I’m just getting started with Julia so I’m trying to optimize every piece of code, not just for practicality, but to learn more about the language. I ran into an interesting case, where the number of allocation seems to have an inverse relationship with speed, and I cant really figure out how to optimize this code:

```
using Profile, BenchmarkTools
function double_sort1(base_arr::Array{Int64, 2})::Array{Int64, 2}
n = size(base_arr)[1]
arr::Array{Int64, 2} = Array{Int64}(undef, n, 4)
p::Array{Int64, 1} = Array{Int64}(undef, n)
for (side, r) in enumerate([1:2, 3:4])
sortperm!(p, base_arr[:, side])
for (i_out, i_in) in enumerate(p)
for (j_in, j_out) in enumerate(r)
arr[i_out, j_out] = base_arr[i_in, j_in]
end
end
end
arr
end
function double_sort2(base_arr::Array{Int64, 2})::Array{Int64, 2}
n = size(base_arr)[1]
arr::Array{Int64, 2} = Array{Int64}(undef, n, 4)
for (side, r) in enumerate([1:2, 3:4])
for (i_out, i_in) in enumerate(sortperm(base_arr[:, side]))
for (j_in, j_out) in enumerate(r)
arr[i_out, j_out] = base_arr[i_in, j_in]
end
end
end
arr
end
function double_sort3(base_arr::Array{Int64, 2})::Array{Int64, 2}
n = size(base_arr)[1]
arr::Array{Int64, 2} = Array{Int64}(undef, n, 4)
for (side, r) in enumerate([1:2, 3:4])
arr[:, r] = base_arr[sortperm(base_arr[:, side]), :]
end
arr
end
function double_sort4(base_arr::Array{Int64, 2})::Array{Int64, 2}
cat([base_arr[sortperm(base_arr[:, side]), :] for side in 1:2]...,dims=2)
end
a = rand(1:200000, (3_000_000, 2));
```

```
double_sort1(a) == double_sort2(a), double_sort2(a) == double_sort3(a), double_sort3(a) == double_sort4(a)
```

```
@btime double_sort1($a);
@btime double_sort2($a);
@btime double_sort3($a);
@btime double_sort4($a);
```

1.725 s (11 allocations: 160.22 MiB)

427.311 ms (15 allocations: 186.16 MiB)

417.247 ms (19 allocations: 277.71 MiB)

403.048 ms (55 allocations: 277.71 MiB)