How can I optimize the following functions that copy a line-, column- or both- permutation (pl
, pc
, pcl
, respectively) of a src
array into a dest
array?
In particular, can the temp
array be avoided in pcl
?
using BenchmarkTools
function pl!(dest, src, lines)
n = lines
@views for i = 1:fld(n, 2)
dest[i,:] = src[n-i+1,:];
dest[n-i+1,:] = src[i,:];
end
end
function pc!(dest, src, columns)
n = columns
@views for i = 1:fld(n, 2)
dest[:,i] = src[:,n-i+1];
dest[:,n-i+1] = src[:,i];
end
end
function pcl!(dest, src, lines, columns)
temp = Array(dest)
pc!(temp, src, columns)
pl!(dest, temp, lines)
end
n = 1000;
m = 900;
a = zeros(n,m);
b = rand(n,m);
@btime pl!(a, b, n); # 14.170 ms (1000 allocations: 46.88 KiB)
@btime pc!(a, b, m); # 1.410 ms (900 allocations: 42.19 KiB)
@btime pcl!(a, b, n, m); # 16.194 ms (1902 allocations: 6.95 MiB)