Hello,
I’m trying to write a piece of code involving calculating the product of permutations, as defined here, for each column of a matrix. Here are three MWEs:
function apply_perm1!(A::AbstractMatrix{<:Integer}, B::AbstractMatrix{<:Integer})
for n in axes(B, 2)[2:end]
A[:, n] .= B[view(A, :, n - 1), n]
end
return A
end
function apply_perm2!(A::AbstractMatrix{<:Integer}, B::AbstractMatrix{<:Integer})
for n in axes(B, 2)[2:end], m in axes(B, 1)
A[m, n] = B[A[m, n-1], n]
end
return A
end
function apply_perm3!(A::AbstractMatrix{<:Integer}, B::AbstractMatrix{<:Integer})
for n in axes(B, 2)[2:end]
A[1, n] = B[A[1, n-1], n]
A[2, n] = B[A[2, n-1], n]
A[3, n] = B[A[3, n-1], n]
end
return A
end
All three functions yield the same output but a quick benchmark yields:
A = Matrix{Int}(undef, 3, 1000)
for n in axes(A, 2)
A[:, n] .= randperm(M)
end
B = deepcopy(A)
@btime apply_perm1!($A, $B) # 27.083 μs (999 allocations: 78.05 KiB)
@btime apply_perm2!($A, $B) # 5.875 μs (0 allocations: 0 bytes)
@btime apply_perm3!($A, $B) # 3.083 μs (0 allocations: 0 bytes)
As far as I understand, apply_perm1!
creates copies of B
’s columns, which I don’t know how to avoid. (view(B, view(A, :, n - 1), n)
) allocates more and becomes 50x slower). apply_perm3!
is also not preferred, as in my actual project the size of these matrices may vary.
I appreciate your suggestion!