Hello,

I’m trying to write a piece of code involving calculating the product of permutations, as defined here, for each column of a matrix. Here are three MWEs:

```
function apply_perm1!(A::AbstractMatrix{<:Integer}, B::AbstractMatrix{<:Integer})
for n in axes(B, 2)[2:end]
A[:, n] .= B[view(A, :, n - 1), n]
end
return A
end
```

```
function apply_perm2!(A::AbstractMatrix{<:Integer}, B::AbstractMatrix{<:Integer})
for n in axes(B, 2)[2:end], m in axes(B, 1)
A[m, n] = B[A[m, n-1], n]
end
return A
end
```

```
function apply_perm3!(A::AbstractMatrix{<:Integer}, B::AbstractMatrix{<:Integer})
for n in axes(B, 2)[2:end]
A[1, n] = B[A[1, n-1], n]
A[2, n] = B[A[2, n-1], n]
A[3, n] = B[A[3, n-1], n]
end
return A
end
```

All three functions yield the same output but a quick benchmark yields:

```
A = Matrix{Int}(undef, 3, 1000)
for n in axes(A, 2)
A[:, n] .= randperm(M)
end
B = deepcopy(A)
@btime apply_perm1!($A, $B) # 27.083 μs (999 allocations: 78.05 KiB)
@btime apply_perm2!($A, $B) # 5.875 μs (0 allocations: 0 bytes)
@btime apply_perm3!($A, $B) # 3.083 μs (0 allocations: 0 bytes)
```

As far as I understand, `apply_perm1!`

creates copies of `B`

’s columns, which I don’t know how to avoid. (`view(B, view(A, :, n - 1), n)`

) allocates more and becomes 50x slower). `apply_perm3!`

is also not preferred, as in my actual project the size of these matrices may vary.

I appreciate your suggestion!