I am trying to apply in-place matrix multiplication to a matrix of SVector. However, there are 3 allocations if I apply mul!
directly. If I apply the multiplication over each column, the allocations disappear. Here is the example:
A = zeros(SVector{3,Float64},10,100)
B = zeros(SVector{3,Float64},100,100)
T = ones(100,10)
function test(B,T,A)
for j = 1:100
@views mul!(B[:,j],T,A[:,j])
end
end
# B = TA
@btime mul!($B,$T,$A)
@btime test($B,$T,$A)
90.117 μs (3 allocations: 24.62 KiB)
46.061 μs (0 allocations: 0 bytes)
For reference, if I use regular matrices, there is no allocation:
A = zeros(Float64,10,100)
B = zeros(Float64,100,100)
T = ones(100,10)
function test(B,T,A)
for j = 1:100
@views mul!(B[:,j],T,A[:,j])
end
end
# B = TA
@btime mul!($B,$T,$A)
@btime test($B,$T,$A)
3.650 μs (0 allocations: 0 bytes)
9.995 μs (0 allocations: 0 bytes)
Do someone knows why this happens? Thanks in advance!