julia> ZC = rand(1800, 1680); M = rand(ComplexF64, 1800, 1800);
julia> @time ZC' * M;
9.169792 seconds (3 allocations: 46.143 MiB)
julia> @time permutedims(ZC) * M;
0.877853 seconds (6 allocations: 115.357 MiB, 1.55% gc time)
julia> @which ZC' * M
*(A::AbstractMatrix, B::AbstractMatrix) in LinearAlgebra at /home/jishnu/Downloads/julia/julia-1.7.0/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:151
julia> @which permutedims(ZC) * M
*(A::StridedMatrix{var"#s859"} where var"#s859"<:Union{Float32, Float64}, B::StridedMatrix{var"#s858"} where var"#s858"<:Union{Float32, Float64, ComplexF32, ComplexF64}) in LinearAlgebra at /home/jishnu/Downloads/julia/julia-1.7.0/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:158
The multiplication involving the lazy transpose seems to hit a generic fallback if M
is complex. This seems to be because
julia> ZC' isa StridedMatrix
false
Why though? Shouldnβt this be a StridedMatrix
as well, given that the stride along each dimension is a constant?
Interestingly, despite this method being hit, the performance is almost identical if M
is real:
julia> ZC = rand(1800, 1680); M = rand(1800, 1800);
julia> @time ZC' * M;
0.261731 seconds (3 allocations: 23.071 MiB, 2.57% gc time)
julia> @time permutedims(ZC) * M;
0.278280 seconds (4 allocations: 46.143 MiB)
julia> @which ZC' * M
*(A::AbstractMatrix, B::AbstractMatrix) in LinearAlgebra at /home/jishnu/Downloads/julia/julia-1.7.0/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:151
julia> @which permutedims(ZC) * M
*(A::StridedMatrix{var"#s859"} where var"#s859"<:Union{Float32, Float64}, B::StridedMatrix{var"#s858"} where var"#s858"<:Union{Float32, Float64, ComplexF32, ComplexF64}) in LinearAlgebra at /home/jishnu/Downloads/julia/julia-1.7.0/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:158
Looking a bit into it, it appears that the mul!
for the complex matrix hits generic_matmatmul!
which is slow, while it doesnβt for real matrices.
# For complex matrices
@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
alpha::Number, beta::Number)
A = adjA.parent
return generic_matmatmul!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
end
# For real matrices
@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
alpha::Real, beta::Real) where {T<:BlasReal}
A = adjA.parent
return mul!(C, transpose(A), B, alpha, beta)
end
Perhaps such a method may be added for complex numbers as well?