julia> using LinearAlgebra
julia> function lmul2!(s::Number, X::AbstractArray)
iszero(s) && return fill!(X, s)
@simd for I in eachindex(X)
@inbounds X[I] = s*X[I]
end
X
end
lmul2! (generic function with 1 method)
julia> A = rand(1000, 1000);
julia> @btime (A -> lmul!(0, A))($A);
290.313 μs (0 allocations: 0 bytes)
julia> @btime (A -> lmul2!(0, A))($A);
222.413 μs (0 allocations: 0 bytes)
Perhaps the compiler should transform the loop to a fill!
. It does seem to handle 1
specially, transforming the loop to a no-op.
julia> @btime (A -> lmul!(1, A))($A);
2.883 ns (0 allocations: 0 bytes)
If this is complicated, adding an iszero
check may help with performance.