Hi there,
consider the following simple function:
M = [1.0 2.0;
3.0 4.0]
n = size(M,1)
I_n = Matrix{Float64}(I, n, n)
A_temp = zeros(n, n)
diff_temp = zeros(n, n)
function inf_norm_diff_type_stable_no_allocs2(args, M, I_n, A_temp, diff)
# Collect the scalar inputs into a vector
for i in eachindex(A_temp)
A_temp[i] = args[i]
end
# Compute the difference A*M - I
mul!(diff, A_temp, M)
@. diff -= I_n
return mapreduce(x -> x^2, +, diff)
end
As expected this has no allocations
@btime inf_norm_diff_type_stable_no_allocs2($(rand(4)), $(M), $(I_n), $(A_temp), $(diff_temp))
21.314 ns (0 allocations: 0 bytes)
However, consider the following modification of passing some of the arguments as kwargs
:
function inf_norm_diff_type_stable(args...; M, I_n, A_temp, diff)
# Collect the scalar inputs into a vector
for i in eachindex(A_temp)
A_temp[i] = args[i]
end
# Compute the difference A*M - I
mul!(diff, A_temp, M)
@. diff -= I_n
return mapreduce(x -> x^2, +, diff)
end
julia> @btime inf_norm_diff_type_stable($(rand()), $(rand()), $(rand()), $(rand()); M = $(M), I_n = $(I_n), A_temp = $(A_temp), diff = $(diff_temp))
36.374 ns (5 allocations: 80 bytes)
It does allocate!
Interestingly, the issue seems to come from the @.
line since:
function inf_norm_diff_type_stable2(args...; M, I_n, A_temp, diff)
# Collect the scalar inputs into a vector
for i in eachindex(A_temp)
A_temp[i] = args[i]
end
# Compute the difference A*M - I
mul!(diff, A_temp, M)
#@. diff -= I_n
return mapreduce(x -> x^2, +, diff)
end
julia> @btime inf_norm_diff_type_stable2($(rand()), $(rand()), $(rand()), $(rand()); M = $(M), I_n = $(I_n), A_temp = $(A_temp), diff = $(diff_temp))
12.929 ns (0 allocations: 0 bytes)
Comparing @code_warntype
and @code_lowered
outputs of each is not helping me to identify the issue. Can someone let me know what I’m missing here?
Thanks in advance!