I have these 4 examles I’m benchmarking:
using SparseArrays
using LinearAlgebra
using BenchmarkTools
using Random
function myfun1(m::Integer, n::Integer)
Random.seed!(1)
A = rand(Float64, n, n)
B = sparse(rand(Float64, m, m))
temp = A' * kron(sparse(I, m, m), B) * A
end
function myfun2(m::Integer, n::Integer)
Random.seed!(1)
A = rand(Float64, n, n)
B = sparse(rand(Float64, m, m))
temp1 = kron(sparse(I, m, m), B) * A
temp = A' * temp1
end
function myfun3(m::Integer, n::Integer)
Random.seed!(1)
A = rand(Float64, n, n)
B = sparse(rand(Float64, m, m))
temp = Array{Float64}(undef, n, n)
mul!(temp, kron(sparse(I, m, m), B), A)
B = A' * temp
end
function myfun4(m::Integer, n::Integer)
Random.seed!(1)
A = rand(Float64, n, n)
B = sparse(rand(Float64, m, m))
temp = Array{Float64}(undef, n, n)
mul!(temp, kron(sparse(I, m, m), B), A)
B = Array{Float64}(undef, n, n)
mul!(B, A', temp)
end
function myfun5(m::Integer, n::Integer)
Random.seed!(1)
A = rand(Float64, n, n)
B = sparse(rand(Float64, m, m))
temp1 = Array{Float64}(undef, n, n)
temp = Array{Float64}(undef, n, n)
temp1 .= kron(sparse(I, m, m), B) * A
temp .= A' * temp1
end
m::Int32 = 100
n::Int32 = m*m
@btime myfun1(m, n)
@btime myfun2(m, n)
@btime myfun3(m, n)
@btime myfun4(m, n)
@btime myfun5(m, n)
Results are:
36.307 s (28 allocations: 2.25 GiB)
24.707 s (28 allocations: 2.25 GiB)
25.310 s (28 allocations: 2.25 GiB)
25.378 s (28 allocations: 2.25 GiB)
26.327 s (32 allocations: 3.74 GiB)
Perhaps I could have created A and B out of the functions, but the seed is set, so there is only a fixed time issue.
What I’m wondering is why is the first example so far of the others?
Regarding the little differences of the others the conclusion I reach is that:
If you have to overwrite an existing matrix use mul! rather than simple multiplication.
If you don’t have to overwrite it, use simple multiplication without preallocation.
Am I right?