There seem to be efficient methods to do mul!(out, ::UpperTriangular, ::Matrix)
and mul!(out, ::LowerTriangular, ::Matrix)
. However, when you reverse the arguments and use mul!(out, ::Matrix, ::UpperTriangular)
the performance is much much worse! It’s much worse than just using mul!(out, ::Matrix, ::Matrix)
.
Benchmarks
using LinearAlgebra
using BenchmarkTools
n = 1000
prealloc = Matrix{Float64}(undef, n, n)
@benchmark mul!($prealloc, m, v) setup=begin
m = rand(n, n)
v = rand(n, n)
end # 12 ms
@benchmark mul!($prealloc, m, v) setup=begin
m = UpperTriangular(rand(n, n))
v = rand(n, n)
end # 8 ms
@benchmark mul!($prealloc, m, v) setup=begin
m = LowerTriangular(rand(n, n))
v = rand(n, n)
end # 8 ms
@benchmark mul!($prealloc, m, v) setup=begin
m = rand(n, n)
v = UpperTriangular(rand(n, n))
end # 800 ms
@benchmark mul!($prealloc, m, v) setup=begin
m = rand(n, n)
v = LowerTriangular(rand(n, n))
end # 800 ms
This is a bug, right?