I made some tests with the 2. option of @stevengj, which seems very promising for small values of n.
I assume that only foo3
is doing what you want, so we compare these two functions:
function foo3(A, v, n, t)
Av = zeros(n, t)
for i in axes(Av, 2)
Av[:, i] = view(A, :, (i - 1) * n + 1 : i * n) * v;
end
return Av
end
foo4(A, v) = A .* Ref(v)
Note that foo3
here uses n
and t
as inputs, not as globals.
The function foo4
needs a different matrix A2
and vector v2
:
function get_A2(A, n, t)
A2 = Vector{SMatrix{n, n, Float64, n^2}}(undef, t)
for i in eachindex(A2)
A2[i] = SMatrix{n,n}(A[:, (i - 1) * n + 1 : i * n])
end
return A2
end
A1, v1 = rand(n, n * t), rand(n)
A2, v2 = get_A2(A1, n, t), SVector{n}(v1)
julia> typeof(A2)
Vector{SMatrix{3, 3, Float64, 9}} (alias for Array{SArray{Tuple{3, 3}, Float64, 2, 9}, 1})
julia> typeof(v2)
SVector{3, Float64} (alias for SArray{Tuple{3}, Float64, 1, 3})
Note that foo4
returns a Vector{SVector{n, Float64}}
and not a Matrix{Float64}
like foo3
!
A conversion can be done with a function like this:
function convert_to_matrix(Av, n, t)
Av_matrix = zeros(n, t)
for i in axes(Av_matrix, 2)
Av_matrix[:, i] = Av[i]
end
return Av_matrix
end
Benchmark results:
function benchmark(n::Int, t::Int=500)
A1, v1 = rand(n, n * t), rand(n)
A2, v2 = get_A2(A1, n, t), SVector{n}(v1)
@btime foo3($A1, $v1, $n, $t)
@btime foo4($A2, $v2)
@test foo3(A1, v1, n, t) ≈ convert_to_matrix(foo4(A2, v2), n, t)
end
julia> benchmark(3)
40.797 μs (501 allocations: 50.94 KiB)
1.486 μs (1 allocation: 11.88 KiB)
Test Passed
julia> benchmark(10)
58.837 μs (502 allocations: 109.42 KiB)
8.749 μs (2 allocations: 39.11 KiB)
Test Passed
julia> benchmark(40)
176.178 μs (502 allocations: 351.61 KiB)
140.093 μs (2 allocations: 156.30 KiB)
Test Passed
julia> benchmark(50)
284.541 μs (502 allocations: 437.55 KiB)
335.238 μs (2 allocations: 195.36 KiB)
Test Passed
Complete MWE
using LinearAlgebra, StaticArrays, BenchmarkTools, Test
function foo3(A, v)
Av = zeros(n, t)
for i in axes(Av, 2)
Av[:, i] = view(A, :, (i - 1) * n + 1 : i * n) * v;
end
return Av
end
foo4(A, v) = A .* Ref(v)
function get_A2(A, n, t)
A2 = Vector{SMatrix{n, n, Float64, n^2}}(undef, t)
for i in eachindex(A2)
A2[i] = SMatrix{n,n}(A[:, (i - 1) * n + 1 : i * n])
end
return A2
end
function convert_to_matrix(Av, n, t)
Av_ = zeros(n, t)
for i in axes(Av_, 2)
Av_[:, i] = Av[i]
end
return Av_
end
function benchmark(n::Int, t::Int=500)
A1, v1 = rand(n, n * t), rand(n)
A2, v2 = get_A2(A1, n, t), SVector{n}(v1)
@btime foo3($A1, $v1, $n, $t)
@btime foo4($A2, $v2)
@test foo3(A1, v1, n, t) ≈ convert_to_matrix(foo4(A2, v2), n, t)
end
benchmark(3)
benchmark(10)
benchmark(40)
benchmark(50)