Here is an example. Regular matrices and static arrays are used in identical loops. The static arrays are orders of magnitude slower. The code using static arrays also allocates. Any idea what’s going on?
The code:
module mbuffers13
using FinEtools
using StaticArrays
function test_tda(N, NLOOP)
Kedim = N
mdim = 2
gradN = rand(N, 2)
Ke = fill(0.0, Kedim, Kedim)
multiplier = 2.0
t = @elapsed for loop = 1:NLOOP
for nx = 1:Kedim # Do: Ce = Ce + gradN*((Jac*w[j]))*gradN' ;
@inbounds for px = 1:mdim
a = (multiplier)*gradN[nx, px]
@inbounds for mx = 1:nx # only the upper triangle
Ke[mx, nx] += gradN[mx, px] * a
end
end
end
end
return t ./ NLOOP
end
function test_tsa(N, NLOOP)
Kedim = N
mdim = 2
gradN = MMatrix{N, 2, Float64}(rand(N, 2))
Ke = MMatrix{N, N, Float64}(fill(0.0, Kedim, Kedim))
multiplier = 2.0
t = @elapsed for loop = 1:NLOOP
for nx = 1:Kedim # Do: Ce = Ce + gradN*((Jac*w[j]))*gradN' ;
@inbounds for px = 1:mdim
a = (multiplier)*gradN[nx, px]
@inbounds for mx = 1:nx # only the upper triangle
Ke[mx, nx] += gradN[mx, px] * a
end
end
end
end
return t ./ NLOOP
end
function test(N)
println("N = $(N)")
NLOOP = 10000
@time tda = test_tda(N, NLOOP)
@time tsa = test_tsa(N, NLOOP)
vec([tda tsa])
end
end
using .mbuffers13
NS = [3, 9, 16, 25, 36] # , 225, 900
ts = []
for N in NS
push!(ts, mbuffers13.test(N))
end
@show ts
The results:
julia> include("test/test_buffers5.jl")
N = 3
0.000173 seconds (2 allocations: 288 bytes)
0.504165 seconds (1.55 M allocations: 54.700 MiB, 2.43% gc time)
N = 9
0.001661 seconds (2 allocations: 960 bytes)
0.411852 seconds (5.09 M allocations: 86.726 MiB, 2.85% gc time)
N = 16
0.002520 seconds (2 allocations: 2.516 KiB)
0.970008 seconds (14.67 M allocations: 239.903 MiB, 1.46% gc time)
N = 25
0.003897 seconds (2 allocations: 5.594 KiB)
2.162746 seconds (34.20 M allocations: 549.774 MiB, 1.07% gc time)
N = 36
0.007959 seconds (2 allocations: 10.906 KiB)
15.484236 seconds (69.35 M allocations: 1.083 GiB, 0.34% gc time)
ts = Any[[1.65101e-8, 1.11647e-5], [1.6589e-7, 2.52754e-5], [2.5164e-7, 6.97781e-5], [3.8924e-7, 0.000139628], [7.94341e-7, 0.000328319]]
Obtained with Version 0.7.0-beta2.12 on Windows.