It is enough with the single dot to get things correct, but then you miss out on loop fusion, and you get extra unnecessary allocations. The right side of
Ke .+= dNdx * dNdx’ * w[i]will create both a temporary vector and a temporary matrix, neither of which you need. ( Edit: I think maybe it’s even two temporary matrices instead of one matrix and one vector.)
Actually, I don’t see any difference. I tried this code:
using StaticArrays
function foo1(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
xi = gausspoints1d()
w = gaussweights1d()
Ke = zeros(MMatrix{nelementnodes,nelementnodes})
@inbounds for i = 1:10
dNdx = shapefunder1d(Val(nelementnodes), xi[i])
Ke .+= dNdx * dNdx' * w[i]
end
Ke .*= Ey * area * 2.0 / le
return Ke
end
function foo2(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
xi = gausspoints1d()
w = gaussweights1d()
Ke = zeros(MMatrix{nelementnodes,nelementnodes})
@inbounds for i = 1:10
dNdx = shapefunder1d(Val(nelementnodes), xi[i])
Ke .+= dNdx .* dNdx' .* w[i]
end
Ke .*= Ey * area * 2.0 / le
return Ke
end
function shapefunder1d(::Val{2}, xi::Float64)
return @SVector [-0.5, 0.5]
end
function shapefunder1d(::Val{3}, xi::Float64)
return @SVector [-0.5 + xi, -2.0 * xi, 0.5 + xi]
end
function gausspoints1d()
return @SVector [- 0.973906528517171720077964012084,
- 0.865063366688984510732096688423,
- 0.679409568299024406234327365115,
- 0.433395394129247290799265943166,
- 0.148874338981631210884826001130,
0.148874338981631210884826001130,
0.433395394129247290799265943166,
0.679409568299024406234327365115,
0.865063366688984510732096688423,
0.973906528517171720077964012084]
end
function gaussweights1d()
return @SVector [0.0666713443086881375935688098933,
0.149451349150580593145776339658,
0.219086362515982043995534934228,
0.269266719309996355091226921569,
0.295524224714752870173892994651,
0.295524224714752870173892994651,
0.269266719309996355091226921569,
0.219086362515982043995534934228,
0.149451349150580593145776339658,
0.0666713443086881375935688098933]
end
# tests
nelementnodes = 3
time1() = @time foo1(Val(nelementnodes), 400.0, 0.8, 1000000.0)
time2() = @time foo2(Val(nelementnodes), 400.0, 0.8, 1000000.0)
time1()
time2()
and got this:
0.033151 seconds (88.24 k allocations: 3.923 MiB)
0.033586 seconds (87.64 k allocations: 3.883 MiB)