Type stability issues when using StaticArrays

It is enough with the single dot to get things correct, but then you miss out on loop fusion, and you get extra unnecessary allocations. The right side of Ke .+= dNdx * dNdx’ * w[i] will create both a temporary vector and a temporary matrix, neither of which you need. ( Edit: I think maybe it’s even two temporary matrices instead of one matrix and one vector.)

Actually, I don’t see any difference. I tried this code:

using StaticArrays

function foo1(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = zeros(MMatrix{nelementnodes,nelementnodes})
	@inbounds for i = 1:10
		dNdx = shapefunder1d(Val(nelementnodes), xi[i])
		Ke .+= dNdx * dNdx' * w[i]
	end
	Ke .*= Ey * area * 2.0 / le
	return Ke
end

function foo2(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = zeros(MMatrix{nelementnodes,nelementnodes})
	@inbounds for i = 1:10
		dNdx = shapefunder1d(Val(nelementnodes), xi[i])
		Ke .+= dNdx .* dNdx' .* w[i]
	end
	Ke .*= Ey * area * 2.0 / le
	return Ke
end

function shapefunder1d(::Val{2}, xi::Float64)
  return @SVector [-0.5, 0.5]
end

function shapefunder1d(::Val{3}, xi::Float64)
  return @SVector [-0.5 + xi, -2.0 * xi, 0.5 + xi]
end

function gausspoints1d()
	return @SVector [- 0.973906528517171720077964012084,
	     						- 0.865063366688984510732096688423,
			 						- 0.679409568299024406234327365115,
			 						- 0.433395394129247290799265943166,
			 						- 0.148874338981631210884826001130,
			 						0.148874338981631210884826001130,
			 						0.433395394129247290799265943166,
			 						0.679409568299024406234327365115,
			 						0.865063366688984510732096688423,
				 					0.973906528517171720077964012084]
end

function gaussweights1d()
	return @SVector [0.0666713443086881375935688098933,
									0.149451349150580593145776339658,
									0.219086362515982043995534934228,
									0.269266719309996355091226921569,
									0.295524224714752870173892994651,
									0.295524224714752870173892994651,
									0.269266719309996355091226921569,
									0.219086362515982043995534934228,
									0.149451349150580593145776339658,
									0.0666713443086881375935688098933]
end

# tests
nelementnodes = 3
time1() = @time foo1(Val(nelementnodes), 400.0, 0.8, 1000000.0)
time2() = @time foo2(Val(nelementnodes), 400.0, 0.8, 1000000.0)

time1()
time2()

and got this:

0.033151 seconds (88.24 k allocations: 3.923 MiB)
0.033586 seconds (87.64 k allocations: 3.883 MiB)