Type stability issues when using StaticArrays

Hi all: looking at some piece of a code I am developing, I wanted to make sure that there is no type stability issues. So, in the process of reviewing of my code I found a type stability issue when using StaticArrays. Below, I provide three working codes that exemplify the issue.

The first one makes @inferred to throw an error about the type stability issue:
ERROR: return type MArray{Tuple{2,2},Float64,2,4} does not match inferred return type Any
Stacktrace:
[1] top-level scope at none:0

(I had to copy and paste this code in the REPL as running it as a file didn’t write the error to the REPL.)

using StaticArrays
using Test

function foo(nelementnodes::Int, le::Float64, area::Float64, Ey::Float64)
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = MMatrix{nelementnodes,nelementnodes}(zeros(nelementnodes,nelementnodes))
	@inbounds for i = 1:10
		dNdx = shapefunder1d(nelementnodes, xi[i])
		Ke += dNdx * dNdx' * w[i]
	end
	Ke *= Ey * area * 2.0 / le
	return Ke
end

function shapefunder1d(nelementnodes::Int, xi::Float64)
  dNdxi = MVector{nelementnodes}(zeros(nelementnodes))
  if nelementnodes == 2
    dNdxi[1] = -0.5
    dNdxi[2] = 0.5
  elseif nelementnodes == 3
    dNdxi[1] = -0.5 + xi
    dNdxi[2] = -2.0 * xi
    dNdxi[3] = 0.5 + xi
  else
    throw("shapefunder1d(): illegal value of nelementnodes")
  end
  return dNdxi
end

function gausspoints1d()
	x = MVector{10}(zeros(10))
	x[1] =  - 0.973906528517171720077964012084
	x[2] =  - 0.865063366688984510732096688423
	x[3] =  - 0.679409568299024406234327365115
	x[4] =  - 0.433395394129247290799265943166
	x[5] =  - 0.148874338981631210884826001130
	x[6] =    0.148874338981631210884826001130
	x[7] =    0.433395394129247290799265943166
	x[8] =    0.679409568299024406234327365115
	x[9] =    0.865063366688984510732096688423
	x[10] =   0.973906528517171720077964012084
end

function gaussweights1d()
	w = MVector{10}(zeros(10))
	w[1] =  0.0666713443086881375935688098933
	w[2] =  0.149451349150580593145776339658
	w[3] =  0.219086362515982043995534934228
	w[4] =  0.269266719309996355091226921569
	w[5] =  0.295524224714752870173892994651
	w[6] =  0.295524224714752870173892994651
	w[7] =  0.269266719309996355091226921569
	w[8] =  0.219086362515982043995534934228
	w[9] =  0.149451349150580593145776339658
	w[10] = 0.0666713443086881375935688098933
end

@inferred foo(2, 400.0, 0.8, 1000000.0)

I believe the problem is in the branching inside the function shapefunder1d(). I suspect this because when I remove the “if” statement, the @inferred correctly infers the type:
2Ă—2 MArray{Tuple{2,2},Float64,2,4}:
666.713 -666.713
-666.713 666.713

This is the code with the “if” statement removed from the shapefunder1d() function:

using StaticArrays
using Test

function foo(nelementnodes::Int, le::Float64, area::Float64, Ey::Float64)
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = MMatrix{2,2}(zeros(2,2))
	@inbounds for i = 1:10
		dNdx = shapefunder1d(nelementnodes, xi[i])
		Ke += dNdx * dNdx' * w[i]
	end
	Ke *= Ey * area * 2.0 / le
	return Ke
end

function shapefunder1d(nelementnodes::Int, xi::Float64)
  dNdxi = MVector{2}(zeros(2))
  dNdxi[1] = -0.5
  dNdxi[2] = 0.5
  return dNdxi
end

function gausspoints1d()
	x = MVector{10}(zeros(10))
	x[1] =  - 0.973906528517171720077964012084
	x[2] =  - 0.865063366688984510732096688423
	x[3] =  - 0.679409568299024406234327365115
	x[4] =  - 0.433395394129247290799265943166
	x[5] =  - 0.148874338981631210884826001130
	x[6] =    0.148874338981631210884826001130
	x[7] =    0.433395394129247290799265943166
	x[8] =    0.679409568299024406234327365115
	x[9] =    0.865063366688984510732096688423
	x[10] =   0.973906528517171720077964012084
end

function gaussweights1d()
	w = MVector{10}(zeros(10))
	w[1] =  0.0666713443086881375935688098933
	w[2] =  0.149451349150580593145776339658
	w[3] =  0.219086362515982043995534934228
	w[4] =  0.269266719309996355091226921569
	w[5] =  0.295524224714752870173892994651
	w[6] =  0.295524224714752870173892994651
	w[7] =  0.269266719309996355091226921569
	w[8] =  0.219086362515982043995534934228
	w[9] =  0.149451349150580593145776339658
	w[10] = 0.0666713443086881375935688098933
end

@inferred foo(2, 400.0, 0.8, 1000000.0)

Finally, I tested the first code (the one with the “if” statement) but using standard Julia arrays. This has no type stability problems. The @inferred correctly infers the type:
2Ă—2 Array{Float64,2}:
666.713 -666.713
-666.713 666.713

The code using standard Julia arrays is:

using Test

function foo(nelementnodes::Int, le::Float64, area::Float64, Ey::Float64)
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = zeros(nelementnodes,nelementnodes)
	@inbounds for i = 1:10
		dNdx = shapefunder1d(nelementnodes, xi[i])
		Ke += dNdx * dNdx' * w[i]
	end
	Ke *= Ey * area * 2.0 / le
	return Ke
end

function shapefunder1d(nelementnodes::Int, xi::Float64)
  dNdxi = zeros(nelementnodes)
  if nelementnodes == 2
    dNdxi[1] = -0.5
    dNdxi[2] = 0.5
  elseif nelementnodes == 3
    dNdxi[1] = -0.5 + xi
    dNdxi[2] = -2.0 * xi
    dNdxi[3] = 0.5 + xi
  else
    throw("shapefunder1d(): illegal value of nelementnodes")
  end
  return dNdxi
end

function gausspoints1d()
	x = zeros(10)
	x[1] =  - 0.973906528517171720077964012084
	x[2] =  - 0.865063366688984510732096688423
	x[3] =  - 0.679409568299024406234327365115
	x[4] =  - 0.433395394129247290799265943166
	x[5] =  - 0.148874338981631210884826001130
	x[6] =    0.148874338981631210884826001130
	x[7] =    0.433395394129247290799265943166
	x[8] =    0.679409568299024406234327365115
	x[9] =    0.865063366688984510732096688423
	x[10] =   0.973906528517171720077964012084
end

function gaussweights1d()
	w = zeros(10)
	w[1] =  0.0666713443086881375935688098933
	w[2] =  0.149451349150580593145776339658
	w[3] =  0.219086362515982043995534934228
	w[4] =  0.269266719309996355091226921569
	w[5] =  0.295524224714752870173892994651
	w[6] =  0.295524224714752870173892994651
	w[7] =  0.269266719309996355091226921569
	w[8] =  0.219086362515982043995534934228
	w[9] =  0.149451349150580593145776339658
	w[10] = 0.0666713443086881375935688098933
end

@inferred foo(2, 400.0, 0.8, 1000000.0)

Any idea how to fix the type stability issue in the first code without removing the “if” statement as shapefunder1d() will have to compute arrays of different sizes depending on “nelementnodes”? Also, I would like to insist on using StaticArrays as compared to standard Julia arrays they perform better for small sized arrays.

Thanks.


EDIT: SOLUTION

Taking the advices of rdeits and Kristoffer.carlsson I modified the code that uses StaticArrays. No more type stability issues.

Solution 1:

using StaticArrays
using Test

function foo(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = zeros(SMatrix{nelementnodes,nelementnodes})
	@inbounds for i = 1:10
		dNdx = shapefunder1d(Val(nelementnodes), xi[i])
		Ke += dNdx * dNdx' * w[i]
	end
	Ke *= Ey * area * 2.0 / le
	return Ke
end

function shapefunder1d(::Val{2}, xi::Float64)
  return @SVector [-0.5, 0.5]
end

function shapefunder1d(::Val{3}, xi::Float64)
  return @SVector [-0.5 + xi, -2.0 * xi, 0.5 + xi]
end

function gausspoints1d()
	return @SVector [- 0.973906528517171720077964012084,
	     						- 0.865063366688984510732096688423,
			 						- 0.679409568299024406234327365115,
			 						- 0.433395394129247290799265943166,
			 						- 0.148874338981631210884826001130,
			 						0.148874338981631210884826001130,
			 						0.433395394129247290799265943166,
			 						0.679409568299024406234327365115,
			 						0.865063366688984510732096688423,
				 					0.973906528517171720077964012084]
end

function gaussweights1d()
	return @SVector [0.0666713443086881375935688098933,
									0.149451349150580593145776339658,
									0.219086362515982043995534934228,
									0.269266719309996355091226921569,
									0.295524224714752870173892994651,
									0.295524224714752870173892994651,
									0.269266719309996355091226921569,
									0.219086362515982043995534934228,
									0.149451349150580593145776339658,
									0.0666713443086881375935688098933]
end

nelementnodes = 2
@inferred foo(Val(nelementnodes), 400.0, 0.8, 1000000.0)

nelementnodes = 3
@inferred foo(Val(nelementnodes), 400.0, 0.8, 1000000.0)

Solution 2:

using StaticArrays
using Test

function foo(Ke::MMatrix{2,2,Float64}, le::Float64, area::Float64, Ey::Float64)
	xi = gausspoints1d()
	w = gaussweights1d()
	@inbounds for i = 1:10
		dNdx = shapefunder1d(2, xi[i])
		Ke .+= dNdx * dNdx' * w[i]
	end
	Ke .*= Ey * area * 2.0 / le
end

function foo(Ke::MMatrix{3,3,Float64}, le::Float64, area::Float64, Ey::Float64)
	xi = gausspoints1d()
	w = gaussweights1d()
	@inbounds for i = 1:10
		dNdx = shapefunder1d(3, xi[i])
		Ke .+= dNdx * dNdx' * w[i]
	end
	Ke .*= Ey * area * 2.0 / le
end

function shapefunder1d(nelementnodes::Int, xi::Float64)
	nelementnodes == 2 && return @SVector [-0.5, 0.5]
	nelementnodes == 3 && return @SVector [-0.5 + xi, -2.0 * xi, 0.5 + xi]
end

function gausspoints1d()
	return @SVector [- 0.973906528517171720077964012084,
	     						- 0.865063366688984510732096688423,
			 						- 0.679409568299024406234327365115,
			 						- 0.433395394129247290799265943166,
			 						- 0.148874338981631210884826001130,
			 						0.148874338981631210884826001130,
			 						0.433395394129247290799265943166,
			 						0.679409568299024406234327365115,
			 						0.865063366688984510732096688423,
				 					0.973906528517171720077964012084]
end

function gaussweights1d()
	return @SVector [0.0666713443086881375935688098933,
									0.149451349150580593145776339658,
									0.219086362515982043995534934228,
									0.269266719309996355091226921569,
									0.295524224714752870173892994651,
									0.295524224714752870173892994651,
									0.269266719309996355091226921569,
									0.219086362515982043995534934228,
									0.149451349150580593145776339658,
									0.0666713443086881375935688098933]
end

nelementnodes = 2
Ke = zeros(MMatrix{nelementnodes,nelementnodes})
@inferred foo(Ke, 400.0, 0.8, 1000000.0)

nelementnodes = 3
Ke = zeros(MMatrix{nelementnodes,nelementnodes})
@inferred foo(Ke, 400.0, 0.8, 1000000.0)

You’re asking for something that is pretty much the definition of type-unstable code: the type of your mvector depends on the value of nelementnodes, a value which is not generally available during code inference. The fact that this works in any of your examples is actually pretty cool, as it’s only possible because Julia is propagating the constant value 2 for nelementnodes.

One simple way around the problem would be to move the value of nelementnodes into the type domain using Val. For example (not tested because I’m typing on a phone):

function foo(::Val{nelementnodes}, ...) where {nelementnodes}
  x = MVector{nelementnodes}()
  ...
end

You would call this function with: foo(Val(2), ...).

Furthermore, such a design would allow you to split up your if statement into separate methods, such as:

function shapefunder1d(::Val{2}, ...)
  ...
end

function shapefunder1d (::Val{3}, ...)
  ...
end

Alternatively, you could have foo take Ke as an input argument instead. That would allow the calling code to choose the kind of array it wants, which might be useful in the future.

2 Likes

FWIW; I would use SVector instead of MVector for things like gauss-points and evaluations of local shape-functions since you won’t be modifying them after creation anyway.

The type stability question has already been answered, creating a separate type for each interpolation (like in JuAFEM.jl/interpolations.jl at master · KristofferC/JuAFEM.jl · GitHub) should work.

1 Like

Thank you rdeits! I tested all your suggested approaches and they worked as expected. I will put the modified codes in my original post.

Very useful kristoffer. Thank you! I will put the modified codes in the main body of my original post.

Great! By the way, instead of doing: x = [1, 2, 3]; y = SVector{3}(x), try doing just: y = SVector(1, 2, 3) or y = @SVector [1, 2, 3]. That will save avoiding allocating the Vector x and should improve performance quite a bit.

Sure! Thank you!

There’s an inconsistency here:

You create an MMatrix, Ke, but you don’t mutate it. You should either update it in-place:

Ke .+= dNdx .* dNdx' .* w[i]
Ke .*= Ey * area * 2 / le

or use an SMatrix instead.

Also, this is not the right way to zero-initialize a StaticArray:

Here, you first make an ordinary, heap-allocated, zero-matrix, and then convert it to an MMatrix. Instead you should directly create a zero-valued StaticArray, like this:

Ke = zero(MMatrix{N, N})

BTW, is there any particular reason that you have to restrict your inputs to be Int and Float64? Seems to me it should work equally well, if you allow Integer and Real instead.

As a general principle, it’s advantageous to not overly restrict input types. It’s a bit inconvenient to have to always type, say, 10.0 instead of 10 for inputs, and also your code will then be less compatible with other codes – for example, it will not work with automatic differentiation.

1 Like

This is just a side note, but I noticed that you define some constants with very high precision:

These are outside the precision of Float64:

ulia> x = 0.26926671930999635509122692156
0.26926671930999635

julia> prevfloat(x), x, nextfloat(x)
(0.2692667193099963, 0.26926671930999635, 0.2692667193099964)

Maybe they are just arbitrary numbers, but if you need full precision you have to use BigFloats, like this:

julia> big"0.26926671930999635509122692156"
2.692667193099963550912269215600000000000000000000000000000000000000000000000014e-01
1 Like

Thanks @DNF … I actually thought that by updating the complete matrix that would require a mutating matrix. Now, I learned that it is not true. Ok with the zero-valued static array. I updated the codes.

@DNF, because “le”, “area” and “Ey” in the computations are expected to be float numbers. They take place in the entries of matrices. I read somewhere in the performance tips that using concrete types performs better than using abstract types in containers. BTW, I don’t see the advantage of using Integer rather than Int. I get that Int is just an alias for the concrete types UInt64 and UInt32 depending on the machine. Once again, I get from the performance tips that it is better, when possible, to use concrete types rather than abstract types.

In any case, my complete code has custom aliases for standard types. For instance, for floats I use this:

const Flt = Float64
export Flt

and my code uses ::Flt rather than ::Float64. So basically, I can change Flt to be Real if later I need to make my code to work with Real types.

@DNF, I got these constants from somewhere else. They are defined as is. The complete file (here I just put one case) has more than 500 of these constants. I prefer not to manually cut the numbers. The system will cut them automatically anyway.

@DNF, BTW, why did you put the dots here? dNdx .* dNdx’ .* w[i]. I think that Ke .+= dNdx * dNdx’ * w[i] is enough.

OK, the question is what should happen if they’re not Float64? Should the code fail, or just handle it? What is the desired behaviour? And if someone (maybe even yourself) wants to apply automatic differentiation on the code, then you have to change the code; or maybe you want to run it on a GPU, where Float32 is much faster.

Maybe this is very unrealistic and unimportant to you, and that’s fine. I just happen to think that it is a good general principle to not restrict input types unless you have a specific reason for it.

And no, there is no performance penalty. If you use e.g. le::Real the input types will never be abstract, it just means it will accept any concrete subtype of Real.

Ah, I see. I just wondered, and thought that you might not be aware.

It is enough with the single dot to get things correct, but then you miss out on loop fusion, and you get extra unnecessary allocations. The right side of Ke .+= dNdx * dNdx’ * w[i] will create both a temporary vector and a temporary matrix, neither of which you need. (Edit: I think maybe it’s even two temporary matrices instead of one matrix and one vector.)

1 Like

@DNF. Thanks for the advices. My actual code (not the portion I put here) uses custom type aliases for integer and float numbers and these are defined in a separate module:

const JInt = Int
export JInt
const JFlt = Float64
export JFlt

Then, I only use ::JInt and ::JFlt in my code.

If I needed something else for the types (like the examples you mentioned), I would just redefine JInt and JFlt in the separate module.

It is enough with the single dot to get things correct, but then you miss out on loop fusion, and you get extra unnecessary allocations. The right side of Ke .+= dNdx * dNdx’ * w[i] will create both a temporary vector and a temporary matrix, neither of which you need. ( Edit: I think maybe it’s even two temporary matrices instead of one matrix and one vector.)

Actually, I don’t see any difference. I tried this code:

using StaticArrays

function foo1(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = zeros(MMatrix{nelementnodes,nelementnodes})
	@inbounds for i = 1:10
		dNdx = shapefunder1d(Val(nelementnodes), xi[i])
		Ke .+= dNdx * dNdx' * w[i]
	end
	Ke .*= Ey * area * 2.0 / le
	return Ke
end

function foo2(::Val{nelementnodes}, le::Float64, area::Float64, Ey::Float64) where nelementnodes
	xi = gausspoints1d()
	w = gaussweights1d()
	Ke = zeros(MMatrix{nelementnodes,nelementnodes})
	@inbounds for i = 1:10
		dNdx = shapefunder1d(Val(nelementnodes), xi[i])
		Ke .+= dNdx .* dNdx' .* w[i]
	end
	Ke .*= Ey * area * 2.0 / le
	return Ke
end

function shapefunder1d(::Val{2}, xi::Float64)
  return @SVector [-0.5, 0.5]
end

function shapefunder1d(::Val{3}, xi::Float64)
  return @SVector [-0.5 + xi, -2.0 * xi, 0.5 + xi]
end

function gausspoints1d()
	return @SVector [- 0.973906528517171720077964012084,
	     						- 0.865063366688984510732096688423,
			 						- 0.679409568299024406234327365115,
			 						- 0.433395394129247290799265943166,
			 						- 0.148874338981631210884826001130,
			 						0.148874338981631210884826001130,
			 						0.433395394129247290799265943166,
			 						0.679409568299024406234327365115,
			 						0.865063366688984510732096688423,
				 					0.973906528517171720077964012084]
end

function gaussweights1d()
	return @SVector [0.0666713443086881375935688098933,
									0.149451349150580593145776339658,
									0.219086362515982043995534934228,
									0.269266719309996355091226921569,
									0.295524224714752870173892994651,
									0.295524224714752870173892994651,
									0.269266719309996355091226921569,
									0.219086362515982043995534934228,
									0.149451349150580593145776339658,
									0.0666713443086881375935688098933]
end

# tests
nelementnodes = 3
time1() = @time foo1(Val(nelementnodes), 400.0, 0.8, 1000000.0)
time2() = @time foo2(Val(nelementnodes), 400.0, 0.8, 1000000.0)

time1()
time2()

and got this:

0.033151 seconds (88.24 k allocations: 3.923 MiB)
0.033586 seconds (87.64 k allocations: 3.883 MiB)

You are just measuring compilation time. If you want to do it like this, you need at least to run the timing twice.

On the other hand, you are actually right. If these were ordinary arrays, it would make a difference, but StaticArrays don’t cause heap allocations anyway.

But, really, you should always use BenchmarkTools for benchmarking.

You are just measuring compilation time. If you want to do it like this, you need at least to run the timing twice.

Yes, I ran it several times before putting these numbers.

julia> time1();
  0.000046 seconds (1 allocation: 80 bytes)

julia> time2();
  0.000055 seconds (1 allocation: 80 bytes)

But, as I said, always use BenchmarkTools for benchmarking.

1 Like