I have an example of type instability. My goal is to evaluate a function on a grid of points, but the dimensionality of the grid is determined by determining the rank of the Hessian matrix.
The grid and various StaticArrays are then parameterized in the dimensionality.
A simple example without any parametrization:
julia> function construct_random_svector1(p::Int)
x = SVector{p}(Vector{Float64}(p))
end
construct_random_svector1 (generic function with 1 method)
julia> function mult_rand1(p::Int)
x = construct_random_svector(p)
y = construct_random_svector(p)
dot(x,y)
end
mult_rand1 (generic function with 1 method)
julia> function svw1(p::Int)
rand_prod = mult_rand(p)
end
svw1 (generic function with 1 method)
julia> @code_warntype svw1(5)
Variables:
#self#::#svw1
p::Int64
rand_prod::Any
x@_4::Any
y::Any
x@_6::Any
x@_7::Any
Body:
begin
$(Expr(:inbounds, false))
# meta: location REPL[6] mult_rand 2
# meta: location REPL[5] construct_random_svector 2
SSAValue(1) = ((Core.apply_type)(Main.SVector, p::Int64)::Type{StaticArrays.SArray{Tuple{_},T,1,_} where T} where _)($(Expr(:invoke, MethodInstance for randn!(::MersenneTwister, ::Array{Float64,1}), :(Base.Random.randn!), :(Base.Random.GLOBAL_RNG), :($(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :(p), 0))))))::Any
# meta: pop location # line 3:
# meta: location REPL[5] construct_random_svector 2
SSAValue(3) = ((Core.apply_type)(Main.SVector, p::Int64)::Type{StaticArrays.SArray{Tuple{_},T,1,_} where T} where _)($(Expr(:invoke, MethodInstance for randn!(::MersenneTwister, ::Array{Float64,1}), :(Base.Random.randn!), :(Base.Random.GLOBAL_RNG), :($(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :(p), 0))))))::Any
# meta: pop location
# meta: pop location
$(Expr(:inbounds, :pop))
SSAValue(0) = (Main.dot)(SSAValue(1), SSAValue(3))::Any
return SSAValue(0)
end::Any
We can pretty-up the @code_warntype via adding annotations:
julia> function mult_rand2(p::Int)
x = construct_random_svector1(p)::SVector{p,Float64}
y = construct_random_svector1(p)::SVector{p,Float64}
dot(x,y)::Float64
end
mult_rand2 (generic function with 1 method)
julia> function svw2(p::Int)
rand_prod = mult_rand2(p)
end
svw2 (generic function with 1 method)
julia> @code_warntype svw2(5)
Variables:
#self#::#svw2
p::Int64
rand_prod::Float64
Body:
begin
SSAValue(0) = $(Expr(:invoke, MethodInstance for mult_rand2(::Int64), :(Main.mult_rand2), :(p)))
return SSAValue(0)
end::Float64
But this actually worsens performance:
julia> using BenchmarkTools
julia> a = 5;
julia> @benchmark svw1($a)
BenchmarkTools.Trial:
memory estimate: 1.86 KiB
allocs estimate: 29
--------------
minimum time: 2.624 ÎĽs (0.00% GC)
median time: 2.713 ÎĽs (0.00% GC)
mean time: 3.037 ÎĽs (8.31% GC)
maximum time: 404.043 ÎĽs (97.26% GC)
--------------
samples: 10000
evals/sample: 9
julia> @benchmark svw2($a)
BenchmarkTools.Trial:
memory estimate: 3.61 KiB
allocs estimate: 57
--------------
minimum time: 5.842 ÎĽs (0.00% GC)
median time: 5.949 ÎĽs (0.00% GC)
mean time: 6.628 ÎĽs (8.02% GC)
maximum time: 679.427 ÎĽs (97.31% GC)
--------------
samples: 10000
evals/sample: 6
If p were known ahead of time, we could make it known to the compiler:
julia> struct num{p}
end
julia> b = num{a}
num{5}
julia> function construct_random_svector3(::Type{num{p}}) where {p}
x = SVector{p}(Vector{Float64}(p))
x
end
construct_random_svector3 (generic function with 1 method)
julia> function mult_rand3(::Type{num{p}}) where {p}
x = construct_random_svector3(num{p})
y = construct_random_svector3(num{p})
dot(x,y)
end
mult_rand3 (generic function with 1 method)
julia> function svw3(::Type{num{p}}) where {p}
rand_prod = mult_rand3(num{p})
end
svw3 (generic function with 1 method)
julia> @code_warntype svw3(b)
Variables:
#self#::#svw3
#unused#::Any
rand_prod::Float64
Body:
begin
SSAValue(0) = $(Expr(:invoke, MethodInstance for mult_rand3(::Type{num{5}}), :(Main.mult_rand3), num{5}))
return SSAValue(0)
end::Float64
julia> @benchmark svw3($b)
BenchmarkTools.Trial:
memory estimate: 272 bytes
allocs estimate: 3
--------------
minimum time: 134.391 ns (0.00% GC)
median time: 137.253 ns (0.00% GC)
mean time: 151.221 ns (5.48% GC)
maximum time: 1.434 ÎĽs (81.61% GC)
--------------
samples: 10000
evals/sample: 888
So far, the best solution I have been able to come up with is
julia> function mult_rand4(::Type{num{p}}) where {p}
x = construct_random_svector3(num{p})
y = construct_random_svector3(num{p})
dot(x,y)::Float64
end
mult_rand4 (generic function with 1 method)
julia> function svw4(p::Int)
rand_prod = mult_rand4(num{p})
end
svw4 (generic function with 1 method)
julia> @code_warntype svw4(a)
Variables:
#self#::#svw4
p::Int64
rand_prod::Float64
Body:
begin
SSAValue(0) = (Main.mult_rand4)((Core.apply_type)(Main.num, p::Int64)::Type{num{_}} where _)::Float64
return SSAValue(0)
end::Float64
julia> @benchmark svw4($a)
BenchmarkTools.Trial:
memory estimate: 272 bytes
allocs estimate: 3
--------------
minimum time: 424.704 ns (0.00% GC)
median time: 435.721 ns (0.00% GC)
mean time: 466.005 ns (3.25% GC)
maximum time: 7.972 ÎĽs (88.91% GC)
--------------
samples: 10000
evals/sample: 199
Not nearly matching the performance of being parametrized in the first place.
Are there work arounds I am missing?
EDIT:
Also worth pointing out:
julia> function construct_random_vector5(p::Int)
x = Vector{Float64}(p)
x
end
construct_random_vector5 (generic function with 1 method)
julia> function mult_rand5(p::Int)
x = construct_random_vector5(p)
y = construct_random_vector5(p)
dot(x,y)
end
mult_rand5 (generic function with 1 method)
julia> function svw5(p::Int)
rand_prod = mult_rand5(p)
end
svw5 (generic function with 1 method)
julia> @code_warntype svw5(a)
Variables:
#self#::#svw5
p::Int64
rand_prod::Float64
x@_4::Any
y::Any
x@_6::Array{Float64,1}
x@_7::Array{Float64,1}
Body:
begin
$(Expr(:inbounds, false))
# meta: location REPL[4] mult_rand5 2
# meta: location REPL[3] construct_random_vector5 2
x@_6::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :(p), 0))
# meta: pop location # line 3:
# meta: location REPL[3] construct_random_vector5 2
x@_7::Array{Float64,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Float64,1}, svec(Any, Int64), Array{Float64,1}, 0, :(p), 0))
# meta: pop location
# meta: pop location
$(Expr(:inbounds, :pop))
SSAValue(0) = $(Expr(:invoke, MethodInstance for dot(::Array{Float64,1}, ::Array{Float64,1}), :(Main.dot), :(x@_6), :(x@_7)))
return SSAValue(0)
end::Float64
julia> @benchmark svw5($a)
BenchmarkTools.Trial:
memory estimate: 256 bytes
allocs estimate: 2
--------------
minimum time: 74.900 ns (0.00% GC)
median time: 78.476 ns (0.00% GC)
mean time: 84.285 ns (4.32% GC)
maximum time: 631.964 ns (77.53% GC)
--------------
samples: 10000
evals/sample: 973
That it is worth considering cost benefit of StaticArrays vs type instability (vs throwing an error when the Hessian isn’t full rank).