I have a type of which I want to create specific instances. To illustrate why I want this, think of a Car
with fields model::String
, year::Int
, acceleration::Function
. So there’s some “identifier” fields, some “parameter” fields, and then there’s fields holding a function. Here, acceleration
could be a function of current speed and accelerator-pedal position.
I could be using “empty” subtypes of a single abstract type Car
, but that does not work out well later on (ran into dynamic dispatch, I guess). So my current idea are factory functions, for example FordModelT
that creates an appropriate instance of Car
. The problem is the performance of the ::Function
fields. Consider this simple example:
struct TheType
f::Function
end
# the original function
function fun(ϑ)
c_ν = (1.99218000e-05, 3.29378459e-03, -4.11466399e-02)
return (c_ν[1] + (c_ν[2] + c_ν[3]/ϑ)/ϑ)/ϑ
end
# the function as a captured variable
function Factory1()
function fun(ϑ)
c_ν = (1.99218000e-05, 3.29378459e-03, -4.11466399e-02)
return (c_ν[1] + (c_ν[2] + c_ν[3]/ϑ)/ϑ)/ϑ
end
return TheType(fun)
end
# accessing captured variable in return expression of function
function Factory2()
c_ν = (1.99218000e-05, 3.29378459e-03, -4.11466399e-02)
return TheType(ϑ -> (c_ν[1] + (c_ν[2] + c_ν[3]/ϑ)/ϑ)/ϑ)
end
# quoting parameters into return expression
function Factory3()
c_ν = (1.99218000e-05, 3.29378459e-03, -4.11466399e-02)
return TheType(@eval ϑ -> ($(c_ν[1]) + ($(c_ν[2]) + $(c_ν[3])/ϑ)/ϑ)/ϑ)
end
# generating the original function
function Factory4()
return TheType(@eval function(ϑ)
c_ν = (1.99218000e-05, 3.29378459e-03, -4.11466399e-02)
return (c_ν[1] + (c_ν[2] + c_ν[3]/ϑ)/ϑ)/ϑ
end)
end
t1 = Factory1()
t2 = Factory2()
t3 = Factory3()
t4 = Factory4()
Using @btime
calling fun(92.95)
or t1.f(92.95)
, I get 0.001ns (= nothing) for fun
and around 35ns for all the others (with -O3
). However, when I look at @code_warntype
and @code_llvm
, everything looks exactly the same, except for t2.f
(which I expected since it is the only version accessing a captured variable).
Also, timing everything for one million evaluations yields the same result:
function the_loop()
for i=1:1_000_000
t1.f(92.95)
end
end
the_loop()
t_el = @elapsed the_loop()
println("Time for 1 eval: $(t_el*1000)ns")
Since also 35ns is almost nothing; am I hunting ghosts here?