Update: yes it seems that the difference was that RuntimeGeneratedFunctions.jl was caching the expressions it evaled, making it seem faster than it really was in a @btime
loop.
Here’s a macrobenchmark that avoids the caching:
function bench(f, x)
ops = ["/", "*", "+", "-"]
vals = rand(5)
for op1 ∈ ops, op2 ∈ ops
for y ∈ vals, z ∈ vals
s = "(x) -> x $op1 $y $op2 $z"
f(s, x)
end
end
end
julia> @time bench(apply_eval, 1.0)
1.077235 seconds (449.57 k allocations: 20.770 MiB, 82.07% compilation time)
julia> @time bench(apply_rgf, 1.0)
1.115373 seconds (976.42 k allocations: 45.832 MiB, 0.66% gc time, 97.86% compilation time)
julia> @time bench(apply_interp, 1.0)
0.111546 seconds (138.85 k allocations: 6.111 MiB)