<2x slower for me with a degree 6 test polynomial (7 coefficients).
julia> function evalpolyloop!(f, y, x)
@inbounds for i ∈ eachindex(y,x)
y[i] = f(x[i], (0.6666666666667333541, 0.3999999999635251990, 0.2857142932794299317, 0.2222214519839380009, 0.1818605932937785996, 0.1525629051003428716, 0.1532076988502701353))
end
end
evalpolyloop! (generic function with 1 method)
julia> x = rand(256) .+ 0.5; y = similar(x);
julia> @inline function exthorner(x, p::Tuple)
hi, lo = p[end], zero(x)
for i in length(p)-1:-1:1
pi = p[i]
prod = hi*x
err = fma(hi, x, -prod)
hi = pi+prod
lo = fma(lo, x, prod - (hi - pi) + err)
end
return hi, lo
end
exthorner (generic function with 1 method)
julia> @benchmark evalpolyloop!(evalpoly, $y, $x)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 37.663 ns (0.00% GC)
median time: 37.959 ns (0.00% GC)
mean time: 38.003 ns (0.00% GC)
maximum time: 90.504 ns (0.00% GC)
--------------
samples: 10000
evals/sample: 992
julia> @benchmark evalpolyloop!((a,b) -> first(exthorner(a,b)), $y, $x)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 63.381 ns (0.00% GC)
median time: 63.487 ns (0.00% GC)
mean time: 63.574 ns (0.00% GC)
maximum time: 99.733 ns (0.00% GC)
--------------
samples: 10000
evals/sample: 980