The fully accurate version is
@inline function two_sum(a, b)
hi = a + b
a1 = hi - b
b1 = hi - a1
lo = (a - a1) + (b - b1)
return hi, lo
end
@inline function exthorner(x, p::Tuple)
hi, lo = p[end], zero(x)
for i in length(p)-1:-1:1
pi = p[i]
prod = hi*x
err1 = fma(hi, x, -prod)
hi,err2 = two_sum(pi,prod)
lo = fma(lo, x, err1 + err2)
end
return hi, lo
end
It should be about another 2x slowdown, but will retain precision for polynomials where high order terms are bigger.