I’m working on developing a solver that requires piecewise-defined polynomial functions to be evaluated very frequently. The easiest way to define such a function would be something like
function piecewise_hardcoded(x)
if x < -1
return 0.0
elseif x < 0
return 1.0 - x^2
elseif x < 1
return x^2 -1.0
else
return 0.0
end
end
This performs very well:
julia> @btime piecewise_hardcoded(x) setup=(x=4*rand()-2)
2.499 ns (0 allocations: 0 bytes)
However, I’d like to be able to take arbitrary derivatives and integrals of such functions as well without rewriting the whole function, so I want to be able to store the functions and breakpoints in a struct like so:
using Polynomials
struct PiecewisePolynomial{P<:AbstractPolynomial, T<:Real} <: Function
polynomials::Vector{P}
breakpoints::Vector{T}
end
function evalpiecewise(functions, breakpoints, x)
for (i, b) in enumerate(breakpoints)
if b < x
return functions[i](x)
end
end
return functions[end](x)
end
(p::PiecewisePolynomial)(x) = evalpiecewise(p.polynomials, p.breakpoints, x)
const polynomials = [
Polynomial([0.0]), Polynomial([1.0, 0.0, -1.0]), Polynomial([-1.0, 0.0, 1.0]), Polynomial([0.0])
]
const breakpoints = [-1.0, 0.0, 1.0]
const mypiecewise = PiecewisePolynomial(polynomials, breakpoints)
This is reasonably fast, but still 5-10 times slower than the hardcoded version
julia> @btime mypiecewise(x) setup=(x=4*rand()-2)
17.734 ns (0 allocations: 0 bytes)
My next idea was to compute a closure for the evaluation and store that in the struct, in the hopes that the compiler could do some constant propagation on evalfunc
struct PiecewisePolynomial_2{F<:Function, P<:AbstractPolynomial, T<:Real} <: Function
polynomials::Vector{P}
breakpoints::Vector{T}
evalfunc::F
function PiecewisePolynomial_2(polynomials::Vector{P}, breakpoints::Vector{T}) where {P, T}
# define closure
evalfunc = let p = polynomials, b = breakpoints
x -> evalpiecewise(p, b, x)
end
return new{typeof(evalfunc), P, T}(polynomials, breakpoints, evalfunc)
end
end
(p::PiecewisePolynomial_2)(x) = p.evalfunc(x)
const mypiecewise_withclosure = PiecewisePolynomial_2(polynomials, breakpoints)
However, this did not measurably impact performance
julia> @btime mypiecewise_withclosure(x) setup=(x=4*rand()-2)
17.734 ns (0 allocations: 0 bytes)
The last thing I tried was using NTuples instead of vectors so that the number of segments is inferrable from the type:
struct PiecewisePolynomial_Tuples{F<:Function, P<:AbstractPolynomial, T<:Real, M} <: Function
polynomials::NTuple{N, P} where N
breakpoints::NTuple{M, T}
evalfunc::F
function PiecewisePolynomial_Tuples(polynomials::NTuple{N, P}, breakpoints::NTuple{M, T}) where {P, N, T, M}
@assert N == M + 1
@assert issorted(breakpoints)
evalfunc = let p = polynomials, b = breakpoints
x -> evalpiecewise(polynomials, breakpoints, x)
end
return new{typeof(evalfunc), P, T, M}(polynomials, breakpoints, evalfunc)
end
end
const poly_tuple = (
Polynomial([0.0]), Polynomial([1.0, 0.0, -1.0]), Polynomial([-1.0, 0.0, 1.0]), Polynomial([0.0])
)
const breakpoint_tuple = (-1.0, 0.0, 1.0)
const mypiecewise_tuples = PiecewisePolynomial_Tuples(poly_tuple, breakpoint_tuple)
and that got me another 5 ns over the vectors, but it’s still 6 times slower than the hardcoded version.
julia> @btime mypiecewise_tuples(x) setup=(x=4*rand()-2)
12.512 ns (0 allocations: 0 bytes)
Does anyone have any ideas for further speedups?