Avoiding allocations and run-time dispatch with ForwardDiff

I have an optimization problem which involves solving an ODE and finding the input to the “system” such that an objective is minimized. A MWE of a toy problem is below:

import Optimization
import ForwardDiff
import OptimizationOptimJL
import LabelledArrays
using Test
using BenchmarkTools
using JET

p = LabelledArrays.LVector(;
    V=450.0, 
    T_bar=21.0,
    alpha=4.8,
    cp=4200.0, 
    rho=0.998, 
)

function f!(x_, x, u, p, t, tS)
    x_[1] = x[1] + tS * 1 / (p.V * p.cp * p.rho) * (p.alpha * (p.T_bar - x[1]) + (u[2] + u[1]))
    return x_
end
function g!(y_, x, u, p, t, tS)
    y_[1] = x[1]
    return y_
end

function simulate(
    f!, g!, u::Array{U,2}, x0::AbstractVector{X}, p::AbstractVector{PT}, t::AbstractVector{T}, tS::Int; nx, ny
) where {U,X,PT,T}
    N = length(t)
    x = zeros(promote_type(X, U, PT), nx, N)
    y = zeros(promote_type(X, U, PT), ny, N)
    @views begin
        x[:, 1] = x0
        g!(y[:, 1], x[:, 1], u[:, 1], p, t[1], tS)
        for k in 2:N
            f!(x[:, k], x[:, k - 1], u[:, k - 1], p, t[k - 1], tS)
            g!(y[:, k], x[:, k], u[:, k], p, t[k], tS)
        end
    end
    return x, y
end

tS = 900
t_sim = 0:tS:(24 * 3600 - tS)
x0 = [67.0]
u = [
    0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 16.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1728.0 5556.0 4480.0 3496.0 16.0 16.0 16.0 16.0 20.0 16.0 16.0 16.0 16.0 16.0 20.0 768.0 3188.0 1368.0 16.0 16.0 16.0 16.0 20.0 8.0 16.0 16.0 12.0 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    15.44 15.44 -55.17 -20.83 13.67 -21.8 -22.44 -22.92 11.42 11.09 10.45 -24.85 -60.64 -96.11 -97.07 42.38 -27.91 -28.71 75.91 40.61 5.14 4.66 -65.79 -66.59 2.89 2.41 1.93 -49.54 0.8 -34.5 -70.29 -70.77 -71.58 67.87 102.69 -2.58 -143.15 -1837.14 -5280.8 -1405.35 220.73 905.78 -39.25 -4.91 -5.23 -79.84 -6.51 28.15 -7.32 27.18 -8.12 -12.77 -201.36 -1080.17 396.75 209.82 -35.39 -71.17 -1.69 28.81 5.51 -3.14 -3.46 -70.07 3.25 -24.05 10.13 44.95 9.32 9.0 -26.62 -62.09 7.07 41.89 -28.71 5.79 5.3 39.8 4.5 3.86 3.53 -67.08 -67.72 1.6 71.25 0.96 0.32 34.98 -70.45 -71.26 33.21 -2.1 -37.72 -3.22 -38.69 -38.69
]
y = [
    67.77 67.7 67.57 67.43 67.33 67.23 67.1 67.0 66.87 66.8 66.67 66.6 66.43 66.33 66.13 66.03 65.97 65.8 65.73 65.67 65.57 65.47 65.37 65.2 65.1 65.0 64.9 64.8 64.67 64.6 64.43 64.33 64.17 64.07 64.03 63.97 63.83 63.63 63.53 63.7 66.27 67.03 66.93 66.8 66.73 66.6 66.47 66.4 66.3 66.2 66.13 66.0 65.93 66.33 67.73 67.8 67.73 67.57 67.47 67.37 67.3 67.17 67.1 66.97 66.83 66.77 66.6 66.57 66.43 66.37 66.23 66.13 65.97 65.93 65.8 65.7 65.6 65.5 65.43 65.3 65.23 65.1 64.97 64.83 64.77 64.7 64.57 64.5 64.4 64.23 64.13 64.07 63.93 63.83 63.73 63.6
]
x_sim, y_sim = simulate(f!, g!, u, x0, p, t_sim, tS; nx=1, ny=1)

Checking, this seems OK.

@btime simulate(f!, g!, u, x0, p, t_sim, tS; nx=1, ny=1)
@inferred simulate(f!, g!, u, x0, p, t_sim, tS; nx=1, ny=1)
@report_opt simulate(f!, g!, u, x0, p, t_sim, tS; nx=1, ny=1)

Since when running the optimization, its objective will be evaluated many times, I want to pre-allocated.

# A buffer in order to avoid allocations
struct OptimBuffer{U,Y,X}
    u::Matrix{U}
    y::Matrix{Y}
    x::Matrix{X}
end
# objective function
function objective(
    u_hat::AbstractVector{U},
    f!::Function,
    g!::Function,
    x0::AbstractVector{Float64},
    p::AbstractVector{PT},
    t::AbstractVector{T},
    tS::Int,
    optim_buffer::OptimBuffer,
    y::Array{Float64,2},
) where {U<:Real,PT<:Real,T}
    # Forward Euler simulation
    N = size(optim_buffer.u, 2)
    @views begin
        optim_buffer.u[2, :] = u_hat
        optim_buffer.x[:, 1] = x0
        g!(optim_buffer.y[:, 1], optim_buffer.x[:, 1], optim_buffer.u[:, 1], p, t[1], tS)
        for k in 2:N
            f!(optim_buffer.x[:, k], optim_buffer.x[:, k - 1], optim_buffer.u[:, k - 1], p, t[k - 1], tS)
            g!(optim_buffer.y[:, k], optim_buffer.x[:, k], optim_buffer.u[:, k], p, t[k], tS)
        end
    end
    # evaluate objective
    return sum(abs2, y - optim_buffer.y)
end
optim_buffer = OptimBuffer(zeros(2, length(t_sim)), zeros(1, length(t_sim)), zeros(1, length(t_sim)))
optim_buffer.u .= u

u_hat_guess = zeros(length(t_sim)) # this is what is optimized
objective(u_hat_guess, f!, g!, x0, p, t_sim, tS, optim_buffer, y)
@btime objective(u_hat_guess, f!, g!, x0, p, t_sim, tS, optim_buffer, y)
@inferred objective(u_hat_guess, f!, g!, x0, p, t_sim, tS, optim_buffer, y)
@report_opt objective(u_hat_guess, f!, g!, x0, p, t_sim, tS, optim_buffer, y)

However, this approach cannot be used with ForwardDiff.jl

optf = Optimization.OptimizationFunction(
    (x, _) -> objective(x, f!, g!, x0, p, t_sim, tS, optim_buffer, y), Optimization.AutoForwardDiff()
)
prob = Optimization.OptimizationProblem(optf, u_hat_guess)
sol = Optimization.solve(prob, OptimizationOptimJL.LBFGS(); maxtime=60, maxiters=100_000)
ERROR: MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{DifferentiationInterface.FixTail{…}, Float64}, Float64, 12})

A solution would be to use

optim_buffer = OptimBuffer(zeros(Real, 2, length(t_sim)), zeros(Real, 1, length(t_sim)), zeros(Real, 1, length(t_sim)))

to accommodate Float64 and ForwardDiff.Dual. However, now I have a run-time dispatch.
I could allocate inside objective, just like I did with simulate, however, this will happen potentially thousands of times.

Is there a solution to this problem?

I think the way to do this with ForwardDiff would be to use PreallocationTools.jl

However, you could also just use AutoEnzyme() (and Enzyme.jl) and the solve should work fine without any changes and would almost certainly be faster than if you used ForwardDiff

Also note that DifferentiationInterface.jl has an experimental caching mechanism, which remains unoptimized at the moment but should eventually replace PreallocationTools.jl in many scenarios

2 Likes