Using a sparse Hessian in Optim.jl

Okay i found a solution. It requires that you initially set up a TwiceDifferentiable object with the initial hessian, gradient values etc. as follows:

function smooth(
    lds::LinearDynamicalSystem{S,O}, y::Matrix{T}
) where {T<:Real,S<:GaussianStateModel{T},O<:GaussianObservationModel{T}}
    T_steps, D = size(y, 2), lds.latent_dim

    X₀ = zeros(T, D * T_steps)

    function nll(vec_x::Vector{T})
        x = reshape(vec_x, D, T_steps)
        return -loglikelihood(x, lds, y)
    end

    function g!(g::Vector{T}, vec_x::Vector{T})
        x = reshape(vec_x, D, T_steps)
        grad = Gradient(lds, y, x)
        return g .= vec(-grad)
    end

    function h!(h::AbstractSparseMatrix, vec_x::Vector{T})
        x = reshape(vec_x, D, T_steps)
        H, _, _, _ = Hessian(lds, y)
        copyto!(h, -H)
        return 
    end

    # set up initial values
    initial_f = nll(X₀)
    
    inital_g = similar(X₀)
    g!(inital_g, X₀)

    initial_h = spzeros(Float64, length(X₀), length(X₀))
    h!(initial_h, X₀)

    # set up a TwiceDifferentiable object i guess?
    td = TwiceDifferentiable(nll, g!, h!, X₀, initial_f, inital_g, initial_h)

    # set up Optim.Options
    opts = Optim.Options(g_tol=1e-12, x_tol=1e-12, f_tol=1e-12, iterations=100)

    # Go!
    res = optimize(td, X₀, Newton(linesearch=LineSearches.BackTracking()), opts)
    
    # Profit
    x = reshape(res.minimizer, D, T_steps)

    H, main, super, sub = Hessian(lds, y)
    p_smooth, inverse_offdiag = block_tridiagonal_inverse(-sub, -main, -super)

    gauss_entropy = 0.5 * (size(H, 1) * log(2π) + logdet(-H))

    @inbounds for i in 1:T_steps
        p_smooth[:, :, i] .= 0.5 .* (p_smooth[:, :, i] .+ p_smooth[:, :, i]')
    end

    inverse_offdiag = cat(zeros(T, D, D), inverse_offdiag; dims=3)

    return x, p_smooth, inverse_offdiag, gauss_entropy
end

This makes it s.t. the initial Hessian is a Sparse matrix. I’m going to take a peek into Optim I think and see if there is a way the interface could be changed to infer the type of the Hessian at the first iteration as I found this to be be a bit unintuitive. This also made my runtime go from ~20s for 100 iterations to <0.5s <8s so this was a massive performance bite. Would love to talk to a developer from Optim to understand if this is a worthwhile feature. But overall happy i found a solution. Thanks @gdalle for the help :slight_smile:

2 Likes