Okay i found a solution. It requires that you initially set up a TwiceDifferentiable
object with the initial hessian, gradient values etc. as follows:
function smooth(
lds::LinearDynamicalSystem{S,O}, y::Matrix{T}
) where {T<:Real,S<:GaussianStateModel{T},O<:GaussianObservationModel{T}}
T_steps, D = size(y, 2), lds.latent_dim
X₀ = zeros(T, D * T_steps)
function nll(vec_x::Vector{T})
x = reshape(vec_x, D, T_steps)
return -loglikelihood(x, lds, y)
end
function g!(g::Vector{T}, vec_x::Vector{T})
x = reshape(vec_x, D, T_steps)
grad = Gradient(lds, y, x)
return g .= vec(-grad)
end
function h!(h::AbstractSparseMatrix, vec_x::Vector{T})
x = reshape(vec_x, D, T_steps)
H, _, _, _ = Hessian(lds, y)
copyto!(h, -H)
return
end
# set up initial values
initial_f = nll(X₀)
inital_g = similar(X₀)
g!(inital_g, X₀)
initial_h = spzeros(Float64, length(X₀), length(X₀))
h!(initial_h, X₀)
# set up a TwiceDifferentiable object i guess?
td = TwiceDifferentiable(nll, g!, h!, X₀, initial_f, inital_g, initial_h)
# set up Optim.Options
opts = Optim.Options(g_tol=1e-12, x_tol=1e-12, f_tol=1e-12, iterations=100)
# Go!
res = optimize(td, X₀, Newton(linesearch=LineSearches.BackTracking()), opts)
# Profit
x = reshape(res.minimizer, D, T_steps)
H, main, super, sub = Hessian(lds, y)
p_smooth, inverse_offdiag = block_tridiagonal_inverse(-sub, -main, -super)
gauss_entropy = 0.5 * (size(H, 1) * log(2π) + logdet(-H))
@inbounds for i in 1:T_steps
p_smooth[:, :, i] .= 0.5 .* (p_smooth[:, :, i] .+ p_smooth[:, :, i]')
end
inverse_offdiag = cat(zeros(T, D, D), inverse_offdiag; dims=3)
return x, p_smooth, inverse_offdiag, gauss_entropy
end
This makes it s.t. the initial Hessian is a Sparse matrix. I’m going to take a peek into Optim I think and see if there is a way the interface could be changed to infer the type of the Hessian at the first iteration as I found this to be be a bit unintuitive. This also made my runtime go from ~20s for 100 iterations to <0.5s <8s so this was a massive performance bite. Would love to talk to a developer from Optim to understand if this is a worthwhile feature. But overall happy i found a solution. Thanks @gdalle for the help