When running the example given here ML Estimation it works fine. When using my own log-likelihood, I can get it to work with LBFGS()
and Newton()
but plain BFGS()
throws the following error pasted below. My code is nearly identical to that in the example save for a log-likelihood that is a bit more involved of course. Code to call this:
using Optim, ForwardDiff, NLSolversBase
using LinearAlgebra: diag
func_llevel = TwiceDifferentiable(x -> FixedVarianceLocalLevelLogLikAllObs(x,NamedArray([]),hcat(mu_hat_i[1,1,:],tv_observational_noise_variance)), s_v; autodiff=:forward)
opt_llevel = optimize(func_llevel, s_v,BFGS())
with the log-likelihood given immediately below.
Can someone help me debug this? Thank you!
function FixedVarianceLocalLevelLogLikAllObs(param::AbstractVector{T},restricted_parameters,data,provideprocess = false,forecast_periods = 0) where T
full_parameter_vector = vcat(param,restricted_parameters)
sample_size = size(data,1);
data = vcat([0 0],data); #so the first observation is zero
p = 1;
#we have 1 + sample_size so that we can use all observations
steps = 1 + sample_size + forecast_periods;
prior_state = zeros(T,steps,p);
posterior_state = zeros(T,steps,p);
forecast_error = zeros(T,steps,1);
prior_covariance = zeros(T,steps,p,p);
forecast_error = zeros(T,steps,1);
MSE_covariance = zeros(T,steps,1,1);
neg_ll_one_obs = zeros(T,steps,1);
kalman_gain = zeros(T,steps,p);
posterior_state = zeros(T,steps,p);
posterior_covariance = zeros(T,steps,p,p);
#measurement variance
#recall that this is time-dependent
#process noise variance, restricted to the same
Q1 = full_parameter_vector["Noise level"]*full_parameter_vector["Noise level"];
neg_ll::T = 0.;
#initialise
posterior_covariance[1,:,:] = [20];
kalman_gain[1,:] = T[0];
neg_ll_one_obs[1] = 0.;
prior_state[1,:] = T[ 1 ] ;
prior_covariance[1,:,:] = [20];
forecast_error[1] = 0.;
MSE_covariance[1,1,1] = 20;
posterior_state[1,:] = T[ 1 ];
for t in 2:steps
#Prediction step starts with the prior estimate
Z = ones(T,1,1);
transition_matrix = ones(T,1,1);
prior_state[t,:] = transition_matrix * posterior_state[t-1,:];# + [param[2]];
#make sure it's a variance matrix proportional to the identity
process_noise_covariance_timeindep = Q1*ones(T,1,1);
prior_covariance[t,:,:] = transition_matrix * posterior_covariance[t-1,:,:] * transition_matrix' + process_noise_covariance_timeindep;
#try two things: Psi^star as slope AND FD as slope
if t <= (sample_size + 1)
#Measurement step
#Collect measurement and forecast (which here equals the prior state) and calculate log-likelihood
#Update the filter with the measurements
#if a missing value is encountered, then
forecast_error[t] = data[t,1] - (Z * prior_state[t,:])[1];
MSE_covariance[t,:,:] = Z * prior_covariance[t,:,:] * Z' + data[t,2] * I(1); #scalar
#
kalman_gain[t,:] = (prior_covariance[t,:,:] * Z') / MSE_covariance[t,:,:]; #need to multiply by the Z matrix which is unity here
neg_ll_one_obs[t] = 0.5 * log(MSE_covariance[t,1,1]) .+ 0.5 * (forecast_error[t]^2 .* inv(MSE_covariance[t,1,1]) );
neg_ll += neg_ll_one_obs[t];
posterior_state[t,:] = prior_state[t,:] + kalman_gain[t,:] * forecast_error[t];
posterior_covariance[t,:,:] = (I(p) - kalman_gain[t,:] * Z) * prior_covariance[t,:,:] * (I(p) - kalman_gain[t,:] * Z)' + kalman_gain[t,:] * data[t,2] * I(1) * kalman_gain[t,:]';
posterior_covariance[t,:,:] = (posterior_covariance[t,:,:] + posterior_covariance[t,:,:]') / 2
#then t rolls one forward so that t-1 posteriors are used in the next iteration for the priors
else
#Measurement step
#Collect measurement and forecast (which here equals the prior state) and calculate log-likelihood
#Update the filter with the measurements
forecast_error[t] = 0;
MSE_covariance[t,:,:] = Z * prior_covariance[t,:,:] * Z' + data[sample_size+1,2] * I(1); #scalar
#
kalman_gain[t,:] = T[0];
neg_ll_one_obs[t] = 0;
neg_ll += neg_ll_one_obs[t];
#keep the filter rolling forward without updating
posterior_state[t,:] = prior_state[t,:];
posterior_covariance[t,:,:] = prior_covariance[t,:,:];
posterior_covariance[t,:,:] = (posterior_covariance[t,:,:] + posterior_covariance[t,:,:]') / 2;
end
end
if provideprocess
stud_res = (forecast_error[1:end] .- mean(forecast_error[1:end])) ./ sqrt.(MSE_covariance[1:end]);
# 1 2 3 4 5 6 7 8 9 10 11 12
return hcat(prior_state[:,1],prior_covariance[:,1,1],forecast_error, MSE_covariance[:,1], neg_ll_one_obs, kalman_gain[:,1], posterior_state[:,1],posterior_covariance[:,1,1],stud_res);
else
return neg_ll;
end
end
DimensionMismatch: destination axes (Base.OneTo(1),) are not compatible with source axes (Base.OneTo(1), Base.OneTo(1))
Stacktrace:
[1] throwdm(axdest::Tuple{Base.OneTo{Int64}}, axsrc::Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}})
@ Base.Broadcast ./broadcast.jl:1080
[2] copyto!
@ ./broadcast.jl:992 [inlined]
[3] copyto!
@ ./broadcast.jl:956 [inlined]
[4] copy
@ ./broadcast.jl:928 [inlined]
[5] materialize
@ ./broadcast.jl:903 [inlined]
[6] _init_identity_matrix(x::NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}, scale::Float64)
@ Optim ~/.julia/packages/Optim/V8ZEC/src/multivariate/solvers/first_order/bfgs.jl:63
[7] _init_identity_matrix(x::NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}})
@ Optim ~/.julia/packages/Optim/V8ZEC/src/multivariate/solvers/first_order/bfgs.jl:62
[8] initial_state(method::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Nothing, Flat}, options::Optim.Options{Float64, Nothing}, d::TwiceDifferentiable{Float64, NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}, NamedMatrix{Float64, Matrix{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}, OrderedCollections.OrderedDict{String, Int64}}}, NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}}, initial_x::NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}})
@ Optim ~/.julia/packages/Optim/V8ZEC/src/multivariate/solvers/first_order/bfgs.jl:101
[9] optimize(d::TwiceDifferentiable{Float64, NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}, NamedMatrix{Float64, Matrix{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}, OrderedCollections.OrderedDict{String, Int64}}}, NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}}, initial_x::NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}, method::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Nothing, Flat}, options::Optim.Options{Float64, Nothing})
@ Optim ~/.julia/packages/Optim/V8ZEC/src/multivariate/optimize/optimize.jl:36
[10] optimize(d::TwiceDifferentiable{Float64, NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}, NamedMatrix{Float64, Matrix{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}, OrderedCollections.OrderedDict{String, Int64}}}, NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}}, initial_x::NamedVector{Float64, Vector{Float64}, Tuple{OrderedCollections.OrderedDict{String, Int64}}}, method::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Nothing, Flat})
@ Optim ~/.julia/packages/Optim/V8ZEC/src/multivariate/optimize/optimize.jl:36
[11] top-level scope
@ ~/Documents/impact-bam/dementia_forecast5.ipynb:6