Hello! I would like to automatically differentiate a log-likelihood to obtain gradients and Hessians but have trouble propagating ForwardDiff.Dual types through my function.
I have the same questions as here and tried to adapt the solution. However, despite declaring my parameter vector as AbstractVector{T}
I am still getting the error MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{var"#173#174", Float64}, Float64, 2})
Can someone please help me rewrite the function so that it can be auto differentiated? The function call that produces the error is:
using Optim, ForwardDiff, NLSolversBase
s_v = [0.05; 0.05]
g = ForwardDiff.gradient(x -> FixedVarianceLogLik(x,hcat(ones(15),ones(15))), s_v)
And the definition of the contentious function is:
function FixedVarianceLogLik(param::AbstractVector{T},data,provideprocess = false,forecast_periods = 0) where T
sample_size = size(data)[1];
p=2;
steps = sample_size + forecast_periods;
prior_state = zeros(T,steps,p);
posterior_state = zeros(T,steps,p);
forecast_error = zeros(T,steps,1);
prior_covariance = zeros(T,steps,p,p);
forecast_error = zeros(T,steps,1);
MSE_covariance = zeros(T,steps,1,1);
neg_ll_one_obs = zeros(T,steps,1);
kalman_gain = zeros(T,steps,p);
posterior_state = zeros(T,steps,p);
posterior_covariance = zeros(T,steps,p,p);
#measurement variance
#recall that this is time-dependent
#process noise variance, restricted to the same
Q1 = param[1]*param[1];
Q2 = param[2]*param[2];
neg_ll::T = 0.;
#initialise
posterior_covariance[1,:,:] = I(p);
forecast_error[1] = 0.;
kalman_gain[1,:] = T[0;0];
neg_ll_one_obs[1] = 0.;
prior_state[1,:] = T[ data[1];0 ] ;
prior_covariance[1,:,:] = I(2);
forecast_error[1] = 0.;
MSE_covariance[1,1,1] = 1;
posterior_state[1,:] = T[data[1];0];
for t in 2:steps
#Prediction step starts with the prior estimate
Z = T[1 0];
transition_matrix = T[ 1 1;
0 1;];
prior_state[t,:] = transition_matrix * posterior_state[t-1,:];
#make sure it's a variance matrix proportional to the identity
process_noise_covariance_timeindep = T[ Q1*I(1) zeros(1,1);
zeros(1,1) Q2*I(1)]
prior_covariance[t,:,:] = transition_matrix * posterior_covariance[t-1,:,:] * transition_matrix' + process_noise_covariance_timeindep;
#try two things: Psi^star as slope AND FD as slope
if t <= sample_size
#Measurement step
#Collect measurement and forecast (which here equals the prior state) and calculate log-likelihood
#Update the filter with the measurements
forecast_error[t] = data[t,1] - (Z * prior_state[t,:])[1];
MSE_covariance[t,:,:] = Z * prior_covariance[t,:,:] * Z' + data[t,2] * I(1); #scalar
#
kalman_gain[t,:] = (prior_covariance[t,:,:] * Z') / MSE_covariance[t,:,:]; #need to multiply by the Z matrix which is unity here
neg_ll_one_obs[t] = 0.5 * log(MSE_covariance[t,1,1]) .+ 0.5 * (forecast_error[t]^2 .* inv(MSE_covariance[t,1,1]) );
neg_ll += neg_ll_one_obs[t];
posterior_state[t,:] = prior_state[t,:] + kalman_gain[t,:] * forecast_error[t];
posterior_covariance[t,:,:] = (I(p) - kalman_gain[t,:] * Z) * prior_covariance[t,:,:] * (I(p) - kalman_gain[t,:] * Z)' + kalman_gain[t,:] * data[t,2] * I(1) * kalman_gain[t,:]';
posterior_covariance[t,:,:] = (posterior_covariance[t,:,:] + posterior_covariance[t,:,:]') / 2
#then t rolls one forward so that t-1 posteriors are used in the next iteration for the priors
else
#Measurement step
#Collect measurement and forecast (which here equals the prior state) and calculate log-likelihood
#Update the filter with the measurements
forecast_error[t] = 0;
MSE_covariance[t,:,:] = Z * prior_covariance[t,:,:] * Z' + data[sample_size,2] * I(1); #scalar
#
kalman_gain[t,:] = T[0;0];
neg_ll_one_obs[t] = 0;
neg_ll += neg_ll_one_obs[t];
#keep the filter rolling forward without updating
posterior_state[t,:] = prior_state[t,:];
posterior_covariance[t,:,:] = prior_covariance[t,:,:];
posterior_covariance[t,:,:] = (posterior_covariance[t,:,:] + posterior_covariance[t,:,:]') / 2;
end
end
if provideprocess
# 1 2 3 4 5 6 7 8 9 10 11 12
return hcat(prior_state[:,1],prior_state[:,2],prior_covariance[:,1,1],prior_covariance[:,2,2],forecast_error, MSE_covariance[:,1], neg_ll_one_obs, kalman_gain[:,1], posterior_state[:,1],posterior_state[:,2],posterior_covariance[:,1,1],posterior_covariance[:,2,2]);
else
return neg_ll;
end
end