I’m trying to use the optimize
function as part of the Optim.jl package with gradients that I provide. The problem I am having is that the function isn’t actually iterating–it simply computes the objective function once and then claims success, i.e.,
res = optimize( Optim.only_fg!((F,G,𝜹)->fg!(F,G,𝜹,storage)), zeros(J),
LBFGS(), Optim.Options(show_trace = true, iterations = 3))
results in
Iter Function value Gradient norm
0 6.931472e+00 NaN
So it appears that the G (the gradient) isn’t getting updated in the call to the fg!
function I defined. I double-checked that I can successfully manually call the gradient function I have defined and get the correct values of the gradient vector returned. I’m new to Julia (coming from R) so the concept of values from variables getting updated inside functions without explicitly reassigning new values to the variable is a bit like black magic.
Below is my code for full reproducibility. Any help would be appreciated
using DataFrames
using Optim
function m(df::DataFrame, 𝜹::Vector{Float64})
# Inputs
# df - DataFrame with each row a pairwise comparisons.
# jlo (Int64) - indexes the lower-indexed item in the comparison,
# jhi (Int64) - indexes the higher-indexed item in the comparison,
# y (Float64) - indicates whether the lower-indexed item was passed,
# while the higher-indexed item was not passed.
# 𝜹 - Vector{Float64} of item difficulties.
# Output: ŷ the predicted probability that
# Extract the relevant input/response information from the DataFrame
jlo = df[:,:jlo]; jhi = df[:,:jhi];
# For model identification, the first parameter is constrained to take on a value of zero
𝜹c = [0; 𝜹]
#Complute fij (like yhat) function (see Zwinderman, 1995, Eqns 1-3)
ŷ = exp.(𝜹c[jlo]) ./ (exp.(𝜹c[jlo]) .+ exp.(𝜹c[jhi]))
return ŷ # \hat{y}
end
function loss(ŷ::Vector{Float64},y::Vector{Float64})
# Inputs:
# - ŷ the predicted probabilities
# - y actual responses (binary)
# Outputs: the negative psuedologlikelihood loss value
𝓁oss = -1.0*sum(y.*log.(ŷ) + y.*log.(1.0 .- ŷ)) #negative to make minimization problem
return 𝓁oss
end
function gradloss(ŷ::Vector{Float64}, y::Vector{Float64}, jlo::Vector{Int64}, jhi::Vector{Int64}, J::Int64)
∂𝜹c = zeros(J+1)
for i = 1:length(y)
jilo = jlo[i]
jihi = jhi[i]
∂i = y[i]*(1.0-ŷ[i]) - (1.0-y[i])*ŷ[i]
∂𝜹c[jilo] -= ∂i #flipped sign to make a minimization problem
∂𝜹c[jihi] += ∂i #flipped sign to make a minimization problem
end
return ∂𝜹c[2:(J+1)]
end
function fg!(F,G,𝜹,storage)
#Retreive quantities out of storage
J = storage["J"]
x = storage["x"]
y = x[:,:y]
jlo = x[:,:jlo]
jhi = x[:,:jhi]
#Do common calculations
ŷ = m(x,𝜹)
if G != nothing
G = gradloss(ŷ,y,jlo,jhi,J)
end
if F != nothing
return loss(ŷ,y)
end
end
# Make toy data
x = DataFrame(y = [1.,1.,1.,0.,1.,1.],
jlo = [1,1,1,2,2,3],
jhi = [2,3,4,3,4,4])
J = 3
storage = Dict("x" => x, "J" => J)
Gtest = gradloss(m(x,zeros(J)),x[:,:y],x[:,:jlo],x[:,:jhi],storage["J"])
println(string("Gradient: ",Gtest)) #[-0.5,1.5,0]
res = optimize( Optim.only_fg!((F,G,𝜹)->fg!(F,G,𝜹,storage)), zeros(J),
LBFGS(), Optim.Options(show_trace = true, iterations = 3))