I am trying to port the ordinal logistic regression to a Fisher Scoring implementation. I need to validate the objective function, gradient, and hessian for these purposes. I am struggling with the Hessian and would appreciate some help.
Here is the problem,
using Distributions, FillArrays, LinearAlgebra, Optim, RDatasets, StatsBase,
StatsModels
# Ordinal Logistic Regression
data = dataset("Ecdat", "Kakadu")[[:RecParks, :Sex, :Age, :Schooling, :Income]]
data.RecParks = categorical(data.RecParks, ordered = true)
formula = @formula(RecParks ~ Sex + Age + Schooling + Income)
terms = StatsModels.Terms(formula)
terms.intercept = false
mf = ModelFrame(terms, data)
mm = ModelMatrix(mf)
X = mm.m
y = model_response(mf)
wts = FrequencyWeights(Ones(y))
l = levels(y)
y = get.(Ref(Dict(zip(l, eachindex(l)))), y, nothing)
@views function f(β)
η = X * β[1:size(X, 2)]
thresholds = vcat(-Inf, β[size(X, 2) + 1:end], Inf)
-sum(wᵢ * log(cdf(Logistic(), thresholds[yᵢ + 1] - ηᵢ) -
cdf(Logistic(), thresholds[yᵢ] - ηᵢ))
for (yᵢ, ηᵢ, wᵢ) in zip(y, η, wts))
end
@views function g!(G, β)
η = X * β[1:size(X, 2)]
thresholds = vcat(-Inf, β[size(X, 2) + 1:end], Inf)
Y₀ = thresholds[y] .- η
Y₁ = thresholds[y .+ 1] .- η
F₀ = cdf.(Logistic(), Y₀)
F₁ = cdf.(Logistic(), Y₁)
f₀ = replace!(pdf.(Logistic(), Y₀), NaN => 0)
f₁ = pdf.(Logistic(), Y₁)
G[1:size(X, 2)] = -(X' * ((f₀ - f₁) ./ (F₁ - F₀)))
G[size(X, 2) + 1:end] =
map(x -> -sum((f₁ .* (y .== x) - f₀ .* (y .== (x + 1))) ./ (F₁ - F₀)),
1:length(β) - size(X, 2))
G
end
β = append!(zeros(size(X, 2)), range(0, 1, length = length(l) - 1))
td₀ = TwiceDifferentiable(f, g!, β)
β = optimize(td₀, β) |> Optim.minimizer
H = Optim.hessian!(td₀, β)
# Implementation of Hessian
η = X * β[1:size(X, 2)]
thresholds = vcat(-Inf, β[size(X, 2) + 1:end], Inf)
Y₀ = thresholds[y] .- η
Y₁ = thresholds[y .+ 1] .- η
F₀ = cdf.(Logistic(), Y₀)
F₁ = cdf.(Logistic(), Y₁)
f₀ = replace!(pdf.(Logistic(), Y₀), NaN => 0)
f₁ = pdf.(Logistic(), Y₁)
# Up to here same as gradient which is correct
H₀ = zero(H)
b1 = H[1:4,1:4] # Replace with ∂β/∂β
b2 = H[1:4,5:end] # Replace with ∂β/∂α
b3 = H[5:end,5:end] # Replace with ∂α/∂α
H₀[1:4,1:4] = b1
H₀[1:4,5:end] = b2
H₀[5:end,5:end] = b3
H₀ = Hermitian(H₀)
H ≈ H₀ # this should be true
The Hessian is described in McKelvey and Zavoina (1975).