I’m having a hard time replicating the results of an ordered logit regression in Econometrics.jl
. The code for the estimation is done here. The objective function is here.
Here is my implementation (you can download the data from here)
using ComponentArrays
using UnPack
using Econometrics
using Distributions
using Optim
using CategoricalArrays
using DataFrames
using CSV
datadf = DataFrame(CSV.File("auto.csv"))
datadf.foreign = categorical(datadf.foreign)
datadf.foreign = recode(datadf.foreign, "Domestic" => 0, "Foreign" => 1)
datadf.rep77 = categorical(datadf.rep77, ordered=true)
datadf.rep77 = recode(datadf.rep77, "Poor" => 1, "Fair" => 2, "Average" => 3, "Good" => 4, "Excellent" => 5)
datadfnm = dropmissing(datadf[!, [:rep77, :foreign, :length, :mpg]])
# Econometrics.jl results
ecn = fit(EconometricModel, @formula(rep77 ~ foreign + length + mpg), datadfnm)
# my implementation
θ = ComponentArray{Float64}(β=[0.1, 0.1, 0.1], K=(1:4)./4)
function Λ(x)
cdf(Logistic(), x)
end
function mycumsum(K)
n = length(K)
s = similar(K)
s[1] = K[1]
for i in 2:n
s[i] = abs(K[i]) + s[i-1]
end
return s
end
function Pi(j, θ, x)
@unpack β, K = θ
KK = mycumsum(K)
kk = length(K)
n = length(x)
xb = sum(x[i]*β[i] for i in 1:n)
if 2 <= j <= kk
return Λ(KK[j] - xb) - Λ(KK[j-1] - xb)
elseif j == 1
return Λ(KK[1] - xb)
elseif j == kk+1
return 1 - Λ(KK[kk] - xb)
end
end
function boole(x, pred)
x == pred
end
function logℒ(θ; Yvec=Yvec, Xmat=Xmat)
lev = unique(Yvec)
s = zero(eltype(θ))
for i in 1:size(Xmat, 1), j in 1:length(lev)
xi = @view(Xmat[i, :])
s += log(Pi(j, θ, xi)) * boole(Yvec[i], lev[j])
end
return s
end
Xmat = Matrix(datadfnm[!, [:foreign, :length, :mpg]])
Yvec = datadfnm[!, :rep77]
θ0 = similar(θ)
θ0.β .= 0.0
θ0.K .= log.(1:4)
opt = optimize(θ -> -logℒ(θ), θ, Newton(), Optim.Options(g_tol = 1e-10, iterations=10_000); autodiff=:forward)
# my coefficients
opt.minimizer.β
# my thresholds
opt.minimizer.K |> mycumsum
# Econometrics.jl coefficients and thresholds:
ecn.β
Can any one give some advice?