Thanks, this is very kind of you. Please see my code below:
using Pkg, Optim, GLM, Random, Statistics, Distributions, QuadGK, StatsFuns, ForwardDiff
Random.seed!(1234)
function choice_prob_naive(θ::Array{Float64,1}, r::Float64, x::Array{Float64,1}, y::Array{Float64,1}, R::Int64, c::Float64, d::Int64, p0::Float64)
α = θ[1]
βX = θ[2:5]
βY = θ[6:10]
γ0 = θ[11]
γ1 = θ[12:16]
logτ = θ[17]
logκ = θ[18]
s(ϵs) = d + 1.0/sqrt(exp(logτ)) * ϵs
p(ϵs) = p0/(p0 + (1.0 - p0) * exp((0.5 - s(ϵs))*exp(logτ)))
Eu(ϵs) = α*r + x'*βX + y'*βY + 0.05*α*R*r - ((γ0 + y'*γ1)*(1.0 - c) + 0.05*α*(1.0 + r - c))*p(ϵs) - (logκ - 1.0)
integrand(ϵs) = normcdf(0.0, 1.0, Eu(ϵs))*(1/sqrt(2.0*π)*exp(-(ϵs^2.0)/2.0))
(expectation, err) = quadgk(ϵ -> integrand(ϵ), -4.0, 4.0, rtol = 0.0001)
return expectation
end
function loglike(θ::Array{Float64,1}, r::Array{Float64,1}, x::Array{Float64,2}, y::Array{Float64,2}, R::Array{Int64,1}, c::Array{Float64,1}, d::Array{Int64,1}, p0::Array{Float64,1}, B::Array{Int64,1})
prob = zeros(size(B,1))
Threads.@threads for i = 1:size(B,1)
prob[i] = choice_prob_naive(θ, r[i], x[i,:], y[i,:], R[i], c[i], d[i], p0[i])
end
negloglike = - (B'*log.(prob) + (1.0 .- B)'*log.(1.0 .- prob))
return negloglike
end
td = TwiceDifferentiable(θ -> loglike(θ, r, x, y, R, c, d, p0, B), theta; autodiff = :forward);
@time theta_naive = optimize(td, theta, BFGS(), Optim.Options(f_tol=10^-3, iterations=100_000, show_trace=true, show_every=1))
The below creates a random sample. You can adjust the size with N, which is around 1 million in my actual sample.
N = 100000;
r = 400 .+ 200*randn(N);
x = [6 .+ 1*randn(N) rand(Binomial(1,0.15), N) 0.6 .+ 0.6*randn(N) 7 .+ 0.5*randn(N)];
y = [0.5 .+ 0.2*randn(N) 2.8 .+ 2.2*randn(N) rand(Binomial(1,0.12), N) 7 .+ 8.0*randn(N) 1.3 .+ 6.0*randn(N)];
R = rand(Binomial(1,0.1), N);
c = 0.5 .+ 0.2*randn(N);
d = rand(Binomial(1,0.03), N);
p0 = 0.5 .+ 0.2*randn(N);
B = rand(Binomial(1,0.3), N);
# initial value
alpha = 0.001;betaX = [0.02; 0.1;0.01;0.02]; betaY = [0.05; -0.01; -0.1; 0.25; 0.11]; gamma0 = 0.05; gamma1 = [0.1; -0.05; -0.0; -0.1; -0.1]; tau = 1; kappa = 2;
theta = [alpha; betaX; betaY; gamma0; gamma1; tau; kappa]
I also checked the NaN issue. It seems to occur when the search attempts very small value of parameters. For example:
choice_prob_naive(-5000*theta, r[1], x[1,:], y[1,:], R[1], c[1], d[1], p0[1])
ERROR: DomainError with 0.0:
integrand produced NaN in the interval (-4.0, 4.0)
Update: I found the issue is that when τ approaches zero, for example when logτ = -1000, exp(logτ) returns exactly zero, instead of a tiny positive value. In this case, s(ϵ) becomes Inf and p(ϵ) becomes NaN.
Not sure if there is a good solution to this issue. I tried to add
if logτ < -100
logτ = -100
end
This change made the program much slower.