NaNs during MCMC sampling?

Hi,
I’m trying to perform model calibration with MCMC. When sampling from my posterior, I find that after about 1 sample, the MCMC sampler produces NaNs.
I’m quiet new to MCMC sampling and am wondering whether there are any obvious knobs to turn.


using AbstractMCMC
using AdvancedHMC
using CairoMakie
using Distributions
using ForwardDiff
using Random
using StatsFuns

using LogDensityProblems
using LogDensityProblemsAD
using LinearAlgebra

using AbstractGPs


##### 
# Perform calibration using MCMC on the toy example from McClarren, 11.1.1, p.277.

# Dropping an object in vacuum from an unknown height.
# Measure the time it takes for the object to fall to the bottom

# Collected measurements
x_i = [11.3315, 10.1265, 10.5592, 11.7906, 10.8204]
y_i = [1.51877, 1.43567, 1.46605, 1.54926, 1.48409]

# Define the model we wish to calibrate - the parameter we wish to calibrate is g
@. η(x, g) = sqrt(2.0*x/g)

# Define the number of simulations to run.
N = 10
# Sample x_star and t_star, the inputs to our simulatior.
x_star = 11.0 .+ (randn(N) .* 0.1)
t_star = 9.81 .+ (randn(N) .* 0.01)

# run "simulations" on the simulator input
η_star = map(η, x_star, t_star)

######
# With this setup, we now perform calibration using MCMC
#

# Define the vector z:
z = [y_i; η_star]

#####
#
# Set up the Bayesian optimization through MCMC
#
# The vector params contains the parameter we wish to optimize:
# params = [t_c, γ]

# This is a function that returns the log-likelihood of z, given observations etc.
function gp_loglikelihood(x, z)
    function loglikelihood(params)
        # Ensure that the parameters are positive
        γ = min(softplus(params[2]) + eps(Float64), 2.0)

        kernel = GammaExponentialKernel(; γ=γ)
        Σ_y = kernelmatrix(kernel, RowVecs(z[1:5]))
        
        Σ_η_test = kernelmatrix(kernel, RowVecs([x [params[1] * ones(5); t_star]]))
        Σ_all = Σ_η_test
        Σ_all[1:5, 1:5] .+= Σ_y
        
        det_Σ_all = det(Σ_all)

        # return f(z | t_c, Σ_y)
        return log(1.0 / sqrt(det_Σ_all) * exp(-0.5 * z' * z / det_Σ_all))
    end
end

x = [x_i; x_star]
x_m = x .- mean(x)
z_m = (z .- mean(z)) ./ std(z)


# Test that the log-likelihood returns something reasonable
loglik_train = gp_loglikelihood(x, z)
loglik_train([9.81, -4.5])


# Log-likelihood of the GP is defined above. Instantiate it with the
# measurements and simulations that are available
loglik_train = gp_loglikelihood(x, z)

# The prior for the parameters is a normal distribution 
logprior(params) = logpdf(MvNormal([9.8, -4.5], [0.1 0.0; 0.0 0.1]), params)

n_samples = 10_000
n_adapts = 1_000

# Boilerplate code for HMC
struct LogJointTrain end
# Log joint density we want to sample from.
# This is the sum of the GP log-likelihood and the priors of all parameters.
LogDensityProblems.logdensity(::LogJointTrain, θ) = loglik_train(θ) + logprior(θ)

# The parameter space is two-dimensional
LogDensityProblems.dimension(::LogJointTrain) = 2

# `LogJointTrain` does not allow to evaluate derivatives of the log density function
# 
function LogDensityProblems.capabilities(::Type{LogJointTrain})
    return LogDensityProblems.LogDensityOrder{0}()
end

const logjoint_train = ADgradient(Val(:ForwardDiff), LogJointTrain())

# Initial point is a sample from the priors
initial_θ = [rand(Normal(9.8, 1.0)), rand(Normal(-4.5, 0.1))]

model = AdvancedHMC.LogDensityModel(LogDensityProblemsAD.ADgradient(Val(:ForwardDiff), LogJointTrain()))
nuts = NUTS(0.8)
samples = AbstractMCMC.sample(model, nuts, n_adapts+n_samples, nadapts=n_adapts, initial_params=initial_θ)

samples_constrained = [[p.z.θ[1], softplus(p.z.θ[2])] for p in samples_2]
mean_samples = mean(samples_constrained)

returns with:

ERROR: ArgumentError: GammaExponentialKernel: γ = Dual{ForwardDiff.Tag{Base.Fix1{typeof(LogDensityProblems.logdensity), LogJointTrain}, Float64}}(NaN,NaN,NaN) does not satisfy the constraint γ ∈ (0, 2].
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/KernelFunctions/LoO92/src/utils.jl:5 [inlined]
  [2] GammaExponentialKernel
    @ ~/.julia/packages/KernelFunctions/LoO92/src/basekernels/exponential.jl:129 [inlined]
  [3] #GammaExponentialKernel#23
    @ ~/.julia/packages/KernelFunctions/LoO92/src/basekernels/exponential.jl:135 [inlined]
  [4] GammaExponentialKernel
    @ ~/.julia/packages/KernelFunctions/LoO92/src/basekernels/exponential.jl:134 [inlined]
  [5] (::var"#loglikelihood#5"{Vector{…}, Vector{…}})(params::Vector{ForwardDiff.Dual{…}})
    @ Main ~/julia_envs/bayes_fun/src/gp_fit05.jl:98
  [6] logdensity(::LogJointTrain, θ::Vector{ForwardDiff.Dual{ForwardDiff.Tag{Base.Fix1{…}, Float64}, Float64, 2}})
    @ Main ~/julia_envs/bayes_fun/src/gp_fit05.jl:136
  [7] Fix1
    @ ./operators.jl:1118 [inlined]
  [8] vector_mode_dual_eval!
    @ ~/.julia/packages/ForwardDiff/PcZ48/src/apiutils.jl:24 [inlined]
  [9] vector_mode_gradient!(result::DiffResults.MutableDiffResult{…}, f::Base.Fix1{…}, x::Vector{…}, cfg::ForwardDiff.GradientConfig{…})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/PcZ48/src/gradient.jl:96
 [10] gradient!
    @ ~/.julia/packages/ForwardDiff/PcZ48/src/gradient.jl:37 [inlined]
 [11] gradient!
    @ ~/.julia/packages/ForwardDiff/PcZ48/src/gradient.jl:35 [inlined]
 [12] logdensity_and_gradient
    @ ~/.julia/packages/LogDensityProblemsAD/OQ0BL/ext/LogDensityProblemsADForwardDiffExt.jl:118 [inlined]
 [13] Fix1
    @ ./operators.jl:1118 [inlined]
 [14] ∂H∂θ(h::Hamiltonian{DiagEuclideanMetric{…}, GaussianKinetic, Base.Fix1{…}, Base.Fix1{…}}, θ::Vector{Float64})
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/hamiltonian.jl:38
 [15] step(lf::Leapfrog{…}, h::Hamiltonian{…}, z::AdvancedHMC.PhasePoint{…}, n_steps::Int64; fwd::Bool, full_trajectory::Val{…})
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/integrator.jl:229
 [16] step (repeats 2 times)
    @ ~/.julia/packages/AdvancedHMC/AlvV4/src/integrator.jl:199 [inlined]
 [17] A
    @ ~/.julia/packages/AdvancedHMC/AlvV4/src/trajectory.jl:759 [inlined]
 [18] find_good_stepsize(rng::TaskLocalRNG, h::Hamiltonian{…}, θ::Vector{…}; max_n_iters::Int64)
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/trajectory.jl:781
 [19] find_good_stepsize
    @ ~/.julia/packages/AdvancedHMC/AlvV4/src/trajectory.jl:765 [inlined]
 [20] make_step_size(rng::TaskLocalRNG, integrator::Symbol, T::Type{…}, hamiltonian::Hamiltonian{…}, initial_params::Vector{…})
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/abstractmcmc.jl:315
 [21] make_step_size(rng::TaskLocalRNG, spl::NUTS{…}, hamiltonian::Hamiltonian{…}, initial_params::Vector{…})
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/abstractmcmc.jl:288
 [22] step(rng::TaskLocalRNG, model::AbstractMCMC.LogDensityModel{…}, spl::NUTS{…}; initial_params::Vector{…}, kwargs::@Kwargs{…})
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/abstractmcmc.jl:121
 [23] step
    @ ~/.julia/packages/AdvancedHMC/AlvV4/src/abstractmcmc.jl:102 [inlined]
 [24] macro expansion
    @ ~/.julia/packages/AbstractMCMC/YrmkI/src/sample.jl:130 [inlined]
 [25] macro expansion
    @ ~/.julia/packages/AbstractMCMC/YrmkI/src/logging.jl:16 [inlined]
 [26] mcmcsample(rng::TaskLocalRNG, model::AbstractMCMC.LogDensityModel{…}, sampler::NUTS{…}, N::Int64; progress::Bool, progressname::String, callback::AdvancedHMC.HMCProgressCallback{…}, discard_initial::Int64, thinning::Int64, chain_type::Type, initial_state::Nothing, kwargs::@Kwargs{…})
    @ AbstractMCMC ~/.julia/packages/AbstractMCMC/YrmkI/src/sample.jl:120
 [27] sample(rng::TaskLocalRNG, model::AbstractMCMC.LogDensityModel{…}, sampler::NUTS{…}, N::Int64; n_adapts::Int64, progress::Bool, verbose::Bool, callback::Nothing, kwargs::@Kwargs{…})
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/abstractmcmc.jl:55
 [28] sample
    @ AdvancedHMC ~/.julia/packages/AdvancedHMC/AlvV4/src/abstractmcmc.jl:39 [inlined]
 [29] #sample#17
    @ AbstractMCMC ~/.julia/packages/AbstractMCMC/YrmkI/src/sample.jl:21 [inlined]
 [30] top-level scope
Some type information was truncated. Use `show(err)` to see complete types.

Posting a solution here for posteriority:

  1. If the sampler proposes parameters that would be out of range, the likelihood function can return 0:
function gp_loglikelihood(x, z)
    function loglikelihood(params)
        # Return 0 if parameters are out of range
        params[3] < -1.0 && return 0.0
        params[4] < -1.0 && return 0.0
        [...]
    end
end
  1. Using specialized functions such as logdet aids numerical stability

I would suggest -Inf instead of 0.0. The log of a very small positive number is very negative, not zero.

1 Like