I am attempting a problem where my observed variable (tested_positive
) has missing values. I am wondering how to attempt such a problem using Turing.jl.
In my cases I have popn
covariates which have no missing values, tested_cases
which also have no missing values (This is “n” in Binomial(n,p)). But my tested_positive
(y) has missing values. “p” is modelled as gp + covariates.
I have also indicated at the very end how this can be done in numpyro
Variables
popn = vcat(df_lo_n.tot_popn, df_hi_n.tot_popn) #(58,)
tested_cases = vcat(df_lo_n.tot_specs, df_hi_n.tot_specs) #(58,)
tested_positive = vcat(df_lo_n.tot_cases, fill(missing, size(df_hi_n,1))) #(58,) with 49 missing
observed_idxs = findall(!ismissing,tested_positive) #(9,)
I have written the following so far, where I am guessing during MCMC you ignore the missing cases but during inference it should account for them - I am not sure how to do this in Turing. This code below is likely incorrect and I get an error. I am guessing this error comes up due to the missing values in the vector. Any idea how to solve such a problem ?
@model function prev_vaegp_bin(popn, tested_cases, tested_positive, observed_idxs)
z ~ MvNormal(zeros(40), Diagonal(ones(40)))
μ ~ Normal(0,1)
β ~ Normal(0,1)
# Reconstruct GP from latent variable z
gp_recon, _ = decode(vae, z, trained_params, trained_states) #(58,)
logits_prev = @. μ + gp_recon + β * popn #(58,)
prev = logistic.(logits_prev)
for i in 1:size(tested_cases,1)
if i in observed_idxs
tested_positive[i] ~ Binomial(tested_cases[i], prev[i])
else
tested_positive[i] ~ Binomial(tested_cases[i], prev[i]) # prior predictive
end
end
end
{
"name": "ErrorException",
"message": "failed to find valid initial parameters in 1000 tries. This may indicate an error with the model or AD backend; please open an issue at https://github.com/TuringLang/Turing.jl/issues",
"stack": "failed to find valid initial parameters in 1000 tries. This may indicate an error with the model or AD backend; please open an issue at https://github.com/TuringLang/Turing.jl/issues
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] find_initial_params(rng::TaskLocalRNG, model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, varinfo::DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}, hamiltonian::AdvancedHMC.Hamiltonian{AdvancedHMC.DiagEuclideanMetric{Float64, Vector{Float64}}, AdvancedHMC.GaussianKinetic, Base.Fix1{typeof(LogDensityProblems.logdensity), LogDensityFunction{DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}, DynamicPPL.SamplingContext{DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, DynamicPPL.DefaultContext, TaskLocalRNG}, AutoForwardDiff{12, ForwardDiff.Tag{DynamicPPL.DynamicPPLTag, Float64}}}}, Base.Fix1{typeof(LogDensityProblems.logdensity_and_gradient), LogDensityFunction{DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}, DynamicPPL.SamplingContext{DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, DynamicPPL.DefaultContext, TaskLocalRNG}, AutoForwardDiff{12, ForwardDiff.Tag{DynamicPPL.DynamicPPLTag, Float64}}}}}; max_attempts::Int64)
@ Turing.Inference ~/.julia/packages/Turing/1Egt9/src/mcmc/hmc.jl:170
[3] find_initial_params(rng::TaskLocalRNG, model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, varinfo::DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}, hamiltonian::AdvancedHMC.Hamiltonian{AdvancedHMC.DiagEuclideanMetric{Float64, Vector{Float64}}, AdvancedHMC.GaussianKinetic, Base.Fix1{typeof(LogDensityProblems.logdensity), LogDensityFunction{DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}, DynamicPPL.SamplingContext{DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, DynamicPPL.DefaultContext, TaskLocalRNG}, AutoForwardDiff{12, ForwardDiff.Tag{DynamicPPL.DynamicPPLTag, Float64}}}}, Base.Fix1{typeof(LogDensityProblems.logdensity_and_gradient), LogDensityFunction{DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}, DynamicPPL.SamplingContext{DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, DynamicPPL.DefaultContext, TaskLocalRNG}, AutoForwardDiff{12, ForwardDiff.Tag{DynamicPPL.DynamicPPLTag, Float64}}}}})
@ Turing.Inference ~/.julia/packages/Turing/1Egt9/src/mcmc/hmc.jl:145
[4] initialstep(rng::TaskLocalRNG, model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, spl::DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, vi_original::DynamicPPL.VarInfo{@NamedTuple{z::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:z, typeof(identity)}, Int64}, Vector{DiagNormal}, Vector{AbstractPPL.VarName{:z, typeof(identity)}}, Vector{Float64}}, μ::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:μ, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:μ, typeof(identity)}}, Vector{Float64}}, β::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:β, typeof(identity)}, Int64}, Vector{Normal{Float64}}, Vector{AbstractPPL.VarName{:β, typeof(identity)}}, Vector{Float64}}, tested_positive::DynamicPPL.Metadata{Dict{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}, Int64}, Vector{Binomial{Float64}}, Vector{AbstractPPL.VarName{:tested_positive, Accessors.IndexLens{Tuple{Int64}}}}, Vector{Int64}}}, Float64}; initial_params::Nothing, nadapts::Int64, kwargs::@Kwargs{})
@ Turing.Inference ~/.julia/packages/Turing/1Egt9/src/mcmc/hmc.jl:210
[5] step(rng::TaskLocalRNG, model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, spl::DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}; initial_params::Nothing, kwargs::@Kwargs{nadapts::Int64})
@ DynamicPPL ~/.julia/packages/DynamicPPL/nxcz4/src/sampler.jl:125
[6] macro expansion
@ ~/.julia/packages/AbstractMCMC/7f1oY/src/sample.jl:0 [inlined]
[7] (::AbstractMCMC.var\"#25#26\"{Nothing, Int64, Int64, Int64, UnionAll, Nothing, @Kwargs{nadapts::Int64}, TaskLocalRNG, DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, Int64, Float64, Int64, Int64})()
@ AbstractMCMC ~/.julia/packages/AbstractMCMC/7f1oY/src/logging.jl:134
[8] with_logstate(f::AbstractMCMC.var\"#25#26\"{Nothing, Int64, Int64, Int64, UnionAll, Nothing, @Kwargs{nadapts::Int64}, TaskLocalRNG, DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, Int64, Float64, Int64, Int64}, logstate::Base.CoreLogging.LogState)
@ Base.CoreLogging ./logging/logging.jl:524
[9] with_logger(f::Function, logger::LoggingExtras.TeeLogger{Tuple{LoggingExtras.EarlyFilteredLogger{TerminalLoggers.TerminalLogger, AbstractMCMC.var\"#1#3\"{Module}}, LoggingExtras.EarlyFilteredLogger{Base.CoreLogging.SimpleLogger, AbstractMCMC.var\"#2#4\"{Module}}}})
@ Base.CoreLogging ./logging/logging.jl:635
[10] with_progresslogger(f::Function, _module::Module, logger::Base.CoreLogging.SimpleLogger)
@ AbstractMCMC ~/.julia/packages/AbstractMCMC/7f1oY/src/logging.jl:157
[11] macro expansion
@ ~/.julia/packages/AbstractMCMC/7f1oY/src/logging.jl:133 [inlined]
[12] mcmcsample(rng::TaskLocalRNG, model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, sampler::DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, N::Int64; progress::Bool, progressname::String, callback::Nothing, num_warmup::Int64, discard_initial::Int64, thinning::Int64, chain_type::Type, initial_state::Nothing, kwargs::@Kwargs{nadapts::Int64})
@ AbstractMCMC ~/.julia/packages/AbstractMCMC/7f1oY/src/sample.jl:151
[13] sample(rng::TaskLocalRNG, model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, sampler::DynamicPPL.Sampler{NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}}, N::Int64; chain_type::Type, resume_from::Nothing, initial_state::Nothing, progress::Bool, nadapts::Int64, discard_adapt::Bool, discard_initial::Int64, kwargs::@Kwargs{})
@ Turing.Inference ~/.julia/packages/Turing/1Egt9/src/mcmc/hmc.jl:117
[14] sample
@ ~/.julia/packages/Turing/1Egt9/src/mcmc/hmc.jl:86 [inlined]
[15] #sample#101
@ ~/.julia/packages/Turing/1Egt9/src/mcmc/abstractmcmc.jl:29 [inlined]
[16] sample
@ ~/.julia/packages/Turing/1Egt9/src/mcmc/abstractmcmc.jl:20 [inlined]
[17] #sample#100
@ ~/.julia/packages/Turing/1Egt9/src/mcmc/abstractmcmc.jl:17 [inlined]
[18] sample(model::DynamicPPL.Model{typeof(prev_vaegp_bin), (:popn, :tested_cases, :tested_positive, :observed_idxs), (), (), Tuple{Vector{Int64}, Vector{Union{Missing, Int64}}, Vector{Union{Missing, Int64}}, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}, alg::NUTS{AutoForwardDiff{nothing, Nothing}, AdvancedHMC.DiagEuclideanMetric}, N::Int64)
@ Turing.Inference ~/.julia/packages/Turing/1Egt9/src/mcmc/abstractmcmc.jl:14
[19] top-level scope
@ ~/workspace/Lo2Hi-IV-Est-AggVAE/julia/jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_X36sZmlsZQ==.jl:6"
}
In numpyro you can use the numpyro.handlers.mask
to handle missing values.
def prev_vaegp_betabin(...)
# ...
if not inference:
n_positive_hi:Float[Array, "n_regions_hi"] = jnp.repeat(jnp.nan, M_hi.shape[0]) #e.g. (49,)
n_positive = jnp.concatenate([n_positive_lo, n_positive_hi], axis = 0) #e.g. (58,)
region_mask = ~jnp.isnan(n_positive) # e.g [True x 9 ... False x 49]
# We need to use numpyro.handlers.mask to make sure we can account for NaN values in observations
with numpyro.handlers.mask(mask = region_mask):
n_positive_obs = numpyro.sample(
"n_positive_obs",
dist.BetaBinomial(total_count=n_tested, concentration1=alpha, concentration0=beta),
obs = n_positive.astype(jnp.float32)
)
else:
# Inference mode, we do not have n_positive_obs
n_positive_obs = numpyro.sample(
"n_positive_obs",
dist.BetaBinomial(total_count=n_tested, concentration1=alpha, concentration0=beta),
)