Problem with reversediff when using Turing

I’m receiving the following error when using reversediff for fitting my model. Does anyone have any clue what may be causing this? I’m using reversediff 1.7.0. Forward diff works fine for my model. The sampler is NUTS(200, 0.65) and I’m using MCMCDistributed() for parallelism.

Sampling (3 processes)   0%|                            |  ETA: N/A
Sampling (3 processes) 100%|████████████████████████████| Time: 0:01:24
ERROR: LoadError: TaskFailedException

    nested task error: On worker 2:
    MethodError: no method matching increment_deriv!(::Int64, ::Float64)
    Closest candidates are:
      increment_deriv!(!Matched::ReverseDiff.TrackedArray, ::Real, !Matched::Any) at ~/.julia/packages/ReverseDiff/iHmB4/src/derivatives/propagation.jl:34
      increment_deriv!(!Matched::AbstractArray, ::Real, !Matched::Any) at ~/.julia/packages/ReverseDiff/iHmB4/src/derivatives/propagation.jl:36
      increment_deriv!(!Matched::AbstractArray, ::Any) at ~/.julia/packages/ReverseDiff/iHmB4/src/derivatives/propagation.jl:38
      ...
    Stacktrace:
      [1] increment_deriv!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/derivatives/propagation.jl:36 [inlined]
      [2] broadcast_increment_deriv!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/derivatives/propagation.jl:145
      [3] special_reverse_exec!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/derivatives/elementwise.jl:442
      [4] reverse_exec!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/tape.jl:93
      [5] reverse_pass!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/tape.jl:87
      [6] reverse_pass!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/api/tape.jl:36 [inlined]
      [7] seeded_reverse_pass!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/api/utils.jl:31
      [8] seeded_reverse_pass!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/api/tape.jl:47
      [9] gradient!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/api/gradients.jl:42
     [10] gradient!
        @ ~/.julia/packages/ReverseDiff/iHmB4/src/api/gradients.jl:41 [inlined]
     [11] logdensity_and_gradient
        @ ~/.julia/packages/LogDensityProblems/oAYeE/src/AD_ReverseDiff.jl:55
     [12] ∂logπ∂θ
        @ ~/.julia/packages/Turing/szPqN/src/inference/hmc.jl:166 [inlined]
     [13] ∂H∂θ
        @ ~/.julia/packages/AdvancedHMC/iWHPQ/src/hamiltonian.jl:31 [inlined]
     [14] phasepoint
        @ ~/.julia/packages/AdvancedHMC/iWHPQ/src/hamiltonian.jl:76 [inlined]
     [15] phasepoint
        @ ~/.julia/packages/AdvancedHMC/iWHPQ/src/hamiltonian.jl:153
     [16] #initialstep#43
        @ ~/.julia/packages/Turing/szPqN/src/inference/hmc.jl:170
     [17] #step#23
        @ ~/.julia/packages/DynamicPPL/zPOYL/src/sampler.jl:104
     [18] macro expansion
        @ ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:120 [inlined]
     [19] macro expansion
        @ ~/.julia/packages/AbstractMCMC/fnRmh/src/logging.jl:16 [inlined]
     [20] #mcmcsample#20
        @ ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:111
     [21] #sample#42
        @ ~/.julia/packages/Turing/szPqN/src/inference/hmc.jl:133 [inlined]
     [22] sample_chain
        @ ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:448
     [23] sample_chain
        @ ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:445 [inlined]
     [24] #exec_from_cache#213
        @ /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/workerpool.jl:343
     [25] exec_from_cache
        @ /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/workerpool.jl:342
     [26] #110
        @ /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/process_messages.jl:278
     [27] run_work_thunk
        @ /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/process_messages.jl:63
     [28] macro expansion
        @ /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/process_messages.jl:278 [inlined]
     [29] #109
        @ ./task.jl:429
    Stacktrace:
      [1] (::Base.var"#898#900")(x::Task)
        @ Base ./asyncmap.jl:177
      [2] foreach(f::Base.var"#898#900", itr::Vector{Any})
        @ Base ./abstractarray.jl:2712
      [3] maptwice(wrapped_f::Function, chnl::Channel{Any}, worker_tasks::Vector{Any}, c::Vector{UInt64})
        @ Base ./asyncmap.jl:177
      [4] wrap_n_exec_twice
        @ ./asyncmap.jl:153 [inlined]
      [5] #async_usemap#883
        @ ./asyncmap.jl:103 [inlined]
      [6] #asyncmap#882
        @ ./asyncmap.jl:81 [inlined]
      [7] pmap(f::Function, p::CachingPool, c::Vector{UInt64}; distributed::Bool, batch_size::Int64, on_error::Nothing, retry_delays::Vector{Any}, retry_check::Nothing)
        @ Distributed /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/pmap.jl:126
      [8] pmap
        @ /cluster/apps/nss/gcc-6.3.0/julia/1.7.3/x86_64/share/julia/stdlib/v1.7/Distributed/src/pmap.jl:101 [inlined]
      [9] macro expansion
        @ ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:465 [inlined]
     [10] (::AbstractMCMC.var"#60#71"{Bool, Nothing, Base.Pairs{Symbol, UnionAll, Tuple{Symbol}, NamedTuple{(:chain_type,), Tuple{UnionAll}}}, Random._GLOBAL_RNG, DynamicPPL.Model{typeof(optimal_observer_model), (:sa_bi, :a_response_bi, :sv_bi, :v_response_bi, :behavioral_params), (), (), Tuple{Vector{Int64}, Vector{Float64}, Vector{Int64}, Vector{Float64}, Dict{Symbol, Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.Sampler{NUTS{Turing.Essential.ReverseDiffAD{false}, (), AdvancedHMC.DiagEuclideanMetric}}, Int64, CachingPool, Vector{UInt64}})()
        @ AbstractMCMC ./task.jl:429
Stacktrace:
  [1] sync_end(c::Channel{Any})
    @ Base ./task.jl:381
  [2] macro expansion
    @ ./task.jl:400 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:421 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/ProgressLogging/6KXlp/src/ProgressLogging.jl:328 [inlined]
  [5] (::AbstractMCMC.var"#57#68"{Bool, String, Nothing, Base.Pairs{Symbol, UnionAll, Tuple{Symbol}, NamedTuple{(:chain_type,), Tuple{UnionAll}}}, Random._GLOBAL_RNG, DynamicPPL.Model{typeof(optimal_observer_model), (:sa_bi, :a_response_bi, :sv_bi, :v_response_bi, :behavioral_params), (), (), Tuple{Vector{Int64}, Vector{Float64}, Vector{Int64}, Vector{Float64}, Dict{Symbol, Int64}}, Tuple{}, DynamicPPL.DefaultContext}, DynamicPPL.Sampler{NUTS{Turing.Essential.ReverseDiffAD{false}, (), AdvancedHMC.DiagEuclideanMetric}}, Int64, Int64, CachingPool, Vector{UInt64}})()
    @ AbstractMCMC ~/.julia/packages/AbstractMCMC/fnRmh/src/logging.jl:12
  [6] with_logstate(f::Function, logstate::Any)
    @ Base.CoreLogging ./logging.jl:511
  [7] with_logger(f::Function, logger::LoggingExtras.TeeLogger{Tuple{LoggingExtras.EarlyFilteredLogger{TerminalLoggers.TerminalLogger, AbstractMCMC.var"#1#3"{Module}}, LoggingExtras.EarlyFilteredLogger{Logging.ConsoleLogger, AbstractMCMC.var"#2#4"{Module}}}})
    @ Base.CoreLogging ./logging.jl:623
  [8] with_progresslogger(f::Function, _module::Module, logger::Logging.ConsoleLogger)
    @ AbstractMCMC ~/.julia/packages/AbstractMCMC/fnRmh/src/logging.jl:36
  [9] macro expansion
    @ ~/.julia/packages/AbstractMCMC/fnRmh/src/logging.jl:11 [inlined]
 [10] mcmcsample(rng::Random._GLOBAL_RNG, model::DynamicPPL.Model{typeof(optimal_observer_model), (:sa_bi, :a_response_bi, :sv_bi, :v_response_bi, :behavioral_params), (), (), Tuple{Vector{Int64}, Vector{Float64}, Vector{Int64}, Vector{Float64}, Dict{Symbol, Int64}}, Tuple{}, DynamicPPL.DefaultContext}, sampler::DynamicPPL.Sampler{NUTS{Turing.Essential.ReverseDiffAD{false}, (), AdvancedHMC.DiagEuclideanMetric}}, ::MCMCDistributed, N::Int64, nchains::Int64; progress::Bool, progressname::String, init_params::Nothing, kwargs::Base.Pairs{Symbol, UnionAll, 
Tuple{Symbol}, NamedTuple{(:chain_type,), Tuple{UnionAll}}})
    @ AbstractMCMC ~/.julia/packages/AbstractMCMC/fnRmh/src/sample.jl:415
 [11] sample(rng::Random._GLOBAL_RNG, model::DynamicPPL.Model{typeof(optimal_observer_model), (:sa_bi, :a_response_bi, :sv_bi, :v_response_bi, :behavioral_params), (), (), Tuple{Vector{Int64}, Vector{Float64}, Vector{Int64}, Vector{Float64}, Dict{Symbol, Int64}}, Tuple{}, DynamicPPL.DefaultContext}, sampler::DynamicPPL.Sampler{NUTS{Turing.Essential.ReverseDiffAD{false}, (), AdvancedHMC.DiagEuclideanMetric}}, ensemble::MCMCDistributed, N::Int64, n_chains::Int64; chain_type::Type, progress::Bool, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Turing.Inference ~/.julia/packages/Turing/szPqN/src/inference/Inference.jl:220
 [12] sample
    @ ~/.julia/packages/Turing/szPqN/src/inference/Inference.jl:220 [inlined]
 [13] #sample#6
    @ ~/.julia/packages/Turing/szPqN/src/inference/Inference.jl:205 [inlined]
 [14] sample
    @ ~/.julia/packages/Turing/szPqN/src/inference/Inference.jl:205 [inlined]
 [15] #sample#5
    @ ~/.julia/packages/Turing/szPqN/src/inference/Inference.jl:192 [inlined]
 [16] sample(model::DynamicPPL.Model{typeof(optimal_observer_model), (:sa_bi, :a_response_bi, :sv_bi, :v_response_bi, :behavioral_params), (), (), Tuple{Vector{Int64}, Vector{Float64}, Vector{Int64}, Vector{Float64}, Dict{Symbol, Int64}}, Tuple{}, DynamicPPL.DefaultContext}, alg::NUTS{Turing.Essential.ReverseDiffAD{false}, (), AdvancedHMC.DiagEuclideanMetric}, ensemble::MCMCDistributed, N::Int64, n_chains::Int64)
    @ Turing.Inference ~/.julia/packages/Turing/szPqN/src/inference/Inference.jl:192
 [17] macro expansion
    @ /cluster/work/tnu/jachou/msi_project/model_inversion/cluster_uniform_bimodal_fit_all.jl:106 [inlined]
 [18] top-level scope
    @ timing.jl:220

My model is approximately the following form:

@everywhere @model function optimal_observer_model(x1, y1, x2, y2, fixed_params)

        # params that I set
        sigma_m2 = behavioral_params[:sigma_m2]
    
        param1 ~ Uniform(0, 1) 
        var_1~ Uniform(0.1, 200)
        var_2 ~ Uniform(0.1, 500)
    

        trial_num = length(x1)  # x2 has the same length


        z1 ~ MvNormal(x1, sqrt(var_1) .* ones(trial_num ))
        z2 ~ MvNormal(x2, sqrt(var_2) .* ones(trial_num ))


        x1_est, x2_est = some_deterministic_transform(z1, z2, var_1, var_2, param1)
        y1  ~ MvNormal(x1_est, sqrt(sigma_m2) .* ones(trial_num))
        y2 ~ MvNormal(x2_est , sqrt(sigma_m2) .* ones(trial_num))
        
    end

both x looks like the following

x=collect(-45:15:45)

Without a MWE, it is difficult to diagnose. My suspicion is that ReverseDiff is not loaded on the other workers. Typically, all of the dependencies are loaded via the @everywhere macro

@everywhere begin 
  using ReverseDiff
  ....
  include("my_code.jl")
end

Another solution is to use MCMCthreads() instead of MCMCDistributed(). In that case, you will not need to use @everywhere.

1 Like

Thank you again for the help! I will definitely provide a MWE if some other approaches I’m trying out doesn’t work. Sorry for the vague descriptions in the question.