Error with ForwardDiff Dual numbers (DifferentialEquations, and Turing)

I am trying to estimate parameters for an ODE system using Turing, ForwardDiff, and DifferentialEquations. However, I consistently get the following error:

julia> TypeError: in setfield!, expected Float64, got ForwardDiff.Dual{Nothing,Float64,12}
Stacktrace:
[1] setproperty! at .\Base.jl:34 [inlined]
[2] set_W_γdt! at C:\Users\d.julia\packages\OrdinaryDiffEq\VPJBD\src\nlsolve\utils.jl:34 [inlined]
[3] set_W_γdt! at C:\Users\d.julia\packages\OrdinaryDiffEq\VPJBD\src\nlsolve\utils.jl:32 [inlined] …
I tried logging the full stack trace, which can be viewed here:
https://cl1p.net/dfasdkmjnf384h

I found some related Q&As here, including [1], [2], [3], [4], [5], and [6]. Following the advice there, I tried to make my code play well with Dual numbers, but haven’t found a solution yet. I’ve included (what I think are) the relevant parts of my code below. Any help would be greatly appreciated. Thanks!

using Glob, CSV, Plots, StatsPlots
using DifferentialEquations, OrdinaryDiffEq, DiffEqSensitivity
using DiffEqBase: get_tmp, dualcache
using Turing, MCMCChains
using ForwardDiff
using Random, Distributions

function tm(v::Float64, θ::AbstractVector{T}) where T
    #elements of Q are functions of theta and `v`, e.g. a1 = theta[1]*(v/theta[2])
    
    a0, sa, b0, sb, c0, sc, d0, sd, g1, g2, h1, h2 = θ

    a1 = a0*exp(-v/sa)
    b1 = b0*exp(v/sb)
    a2 = c0*exp(-v/sc)
    b2 = d0*exp(v/sd)

    Q = T[a1 -(b1+g1+a1) -a1 h1-a1;
        g2 -g2 -(a2+h2+g2) b2-g2;
        0.0 g1 a2 -(b2 + h1)]

    return Q 
end 

# get starting values
function get_inits(Qh)
    return Qh[:,2:end] \ (-1 .* Qh[:,1])
end 

# ODE
function rhs!(du, u, Q, t)
    du .= Q[:,2:end]*u + Q[:,1] 
end 

# This gives a different error from the other things I've tried:
# ERROR: LoadError: MethodError: no method matching get_tmp(::Float64, ::Array{Float64,1})
# function rhs!(du, u, (Q, tmp), t)
#     tmp = DiffEqBase.get_tmp(tmp, u)
#     tmp .= Q[:,2:end]*u + Q[:,1] 
#     @. du = u + tmp 
#     nothing 
# end

times = collect(1.0:1000.0)
Nt = length(times)  
voltages = collect(-150.0:50.0:-50.0)
holds = [-20.0, -180.0]
data_to_fit = rand(Uniform(0,1), (length(voltages), length(holds)*Nt))

function do_sim(θ::AbstractVector{T};
            ydata = data_to_fit, times=times, 
            Nt=Nt, voltages=voltages, holds=holds) where {T<:Real} 
    """
    θ = parameters; used to construct a matrix when solving ODEs 
    data = Array{Float64,2}
    time, voltages, holds = Array{Float64,1}
    Nt = Int64
    """
    # array to hold simulation values
    ysim = zeros(T, size(ydata))
    
    ts = (times[1], times[end])

    # find initial values
    Qh1 = tm(holds[1], θ)
    u1 = convert.(T, Qh1[:,2:end] \ (-1 .* Qh1[:,1]))
    
    # tried this, too (doesn't work)
    # u1 = get_inits(Qh1)

    Qh2 = tm(holds[2], θ)
    u2 = convert.(T, Qh2[:,2:end] \ (-1 .* Qh2[:,1]))

    for (j, vt) in enumerate(voltages)
        Qv = tm(vt, θ)
        
        prob1 = ODEProblem(rhs!, u1, ts, Qv)
        prob2 = ODEProblem(rhs!, u2, ts, Qv)

        # tried this with `theta` and `Qv` to make u1/u2 the same type
        # prob1 = ODEProblem(rhs!, eltype(θ).(u1), ts, Qv)

        # solve ODEs
        sol1 = solve(prob1, TRBDF2(), saveat=times)
        sol2 = solve(prob2, TRBDF2(), saveat=times)
        # tried TRBDF2(autodiff=false)

        # add to array
        ysim[j,1:Nt] = sum(sol1[2:end,:], dims=1)
        ysim[j,1+Nt:end] = sum(sol2[2:end,:], dims=1)
    end 
    return ysim 
end 

# some parameters 
p0 =  ([-10.783723199314082, 2.768277944084355, -0.9532448651472752, 
        3.806992452948674, -12.864645880101888, 2.262134729959288, -3.8200088837688195, 3.1458924991336534, -4.992066945500882, -7.758380598719609, -9.485975086743023, -4.298650310672511])

Np = length(p0) 
Nv = length(voltages)

Turing.setadbackend(:forwarddiff)
@model function ChTurMod(y, ::Type{T} = Float64) where T
    """
    y = data array
    """
    # What I have right now, doesn't work.
    θ = Vector{T}(undef, Np)
    for i = 1:Np
        θ[i] ~ Normal(0, 1)
    end 

    # Tried this as well, didn't work.   
    # θ ~ MvNormal(zeros(T, 12),1)

    ysim = do_sim( exp.(θ .+ p0) )
    for j = 1:Nv
        y[j,:] ~ MvNormal(ysim[j,:], 0.02)
    end 
end

# run a simulation
# ysim = do_sim(exp.(p0))   

# Output of the above should look like:
# 3×2000 Array{Float64,2}:
# 0.030866  0.0319676  0.0342911  …  0.988392  0.988392
# 0.030866  0.0309165  0.0310629     0.945324  0.945307
# 0.030866  0.0308674  0.0308714     0.133803  0.133578

# sampling
to_fit = ChTurMod(data_to_fit)
chain = sample(to_fit, Turing.NUTS(0.65), 1000)

# plot simulation with random data and default parameters 
#=
ysim = do_sim(exp.(p0))
plt = plot(size=(400,280))
plt2 = plot(size=(400,280))
for j = 1:size(ysim)[1]
    plot!(plt, times, ysim[j,1:Nt], lw=2, label="")
    plot!(plt2, times, ysim[j, Nt+1:end], lw=2, label="")

    # plot random data 
    plot!(plt, times, data_to_fit[j,1:Nt], 
        lalpha=0.2, label="")
    plot!(plt2, times, data_to_fit[j, Nt+1:end], 
        lalpha=0.2, label="")
end 
display(plt) 
display(plt2) 
=#```

a1 isn’t defined, etc. Post a runnable code. If you can’t share the equations, pair it down to a simpler model with the same error.

Thanks for the reply! I edited my post with runnable code and have included a hyperlink to a full stack trace, if that helps.

I’m not able to reproduce your error and instead am just getting NaN dts. Can you try to reduce this a little to something that is reproducible? It would be helpful if it was just a ForwardDiff.gradient call instead of the Bayesian stuff because the Bayesian part will add a bunch of randomness that should be unnecessary to the error. I would assume TRBDF2(autodiff=false) would fix your issue so the comment is a little odd. prob1 = ODEProblem(rhs!, eltype(Qv).(u1), ts, Qv) should’ve worked too. So without a way for me to run it I’m not sure I can debug it.

Thanks for the reply; like you suggested, I looked at whether ForwardDiff.gradient worked on my ODE-containing function do_sim. It doesn’t work when the output of do_sim is an array (return ysim), but it works when I change the output to a scalar value, e.g. return sum(ysim). Would this be related to my issue above? I also tried re-installing the packages I’ve used, but the error persists.

I have some similar, simplified code below where I simulate ODEs and the output is a scalar or an array. As with my original code, ForwardDiff.gradient works for the former, but not the latter. A stack trace for the error is here: cl1p.net - The internet clipboard.

Thanks again for your time.

using DifferentialEquations, OrdinaryDiffEq, DiffEqSensitivity
using ForwardDiff
using Random

# ODE 
function new_rhs!(du, u, p, t)
    du[1] = p[1]*u[1] - p[2]*u[2] 
    du[2] = -p[2]*u[2] + p[1]*u[1]
end 

function array_output(θ::AbstractVector{T}) where T 
    u0 = T[1, 0]
    times = collect(0.0:5.0)
    tspan = (times[1], times[end])
    ysim = zeros(T, (length(times), 3)) 
    for i = 1:3
        prob = ODEProblem(new_rhs!, u0, tspan, θ)
        sol = solve(prob, Tsit5(), saveat=times)
        
        ysim[:,i] = sol[2,:]
        θ[2] += 1         
    end 
    return ysim 
end 

function scalar_output(θ::AbstractVector{T}) where T 
    u0 = T[1, 0]
    times = collect(0.0:5.0)
    tspan = (times[1], times[end])
    ysim = zeros(T, (length(times), 5)) 
    for i = 1:3
        prob = ODEProblem(new_rhs!, u0, tspan, θ)
        sol = solve(prob, TRBDF2(autodiff=false), saveat=times)
        
        ysim[:,i] = sol[2,:]
        θ[2] += 1         
    end 
    return sum(ysim)  
end 

# random parameters
θ = randn(2) 

array_output(θ)
# ┌ Info: Array output
# │   array_output(θ) = [0.0 0.0 0.0; 0.13249229591693032 0.08322799790397031 0.05659841019035259; 0.2754917416690064 0.11627431135675093 0.06486529436360391; 0.4298316363541616 0.12939465908359235 0.0660733333966357; 0.5964112521539043 0.13460393725843148 0.06624889275865706; 0.7762013193326375 0.13667245482004048 0.06627475261593223]

scalar_output(θ)
# ┌ Info: Scalar output
# │   scalar_output(θ) = 0.5066808613440488

dFdx_array = θ -> ForwardDiff.gradient(array_output, θ)
dFdx_scalar = θ -> ForwardDiff.gradient(scalar_output, θ)

dFdx_scalar(θ) # runs
# [1.6678760944510782, 0.3135090543379836]

dFdx_array(θ)   # doesn't run
# ERROR: LoadError: MethodError: no method matching extract_gradient!(::Type{ForwardDiff.Tag{typeof(array_output),Float64}}, ::Array{Array{ForwardDiff.Dual{ForwardDiff.Tag{typeof(array_output),Float64},Float64,2},2},1}, ::Array{ForwardDiff.Dual{ForwardDiff.Tag{typeof(array_output),Float64},Float64,2},2})
# Closest candidates are:
#   extract_gradient!(::Type{T}, ::AbstractArray, ::ForwardDiff.Dual) where T at C:\Users\d\.julia\packages\ForwardDiff\qTmqf\src\gradient.jl:79
#   extract_gradient!(::Type{T}, ::AbstractArray, ::Real) where T 
# at C:\Users\d\.julia\packages\ForwardDiff\qTmqf\src\gradient.jl:78
#   extract_gradient!(::Type{T}, ::DiffResults.DiffResult, ::ForwardDiff.Dual) where T at C:\Users\d\.ulia\packages\ForwardDiff\qTmqf\src\gradient.jl:73
# See file in post for logged stack trace 

The gradient of a function with a non-scalar output isn’t something that exists. Did you mean Jacobian?