Issues with Convergence in Differential Equation Solver Using Dual Numbers

Hello Julia Community,

I’m currently working on a differential equation problem related to ice flow modeling and encountering some issues with convergence when using Dual numbers. The problem converges as expected with Float64 types, but when I switch to Dual numbers for automatic differentiation, the solver doesn’t seem to converge (or at least, not in the way I expect).

Here’s a snippet of my code:

iceflow_prob = ODEProblem{true,SciMLBase.FullSpecialize}(SIA2D!, model.iceflow.H, params.simulation.tspan, tstops=params.solver.tstops, simulation)
iceflow_sol = solve(iceflow_prob, 
                    params.solver.solver, 
                    callback=cb, 
                    tstops=params.solver.tstops, 
                    reltol=params.solver.reltol, 
                    save_everystep=params.solver.save_everystep, 
                    progress=params.solver.progress, 
                    progress_steps=params.solver.progress_steps)

function SIA2D!(dH::Matrix{F}, H::Matrix{F}, simulation::SIM, t::AbstractFloat) where {F <: DualFloat, SIM <: Simulation}
    N=DualFloat
    # Retrieve parameters
    SIA2D_model::SIA2Dmodel = simulation.model.iceflow
    glacier::Sleipnir.Glacier2D = simulation.glaciers[simulation.model.iceflow.glacier_idx[]]
    params::Sleipnir.Parameters = simulation.parameters
    int_type = simulation.parameters.simulation.int_type
    H̄::Matrix{<:N} = SIA2D_model.H̄
    A::Ref{N} = SIA2D_model.A
    n::Ref{N} = SIA2D_model.n
    B::Matrix{<:N} = glacier.B
    S::Matrix{<:N} = SIA2D_model.S
    dSdx::Matrix{<:N} = SIA2D_model.dSdx
    dSdy::Matrix{<:N} = SIA2D_model.dSdy
    D::Matrix{<:N} = SIA2D_model.D
    Dx::Matrix{<:N} = SIA2D_model.Dx
    Dy::Matrix{<:N} = SIA2D_model.Dy
    dSdx_edges::Matrix{<:N} = SIA2D_model.dSdx_edges
    dSdy_edges::Matrix{<:N} = SIA2D_model.dSdy_edges
    ∇S::Matrix{<:N} = SIA2D_model.∇S
    ∇Sx::Matrix{<:N} = SIA2D_model.∇Sx
    ∇Sy::Matrix{<:N} = SIA2D_model.∇Sy
    Fx::Matrix{<:N} = SIA2D_model.Fx
    Fy::Matrix{<:N} = SIA2D_model.Fy
    Fxx::Matrix{<:N} = SIA2D_model.Fxx
    Fyy::Matrix{<:N} = SIA2D_model.Fyy
    Δx::N = glacier.Δx
    Δy::N = glacier.Δy
    Γ::Ref{N} = SIA2D_model.Γ
    ρ::F = simulation.parameters.physical.ρ
    g::F = simulation.parameters.physical.g

    # First, enforce values to be positive
    map!(x -> ifelse(x>0.0,x,0.0), H, H)
    
    S .= B .+ H

    # All grid variables computed in a staggered grid
    # Compute surface gradients on edges
    diff_x!(dSdx, S, Δx)  
    diff_y!(dSdy, S, Δy) 
    avg_y!(∇Sx, dSdx)
    avg_x!(∇Sy, dSdy)
    
    ∇S .= (∇Sx.^2 .+ ∇Sy.^2).^((n[] - 1)/2) 

    # @infiltrate
    avg!(H̄, H)
    Γ[] = 2.0 * A[] * (ρ * g)^n[] / (n[]+2) # 1 / m^3 s 
    D .= Γ[] .* H̄.^(n[] + 2) .* ∇S

    # Compute flux components
    @views diff_x!(dSdx_edges, S[:,2:end - 1], Δx)
    @views diff_y!(dSdy_edges, S[2:end - 1,:], Δy)

    # Cap surface elevaton differences with the upstream ice thickness to 
    # imporse boundary condition of the SIA equation
    η₀ = params.physical.η₀
    dSdx_edges .= @views @. min(dSdx_edges,  η₀ * H[2:end, 2:end-1]/Δx)
    dSdx_edges .= @views @. max(dSdx_edges, -η₀ * H[1:end-1, 2:end-1]/Δx)
    dSdy_edges .= @views @. min(dSdy_edges,  η₀ * H[2:end-1, 2:end]/Δy)
    dSdy_edges .= @views @. max(dSdy_edges, -η₀ * H[2:end-1, 1:end-1]/Δy)

    avg_y!(Dx, D)
    avg_x!(Dy, D)
    Fx .= .-Dx .* dSdx_edges
    Fy .= .-Dy .* dSdy_edges 

    #  Flux divergence
    diff_x!(Fxx, Fx, Δx)
    diff_y!(Fyy, Fy, Δy)
    inn(dH) .= .-(Fxx .+ Fyy) 
end

Also see the options of the ODE below:

solver: RDPK3Sp35(; stage_limiter! = trivial_limiter!, step_limiter! = trivial_limiter!, thread = static(false),)
reltol: 1.0
step: 0.08333333333333333
tstops: [2010.0833333333333, 2010.1666666666667, 2010.25, 2010.3333333333333, 2010.4166666666667, 2010.5, 2010.5833333333333, 2010.6666666666667, 2010.75, 2010.8333333333333, 2010.9166666666667, 2011.0, 2011.0833333333333, 2011.1666666666667, 2011.25, 2011.3333333333333, 2011.4166666666667, 2011.5, 2011.5833333333333, 2011.6666666666667, 2011.75, 2011.8333333333333, 2011.9166666666667, 2012.0, 2012.0833333333333, 2012.1666666666667, 2012.25, 2012.3333333333333, 2012.4166666666667, 2012.5, 2012.5833333333333, 2012.6666666666667, 2012.75, 2012.8333333333333, 2012.9166666666667, 2013.0, 2013.0833333333333, 2013.1666666666667, 2013.25, 2013.3333333333333, 2013.4166666666667, 2013.5, 2013.5833333333333, 2013.6666666666667, 2013.75, 2013.8333333333333, 2013.9166666666667, 2014.0, 2014.0833333333333, 2014.1666666666667, 2014.25, 2014.3333333333333, 2014.4166666666667, 2014.5, 2014.5833333333333, 2014.6666666666667, 2014.75, 2014.8333333333333, 2014.9166666666667, 2015.0]
save_everystep: false
progress: true
progress_steps: 10

I’ve noticed that when I adjust the relative tolerance (reltol) to 1, the solver converges quickly, suggesting a possible sensitivity to tolerance settings or other numerical stability issues. Also I think it is good to mention that I am using a custom uniontype const DualFloat = Union{AbstractFloat, ForwardDiff.Dual}.

I’m seeking insights or suggestions on the following:

  1. Are there specific considerations or best practices when using Dual numbers in differential equation solvers, particularly regarding solver settings like reltol?
  2. Could this issue be indicative of a deeper problem with numerical stability in my implementation, and how might I address it?
  3. Any recommendations for alternative solvers or approaches within the SciML ecosystem that are known to work well with Dual numbers?

Any help or guidance on this would be greatly appreciated!

Thank you in advance!

1 Like

This is not a good idea. I’d fix that first. I presume your caches aren’t setup right.

The tolerance during differentiation is slightly stricter since it’s also doing error control on the derivative.

Why this solver? It’s not generally considered one of the recommended ones for efficiency or lower tolerance convergence. I would try one of the recommended ones.

Thank you for your answer!

I will certainly look into PrellocationTools.jl.

Concerning the solver: I recycled the code from someone else, so I am not sure why this solver is used. So it makes sense to try one of the recommended ones.

Did you get it working?

And a swap of RDPK3Sp35 to a potentially higher order method?

I got the forwardmodel to work even without implementing DiffCache from PreallocationTools.jl, it seems indeed that the uniontype was causing issues because I did not implement it correctly.

However I will still try to implement DiffCache as I can benefit from the pre memory allocation.

Concerning the solver, this specific solver turned out to be the best out of the explicit solvers, after benchmarking.

A question about the implementation of DiffCache. Should I setup the caches before the ODE is solved (assuming i only need to do this once) and pass on the caches on to the SIA2D forward model? And then proceed to acces the caches within the model?

Yes

1 Like

Okay I have implemented DiffCache and tried some other solvers, but I am getting the following warning (reltol = 1e-6):

┌ Warning: dt(2.2737367544323206e-13) <= dtmin(2.2737367544323206e-13) at t=2010.097388613536, and step error estimate = 52082.68508677296. Aborting. There is either an error in your model specification or the true solution is unstable.
└ @ SciMLBase ~/.julia/packages/SciMLBase/8XHkk/src/integrator_interface.jl:599

Also when applying Rosenbrock23() as solver method I get the following:

ERROR: First call to automatic differentiation for the Jacobian
failed. This means that the user `f` function is not compatible
with automatic differentiation. Methods to fix this include:

1. Turn off automatic differentiation (e.g. Rosenbrock23() becomes
   Rosenbrock23(autodiff=false)). More details can befound at
   https://docs.sciml.ai/DiffEqDocs/stable/features/performance_overloads/
2. Improving the compatibility of `f` with ForwardDiff.jl automatic 
   differentiation (using tools like PreallocationTools.jl). More details
   can be found at https://docs.sciml.ai/DiffEqDocs/stable/basics/faq/#Autodifferentiation-and-Dual-Numbers
3. Defining analytical Jacobians. More details can be
   found at https://docs.sciml.ai/DiffEqDocs/stable/types/ode_types/#SciMLBase.ODEFunction

Note: turning off automatic differentiation tends to have a very minimal
performance impact (for this use case, because it's forward mode for a
square Jacobian. This is different from optimization gradient scenarios).
However, one should be careful as some methods are more sensitive to
accurate gradients than others. Specifically, Rodas methods like `Rodas4`
and `Rodas5P` require accurate Jacobians in order to have good convergence,
while many other methods like BDF (`QNDF`, `FBDF`), SDIRK (`KenCarp4`),
and Rosenbrock-W (`Rosenbrock23`) do not. Thus if using an algorithm which
is sensitive to autodiff and solving at a low tolerance, please change the
algorithm as well.

And the following warning:

┌ Warning: The supplied DiffCache was too small and was enlarged. This incurs allocations
│     on the first call to `get_tmp`. If few calls to `get_tmp` occur and optimal performance is essential,
│     consider changing 'N'/chunk size of this DiffCache to 25.
└ @ PreallocationTools ~/.julia/packages/PreallocationTools/mJSsc/src/PreallocationTools.jl:151

This leads me to believe that my implementation is not correct. I did the following, to my Forward model:

iceflow_caches = (
        H̄ = DiffCache(model.iceflow.H̄),
        S = DiffCache(model.iceflow.S),
        D = DiffCache(model.iceflow.D),
        Dx = DiffCache(model.iceflow.Dx),
        Dy = DiffCache(model.iceflow.Dy),
        ∇S = DiffCache(model.iceflow.∇S),
        ∇Sx = DiffCache(model.iceflow.∇Sx),
        ∇Sy = DiffCache(model.iceflow.∇Sy),
        Fx = DiffCache(model.iceflow.Fx),
        Fy = DiffCache(model.iceflow.Fy),
        Fxx = DiffCache(model.iceflow.Fxx),
        Fyy = DiffCache(model.iceflow.Fyy),
        dSdx = DiffCache(model.iceflow.dSdx),
        dSdy = DiffCache(model.iceflow.dSdy),
        dSdx_edges = DiffCache(model.iceflow.dSdx_edges),
        dSdy_edges = DiffCache(model.iceflow.dSdy_edges)
    )

p = (simulation,iceflow_caches)

# Define problem to be solved
iceflow_prob = ODEProblem{true,SciMLBase.FullSpecialize}(du, model.iceflow.H, params.simulation.tspan, tstops=params.solver.tstops, p)
iceflow_sol = solve(iceflow_prob, 
                        params.solver.solver, 
                        callback=cb, 
                        tstops=params.solver.tstops, 
                        reltol=params.solver.reltol, 
                        save_everystep=params.solver.save_everystep, 
                        progress=params.solver.progress, 
                        progress_steps=params.solver.progress_steps)

function SIA2D!(dH::Matrix{<:Real}, H::Matrix{<:Real}, p, t::F) where {F <: Real}

    simulation = p[1]
    caches = p[2]
    
    # Retrieve parameters
    SIA2D_model::SIA2Dmodel = simulation.model.iceflow
    glacier::Sleipnir.Glacier2D = simulation.glaciers[simulation.model.iceflow.glacier_idx[]]
    params::Sleipnir.Parameters = simulation.parameters
        int_type = simulation.parameters.simulation.int_type
    H̄ = get_tmp(caches.H̄,H)
    A = SIA2D_model.A
    n = SIA2D_model.n
    B = glacier.B
    S = get_tmp(caches.S,H)
    dSdx = get_tmp(caches.dSdx,H)
    dSdy = get_tmp(caches.dSdy,H)
    D = get_tmp(caches.D,H)
    Dx = get_tmp(caches.Dx,H)
    Dy = get_tmp(caches.Dy,H)
    dSdx_edges = get_tmp(caches.dSdx_edges,H)
    dSdy_edges = get_tmp(caches.dSdy_edges,H)
    ∇S = get_tmp(caches.∇S,H)
    ∇Sx = get_tmp(caches.∇Sx,H)
    ∇Sy = get_tmp(caches.∇Sy,H)
    Fx = get_tmp(caches.Fx,H)
    Fy = get_tmp(caches.Fy,H)
    Fxx = get_tmp(caches.Fxx,H)
    Fyy = get_tmp(caches.Fyy,H)
    Δx::F = glacier.Δx
    Δy::F = glacier.Δy
    Γ = SIA2D_model.Γ
    ρ::F = simulation.parameters.physical.ρ
    g::F = simulation.parameters.physical.g

    # First, enforce values to be positive
    map!(x -> ifelse(x>0.0,x,0.0), H, H)
    
    # Update glacier surface altimetry
    S .= B .+ H

    # All grid variables computed in a staggered grid
    # Compute surface gradients on edges
    diff_x!(dSdx, S, Δx)  
    diff_y!(dSdy, S, Δy) 
    avg_y!(∇Sx, dSdx)
    avg_x!(∇Sy, dSdy)
    ∇S .= (∇Sx.^2 .+ ∇Sy.^2).^((n[] - 1)/2) 

    # @infiltrate
    avg!(H̄, H)
    Γ[] = 2.0 * A[] * (ρ * g)^n[] / (n[]+2) # 1 / m^3 s 
    D .= Γ[] .* H̄.^(n[] + 2) .* ∇S

    # Compute flux components
    @views diff_x!(dSdx_edges, S[:,2:end - 1], Δx)
    @views diff_y!(dSdy_edges, S[2:end - 1,:], Δy)

    # Cap surface elevaton differences with the upstream ice thickness to 
    # imporse boundary condition of the SIA equation
    η₀ = params.physical.η₀
    dSdx_edges .= @views @. min(dSdx_edges,  η₀ * H[2:end, 2:end-1]/Δx)
    dSdx_edges .= @views @. max(dSdx_edges, -η₀ * H[1:end-1, 2:end-1]/Δx)
    dSdy_edges .= @views @. min(dSdy_edges,  η₀ * H[2:end-1, 2:end]/Δy)
    dSdy_edges .= @views @. max(dSdy_edges, -η₀ * H[2:end-1, 1:end-1]/Δy)

    avg_y!(Dx, D)
    avg_x!(Dy, D)
    Fx .= .-Dx .* dSdx_edges
    Fy .= .-Dy .* dSdy_edges 

    #  Flux divergence
    diff_x!(Fxx, Fx, Δx)
    diff_y!(Fyy, Fy, Δy)
    inn(dH) .= .-(Fxx .+ Fyy) 
end

Any idea what I could be doing wrong?

Edit: For completeness some of the extra functions shown.


# Helper functions for the staggered grid

####  Non-allocating functions  ####

diff_x!(O, I, Δx) = @views @. O = (I[begin+1:end,:] - I[1:end-1,:]) / Δx

diff_y!(O, I, Δy) = @views @. O = (I[:,begin+1:end] - I[:,1:end - 1]) / Δy

avg!(O, I) = @views @. O = (I[1:end-1,1:end-1] + I[2:end,1:end-1] + I[1:end-1,2:end] + I[2:end,2:end]) * 0.25

avg_x!(O, I) = @views @. O = (I[1:end-1,:] + I[2:end,:]) * 0.5

avg_y!(O, I) = @views @. O = (I[:,1:end-1] + I[:,2:end]) * 0.5

####  Allocating functions  ####

"""
    avg(A)

4-point average of a matrix
"""
@views avg(A) = 0.25 .* ( A[1:end-1,1:end-1] .+ A[2:end,1:end-1] .+ A[1:end-1,2:end] .+ A[2:end,2:end] )


"""
    avg_x(A)

2-point average of a matrix's X axis
"""
@views avg_x(A) = 0.5 .* ( A[1:end-1,:] .+ A[2:end,:] )

"""
    avg_y(A)

2-point average of a matrix's Y axis
"""
@views avg_y(A) = 0.5 .* ( A[:,1:end-1] .+ A[:,2:end] )

"""
    diff_x(A)

2-point differential of a matrix's X axis
"""
@views diff_x(A) = (A[begin + 1:end, :] .- A[1:end - 1, :])

"""
    diff_y(A)

2-point differential of a matrix's Y axis
"""
@views diff_y(A) = (A[:, begin + 1:end] .- A[:, 1:end - 1])

"""
    inn(A)

Access the inner part of the matrix (-2,-2)
"""
@views inn(A) = A[2:end-1,2:end-1]

"""
    inn1(A)

Access the inner part of the matrix (-1,-1)
"""
@views inn1(A) = A[1:end-1,1:end-1]

Continuation of this post here: Resolving Julia Warning: dt(2.27e-13) <= dtmin(2.27e-13) in implementation of dual numbers