How to autodifferentiate the results of NLsolve?

How can I auto differentiate the function cd with respect to p?

using ForwardDiff
using NLsolve

function cd(; p=[1.0, 1.0], i=100)
    function sys(sto, x; p=p, i=i)
        x1 = x[1]
        x2 = x[2]
        p1 = p[1]
        p2 = p[2]
        sto[1] = x1/p2 - x2/p1
        sto[2] = i - p1*x1 - p2*x2
    end
    sol = nlsolve(sys, [1.0, 1.0], method=:trust_region, autodiff=:forward)
    return sol.zero
end

I get an error:

julia> ForwardDiff.jacobian(p -> cd(; p=p), [1.0, 1.0])
ERROR: MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2})
Closest candidates are:
  (::Type{T})(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
  (::Type{T})(::T) where T<:Number at boot.jl:760
  (::Type{T})(::AbstractChar) where T<:Union{AbstractChar, Number} at char.jl:50
  ...
Stacktrace:
  [1] convert(#unused#::Type{Float64}, x::ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2})
    @ Base ./number.jl:7
  [2] convert(#unused#::Type{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, d::ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}, 2})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/dual.jl:378
  [3] setindex!(A::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}, 2}, i1::Int64)
    @ Base ./array.jl:839
  [4] (::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"})(sto::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}; p::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, i::Int64)
    @ Main ./REPL[4]:8
  [5] (::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"})(sto::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}})
    @ Main ./REPL[4]:4
  [6] vector_mode_dual_eval(f!::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/apiutils.jl:44
  [7] vector_mode_jacobian!(result::DiffResults.MutableDiffResult{1, Vector{Float64}, Tuple{Matrix{Float64}}}, f!::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:172
  [8] jacobian!(result::DiffResults.MutableDiffResult{1, Vector{Float64}, Tuple{Matrix{Float64}}}, f!::Function, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}}, ::Val{false})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:78
  [9] (::NLSolversBase.var"#fj_forwarddiff!#24"{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Vector{Float64}, ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}}})(F::Vector{Float64}, J::Matrix{Float64}, x::Vector{Float64})
    @ NLSolversBase ~/.julia/packages/NLSolversBase/GRQ1x/src/objective_types/oncedifferentiable.jl:160
 [10] value_jacobian!!(obj::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, F::Vector{Float64}, J::Matrix{Float64}, x::Vector{Float64})
    @ NLSolversBase ~/.julia/packages/NLSolversBase/GRQ1x/src/interface.jl:124
 [11] value_jacobian!!
    @ ~/.julia/packages/NLSolversBase/GRQ1x/src/interface.jl:122 [inlined]
 [12] trust_region_(df::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, initial_x::Vector{Float64}, xtol::Float64, ftol::Float64, iterations::Int64, store_trace::Bool, show_trace::Bool, extended_trace::Bool, factor::Float64, autoscale::Bool, cache::NLsolve.NewtonTrustRegionCache{Vector{Float64}})
    @ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/solvers/trust_region.jl:119
 [13] trust_region (repeats 2 times)
    @ ~/.julia/packages/NLsolve/gJL1I/src/solvers/trust_region.jl:235 [inlined]
 [14] nlsolve(df::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, initial_x::Vector{Float64}; method::Symbol, xtol::Float64, ftol::Float64, iterations::Int64, store_trace::Bool, show_trace::Bool, extended_trace::Bool, linesearch::Static, linsolve::NLsolve.var"#29#31", factor::Float64, autoscale::Bool, m::Int64, beta::Int64, aa_start::Int64, droptol::Float64)
    @ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/nlsolve/nlsolve.jl:26
 [15] nlsolve(f::Function, initial_x::Vector{Float64}; method::Symbol, autodiff::Symbol, inplace::Bool, kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/nlsolve/nlsolve.jl:52
 [16] cd(; p::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, i::Int64)
    @ Main ./REPL[4]:12
 [17] #9
    @ ./REPL[5]:1 [inlined]
 [18] vector_mode_dual_eval
    @ ~/.julia/packages/ForwardDiff/5gUap/src/apiutils.jl:37 [inlined]
 [19] vector_mode_jacobian(f::var"#9#10", x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:147
 [20] jacobian(f::Function, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}}, ::Val{true})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:21
 [21] jacobian(f::Function, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}}) (repeats 2 times)
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:19
 [22] top-level scope
    @ REPL[5]:1

Running AD through a solver is in general inefficient (and sometimes not possible if the solver uses functionality that you can’t easily AD through, like LAPACK). Fortunately, you can use the chain rule to express the sensitivity you seek in terms of the partial derivatives of your residual function evaluated at the point of the solution returned by the solver. In short, you have:

r(p, x(p)) = 0 =>
dr(p, x(p)) / dp = 0 =>
∂r/∂p + ∂r/∂x * dx/dp = 0 =>
dx/dp = - inv(∂r/∂x) * ∂r/∂p

In code this would look like:

using ForwardDiff
using NLsolve

function r(sto, x; p=p, i=i)
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    sto[1] = x1/p2 - x2/p1
    sto[2] = i - p1*x1 - p2*x2
end


function dxdp(; p=[1.0, 1.0], i=100)
    sto = zeros(2)
    sol = nlsolve((g, x) -> r(g, x; p=p, i=i), [1.0, 1.0], method=:trust_region, autodiff=:forward)
    drdx = ForwardDiff.jacobian((sto, x) -> r(sto, x; p=p, i=i), sto, sol.zero)
    drdp = ForwardDiff.jacobian((sto, p) -> r(sto, sol.zero; p=p, i=i), sto, p)
    return -(drdx \ drdp)
end

giving:

julia> dxdp()
2×2 Matrix{Float64}:
 -50.0   -0.0
   0.0  -50.0

You can double-check with e.g. finite difference:

function cd(; p=[1.0, 1.0], i=100)
    sol = nlsolve((sto, x) -> r(sto, x; p=p, i=i), [1.0, 1.0], method=:trust_region, autodiff=:forward)
    return sol.zero
end

using FiniteDiff
FiniteDiff.finite_difference_jacobian(p -> cd(; p=p), [1.0, 1.0])

giving

julia> FiniteDiff.finite_difference_jacobian(p -> cd(; p=p), [1.0, 1.0])
2×2 Matrix{Float64}:
 -50.0    0.0
   0.0  -50.0
10 Likes

See Reverse differentiation through nlsolve · Issue #205 · JuliaNLSolvers/NLsolve.jl · GitHub for a related discussion

1 Like

What if I really wanted to find a way to be able to AD the function cd? I am asking because cd is part of long program that I need to optimize and I’m having a hard time using AD because function cd “blocks” the way.

If you want it to work “seamlessly” with an existing AD framework you would need to look up how to add a custom derivative. For ForwardDiff it might be to overload f(x::Vector{<:ForwardDiff.Dual}).

You might be able to pull something from an old PR I made: wip: implement macro for users to define their own derivatives by KristofferC · Pull Request #165 · JuliaDiff/ForwardDiff.jl · GitHub.

1 Like

But isn’t the error I’m getting because at some point something gets converted Float64 so that ForwardDiff loses track of the derivatives? I was hoping to simply fix that, but I can’t spot where the problem happens.

Geneally, you won’t be able to AD through an iterative solver. There is no “simple fix”, other than defining the derivative from the equation(s) that implicitly characterize the solution, as suggested by @kristoffer.carlsson.

1 Like

But doesn’t AD handle differentiation of algorithms (except when they are not written in Julia)? I thought that Optim outsources something to a library outside Julia, and so I though could write a version of some optimization algorithm in pure Julia and get away with it, no?

If I am not misunderstanding your question:
In theory you can differentiate an iterative algorithm.

For example, I think a lot of functions yielding trandendentials and irrationals might end up using an iterative algorithm (Newton Raphson, the Babylonian method, etc) depending on the precision. So to auto differentiate sqrt you could run the AD through that iterative algorithm if it was implemented in Julia… But it would be grossly inefficient when we already know calculus. So you teach the AD system the derivative of these simple functions and it doesn’t care how the original function was calculated (ie, register a rule like ChainRules.jl/fastmath_able.jl at master · JuliaDiff/ChainRules.jl · GitHub)

Similarly, you could in principle differentiate the algorithm calculating the system of equations itself, but you are much better off teaching the AD system the derivative (using the implicit function theorem and the chain rule local to the calculation of the system). The more people register those rules, the more coverage we will have on important software packages so that this becomes seamless for users.

Moreover, it doesn’t matter if the calculation of the “primal” value was calculated in Julia or called to an external library. It just matters if the AD system is taught the derivative of a julia function (at which point it doesn’t care how it is implemented) vs. needs to go inside of the function and recursively try to differentiate its contents (at which point it eventually hits functions with rules or dies trying).

Yes, I understand that. I guess I was surprised that ForwardDiff throws an error. The issue still stands: suppose I am not worried about efficiency, what can I do to get that differentiation to work?

About registering the rule, I don’t know how to do it. I am more of a user than a developer of Julia. I have contributed to some packages, but those contributions are rather trivial. I wish I knew more about packages and developing Julia. Often times I find myself programming some functionality that should probably go to a package, but I don’t know how to start so I end up writing a script rather than programming a package :frowning: . I’m willing to walk the path though, if someone gives me directions.

This, unfortunately, isn’t true in general. At least not in the sense of being able to blindly AD through any iterative algorithm. As an example, consider bisection to find x such that x + a = 0, from a bracket of floor(-abs(a)), ceil(abs(a)). Your stopping rule would guarantee some bracket width or similar, but no derivative of a would propagate at all.

What multiple people in this discussion already told you: define a rule using an implicit function based on the original equation. There is no shortcut.

1 Like

I know very little about AD, but my impression is that you don’t have to be a “developer” to define rules, you just use ChainRulesCore and write a rule like you would write another function (or maybe a good comparison is a plot recipe for Plots.jl), see

FAQ · ChainRules?

Yes, I was looking at that, but it seems to me that I have to use DiffRules.jl, because I think that’s what ForwardDiff.jl uses.

That might be, and apologies if I’ve added to the confusion, my point was rather that you don’t need to develop a package (potentially involving tests, documentation, GitHub CI etc etc) but that writing a rule isn’t much different from writing a regular function in your script as far as I understand.

ForwardDiff doesn’t really have any vector rules so there isn’t much in place to easily do this. In order to get this correct you need to propagate the derivative from your function with the partials of your input, see:

in my PR to add this to ForwardDiff.

3 Likes

Also, this univariate worked example may help to get you started:

1 Like

This seems to work though

using FiniteDiff
using ForwardDiff
using NLsolve
using DiffRules

function sys(sto, x; p=[1.0, 1.0], i=100)
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    sto[1] = x1/p2 - x2/p1
    sto[2] = i - p1*x1 - p2*x2
end

function cd(p=[1.0, 1.0]; i=100)
    sol = nlsolve((sto, x) -> sys(sto, x; p=p, i=i), p, method=:trust_region, autodiff=:forward)
    return sol.zero
end

function dxdp( p=[1.0, 1.0]; i=100)
    sto = zeros(2)
    sol = cd(p; i=i)
    drdx = ForwardDiff.jacobian((sto, x) -> sys(sto, x; p=p, i=i), sto, sol)
    drdp = ForwardDiff.jacobian((sto, p) -> sys(sto, sol; p=p, i=i), sto, p)
    return -(drdx \ drdp)
end

DiffRules.@define_diffrule Main.cd(p) = :(dxdp($p))
julia> ForwardDiff.jacobian(cd, [1.0, 1.0])
2×2 Matrix{Float64}:
 -50.0    3.55271e-15
   0.0  -50.0

julia> FiniteDiff.finite_difference_jacobian(cd, [1.0, 1.0])
2×2 Matrix{Float64}:
 -50.0    0.0
   0.0  -50.0

julia> dxdp()
2×2 Matrix{Float64}:
 -50.0   -0.0
   0.0  -50.0

I find strange that I don’t get the exact same result as dxdp().