# How to autodifferentiate the results of NLsolve?

How can I auto differentiate the function cd with respect to p?

using ForwardDiff
using NLsolve

function cd(; p=[1.0, 1.0], i=100)
function sys(sto, x; p=p, i=i)
x1 = x[1]
x2 = x[2]
p1 = p[1]
p2 = p[2]
sto[1] = x1/p2 - x2/p1
sto[2] = i - p1*x1 - p2*x2
end
sol = nlsolve(sys, [1.0, 1.0], method=:trust_region, autodiff=:forward)
return sol.zero
end


I get an error:

julia> ForwardDiff.jacobian(p -> cd(; p=p), [1.0, 1.0])
ERROR: MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2})
Closest candidates are:
(::Type{T})(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
(::Type{T})(::T) where T<:Number at boot.jl:760
(::Type{T})(::AbstractChar) where T<:Union{AbstractChar, Number} at char.jl:50
...
Stacktrace:
[1] convert(#unused#::Type{Float64}, x::ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2})
@ Base ./number.jl:7
[2] convert(#unused#::Type{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, d::ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}, 2})
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/dual.jl:378
[3] setindex!(A::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}, 2}, i1::Int64)
@ Base ./array.jl:839
[4] (::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"})(sto::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}; p::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, i::Int64)
@ Main ./REPL[4]:8
[5] (::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"})(sto::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}})
@ Main ./REPL[4]:4
[6] vector_mode_dual_eval(f!::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}})
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/apiutils.jl:44
[7] vector_mode_jacobian!(result::DiffResults.MutableDiffResult{1, Vector{Float64}, Tuple{Matrix{Float64}}}, f!::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}})
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:172
[8] jacobian!(result::DiffResults.MutableDiffResult{1, Vector{Float64}, Tuple{Matrix{Float64}}}, f!::Function, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}}, ::Val{false})
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:78
[9] (::NLSolversBase.var"#fj_forwarddiff!#24"{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Vector{Float64}, ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}}})(F::Vector{Float64}, J::Matrix{Float64}, x::Vector{Float64})
@ NLSolversBase ~/.julia/packages/NLSolversBase/GRQ1x/src/objective_types/oncedifferentiable.jl:160
[10] value_jacobian!!(obj::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, F::Vector{Float64}, J::Matrix{Float64}, x::Vector{Float64})
@ NLSolversBase ~/.julia/packages/NLSolversBase/GRQ1x/src/interface.jl:124
[11] value_jacobian!!
@ ~/.julia/packages/NLSolversBase/GRQ1x/src/interface.jl:122 [inlined]
[12] trust_region_(df::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, initial_x::Vector{Float64}, xtol::Float64, ftol::Float64, iterations::Int64, store_trace::Bool, show_trace::Bool, extended_trace::Bool, factor::Float64, autoscale::Bool, cache::NLsolve.NewtonTrustRegionCache{Vector{Float64}})
@ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/solvers/trust_region.jl:119
[13] trust_region (repeats 2 times)
@ ~/.julia/packages/NLsolve/gJL1I/src/solvers/trust_region.jl:235 [inlined]
[14] nlsolve(df::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, initial_x::Vector{Float64}; method::Symbol, xtol::Float64, ftol::Float64, iterations::Int64, store_trace::Bool, show_trace::Bool, extended_trace::Bool, linesearch::Static, linsolve::NLsolve.var"#29#31", factor::Float64, autoscale::Bool, m::Int64, beta::Int64, aa_start::Int64, droptol::Float64)
@ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/nlsolve/nlsolve.jl:26
[15] nlsolve(f::Function, initial_x::Vector{Float64}; method::Symbol, autodiff::Symbol, inplace::Bool, kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/nlsolve/nlsolve.jl:52
[16] cd(; p::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, i::Int64)
@ Main ./REPL[4]:12
[17] #9
@ ./REPL[5]:1 [inlined]
[18] vector_mode_dual_eval
@ ~/.julia/packages/ForwardDiff/5gUap/src/apiutils.jl:37 [inlined]
[19] vector_mode_jacobian(f::var"#9#10", x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}})
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:147
[20] jacobian(f::Function, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}}, ::Val{true})
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:21
[21] jacobian(f::Function, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}}) (repeats 2 times)
@ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:19
[22] top-level scope
@ REPL[5]:1


Running AD through a solver is in general inefficient (and sometimes not possible if the solver uses functionality that you canât easily AD through, like LAPACK). Fortunately, you can use the chain rule to express the sensitivity you seek in terms of the partial derivatives of your residual function evaluated at the point of the solution returned by the solver. In short, you have:

r(p, x(p)) = 0 =>
dr(p, x(p)) / dp = 0 =>
âr/âp + âr/âx * dx/dp = 0 =>
dx/dp = - inv(âr/âx) * âr/âp


In code this would look like:

using ForwardDiff
using NLsolve

function r(sto, x; p=p, i=i)
x1 = x[1]
x2 = x[2]
p1 = p[1]
p2 = p[2]
sto[1] = x1/p2 - x2/p1
sto[2] = i - p1*x1 - p2*x2
end

function dxdp(; p=[1.0, 1.0], i=100)
sto = zeros(2)
sol = nlsolve((g, x) -> r(g, x; p=p, i=i), [1.0, 1.0], method=:trust_region, autodiff=:forward)
drdx = ForwardDiff.jacobian((sto, x) -> r(sto, x; p=p, i=i), sto, sol.zero)
drdp = ForwardDiff.jacobian((sto, p) -> r(sto, sol.zero; p=p, i=i), sto, p)
return -(drdx \ drdp)
end


giving:

julia> dxdp()
2Ă2 Matrix{Float64}:
-50.0   -0.0
0.0  -50.0


You can double-check with e.g. finite difference:

function cd(; p=[1.0, 1.0], i=100)
sol = nlsolve((sto, x) -> r(sto, x; p=p, i=i), [1.0, 1.0], method=:trust_region, autodiff=:forward)
return sol.zero
end

using FiniteDiff
FiniteDiff.finite_difference_jacobian(p -> cd(; p=p), [1.0, 1.0])


giving

julia> FiniteDiff.finite_difference_jacobian(p -> cd(; p=p), [1.0, 1.0])
2Ă2 Matrix{Float64}:
-50.0    0.0
0.0  -50.0

10 Likes

See Reverse differentiation through nlsolve Âˇ Issue #205 Âˇ JuliaNLSolvers/NLsolve.jl Âˇ GitHub for a related discussion

1 Like

What if I really wanted to find a way to be able to AD the function cd? I am asking because cd is part of long program that I need to optimize and Iâm having a hard time using AD because function cd âblocksâ the way.

If you want it to work âseamlesslyâ with an existing AD framework you would need to look up how to add a custom derivative. For ForwardDiff it might be to overload f(x::Vector{<:ForwardDiff.Dual}).

You might be able to pull something from an old PR I made: wip: implement macro for users to define their own derivatives by KristofferC Âˇ Pull Request #165 Âˇ JuliaDiff/ForwardDiff.jl Âˇ GitHub.

1 Like

But isnât the error Iâm getting because at some point something gets converted Float64 so that ForwardDiff loses track of the derivatives? I was hoping to simply fix that, but I canât spot where the problem happens.

Geneally, you wonât be able to AD through an iterative solver. There is no âsimple fixâ, other than defining the derivative from the equation(s) that implicitly characterize the solution, as suggested by @kristoffer.carlsson.

1 Like

But doesnât AD handle differentiation of algorithms (except when they are not written in Julia)? I thought that Optim outsources something to a library outside Julia, and so I though could write a version of some optimization algorithm in pure Julia and get away with it, no?

If I am not misunderstanding your question:
In theory you can differentiate an iterative algorithm.

For example, I think a lot of functions yielding trandendentials and irrationals might end up using an iterative algorithm (Newton Raphson, the Babylonian method, etc) depending on the precision. So to auto differentiate sqrt you could run the AD through that iterative algorithm if it was implemented in JuliaâŚ But it would be grossly inefficient when we already know calculus. So you teach the AD system the derivative of these simple functions and it doesnât care how the original function was calculated (ie, register a rule like ChainRules.jl/fastmath_able.jl at master Âˇ JuliaDiff/ChainRules.jl Âˇ GitHub)

Similarly, you could in principle differentiate the algorithm calculating the system of equations itself, but you are much better off teaching the AD system the derivative (using the implicit function theorem and the chain rule local to the calculation of the system). The more people register those rules, the more coverage we will have on important software packages so that this becomes seamless for users.

Moreover, it doesnât matter if the calculation of the âprimalâ value was calculated in Julia or called to an external library. It just matters if the AD system is taught the derivative of a julia function (at which point it doesnât care how it is implemented) vs. needs to go inside of the function and recursively try to differentiate its contents (at which point it eventually hits functions with rules or dies trying).

Yes, I understand that. I guess I was surprised that ForwardDiff throws an error. The issue still stands: suppose I am not worried about efficiency, what can I do to get that differentiation to work?

About registering the rule, I donât know how to do it. I am more of a user than a developer of Julia. I have contributed to some packages, but those contributions are rather trivial. I wish I knew more about packages and developing Julia. Often times I find myself programming some functionality that should probably go to a package, but I donât know how to start so I end up writing a script rather than programming a package . Iâm willing to walk the path though, if someone gives me directions.

This, unfortunately, isnât true in general. At least not in the sense of being able to blindly AD through any iterative algorithm. As an example, consider bisection to find x such that x + a = 0, from a bracket of floor(-abs(a)), ceil(abs(a)). Your stopping rule would guarantee some bracket width or similar, but no derivative of a would propagate at all.

What multiple people in this discussion already told you: define a rule using an implicit function based on the original equation. There is no shortcut.

1 Like

I know very little about AD, but my impression is that you donât have to be a âdeveloperâ to define rules, you just use ChainRulesCore and write a rule like you would write another function (or maybe a good comparison is a plot recipe for Plots.jl), see

Yes, I was looking at that, but it seems to me that I have to use DiffRules.jl, because I think thatâs what ForwardDiff.jl uses.

That might be, and apologies if Iâve added to the confusion, my point was rather that you donât need to develop a package (potentially involving tests, documentation, GitHub CI etc etc) but that writing a rule isnât much different from writing a regular function in your script as far as I understand.

ForwardDiff doesnât really have any vector rules so there isnât much in place to easily do this. In order to get this correct you need to propagate the derivative from your function with the partials of your input, see:

in my PR to add this to ForwardDiff.

3 Likes

Also, this univariate worked example may help to get you started:

1 Like

This seems to work though

using FiniteDiff
using ForwardDiff
using NLsolve
using DiffRules

function sys(sto, x; p=[1.0, 1.0], i=100)
x1 = x[1]
x2 = x[2]
p1 = p[1]
p2 = p[2]
sto[1] = x1/p2 - x2/p1
sto[2] = i - p1*x1 - p2*x2
end

function cd(p=[1.0, 1.0]; i=100)
sol = nlsolve((sto, x) -> sys(sto, x; p=p, i=i), p, method=:trust_region, autodiff=:forward)
return sol.zero
end

function dxdp( p=[1.0, 1.0]; i=100)
sto = zeros(2)
sol = cd(p; i=i)
drdx = ForwardDiff.jacobian((sto, x) -> sys(sto, x; p=p, i=i), sto, sol)
drdp = ForwardDiff.jacobian((sto, p) -> sys(sto, sol; p=p, i=i), sto, p)
return -(drdx \ drdp)
end

DiffRules.@define_diffrule Main.cd(p) = :(dxdp(\$p))

julia> ForwardDiff.jacobian(cd, [1.0, 1.0])
2Ă2 Matrix{Float64}:
-50.0    3.55271e-15
0.0  -50.0

julia> FiniteDiff.finite_difference_jacobian(cd, [1.0, 1.0])
2Ă2 Matrix{Float64}:
-50.0    0.0
0.0  -50.0

julia> dxdp()
2Ă2 Matrix{Float64}:
-50.0   -0.0
0.0  -50.0


I find strange that I donât get the exact same result as dxdp().