How to autodifferentiate the results of NLsolve?

amrods · August 2, 2021, 1:22pm

How can I auto differentiate the function cd with respect to p?

using ForwardDiff
using NLsolve

function cd(; p=[1.0, 1.0], i=100)
    function sys(sto, x; p=p, i=i)
        x1 = x[1]
        x2 = x[2]
        p1 = p[1]
        p2 = p[2]
        sto[1] = x1/p2 - x2/p1
        sto[2] = i - p1*x1 - p2*x2
    end
    sol = nlsolve(sys, [1.0, 1.0], method=:trust_region, autodiff=:forward)
    return sol.zero
end

I get an error:

julia> ForwardDiff.jacobian(p -> cd(; p=p), [1.0, 1.0])
ERROR: MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2})
Closest candidates are:
  (::Type{T})(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
  (::Type{T})(::T) where T<:Number at boot.jl:760
  (::Type{T})(::AbstractChar) where T<:Union{AbstractChar, Number} at char.jl:50
  ...
Stacktrace:
  [1] convert(#unused#::Type{Float64}, x::ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2})
    @ Base ./number.jl:7
  [2] convert(#unused#::Type{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, d::ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}, 2})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/dual.jl:378
  [3] setindex!(A::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}, 2}, i1::Int64)
    @ Base ./array.jl:839
  [4] (::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"})(sto::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}; p::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, i::Int64)
    @ Main ./REPL[4]:8
  [5] (::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"})(sto::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, x::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}})
    @ Main ./REPL[4]:4
  [6] vector_mode_dual_eval(f!::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/apiutils.jl:44
  [7] vector_mode_jacobian!(result::DiffResults.MutableDiffResult{1, Vector{Float64}, Tuple{Matrix{Float64}}}, f!::var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:172
  [8] jacobian!(result::DiffResults.MutableDiffResult{1, Vector{Float64}, Tuple{Matrix{Float64}}}, f!::Function, y::Vector{Float64}, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}}, ::Val{false})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:78
  [9] (::NLSolversBase.var"#fj_forwarddiff!#24"{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Vector{Float64}, ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2, Tuple{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#sys#7"{Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, Int64, var"#sys#6#8"}, Float64}, Float64, 2}}}}})(F::Vector{Float64}, J::Matrix{Float64}, x::Vector{Float64})
    @ NLSolversBase ~/.julia/packages/NLSolversBase/GRQ1x/src/objective_types/oncedifferentiable.jl:160
 [10] value_jacobian!!(obj::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, F::Vector{Float64}, J::Matrix{Float64}, x::Vector{Float64})
    @ NLSolversBase ~/.julia/packages/NLSolversBase/GRQ1x/src/interface.jl:124
 [11] value_jacobian!!
    @ ~/.julia/packages/NLSolversBase/GRQ1x/src/interface.jl:122 [inlined]
 [12] trust_region_(df::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, initial_x::Vector{Float64}, xtol::Float64, ftol::Float64, iterations::Int64, store_trace::Bool, show_trace::Bool, extended_trace::Bool, factor::Float64, autoscale::Bool, cache::NLsolve.NewtonTrustRegionCache{Vector{Float64}})
    @ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/solvers/trust_region.jl:119
 [13] trust_region (repeats 2 times)
    @ ~/.julia/packages/NLsolve/gJL1I/src/solvers/trust_region.jl:235 [inlined]
 [14] nlsolve(df::OnceDifferentiable{Vector{Float64}, Matrix{Float64}, Vector{Float64}}, initial_x::Vector{Float64}; method::Symbol, xtol::Float64, ftol::Float64, iterations::Int64, store_trace::Bool, show_trace::Bool, extended_trace::Bool, linesearch::Static, linsolve::NLsolve.var"#29#31", factor::Float64, autoscale::Bool, m::Int64, beta::Int64, aa_start::Int64, droptol::Float64)
    @ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/nlsolve/nlsolve.jl:26
 [15] nlsolve(f::Function, initial_x::Vector{Float64}; method::Symbol, autodiff::Symbol, inplace::Bool, kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ NLsolve ~/.julia/packages/NLsolve/gJL1I/src/nlsolve/nlsolve.jl:52
 [16] cd(; p::Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}, i::Int64)
    @ Main ./REPL[4]:12
 [17] #9
    @ ./REPL[5]:1 [inlined]
 [18] vector_mode_dual_eval
    @ ~/.julia/packages/ForwardDiff/5gUap/src/apiutils.jl:37 [inlined]
 [19] vector_mode_jacobian(f::var"#9#10", x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:147
 [20] jacobian(f::Function, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}}, ::Val{true})
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:21
 [21] jacobian(f::Function, x::Vector{Float64}, cfg::ForwardDiff.JacobianConfig{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{var"#9#10", Float64}, Float64, 2}}}) (repeats 2 times)
    @ ForwardDiff ~/.julia/packages/ForwardDiff/5gUap/src/jacobian.jl:19
 [22] top-level scope
    @ REPL[5]:1

kristoffer.carlsson · August 2, 2021, 1:58pm

Running AD through a solver is in general inefficient (and sometimes not possible if the solver uses functionality that you can’t easily AD through, like LAPACK). Fortunately, you can use the chain rule to express the sensitivity you seek in terms of the partial derivatives of your residual function evaluated at the point of the solution returned by the solver. In short, you have:

r(p, x(p)) = 0 =>
dr(p, x(p)) / dp = 0 =>
∂r/∂p + ∂r/∂x * dx/dp = 0 =>
dx/dp = - inv(∂r/∂x) * ∂r/∂p

In code this would look like:

using ForwardDiff
using NLsolve

function r(sto, x; p=p, i=i)
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    sto[1] = x1/p2 - x2/p1
    sto[2] = i - p1*x1 - p2*x2
end


function dxdp(; p=[1.0, 1.0], i=100)
    sto = zeros(2)
    sol = nlsolve((g, x) -> r(g, x; p=p, i=i), [1.0, 1.0], method=:trust_region, autodiff=:forward)
    drdx = ForwardDiff.jacobian((sto, x) -> r(sto, x; p=p, i=i), sto, sol.zero)
    drdp = ForwardDiff.jacobian((sto, p) -> r(sto, sol.zero; p=p, i=i), sto, p)
    return -(drdx \ drdp)
end

giving:

julia> dxdp()
2×2 Matrix{Float64}:
 -50.0   -0.0
   0.0  -50.0

You can double-check with e.g. finite difference:

function cd(; p=[1.0, 1.0], i=100)
    sol = nlsolve((sto, x) -> r(sto, x; p=p, i=i), [1.0, 1.0], method=:trust_region, autodiff=:forward)
    return sol.zero
end

using FiniteDiff
FiniteDiff.finite_difference_jacobian(p -> cd(; p=p), [1.0, 1.0])

giving

julia> FiniteDiff.finite_difference_jacobian(p -> cd(; p=p), [1.0, 1.0])
2×2 Matrix{Float64}:
 -50.0    0.0
   0.0  -50.0

baggepinnen · August 2, 2021, 2:31pm

See https://github.com/JuliaNLSolvers/NLsolve.jl/issues/205 for a related discussion

amrods · August 2, 2021, 11:50pm

What if I really wanted to find a way to be able to AD the function cd? I am asking because cd is part of long program that I need to optimize and I’m having a hard time using AD because function cd “blocks” the way.

kristoffer.carlsson · August 3, 2021, 7:35am

If you want it to work “seamlessly” with an existing AD framework you would need to look up how to add a custom derivative. For ForwardDiff it might be to overload f(x::Vector{<:ForwardDiff.Dual}).

You might be able to pull something from an old PR I made: wip: implement macro for users to define their own derivatives by KristofferC · Pull Request #165 · JuliaDiff/ForwardDiff.jl · GitHub.

amrods · August 3, 2021, 7:44am

But isn’t the error I’m getting because at some point something gets converted Float64 so that ForwardDiff loses track of the derivatives? I was hoping to simply fix that, but I can’t spot where the problem happens.

Tamas_Papp · August 3, 2021, 8:26am

Geneally, you won’t be able to AD through an iterative solver. There is no “simple fix”, other than defining the derivative from the equation(s) that implicitly characterize the solution, as suggested by @kristoffer.carlsson.

amrods · August 3, 2021, 10:45am

But doesn’t AD handle differentiation of algorithms (except when they are not written in Julia)? I thought that Optim outsources something to a library outside Julia, and so I though could write a version of some optimization algorithm in pure Julia and get away with it, no?

jlperla · August 4, 2021, 2:34am

If I am not misunderstanding your question:
In theory you can differentiate an iterative algorithm.

For example, I think a lot of functions yielding trandendentials and irrationals might end up using an iterative algorithm (Newton Raphson, the Babylonian method, etc) depending on the precision. So to auto differentiate sqrt you could run the AD through that iterative algorithm if it was implemented in Julia… But it would be grossly inefficient when we already know calculus. So you teach the AD system the derivative of these simple functions and it doesn’t care how the original function was calculated (ie, register a rule like https://github.com/JuliaDiff/ChainRules.jl/blob/master/src/rulesets/Base/fastmath_able.jl#L1)

Similarly, you could in principle differentiate the algorithm calculating the system of equations itself, but you are much better off teaching the AD system the derivative (using the implicit function theorem and the chain rule local to the calculation of the system). The more people register those rules, the more coverage we will have on important software packages so that this becomes seamless for users.

Moreover, it doesn’t matter if the calculation of the “primal” value was calculated in Julia or called to an external library. It just matters if the AD system is taught the derivative of a julia function (at which point it doesn’t care how it is implemented) vs. needs to go inside of the function and recursively try to differentiate its contents (at which point it eventually hits functions with rules or dies trying).

amrods · August 4, 2021, 5:54am

Yes, I understand that. I guess I was surprised that ForwardDiff throws an error. The issue still stands: suppose I am not worried about efficiency, what can I do to get that differentiation to work?

About registering the rule, I don’t know how to do it. I am more of a user than a developer of Julia. I have contributed to some packages, but those contributions are rather trivial. I wish I knew more about packages and developing Julia. Often times I find myself programming some functionality that should probably go to a package, but I don’t know how to start so I end up writing a script rather than programming a package . I’m willing to walk the path though, if someone gives me directions.

Tamas_Papp · August 5, 2021, 10:56am

This, unfortunately, isn’t true in general. At least not in the sense of being able to blindly AD through any iterative algorithm. As an example, consider bisection to find x such that x + a = 0, from a bracket of floor(-abs(a)), ceil(abs(a)). Your stopping rule would guarantee some bracket width or similar, but no derivative of a would propagate at all.

What multiple people in this discussion already told you: define a rule using an implicit function based on the original equation. There is no shortcut.

nilshg · August 5, 2021, 12:09pm

I know very little about AD, but my impression is that you don’t have to be a “developer” to define rules, you just use ChainRulesCore and write a rule like you would write another function (or maybe a good comparison is a plot recipe for Plots.jl), see

FAQ · ChainRules?

amrods · August 5, 2021, 12:16pm

Yes, I was looking at that, but it seems to me that I have to use DiffRules.jl, because I think that’s what ForwardDiff.jl uses.

nilshg · August 5, 2021, 12:18pm

That might be, and apologies if I’ve added to the confusion, my point was rather that you don’t need to develop a package (potentially involving tests, documentation, GitHub CI etc etc) but that writing a rule isn’t much different from writing a regular function in your script as far as I understand.

kristoffer.carlsson · August 5, 2021, 12:24pm

ForwardDiff doesn’t really have any vector rules so there isn’t much in place to easily do this. In order to get this correct you need to propagate the derivative from your function with the partials of your input, see:

github.com/JuliaDiff/ForwardDiff.jl

wip: implement macro for users to define their own derivatives

JuliaDiff:master ← KristofferC:kc/user_defined_derivatives

opened 02:21PM - 03 Dec 16 UTC

KristofferC

+452 -0

This is a start towards an official API for fixing https://github.com/JuliaDiff/…ForwardDiff.jl/issues/89. It provides macros for derivatives, gradients and jacobians such that user defined derivatives are easily injected: ```jl julia> f(x) = x^2 + x; julia> Df(x) = (println("I got called"); 2x + 1); julia> ForwardDiff.@implement_derivative f Df f (generic function with 2 methods) julia> ForwardDiff.derivative(f, 2.0) I got called 5.0 ``` For gradients and jacobians there are two "config" objects that can be used, which cache intermediate arrays needed. I don't think we can support user defined Hessians at the moment because of the composition of gradient and jacobian that is currently being used. TODOs: - [ ] The documentation is a bit so-so. Just wanted to get something up quickly. - [x] Something weird happens when I use this in a function: ```jl julia> function test() h(x) = 3x dh(x) = 3 ForwardDiff.@implement_derivative h dh ForwardDiff.derivative(h, 2.0) end test (generic function with 1 method) julia> test() ------ UndefVarError ------------------- Stacktrace (most recent call last) [1] — test() at REPL[2]:2 UndefVarError: dh not defined ``` I opened a thread on the forum: https://discourse.julialang.org/t/question-about-scoping-inside-a-function/709 **Conclusion** Lowering bug in julia 0.5. Need to define the derivative before the function. - [x] Performance checks, if function + derivative is non allocating, then the call to ForwardDiff should be non allocating assuming that all the cache functionality is used. - [x] Figure out why this is needed: https://github.com/JuliaDiff/ForwardDiff.jl/pull/165/files#diff-a51666f82528f840f37fcc362fb74659R16 [Fixed by https://github.com/JuliaDiff/ForwardDiff.jl/pull/162]

in my PR to add this to ForwardDiff.

Tamas_Papp · August 5, 2021, 12:24pm

Also, this univariate worked example may help to get you started:

amrods · August 5, 2021, 12:30pm

This seems to work though

using FiniteDiff
using ForwardDiff
using NLsolve
using DiffRules

function sys(sto, x; p=[1.0, 1.0], i=100)
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    sto[1] = x1/p2 - x2/p1
    sto[2] = i - p1*x1 - p2*x2
end

function cd(p=[1.0, 1.0]; i=100)
    sol = nlsolve((sto, x) -> sys(sto, x; p=p, i=i), p, method=:trust_region, autodiff=:forward)
    return sol.zero
end

function dxdp( p=[1.0, 1.0]; i=100)
    sto = zeros(2)
    sol = cd(p; i=i)
    drdx = ForwardDiff.jacobian((sto, x) -> sys(sto, x; p=p, i=i), sto, sol)
    drdp = ForwardDiff.jacobian((sto, p) -> sys(sto, sol; p=p, i=i), sto, p)
    return -(drdx \ drdp)
end

DiffRules.@define_diffrule Main.cd(p) = :(dxdp($p))

julia> ForwardDiff.jacobian(cd, [1.0, 1.0])
2×2 Matrix{Float64}:
 -50.0    3.55271e-15
   0.0  -50.0

julia> FiniteDiff.finite_difference_jacobian(cd, [1.0, 1.0])
2×2 Matrix{Float64}:
 -50.0    0.0
   0.0  -50.0

julia> dxdp()
2×2 Matrix{Float64}:
 -50.0   -0.0
   0.0  -50.0

I find strange that I don’t get the exact same result as dxdp().

elbert5770 · February 3, 2023, 4:13pm

Just to further expand, this is the code above in the context of an optimization problem. Here, the desired solution is x at p = [1.0,1.0], but the starting values of p are [0.5,0.5]. Seems to work though. This is relevant to the case where the state variables are measured at the steady state.

using FiniteDiff
using ForwardDiff
using NLsolve
using DiffRules
using Optimization
using OptimizationOptimJL

function sys(sto, x; p=[1.0, 1.0], i=100)
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    sto[1] = x1/p2 - x2/p1
    sto[2] = i - p1*x1 - p2*x2
end

function cd(p=[1.0, 1.0]; i=100)
    sol = nlsolve((sto, x) -> sys(sto, x; p=p, i=i), p, method=:trust_region, autodiff=:forward)
    return sol.zero
end

function dxdp( p=[1.0, 1.0]; i=100)
    sto = zeros(2)
    sol = cd(p; i=i)
    drdx = ForwardDiff.jacobian((sto, x) -> sys(sto, x; p=p, i=i), sto, sol)
    drdp = ForwardDiff.jacobian((sto, p) -> sys(sto, sol; p=p, i=i), sto, p)
    return -(drdx \ drdp)
end

#@show dxdp()
DiffRules.@define_diffrule Main.cd(p) = :(dxdp($p))
@show Main.cd([0.5,0.5])

function set_p(u0,a)
    #@show u0
    sol = cd(u0)
    return (sol[1]-50.0)^2 + (sol[2]-50.0)^2
end

optf = OptimizationFunction(set_p,Optimization.AutoForwardDiff())
u0 = [0.5,0.5]
i = 100
a=0
prob_set_kf = OptimizationProblem(optf,u0, a)
sol_kf = solve(prob_set_kf,BFGS())
@show sol_kf.u

elbert5770 · February 13, 2023, 6:13pm

Oops, there is a fundamental problem present in the original solution. When nlsolve is called, the ‘u0’ that is delivered is actually ‘p’. If u0 is delivered (for example as a global variable or through a closure), it errors because it will always be a Float64. However, as suggested here: (automatic differentiation - Julia Roots find_zero with ForwardDiff.Dual type? - Stack Overflow), one just create a new variable that can be a float or a dual when needed. I don’t know if this whole thing is necessary, but ‘u0_ = ones(eltype(p)).*u0’ works.

using FiniteDiff
using ForwardDiff
using NLsolve
using DiffRules
using Optimization
using OptimizationOptimJL

function sys(sto, x; p=[1.0, 1.0], i=100)
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    @show x
    @show p
    
    sto[1] = x1/p2 - x2/p1
    sto[2] = i - p1*x1 - p2*x2
    @show sto
    
    return sto

end


function cd(p)
    @show u0
    @show i
    @show p
    u0_ = ones(eltype(p)).*u0
    sol = nlsolve((sto, x) -> sys(sto, x; p=p, i=i), u0_, method=:trust_region, autodiff=:forward)
    return sol.zero
end



function dxdp(p)
    
    sol = cd(p)
    drdx = ForwardDiff.jacobian((sto, x) -> sys(sto, x; p=p, i=i), sto, sol)
    drdp = ForwardDiff.jacobian((sto, p) -> sys(sto, sol; p=p, i=i), sto, p)
    return -(drdx \ drdp)
end


function set_p(p,a)  
    sol = cd(p)
    return (sol[1]-50.0)^2 + (sol[2]-50.0)^2
end

a=0
i = 100
p = [0.1,0.1]
u0 = [1000.0,1000.0]
sto = zeros(2)

DiffRules.@define_diffrule Main.cd(p) = :(dxdp($p))
@show p -> ForwardDiff.jacobian(cd,u0)

sol = cd(p)

optf = OptimizationFunction(set_p,Optimization.AutoForwardDiff())

prob_set_kf = OptimizationProblem(optf,u0, a)
sol_kf = solve(prob_set_kf,BFGS())
@show sol_kf.u

elbert5770 · February 13, 2023, 6:46pm

Also suggested by Chris Rackauckas was to avoid nlsolve’s direct use and use NonlinearSolve. This is what it looks like (bonus: no global variables):

using ForwardDiff
using Optimization
using OptimizationOptimJL
using NonlinearSolve
using DifferentialEquations
using SteadyStateDiffEq

function sys(sto, x, p)
    # @show x
    # @show p
    x1 = x[1]
    x2 = x[2]
    p1 = p[1]
    p2 = p[2]
    sto[1] = x1/p2 - x2/p1
    sto[2] = 100.0 - p1*x1 - p2*x2
    #@show sto
    
    return sto
end

function set_p_outer(probN)
    function set_p_inner(p,u0)

        sol = solve(remake(probN,u0=u0,p=p))
        return (sol[1]-50.0)^2 + (sol[2]-50.0)^2
    end
end

function main()
    a=0
    p = [0.5,0.5]
    u0 = [1000.0,1000.0]
    probN = NonlinearProblem{true}(sys,u0,p)
    sol = solve(probN)

    @show sol
    set_p = set_p_outer(probN)
    sto = similar(p)
    @show sys(sto,sol.u,p)
    optf = OptimizationFunction(set_p,Optimization.AutoForwardDiff())


    prob_set_kf = OptimizationProblem(optf,p,u0)
    sol_kf = solve(prob_set_kf,BFGS())
    @show sol_kf.u
end

main()

Topic		Replies	Views
How to implement AD for this function? General Usage differentiation , autodiff	9	645	May 7, 2021
Autodiff-ing a function defined by the result of Roots.find_zero fails General Usage roots , forwarddiff , autodiff	12	795	September 29, 2022
Differentiating through optimization problem New to Julia question , autodiff , enzyme	17	915	May 20, 2024
Obtain jacobian used by NLsolve General Usage question , nlsolve	2	321	July 12, 2023
Method Error for ForwardDiff.hessian when ForwardDiff.Jacobian and ForwardDiff.gradient works perfectly fine. How to resolve? New to Julia question , forwarddiff	18	359	June 7, 2024

How to autodifferentiate the results of NLsolve?

Related topics