Sciml_train with BFGS() (post-training with ADAM)causes following error

I am able to minimize the loss using ADAM to some extent… but here is the error i get after I try to reduce it using BFGS().
Any ideas on how to resolve this ?

Warning: AD methods failed, using numerical differentiation. To debug, try ForwardDiff.gradient(loss, θ) or Zygote.gradient(loss, θ)
└ @ DiffEqFlux ~/.julia/packages/DiffEqFlux/jpIWG/src/train.jl:71
ERROR: MethodError: objects of type Float32 are not callable
Stacktrace:
  [1] (::DiffEqFlux.var"#82#87"{Float32})(x::Vector{Float32}, p::Nothing)
    @ DiffEqFlux ~/.julia/packages/DiffEqFlux/jpIWG/src/train.jl:84
  [2] (::GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing})(::Vector{Float32})
    @ GalacticOptim ~/.julia/packages/GalacticOptim/DHxE0/src/function/finitediff.jl:11
  [3] (::GalacticOptim.var"#181#187"{Tuple{}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}})(x::Vector{Float32})
    @ GalacticOptim ~/.julia/packages/GalacticOptim/DHxE0/src/function/finitediff.jl:14
  [4] #finite_difference_gradient!#16
    @ ~/.julia/packages/FiniteDiff/msXcU/src/gradients.jl:240 [inlined]
  [5] finite_difference_gradient!
    @ ~/.julia/packages/FiniteDiff/msXcU/src/gradients.jl:224 [inlined]
  [6] (::GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}})(::Vector{Float32}, ::Vector{Float32})
    @ GalacticOptim ~/.julia/packages/GalacticOptim/DHxE0/src/function/finitediff.jl:14
  [7] (::GalacticOptim.var"#130#138"{OptimizationProblem{false, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}, Vector{Float32}, SciMLBase.NullParameters, Nothing, Nothing, Nothing, Nothing, Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:cb, :g_tol), Tuple{var"#76#80", Float32}}}}, GalacticOptim.var"#129#137"{OptimizationProblem{false, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}, Vector{Float32}, SciMLBase.NullParameters, Nothing, Nothing, Nothing, Nothing, Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:cb, :g_tol), Tuple{var"#76#80", Float32}}}}, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}}, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}})(G::Vector{Float32}, θ::Vector{Float32})
    @ GalacticOptim ~/.julia/packages/GalacticOptim/DHxE0/src/solve/optim.jl:93
  [8] value_gradient!!(obj::TwiceDifferentiable{Float32, Vector{Float32}, Matrix{Float32}, Vector{Float32}}, x::Vector{Float32})
    @ NLSolversBase ~/.julia/packages/NLSolversBase/cfJrN/src/interface.jl:82
  [9] initial_state(method::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Float32, Flat}, options::Optim.Options{Float64, GalacticOptim.var"#_cb#136"{var"#76#80", BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Float32, Flat}, Base.Iterators.Cycle{Tuple{GalacticOptim.NullData}}}}, d::TwiceDifferentiable{Float32, Vector{Float32}, Matrix{Float32}, Vector{Float32}}, initial_x::Vector{Float32})
    @ Optim ~/.julia/packages/Optim/rES57/src/multivariate/solvers/first_order/bfgs.jl:94
 [10] optimize(d::TwiceDifferentiable{Float32, Vector{Float32}, Matrix{Float32}, Vector{Float32}}, initial_x::Vector{Float32}, method::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Float32, Flat}, options::Optim.Options{Float64, GalacticOptim.var"#_cb#136"{var"#76#80", BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Float32, Flat}, Base.Iterators.Cycle{Tuple{GalacticOptim.NullData}}}})
    @ Optim ~/.julia/packages/Optim/rES57/src/multivariate/optimize/optimize.jl:35
 [11] ___solve(prob::OptimizationProblem{false, OptimizationFunction{false, GalacticOptim.AutoFiniteDiff, OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, GalacticOptim.var"#180#186"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#182#188"{Vector{Float32}, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, GalacticOptim.var"#179#185"{OptimizationFunction{true, GalacticOptim.AutoFiniteDiff{Val{:forward}, Val{:hcentral}}, DiffEqFlux.var"#82#87"{Float32}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Nothing}}, GalacticOptim.var"#184#190", Nothing, Nothing, Nothing}, Vector{Float32}, SciMLBase.NullParameters, Nothing, Nothing, Nothing, Nothing, Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:cb, :g_tol), Tuple{var"#76#80", Float32}}}}, opt::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Float32, Flat}, data::Base.Iterators.Cycle{Tuple{GalacticOptim.NullData}}; cb::Function, maxiters::Int64, maxtime::Nothing, abstol::Nothing, reltol::Nothing, progress::Bool, kwargs::Base.Iterators.Pairs{Symbol, Float32, Tuple{Symbol}, NamedTuple{(:g_tol,), Tuple{Float32}}})
    @ GalacticOptim ~/.julia/packages/GalacticOptim/DHxE0/src/solve/optim.jl:129
 [12] #__solve#127
    @ ~/.julia/packages/GalacticOptim/DHxE0/src/solve/optim.jl:49 [inlined]
 [13] #solve#476
    @ ~/.julia/packages/SciMLBase/x3z0g/src/solve.jl:3 [inlined]
 [14] sciml_train(::Float32, ::Vector{Float32}, ::BFGS{LineSearches.InitialStatic{Float64}, LineSearches.HagerZhang{Float64, Base.RefValue{Bool}}, Nothing, Float32, Flat}, ::Nothing; lower_bounds::Nothing, upper_bounds::Nothing, maxiters::Int64, kwargs::Base.Iterators.Pairs{Symbol, Any, Tuple{Symbol, Symbol}, NamedTuple{(:cb, :g_tol), Tuple{var"#76#80", Float32}}})
    @ DiffEqFlux ~/.julia/packages/DiffEqFlux/jpIWG/src/train.jl:89
 [15] optimise_p(p_init::Vector{Float32}, prob::ODEProblem{Vector{Float32}, Tuple{Float32, Float32}, true, Vector{Float32}, ODEFunction{true, ModelingToolkit.var"#f#253"{RuntimeGeneratedFunctions.RuntimeGeneratedFunction{(:ˍ₋arg1, :ˍ₋arg2, :t), ModelingToolkit.var"#_RGF_ModTag", ModelingToolkit.var"#_RGF_ModTag", (0x2f80bcbe, 0xac1fa29b, 0xb0dcfa15, 0x5640cd1e, 0x8eaf4a5d)}, RuntimeGeneratedFunctions.RuntimeGeneratedFunction{(:ˍ₋out, :ˍ₋arg1, :ˍ₋arg2, :t), ModelingToolkit.var"#_RGF_ModTag", ModelingToolkit.var"#_RGF_ModTag", (0x0ecdd92a, 0xc8ee1a18, 0xcba5708e, 0x17895940, 0xca0c871c)}}, UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Vector{Symbol}, Symbol, ModelingToolkit.var"#246#generated_observed#260"{Bool, ODESystem, Dict{Any, Any}}, Nothing}, Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, i::Int64, tend::Float32)
    @ Main ~/nikhil/s rao reduction of chem reaction/model_red_julia/mainusing jacobian.jl:234
 [16] top-level scope
    @ none:1

Fix your loss function.

@ChrisRackauckas
something like this ?
https://diffeqflux.sciml.ai/dev/examples/divergence/

No, your loss function most likely doesn’t even work if you call it without sciml_train. You didn’t share anything so all I can really say is, the function literally fails if you just do loss(p), so just fix it before trying to solve an inverse problem on it.

actually it works with ADAM, but i was trying to follow the docs tutorial on parameter estimation for stiff equations. i.e. doing BFGS() or optim optimizers following the ADAM
I am trying to use loss function as follows, training it one multiple tspan (i.e) increasing the tend iteratively to get good param estimate.
Js is jacobian same as in tutorial.
i here is simply the index to ensure i have same size of datasets (because my predicted ODEProblem involves reduced system with some state variables deleted. I am trying to estimate good parameter for this reduced system)

function optimise_p(p_init, prob, i, tend)
    sol_real = solve(remake(oprob, tspan = (0.0f0, tend)),
                saveat = 1.0, save_idxs = important_specs[i])  # real or true ODEProblem with true parameters
    function predict_adjoint(p)
        p = exp.(p)
        _prob = remake(prob, tspan = (0.0f0, tend), p = p)  # Reduced ODEProblem with less state variables than `oprob`
        solve(_prob, saveat = sol_real.t)
    end
    function loss(p)
        pred = predict_adjoint(p)
        diff = map((J, u, data) -> J[important_specs[i],important_specs[i]] * (abs2.(u .- data)) ,
                    Js , pred.u, sol_real.u)
        loss = sum(abs2, sum(diff))
        return loss, pred
    end
    cb = function (p, l, pred)
        println("loss : $l")
        println("Parameters : $(exp.(p)))")
#        plot(sol_new.t, sol_new[7,:], lw=3) |> display
#        new_pred = solve(remake(prob, tspan = (0.0, 700.0), p=p), saveat = tsteps)
#        plot!(new_pred.t, new_pred[7,:], ) |> display
        return false # Tell it to not halt the optimization.
    end
    res = DiffEqFlux.sciml_train(loss, p_init, ADAM(0.01f0), cb = cb, maxiters = 1000)
    return res
end

Also if you could help me understand on the tutorial of parameter estimation of stiff equations…

  1. what purpose does exp.(p) serve ?
  2. what purpose does scaling loss by Jacobian matrix serve ?
    Or
    why plain sum(abs2, sol .- data) was not enough ?

Those are just common tricks for stiff equations which stabilize the loss which generally has values changing over orders of magnitude, and those that is a form of problem-based normalization for relative scale.

That’s still not enough of course to run so it’s not debuggable, but anyways, just fix wherever you did x(0f0) or whatnot, where x is a a floating point number and not a function, and you’re good.