Solving UDE segfaults or runs with poor performance

rkube · June 8, 2023, 9:14pm

Hi,
I’m trying to optimize an a circuit equations with an UDE, following this example from the sciml docs.

U = Lux.Chain(Lux.Dense(3, 32, Lux.relu), Lux.Dense(32, 32, Lux.relu),
              Lux.Dense(32, 32, Lux.relu), Lux.Dense(32, 2))
p_nn, st_nn = Lux.setup(rng, U)

function ude_dynamics!(du, u, p_nn, t, p_ode)
    Q, I = u[1], u[2]
    Û = U([Q, I, t], p_nn, st_nn)[1]

    du[1] = -I + Û[1]
    du[2] = (1 / L_mod - n_mod * L̃) * (Q / C_mod - (R_mod + n_mod * R_line) * I) + Û[2]
end

# Closure to be used in ODEproblem. Our ODE uses parameters from global scope, 
# pass nothing here.
nn_dynamics!(du, u, p, t) = ude_dynamics!(du, u, p, t, nothing)

u0 = [n_mod * V₀ * C_mod, 0.0]

prob_ude = ODEProblem(nn_dynamics!, u0, (trg_ode[1], trg_ode[end]), p_nn)
sol_ude = solve(prob_ude, Tsit5(), reltol=1e-6, abstol=1e-6)

# Update the ODE problem with current NN parameters
function predict(θ, u0=u0, trg=trg_ode)
    _prob = remake(prob_ude, u0=u0, tspan=(trg[1], trg[end]), p=θ)
    Array(solve(_prob, Tsit5(), saveat=trg_ode, reltol=1e-6, abstol=1e-6))
end

# Loss is L2-distance to true current.
true_I = load_true_I(trg_ode)
function loss(θ)
    Î = predict(θ)[2,:]
    mean(abs2, Î - true_I)
end

losses = Float64[]

callback = function (p, l)
    push!(losses, l)
    if length(losses) % 50 == 0
        println("Current loss after $(length(losses)) iterations: $(losses[end])")
    end
    return false
end

adtype = Optimization.AutoZygote()
optf = Optimization.OptimizationFunction((x, p_nn) -> loss(x), adtype)
optprob = Optimization.OptimizationProblem(optf, ComponentVector{Float64}(p_nn))
res1 = Optimization.solve(optprob, ADAM(), callback = callback, maxiters = 5000)
println("Training loss after $(length(losses)) iterations: $(losses[end])")

I’m having difficulties running this code. Most of the times I’m getting a segfault:


julia> res1 = Optimization.solve(optprob, ADAM(), callback = callback, maxiters = 10)
┌ Warning: Using fallback BLAS replacements, performance may be degraded
└ @ Enzyme.Compiler ~/.julia/packages/GPUCompiler/cy24l/src/utils.jl:56
GC error (probable corruption) :
Allocations: 288657556 (Pool: 288579160; Big: 78396); GC: 414

!!! ERROR in jl_ -- ABORTING !!!
0x165000000: Queued root: 0x2f9b18920 :: 0x120d301c0 (bits: 3)
        of type Core.MethodInstance
0x165000018: Queued root: 0x172dd98d0 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x165000030: Queued root: 0x2bab6d990 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x165000048: Queued root: 0x2a628e410 :: 0x120d2de10 (bits: 7)
        of type Array{Any, 1}
0x165000060: Queued root: 0x10ea39b30 :: 0x120d308d0 (bits: 3)
        of type Core.SimpleVector
0x165000078: Queued root: 0x10e129c90 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x165000090: Queued root: 0x1733ccc90 :: 0x120d308d0 (bits: 3)
        of type Core.SimpleVector
0x1650000a8: Queued root: 0x2f1c8cec0 :: 0x120d301c0 (bits: 3)
        of type Core.MethodInstance
0x1650000c0: Queued root: 0x2e6b00a30 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x1650000d8: Queued root: 0x10c020b80 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x1650000f0: Queued root: 0x2f0edb460 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x165000108: Queued root: 0x2f0ed9c00 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x165000120: Queued root: 0x2b9ec8cd0 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
0x165000138: Queued root: 0x17240d560 :: 0x120d308d0 (bits: 3)
        of type Core.SimpleVector
0x165000150: Queued root: 0x111833250 :: 0x120d2de10 (bits: 7)
        of type Array{Any, 1}
0x165000168: Queued root: 0x111832fc0 :: 0x120d2de10 (bits: 7)
        of type Array{Any, 1}
0x165000180: Queued root: 0x10c52f450 :: 0x120d2de10 (bits: 3)
        of type Array{Any, 1}
[...]
0x165001ed8: Queued root: 0x10b32d2d0 :: 0x120d2e000 (bits: 3)
        of type Task
0x165001ef0:  r-- Stack frame 0x16d020700 -- 66 of 134 (direct)
0x165001f18:   `- Object (16bit) 0x2bbfa6710 :: 0x2e7b9d6d1 -- [2, 96)
        of type Enzyme.Compiler.Tape{NamedTuple{(Symbol("1"), Symbol("2")), Tuple{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10"), Symbol("11")), Tuple{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10"), Symbol("11"), Symbol("12"), Symbol("13"), Symbol("14"), Symbol("15"), Symbol("16"), Symbol("17"), Symbol("18"), Symbol("19"), Symbol("20"), Symbol("21"), Symbol("22"), Symbol("23"), Symbol("24"), Symbol("25")), Tuple{Any, Any, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10")), Tuple{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8")), Tuple{Core.LLVMPtr{UInt8, 0}, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}, UInt64, Core.LLVMPtr{UInt8, 0}, UInt64, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}}}, Any, Any, Any, Any, Any, Any, Any, Any, UInt64}}, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, UInt64, Bool, Bool, Bool, UInt64, Bool, Bool, Core.LLVMPtr{Bool, 0}, Core.LLVMPtr{Bool, 0}}}, NamedTuple{(Symbol("1"),), Tuple{Any}}, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10"), Symbol("11"), Symbol("12"), Symbol("13"), Symbol("14"), Symbol("15"), Symbol("16"), Symbol("17"), Symbol("18"), Symbol("19"), Symbol("20"), Symbol("21"), Symbol("22"), Symbol("23"), Symbol("24"), Symbol("25")), Tuple{Any, Any, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10")), Tuple{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8")), Tuple{Core.LLVMPtr{UInt8, 0}, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}, UInt64, Core.LLVMPtr{UInt8, 0}, UInt64, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}}}, Any, Any, Any, Any, Any, Any, Any, Any, UInt64}}, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, UInt64, Bool, Bool, Bool, UInt64, Bool, Bool, Core.LLVMPtr{Bool, 0}, Core.LLVMPtr{Bool, 0}}}, NamedTuple{(Symbol("1"),), Tuple{Any}}, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10"), Symbol("11"), Symbol("12"), Symbol("13"), Symbol("14"), Symbol("15"), Symbol("16"), Symbol("17"), Symbol("18"), Symbol("19"), Symbol("20"), Symbol("21"), Symbol("22"), Symbol("23"), Symbol("24"), Symbol("25")), Tuple{Any, Any, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10")), Tuple{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8")), Tuple{Core.LLVMPtr{UInt8, 0}, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}, UInt64, Core.LLVMPtr{UInt8, 0}, UInt64, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}}}, Any, Any, Any, Any, Any, Any, Any, Any, UInt64}}, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, UInt64, Bool, Bool, Bool, UInt64, Bool, Bool, Core.LLVMPtr{Bool, 0}, Core.LLVMPtr{Bool, 0}}}, NamedTuple{(Symbol("1"),), Tuple{Any}}, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10"), Symbol("11"), Symbol("12"), Symbol("13"), Symbol("14"), Symbol("15")), Tuple{Any, Any, NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8"), Symbol("9"), Symbol("10")), Tuple{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3"), Symbol("4"), Symbol("5"), Symbol("6"), Symbol("7"), Symbol("8")), Tuple{Core.LLVMPtr{UInt8, 0}, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}, UInt64, Core.LLVMPtr{UInt8, 0}, UInt64, Core.LLVMPtr{Float64, 0}, Core.LLVMPtr{Float64, 0}}}, Any, Any, Any, Any, Any, Any, Any, Any, UInt64}}, Any, Any, Any, Any, Any, Any, Any, Any, Any, Bool, Bool, Bool}}, NamedTuple{(Symbol("1"),), Tuple{Any}}, Any, Any, Any}}, NamedTuple{(Symbol("1"),), Tuple{Any}}}}, Nothing, Tuple{Array{Float64, 1}, NamedTuple{(:layer_1, :layer_2, :layer_3, :layer_4), NTuple{4, NamedTuple{(), Tuple{}}}}}}

[71577] signal (6): Abort trap: 6
in expression starting at REPL[21]:1
__pthread_kill at /usr/lib/system/libsystem_kernel.dylib (unknown line)
Allocations: 288657556 (Pool: 288579160; Big: 78396); GC: 414
[1]    71577 abort      /Applications/Julia-1.9.app/Contents/Resources/julia/bin/julia --project=.

on an M2 with Julia-1.9.

It runs on a different machine (AMD-based) with Julia -1.9, but only when loading the script from the repl. In this case, this warning appears in every optimizer step:

┌ Warning: EnzymeVJP tried and failed in the automated AD choice algorithm with the following error. (To turn off this printing, add `verbose = false` to the `solve` call)
└ @ SciMLSensitivity ~/.julia/packages/SciMLSensitivity/E8w3Z/src/concrete_solve.jl:21
MethodError: no method matching asprogress(::Base.CoreLogging.LogLevel, ::String, ::Module, ::Symbol, ::Symbol, ::String, ::Int64)
The applicable method may be too new: running in world age 33663, while current world is 34006.

Closest candidates are:
  asprogress(::Any, ::Any, ::Any, ::Any, ::Any, ::Any, ::Any; progress, kwargs...) (method too new to be called from this world context.)
   @ ProgressLogging ~/.julia/packages/ProgressLogging/6KXlp/src/ProgressLogging.jl:156
  asprogress(::Any, ::ProgressLogging.Progress, ::Any...; _...) (method too new to be called from this world context.)
   @ ProgressLogging ~/.julia/packages/ProgressLogging/6KXlp/src/ProgressLogging.jl:155
  asprogress(::Any, ::ProgressLogging.ProgressString, ::Any...; _...) (method too new to be called from this world context.)
   @ ProgressLogging ~/.julia/packages/ProgressLogging/6KXlp/src/ProgressLogging.jl:200

Also, the performance is quiet slow. It runs only on a single core.

Any ideas how to get this code to run and increase performance (multi-threadings or gpu?) are appreciated

wsmoses · June 8, 2023, 10:20pm

Not necessarily a solution to your immediate problem, but you should definitely make U type stable by marking it as const U = Lux.Chain(...)

@ChrisRackauckas can you update the SciML docs to have the performance-correct version by default?

rkube · June 9, 2023, 11:19pm

I fixed the segfaults by thinking really hard how the neural network should integrate with the right hand side of the ODE system.

Now I’m mostly left with warnings like

┌ Warning: EnzymeVJP tried and failed in the automated AD choice algorithm with the following error. (To turn off this printing, add `verbose = false` to the `solve` call)
└ @ SciMLSensitivity ~/.julia/packages/SciMLSensitivity/M3EmS/src/concrete_solve.jl:21
Enzyme execution failed.
Mismatched activity for:   store i64 %32, i64 addrspace(10)* %31, align 8, !dbg !41, !tbaa !61 const val:   %32 = load i64, i64 addrspace(11)* %30, align 8, !dbg !41, !tbaa !61
Type tree: {[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}
You may be using a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/#Activity-of-temporary-storage). If not, please open an issue, and either rewrite this variable to not be conditionally active or use Enzyme.API.runtimeActivity!(true) as a workaround for now

Which goes away when I set runtimeActivity!(true), The warning refers to the RHS here

function ude_dynamics!(du, u, p_nn, t, p_ode)
    Q, I = u[1], u[2]
    Û = U([Q, scaling1 * I, scaling2 * t], p_nn, st_nn)[1][1]

    L_nn = scaling3 * Û
    du[1] = -I 
    du[2] = (1 / (L_mod + L_nn) - n_mod * L̃) * (Q / C_mod - (R_mod + n_mod * R_line) * I) 
end

but I don’t understand how this function uses temporary storage for active memory. The factors scaling_[123] are just scaling constants.

┌ Warning: EnzymeVJP tried and failed in the automated AD choice algorithm with the following error. (To turn off this printing, add `verbose = false` to the `solve` call)
└ @ SciMLSensitivity ~/.julia/packages/SciMLSensitivity/M3EmS/src/concrete_solve.jl:21
MethodError: no method matching asprogress(::Base.CoreLogging.LogLevel, ::String, ::Module, ::Symbol, ::Symbol, ::String, ::Int64)

or

┌ Warning: EnzymeVJP tried and failed in the automated AD choice algorithm with the following error. (To turn off this printing, add `verbose = false` to the `solve` call)
└ @ SciMLSensitivity ~/.julia/packages/SciMLSensitivity/M3EmS/src/concrete_solve.jl:21
AssertionError: state != MixedState

and

Warning: Using fallback BLAS replacements, performance may be degraded

The optimization is only running single threaded, using 100% CPU. I’d like to increase this. Should I look into fixing the BLAS warning to get a better CPU utilization or would moving to GPU help for this use-case?

ChrisRackauckas · June 10, 2023, 12:23am

Interesting, I cannot reproduce. @rkube can you share your ]st? Are you on Enzyme v0.11 like the docs build? I am testing on an M2 and it was fine.

Those micro optimizations don’t really matter all that much. Before:

42.903593 seconds (312.79 M allocations: 62.209 GiB, 30.13% gc time)

after:

35.588541 seconds (218.71 M allocations: 56.801 GiB, 32.23% gc time)

I don’t think the first tutorial should go ham on all performance details. If you actually want to make this fast then you should do the SimpleChains version which is going to improve it by more than an order of magnitude.

But I also wouldn’t suggest every user be doing that kind of optimization. That’s just for those who really need the extra performance.

ChrisRackauckas · June 10, 2023, 12:44am

Let me explain what’s going on here, somewhat for @wsmoses’s sake. The adjoint of an ODE is an ODE, so the adjoint overload of solve is to solve a new ODE. That new ODE is “basically” the vjp of the original ODE. So what the adjoint definition does is it takes the original ODE and the sensealg keyword argument and uses the autojacvec choice in order to define the reverse ODE.

If this is not specified, as is done in the tutorial and is recommended for almost all users, then it uses a polyalgorithm to choose the adjoint. The polyalgorithm isn’t all that hard to read (SciMLSensitivity.jl/src/concrete_solve.jl at v7.31.0 · SciML/SciMLSensitivity.jl · GitHub). It’s basically, in the case of in-place ODE definition (out-of-place has a different tree):

Try Enzyme. If Enzyme doesn’t error (try/catch) then use it. Why? Because it’s generally the fastest, but supports the least amount of code.
Next run Cassette to check the branching behavior in the code. If the code satisfies certain qualities, then use ReverseDiff with compiled tapes, otherwise use ReverseDiff without compiled tapes.

And this served us well for a very long time because Enzyme had the property that it only generally worked on code for which it was the fastest. But there have been a few recent developments:

Enzyme with v0.11 is able to support a lot more codes, so it’s being activated a lot more.
The warnings are quite new. EnzymeVJP tried and failed in the automated AD choice algorithm with the following error. (To turn off this printing, add verbose = falseto thesolve call) is something we added very recently. It throws the warning and then shares the stacktrace for why Enzyme failed. Before it used to be just invisible. I think it’s good to share this information to help people end up with code that differentiates better, but it is quite noisy in a loop and we might want to re-consider sharing the stack-trace, or provide a function that helps people recreate this locally.
Enzyme does now support some codes which its not the fastest with. Notably its behavior with BLAS.

So what’s going on here is that it does the try/catch with Enzyme, fails, and falls back to ReverseDiff with compiled tape in the first scenario, and then in the scenario with runtimeActivity!(true) it’s just Enzyme slow mode.

Now the try/catch, don’t look at that as the performance issue. Profile if you don’t believe me, but an ODE solve does hundreds to thousands of vjp calls so the single vjp call in the try/catch is miniscule. What’s more likely to be a culprit here is that the printing from the IO is not light, so if you follow what the warning says and add verbose=true you should be down to the 40 second training time for 5000 iterations (and then set it up with StaticArrays and SimpleChains if you should get something closer to like 4 seconds).

But again, since I cannot reproduce this right now (maybe a different version of Enzyme?) and am not getting any warnings, I do not know how much of a timing difference the printing is making, so please post more explicit timings with before/after verbose=false. I am curious whether

would make a difference for someone in your case.

Or of course, the other way to make it quieter is to not use the polyalgorithm and instead explicitly choose ReverseDiffVJP like:

function predict(θ, X = Xₙ[:, 1], T = t)
    _prob = remake(prob_nn, u0 = X, tspan = (T[1], T[end]), p = θ)
    Array(solve(_prob, Vern7(), saveat = T,
                abstol = 1e-6, reltol = 1e-6, sensealg = QuadratureAdjoint(autojacvec=ReverseDiffVJP(true))))
end

That’s all deep in the docs (SciMLSensitivity: Automatic Differentiation and Adjoints for (Differential) Equation Solvers · SciMLSensitivity.jl) for if someone really feels the need to optimize around, but I hope most people don’t need to be swapping sensealgs. It does bring this example to:

29.977247 seconds (94.56 M allocations: 9.165 GiB, 11.49% gc time)

So it does cut off a nice chunk of time, but again I wouldn’t put that into a first tutorial because otherwise people are going to copy-paste sensealg = QuadratureAdjoint(autojacvec=ReverseDiffVJP(true))) and use that in places where it’s not appropriate, so it’s not the kind of thing to put into a first example that doesn’t dive into detail on the adjoint choices (which this tutorial shouldn’t be doing, that’s for the SciMLSensitivity package and people who truly care).

With all that said, I think this is just showcasing a few things that are a bit up in the air in my mind right now:

Should we throw these kinds of warnings by default? Or just silently choose the best AD for VJP and move on? Until about 2 weeks ago it was silent unless numerical, now it’s noisy if Enzyme fails.
Is there a better way to test for Enzyme compatibility other than running the code? I asked @wsmoses before and the answer was no.
Is the warning message good and clear? From the look of how this thread is gone, it seems the user didn’t even see the verbose=false option nor understand what alternative things could be done here, so I think the warning message was both noisy and failed to do its job.

wsmoses · June 10, 2023, 4:02am

@rkube I assume you’re on an unreleased main branch of Enzyme.jl since that error about runtime activity has not been tagged in a release version.

That error is an over-aggressive warning, that an operation being performed could (but not necessarily will) lead to incorrect results. It’s over-aggressive at the moment because we figure it’s better to have confidence in your answer being correct than have to see the error and turn on runtimeActivity more than necessary.

That said we are both working to reduce the aggressive error, as well as resolve the underlying issue.
Also, while enabling runtime activity does lead to a performance reduction, it should not be substantial.

You should also get a backtrace of where and why the runtime activity error is occurring, but unfortunately it’s limited to basically a few function calls back until the corresponding PR’s to the latest Julia and LLVM.jl land to enable better backtraces (Add Interface to julia OJIT by gbaraldi · Pull Request #346 · maleadt/LLVM.jl · GitHub), (https://github.com/JuliaLang/julia/pull/49858). I’m told this will only enable it for the (not released) Julia 1.10, but maybe if you ask nicely enough someone can be convinced to backport it.

By turning on the runtime activity flag, the program appears to succeed (and should be correct at that minor performance reduction).

The much more substantial performance issue is this:

Warning: Using fallback BLAS replacements, performance may be degraded

In essence, this says we have not finished implementing our internal BLAS and have a fallback implementation which exists and is correct, but is single core and may be substantially slower. Ironically we are presently on the verge of merging the much faster one for most cases of dot product, matrix multiply, and matrix vector multiply (Gemv by ZuseZ4 · Pull Request #1208 · EnzymeAD/Enzyme · GitHub) with more coming soon. cc @ZuseZ4 who is leading that effort (though FYI that branch doesn’t support runtime activity yet).

The BLAS isn’t a warning you can fix normally, but is an internal feature that is under development. That said you can work around it by writing a custom Enzyme rule (Custom rules · Enzyme.jl). Finding where to write it however, may be a bit more tricky without the better backtraces having landed in LLVM.jl.

rkube · June 13, 2023, 8:36pm

Thanks @ChrisRackauckas and @wsmoses for explaining these issues. Setting verbose=false gets rid of all warnings and brings run-time down. To speed it up, I’ll try SimpleChains and StaticArrays.

(circuit_simulator) pkg> st
Project circuit_simulator v0.1.0
Status `~/julia_envs/circuit_simulator/Project.toml`
  [13f3f980] CairoMakie v0.10.6
  [b0b7db55] ComponentArrays v0.13.12
  [2445eb08] DataDrivenDiffEq v1.2.0
  [5b588203] DataDrivenSparse v0.1.2
⌃ [0c46a032] DifferentialEquations v7.7.0
  [7da242da] Enzyme v0.11.1 `https://github.com/EnzymeAD/Enzyme.jl.git#main`
  [e9467ef8] GLMakie v0.8.6
  [a98d9a8b] Interpolations v0.14.7
  [b2108857] Lux v0.4.54
  [15e1cf62] NPZ v0.4.3
⌃ [7f7a1694] Optimization v3.14.0
  [36348300] OptimizationOptimJL v0.1.8
  [42dfb2eb] OptimizationOptimisers v0.1.2
⌃ [1dea7af3] OrdinaryDiffEq v6.52.0
⌃ [1ed8b502] SciMLSensitivity v7.29.0
  [860ef19b] StableRNGs v1.0.0
  [e88e6eb3] Zygote v0.6.62

Running this script, I noticed that for some choices network parameters, res1.u will not update and the loss remains at the initial level. Is this some kind of silent failure where errors are suppressed by verbose=false?

ChrisRackauckas · June 13, 2023, 11:19pm

Not any that is known at least. It should be parameter dependent. Is the gradient a true zero or just small? It could likely be a local minima.

Topic		Replies	Views
Minimum Working Example (MWE) showing error in Universal Differential Equation (UDE) implementation Modelling & Simulations sciml , autodiff , differentialequation , lux	10	364	February 11, 2025
Improving the speed for the forward solve of a Universal Differential Equation (UDE) New to Julia machine-learning , sciml	7	230	July 17, 2025
Errors when running a Universal Differential Equation (UDE) New to Julia ode , optimization , ad , lux	8	299	February 8, 2025
Struggling to train a UDE model with a GPU New to Julia question , cuda , differentialequation , diffeqflux	3	383	February 14, 2023
Optimizing performance of 2D nonlinear diffusion UDE Machine Learning diffeq , zygote , enzyme	54	3036	January 31, 2022

Solving UDE segfaults or runs with poor performance

Related topics