Fitting a dynamic system with an exogenous input (nonhomogenous neural ode) via DiffEqFlux

This PR fixes your original code: https://github.com/SciML/DiffEqSensitivity.jl/pull/341 and it’ll be released later tonight if all tests pass.

1 Like

True story… Thanks again :slight_smile:

Hi @ChrisRackauckas,

I would like to ask you again for some help. I have 4 topics, where I have some questions:

1.
First I changed the implementation a litte bit, so that the model fits the nonlinear state space representation (\underline{\dot{x}}(t) = f(t, \underline{x}(t), \underline{u}(t)) => du[1] = f(u[1], ex), f is a neural network and ex the excitation / input, here just use a constant / step response).

using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
using LaTeXStrings
plotly()

using JLD2

tspan = (0.1f0, Float32(10.0))
tsteps = range(tspan[1], tspan[2], length = 100)
t_vec = collect(tsteps)
ex = vec(ones(length(tsteps), 1))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))

function hammerstein_system(u)
    y= zeros(size(u))
    for k in 2:length(u)
        y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
    end
    return y
end

y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)

nn_model = FastChain(FastDense(2,25, tanh), FastDense(25, 25, tanh), FastDense(25, 1))
p_model = initial_params(nn_model)

u0 = Float32.([0.0])

function dudt(du, u, p, t)
    #input_val = u_vals[Int(round(t*10)+1)]
    du[1] = nn_model(reshape(vcat(u[1], ex[Int(round(t*10))]), 2, 1), p)[1]
end

nn_model(reshape(vcat(0.0, ex[Int(round(10*0.1))]), 2, 1), p_model)[1]

prob = ODEProblem(dudt,u0,tspan,nothing)

function predict_neuralode(p)
    _prob = remake(prob,p=p)
    Array(solve(_prob,Tsit5(), saveat=tsteps[tsteps .<= 0.5f0], abstol = 1e-8, reltol = 1e-6))
end

function loss(p)
    sol = predict_neuralode(p)
    N = length(sol)
    return sum(abs2.(y[1:N] .- sol))/N
end

# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.005), maxiters=300)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

loss(res0.minimizer)
loss(res1.minimizer)

sol = predict_neuralode(res1.minimizer)
y_model = vec(Array(sol))

scatter(t_vec[1:length(y_model)], [y[1:length(y_model)] y_model], labels = [L"y"; L"\hat{y}"] |> permutedims, ticks=:native,xlabel=L"time\ in\ s", ylabel=L"y",extra_plot_kwargs = KW(
    :include_mathjax => "cdn",
    :yaxis => KW(:automargin => true),
    :xaxis => KW(:domain => "auto")
    ))


This implementation leads to the following error message:

*ERROR: MethodError: *(::ReverseDiff.TrackedArray{Float32,Float32,2,Array{Float32,2},Array{Float32,2}}, ::ReverseDiff.TrackedArray{Float64,Float64,2,Array{Float64,2},Array{Float64,2}}) is ambiguous. Candidates: *

  • (x::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA, y::AbstractArray{T,2} where T) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
  • (x::AbstractArray{T,2} where T, y::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
  • (x::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N, y::AbstractArray{T,2} where T) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
  • (x::AbstractArray{T,2} where T, y::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
  • (x::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA, y::AbstractArray) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
  • (x::AbstractArray, y::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
  • (x::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N, y::AbstractArray) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
  • (x::AbstractArray, y::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
    Possible fix, define
  • (::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA, ::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA) where {V, D, V, D}

Do you have an idea why I get this error message?

If I change the function dudt to du[1] = nn_model1(u,p[1:31])[1] + nn_model2(input_vec[Int(round(t*10.0))],p[32:end])[1] into two separat neural networks, it will optimize.

using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
using LaTeXStrings
plotly()

# excitation of the system
tspan = (0.1f0, Float32(10.0))
tsteps = range(tspan[1], tspan[2], length = 100)
t_vec = collect(tsteps)
ex = vec(ones(length(tsteps), 1))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))

function hammerstein_system(u)
    y= zeros(size(u))
    for k in 2:length(u)
        y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
    end
    return y
end

y = Float32.(hammerstein_system(ex)

plot(collect(tsteps), y, ticks=:native)

nn_model1 = FastChain(FastDense(1,10, tanh), FastDense(10,1))
nn_model2 = FastChain(FastDense(1,10, tanh), FastDense(10,1))

p_model1 = initial_params(nn_model1)
p_model2 = initial_params(nn_model2)

u0 = Float32.([0.0])

function dudt(du, u, p, t)
    du[1] = nn_model1(u,p[1:31])[1] + nn_model2(ex[Int(round(t*10.0))],p[32:end])[1]
end

prob = ODEProblem(dudt,u0,tspan,nothing)

function predict_neuralode(p)
    _prob = remake(prob,p=p)
    Array(solve(_prob,Tsit5(), saveat=tsteps[tsteps .<= 0.3f0], abstol = 1e-8, reltol = 1e-6))
end

function loss(p)
    sol = predict_neuralode(p)
    N = length(sol)
    return sum(abs2.(y[1:N] .- sol))/N
end

res0 = DiffEqFlux.sciml_train(loss,vcat(p_model1,p_model2) ,ADAM(), maxiters=1000)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

loss(res0.minimizer)
loss(res1.minimizer)

sol = predict_neuralode(res1.minimizer)
y_model = vec(Array(sol))

scatter(t_vec[1:length(y_model)], [y[1:length(y_model)] y_model], labels = [L"y"; L"\hat{y}"] |> permutedims, ticks=:native,xlabel=L"time\ in\ s", ylabel=L"y",extra_plot_kwargs = KW(
    :include_mathjax => "cdn",
    :yaxis => KW(:automargin => true),
    :xaxis => KW(:domain => "auto")
    ))



2.
Second I´m not able to get a nice converged UDE, which fits the data accurately enough. I tried to follow the instructions to avoid local minima on the site
https://diffeqflux.sciml.ai/stable/examples/local_minima/ and I also tried several different optimizers and model structures, but I had no sucess.

ude_example

Do you have maybe a tip for me?

3.
Third I´m a little bit confused, that I don´t find examples, which are analogously to my task (identification of a nonlinear statespace representation of an excitated system / non-autonomous or nonhomogenous ode with UDEs). Is it so uncommon? I think, that many systems need an excitation or at least an initial state, which not zero, to get useful data of the system.

4.
Fourth can you maybe explain me why I should use UDEs instead of NODEs? In my understanding UDEs should be taken if one would like to incorporate some additional knowledge about the systems in form of equations, but if the whole derivative equation is modeled by a neural network the UDE would be a NODE. Or am I missing there something?

I wouldn’t use ReverseDiff or inplace here. Another mistake that you made was that, notice that the solution is a row vector for each output component, so you were broadcasting a row vector against a column vector in the loss function to generate a matrix. That’s not what you wanted.

Here’s a code that trains:

using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots

tspan = (0.1f0, Float32(10.0))
tsteps = range(tspan[1], tspan[2], length = 100)
t_vec = collect(tsteps)
ex = vec(ones(Float32,length(tsteps), 1))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))

function hammerstein_system(u)
    y= zeros(size(u))
    for k in 2:length(u)
        y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
    end
    return y
end

y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)

nn_model = FastChain(FastDense(2,8, tanh), FastDense(8, 1))
p_model = initial_params(nn_model)

u0 = Float32.([0.0])

function dudt(u, p, t)
    #input_val = u_vals[Int(round(t*10)+1)]
    nn_model(vcat(u[1], ex[Int(round(10*0.1))]), p)
end

prob = ODEProblem(dudt,u0,tspan,nothing)

function predict_neuralode(p)
    _prob = remake(prob,p=p)
    Array(solve(_prob, Tsit5(), saveat=tsteps, abstol = 1e-8, reltol = 1e-6))
end

function loss(p)
    sol = predict_neuralode(p)
    N = length(sol)
    return sum(abs2.(y[1:N] .- sol'))/N
end

# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.01), maxiters=100)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

Flux.gradient(loss,res1.minimizer)

sol = predict_neuralode(res1.minimizer)
plot(tsteps,sol')
N = length(sol)
scatter!(tsteps,y[1:N])

savefig("trained.png")

Nope, I don’t think it’s too uncommon. Would you mind if I added this as an example to the documentation?

If you don’t have very much data but you have a model then this could be a way to get a better predicting method since the neural network will likely overfit and attempt to over-generalize. There’s also a ton of other reasons, like how discretized physics-informed neural networks are a subset of UDEs and how high dimensional PDE solvers can be written as a UDE. See https://arxiv.org/abs/2001.04385 for all of the details.

trained

2 Likes

Thank you very much :smiley:

Sure you can use it for the documentation.

Oh no, I haven´t seen this. So simple… This was probably the main reason, why it didn´t converge

Thanks, I will read it carefully.

I extended the code a little bit, so that after the first step response from 0 to 1 another step response from 1 to 0 occurs to make this toy example more general. I also added the variables t0 and t1 to define a subinterval of the the tspan. Therefore one can easily apply the fourth strategy of avoiding local minima, which is explained here https://diffeqflux.sciml.ai/stable/examples/local_minima/. Thanks again.

using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
plotly()

tspan = (0.1f0, Float32(8.0))
tsteps = range(tspan[1], tspan[2], length = 80)
t_vec = collect(tsteps)

ex = vcat(ones(Float32,40), zeros(Float32, 40))
#ex = vcat(zeros(Float32,40), ones(Float32,40))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))

function hammerstein_system(u)
    y= zeros(size(u))
    for k in 2:length(u)
        y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
    end
    return y
end

y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)

nn_model = FastChain(FastDense(2,8, tanh), FastDense(8, 1))
p_model = initial_params(nn_model)


function dudt(u, p, t)
    #input_val = u_vals[Int(round(t*10)+1)]
    nn_model(vcat(u[1], ex[Int(round(10*t))]), p)
end


t0 = 0.1f0
t1 = 4.0f0
idx = [t0 .<= tsteps .<= t1][1]
u0 = Float32.(y[t0 .== tsteps])
prob = ODEProblem(dudt,u0,tspan,nothing)

function predict_neuralode(p)
    _prob = remake(prob,p=p)
    Array(solve(_prob, Tsit5(), saveat=tsteps[idx], abstol = 1e-8, reltol = 1e-6))
end

function loss(p)
    sol = predict_neuralode(p)    
    N = sum(idx)
    return sum(abs2.(y[idx] .- sol'))/N
end

# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.01), maxiters=100)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

res2 = DiffEqFlux.sciml_train(loss,res1.minimizer ,ADAM(), maxiters=100)
res3 = DiffEqFlux.sciml_train(loss,res2.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
res4 = DiffEqFlux.sciml_train(loss,res3.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

Flux.gradient(loss,res4.minimizer)

sol = predict_neuralode(res1.minimizer)
N = length(sol)
plot(tsteps[idx],sol')


scatter!(tsteps[idx],y[idx], ticks=:native, leg=:none)


savefig("trained.png")

Hi Chris… Trying to understand this example… The new ex(ogenous) input does not appear to depend on time here? nn_model(vcat(u[1], ex[Int(round(10*0.1))]), p)
Is this correct. Also, would it be possible to use an interpolator function instead? If so… How to make the solver “trigger” on specific times when the fex(t) changes (a lot)?

For me this one works

function dudt(u, p, t)
    #input_val = u_vals[Int(round(t*10)+1)]
    nn_model(vcat(u[1], ex[Int(round(10*t))]), p)
end

If you copy the code in the post above

then you have good toy problem, where you can also play around with avoiding local minima through first optimize on subsequences of the full signal.

I have a similiar error lke this using Flux.train!() @ChrisRackauckas
MethodError: no method matching copyto!(::Float64, ::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0},Tuple{},typeof(muladd),Tuple{Float64,Float64,Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0},Nothing,typeof(*),Tuple{Float64,Float64}}}})
Closest candidates are:
copyto!(::DataFrames.ColReplaceDataFrame, ::Base.Broadcast.Broadcasted) at /home/antonino/.julia/packages/DataFrames/nxjiD/src/other/broadcasting.jl:278
copyto!(::OrdinaryDiffEq.ArrayFuse{AT,T,P}, ::Base.Broadcast.Broadcasted{F1,Axes,F,Args}) where {AT, T, P, F1<:Base.Broadcast.AbstractArrayStyle{0}, Axes, F, Args<:Tuple} at /home/antonino/.julia/packages/OrdinaryDiffEq/VPJBD/src/wrappers.jl:28
copyto!(::Union{Base.RefValue{#s23} where #s23<:(GPUArrays.AbstractGPUArray{T,N} where N), Union{GPUArrays.AbstractGPUArray{T,N}, Base.LogicalIndex{T,#s16} where #s16<:GPUArrays.AbstractGPUArray, Base.ReinterpretArray{T,N,#s12,#s13} where #s13<:Union{SubArray{#s14,#s15,#s24,I,L} where L where I where #s15 where #s14, #s24} where #s12 where #s24<:GPUArrays.AbstractGPUArray, Base.ReshapedArray{T,N,#s15,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where #s15<:Union{Base.ReinterpretArray{#s12,#s16,#s121,#s13} where #s13<:Union{SubArray{#s14,#s15,#s25,I,L} where L where I where #s15 where #s14, #s25} where #s121 where #s16 where #s12, SubArray{#s14,#s13,#s25,I,L} where L where I where #s13 where #s14, #s25} where #s25<:GPUArrays.AbstractGPUArray, SubArray{T,N,#s16,I,L} where L where I where #s16<:Union{Base.ReinterpretArray{#s13,#s12,#s121,#s131} where #s131<:Union{SubArray{#s14,#s15,#s26,I,L} where L where I where #s15 where #s14, #s26} where #s121 where #s12 where #s13, Base.ReshapedArray{#s15,#s14,#s151,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where #s151<:Union{Base.ReinterpretArray{#s12,#s16,#s121,#s13} where #s13<:Union{SubArray{#s14,#s15,#s26,I,L} where L where I where #s15 where #s14, #s26} where #s121 where #s16 where #s12, SubArray{#s14,#s13,#s26,I,L} where L where I where #s13 where #s14, #s26} where #s14 where #s15, #s26} where #s26<:GPUArrays.AbstractGPUArray, LinearAlgebra.Adjoint{T,#s12} where #s12<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.Diagonal{T,#s22} where #s22<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.LowerTriangular{T,#s18} where #s18<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.Transpose{T,#s17} where #s17<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.Tridiagonal{T,#s23} where #s23<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.UnitLowerTriangular{T,#s19} where #s19<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.UnitUpperTriangular{T,#s21} where #s21<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.UpperTriangular{T,#s20} where #s20<:GPUArrays.AbstractGPUArray{T,N}, PermutedDimsArray{T,N,#s15,#s14,#s13} where #s13<:GPUArrays.AbstractGPUArray where #s14 where #s15} where N} where T, ::Base.Broadcast.Broadcasted{#s23,Axes,F,Args} where Args<:Tuple where F where Axes where #s23<:Base.Broadcast.AbstractArrayStyle{0}) at /home/antonino/.julia/packages/GPUArrays/uaFZh/src/host/broadcast.jl:76

I can’t paste the full code but it works until i use Flux.train!()

hidden_layer = 50
ann = FastChain(FastDense(9,hidden_layer,tanh),FastDense(hidden_layer,hidden_layer),FastDense(hidden_layer,1)) |> gpu
p = initial_params(ann)

function T_fun(u,p,t)
    ann(vcat(u[1],(Sys_Out_temp_(t)-MA_temp_(t)),outT_(t),outRH_(t),wind_speed_(t),wind_direction_(t),diff_solar_rad_(t),direct_solar_rad_(t),Occupancy_flag_(t)),p)[1]
end

T0 = inT_avg[1]

nn = ODEProblem(T_fun,T0,t_span,saveat = 1,nothing)

function pred(p)
 _prob = remake(nn,p=p)
  Array(solve(_prob, Tsit5(), saveat = 1, abstol = 1e-8, reltol = 1e-6)) 
end

plot(time_,inT_avg,title = "Temperature ",xlabel = "Time",ylabel ="degC",label = "T_avarage")
Plots.scatter!(pred(p))

dt = 1
loss() = Flux.mse(pred(p),inT_avg) |> gpu
data = Iterators.repeated((), 1000)
opt = ADAM(0.01)
cb = function () #callback function to observe training
        display(loss())
        pl = plot(inT_avg, label = "T",color = "black")
        Plots.scatter!(pred(p),label = " nn_ode T")
        display(plot(pl))
end
cb()
@time begin
Flux.train!(loss, p, data, opt, cb = throttle(cb, 50))|> gpu
end

Did you try the fixed code?

1 Like

Yes, i’ve tried to fix my code but the error persists.

Can you open an issue with an MWE?

I can’t use MWE but i can open a new iusse.

If there’s no code I can copy paste to reproduce it then an issue won’t be helpful as I already fixed the one in this thread, with a tutorial in the documentation showing the fix, so whatever your issue is needs an example.

2 Likes