This PR fixes your original code: https://github.com/SciML/DiffEqSensitivity.jl/pull/341 and it’ll be released later tonight if all tests pass.
True story… Thanks again
Hi @ChrisRackauckas,
I would like to ask you again for some help. I have 4 topics, where I have some questions:
1.
First I changed the implementation a litte bit, so that the model fits the nonlinear state space representation (\underline{\dot{x}}(t) = f(t, \underline{x}(t), \underline{u}(t)) => du[1] = f(u[1], ex)
, f
is a neural network and ex
the excitation / input, here just use a constant / step response).
using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
using LaTeXStrings
plotly()
using JLD2
tspan = (0.1f0, Float32(10.0))
tsteps = range(tspan[1], tspan[2], length = 100)
t_vec = collect(tsteps)
ex = vec(ones(length(tsteps), 1))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))
function hammerstein_system(u)
y= zeros(size(u))
for k in 2:length(u)
y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
end
return y
end
y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)
nn_model = FastChain(FastDense(2,25, tanh), FastDense(25, 25, tanh), FastDense(25, 1))
p_model = initial_params(nn_model)
u0 = Float32.([0.0])
function dudt(du, u, p, t)
#input_val = u_vals[Int(round(t*10)+1)]
du[1] = nn_model(reshape(vcat(u[1], ex[Int(round(t*10))]), 2, 1), p)[1]
end
nn_model(reshape(vcat(0.0, ex[Int(round(10*0.1))]), 2, 1), p_model)[1]
prob = ODEProblem(dudt,u0,tspan,nothing)
function predict_neuralode(p)
_prob = remake(prob,p=p)
Array(solve(_prob,Tsit5(), saveat=tsteps[tsteps .<= 0.5f0], abstol = 1e-8, reltol = 1e-6))
end
function loss(p)
sol = predict_neuralode(p)
N = length(sol)
return sum(abs2.(y[1:N] .- sol))/N
end
# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.005), maxiters=300)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
loss(res0.minimizer)
loss(res1.minimizer)
sol = predict_neuralode(res1.minimizer)
y_model = vec(Array(sol))
scatter(t_vec[1:length(y_model)], [y[1:length(y_model)] y_model], labels = [L"y"; L"\hat{y}"] |> permutedims, ticks=:native,xlabel=L"time\ in\ s", ylabel=L"y",extra_plot_kwargs = KW(
:include_mathjax => "cdn",
:yaxis => KW(:automargin => true),
:xaxis => KW(:domain => "auto")
))
This implementation leads to the following error message:
*ERROR: MethodError: *(::ReverseDiff.TrackedArray{Float32,Float32,2,Array{Float32,2},Array{Float32,2}}, ::ReverseDiff.TrackedArray{Float64,Float64,2,Array{Float64,2},Array{Float64,2}}) is ambiguous. Candidates: *
- (x::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA, y::AbstractArray{T,2} where T) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
- (x::AbstractArray{T,2} where T, y::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
- (x::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N, y::AbstractArray{T,2} where T) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
- (x::AbstractArray{T,2} where T, y::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
- (x::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA, y::AbstractArray) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
- (x::AbstractArray, y::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
- (x::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N, y::AbstractArray) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:213
- (x::AbstractArray, y::ReverseDiff.TrackedArray{V,D,N,VA,DA} where DA where VA where N) where {V, D} in ReverseDiff at C:\Users\k582186.julia\packages\ReverseDiff\jFRo1\src\derivatives\linalg\arithmetic.jl:214
Possible fix, define - (::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA, ::ReverseDiff.TrackedArray{V,D,2,VA,DA} where DA where VA) where {V, D, V, D}
Do you have an idea why I get this error message?
If I change the function dudt
to du[1] = nn_model1(u,p[1:31])[1] + nn_model2(input_vec[Int(round(t*10.0))],p[32:end])[1]
into two separat neural networks, it will optimize.
using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
using LaTeXStrings
plotly()
# excitation of the system
tspan = (0.1f0, Float32(10.0))
tsteps = range(tspan[1], tspan[2], length = 100)
t_vec = collect(tsteps)
ex = vec(ones(length(tsteps), 1))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))
function hammerstein_system(u)
y= zeros(size(u))
for k in 2:length(u)
y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
end
return y
end
y = Float32.(hammerstein_system(ex)
plot(collect(tsteps), y, ticks=:native)
nn_model1 = FastChain(FastDense(1,10, tanh), FastDense(10,1))
nn_model2 = FastChain(FastDense(1,10, tanh), FastDense(10,1))
p_model1 = initial_params(nn_model1)
p_model2 = initial_params(nn_model2)
u0 = Float32.([0.0])
function dudt(du, u, p, t)
du[1] = nn_model1(u,p[1:31])[1] + nn_model2(ex[Int(round(t*10.0))],p[32:end])[1]
end
prob = ODEProblem(dudt,u0,tspan,nothing)
function predict_neuralode(p)
_prob = remake(prob,p=p)
Array(solve(_prob,Tsit5(), saveat=tsteps[tsteps .<= 0.3f0], abstol = 1e-8, reltol = 1e-6))
end
function loss(p)
sol = predict_neuralode(p)
N = length(sol)
return sum(abs2.(y[1:N] .- sol))/N
end
res0 = DiffEqFlux.sciml_train(loss,vcat(p_model1,p_model2) ,ADAM(), maxiters=1000)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
loss(res0.minimizer)
loss(res1.minimizer)
sol = predict_neuralode(res1.minimizer)
y_model = vec(Array(sol))
scatter(t_vec[1:length(y_model)], [y[1:length(y_model)] y_model], labels = [L"y"; L"\hat{y}"] |> permutedims, ticks=:native,xlabel=L"time\ in\ s", ylabel=L"y",extra_plot_kwargs = KW(
:include_mathjax => "cdn",
:yaxis => KW(:automargin => true),
:xaxis => KW(:domain => "auto")
))
2.
Second I´m not able to get a nice converged UDE, which fits the data accurately enough. I tried to follow the instructions to avoid local minima on the site
https://diffeqflux.sciml.ai/stable/examples/local_minima/ and I also tried several different optimizers and model structures, but I had no sucess.
Do you have maybe a tip for me?
3.
Third I´m a little bit confused, that I don´t find examples, which are analogously to my task (identification of a nonlinear statespace representation of an excitated system / non-autonomous or nonhomogenous ode with UDEs). Is it so uncommon? I think, that many systems need an excitation or at least an initial state, which not zero, to get useful data of the system.
4.
Fourth can you maybe explain me why I should use UDEs instead of NODEs? In my understanding UDEs should be taken if one would like to incorporate some additional knowledge about the systems in form of equations, but if the whole derivative equation is modeled by a neural network the UDE would be a NODE. Or am I missing there something?
I wouldn’t use ReverseDiff or inplace here. Another mistake that you made was that, notice that the solution is a row vector for each output component, so you were broadcasting a row vector against a column vector in the loss function to generate a matrix. That’s not what you wanted.
Here’s a code that trains:
using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
tspan = (0.1f0, Float32(10.0))
tsteps = range(tspan[1], tspan[2], length = 100)
t_vec = collect(tsteps)
ex = vec(ones(Float32,length(tsteps), 1))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))
function hammerstein_system(u)
y= zeros(size(u))
for k in 2:length(u)
y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
end
return y
end
y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)
nn_model = FastChain(FastDense(2,8, tanh), FastDense(8, 1))
p_model = initial_params(nn_model)
u0 = Float32.([0.0])
function dudt(u, p, t)
#input_val = u_vals[Int(round(t*10)+1)]
nn_model(vcat(u[1], ex[Int(round(10*0.1))]), p)
end
prob = ODEProblem(dudt,u0,tspan,nothing)
function predict_neuralode(p)
_prob = remake(prob,p=p)
Array(solve(_prob, Tsit5(), saveat=tsteps, abstol = 1e-8, reltol = 1e-6))
end
function loss(p)
sol = predict_neuralode(p)
N = length(sol)
return sum(abs2.(y[1:N] .- sol'))/N
end
# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.01), maxiters=100)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
Flux.gradient(loss,res1.minimizer)
sol = predict_neuralode(res1.minimizer)
plot(tsteps,sol')
N = length(sol)
scatter!(tsteps,y[1:N])
savefig("trained.png")
Nope, I don’t think it’s too uncommon. Would you mind if I added this as an example to the documentation?
If you don’t have very much data but you have a model then this could be a way to get a better predicting method since the neural network will likely overfit and attempt to over-generalize. There’s also a ton of other reasons, like how discretized physics-informed neural networks are a subset of UDEs and how high dimensional PDE solvers can be written as a UDE. See https://arxiv.org/abs/2001.04385 for all of the details.
Thank you very much
Sure you can use it for the documentation.
Oh no, I haven´t seen this. So simple… This was probably the main reason, why it didn´t converge
Thanks, I will read it carefully.
I extended the code a little bit, so that after the first step response from 0 to 1 another step response from 1 to 0 occurs to make this toy example more general. I also added the variables t0
and t1
to define a subinterval of the the tspan. Therefore one can easily apply the fourth strategy of avoiding local minima, which is explained here https://diffeqflux.sciml.ai/stable/examples/local_minima/. Thanks again.
using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
plotly()
tspan = (0.1f0, Float32(8.0))
tsteps = range(tspan[1], tspan[2], length = 80)
t_vec = collect(tsteps)
ex = vcat(ones(Float32,40), zeros(Float32, 40))
#ex = vcat(zeros(Float32,40), ones(Float32,40))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))
function hammerstein_system(u)
y= zeros(size(u))
for k in 2:length(u)
y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
end
return y
end
y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)
nn_model = FastChain(FastDense(2,8, tanh), FastDense(8, 1))
p_model = initial_params(nn_model)
function dudt(u, p, t)
#input_val = u_vals[Int(round(t*10)+1)]
nn_model(vcat(u[1], ex[Int(round(10*t))]), p)
end
t0 = 0.1f0
t1 = 4.0f0
idx = [t0 .<= tsteps .<= t1][1]
u0 = Float32.(y[t0 .== tsteps])
prob = ODEProblem(dudt,u0,tspan,nothing)
function predict_neuralode(p)
_prob = remake(prob,p=p)
Array(solve(_prob, Tsit5(), saveat=tsteps[idx], abstol = 1e-8, reltol = 1e-6))
end
function loss(p)
sol = predict_neuralode(p)
N = sum(idx)
return sum(abs2.(y[idx] .- sol'))/N
end
# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.01), maxiters=100)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
res2 = DiffEqFlux.sciml_train(loss,res1.minimizer ,ADAM(), maxiters=100)
res3 = DiffEqFlux.sciml_train(loss,res2.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
res4 = DiffEqFlux.sciml_train(loss,res3.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
Flux.gradient(loss,res4.minimizer)
sol = predict_neuralode(res1.minimizer)
N = length(sol)
plot(tsteps[idx],sol')
scatter!(tsteps[idx],y[idx], ticks=:native, leg=:none)
savefig("trained.png")
Hi Chris… Trying to understand this example… The new ex(ogenous) input does not appear to depend on time here? nn_model(vcat(u[1], ex[Int(round(10*0.1))]), p)
Is this correct. Also, would it be possible to use an interpolator function instead? If so… How to make the solver “trigger” on specific times when the fex(t) changes (a lot)?
For me this one works
function dudt(u, p, t)
#input_val = u_vals[Int(round(t*10)+1)]
nn_model(vcat(u[1], ex[Int(round(10*t))]), p)
end
If you copy the code in the post above
then you have good toy problem, where you can also play around with avoiding local minima through first optimize on subsequences of the full signal.
I have a similiar error lke this using Flux.train!() @ChrisRackauckas
MethodError: no method matching copyto!(::Float64, ::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0},Tuple{},typeof(muladd),Tuple{Float64,Float64,Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0},Nothing,typeof(*),Tuple{Float64,Float64}}}})
Closest candidates are:
copyto!(::DataFrames.ColReplaceDataFrame, ::Base.Broadcast.Broadcasted) at /home/antonino/.julia/packages/DataFrames/nxjiD/src/other/broadcasting.jl:278
copyto!(::OrdinaryDiffEq.ArrayFuse{AT,T,P}, ::Base.Broadcast.Broadcasted{F1,Axes,F,Args}) where {AT, T, P, F1<:Base.Broadcast.AbstractArrayStyle{0}, Axes, F, Args<:Tuple} at /home/antonino/.julia/packages/OrdinaryDiffEq/VPJBD/src/wrappers.jl:28
copyto!(::Union{Base.RefValue{#s23} where #s23<:(GPUArrays.AbstractGPUArray{T,N} where N), Union{GPUArrays.AbstractGPUArray{T,N}, Base.LogicalIndex{T,#s16} where #s16<:GPUArrays.AbstractGPUArray, Base.ReinterpretArray{T,N,#s12,#s13} where #s13<:Union{SubArray{#s14,#s15,#s24,I,L} where L where I where #s15 where #s14, #s24} where #s12 where #s24<:GPUArrays.AbstractGPUArray, Base.ReshapedArray{T,N,#s15,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where #s15<:Union{Base.ReinterpretArray{#s12,#s16,#s121,#s13} where #s13<:Union{SubArray{#s14,#s15,#s25,I,L} where L where I where #s15 where #s14, #s25} where #s121 where #s16 where #s12, SubArray{#s14,#s13,#s25,I,L} where L where I where #s13 where #s14, #s25} where #s25<:GPUArrays.AbstractGPUArray, SubArray{T,N,#s16,I,L} where L where I where #s16<:Union{Base.ReinterpretArray{#s13,#s12,#s121,#s131} where #s131<:Union{SubArray{#s14,#s15,#s26,I,L} where L where I where #s15 where #s14, #s26} where #s121 where #s12 where #s13, Base.ReshapedArray{#s15,#s14,#s151,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where #s151<:Union{Base.ReinterpretArray{#s12,#s16,#s121,#s13} where #s13<:Union{SubArray{#s14,#s15,#s26,I,L} where L where I where #s15 where #s14, #s26} where #s121 where #s16 where #s12, SubArray{#s14,#s13,#s26,I,L} where L where I where #s13 where #s14, #s26} where #s14 where #s15, #s26} where #s26<:GPUArrays.AbstractGPUArray, LinearAlgebra.Adjoint{T,#s12} where #s12<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.Diagonal{T,#s22} where #s22<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.LowerTriangular{T,#s18} where #s18<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.Transpose{T,#s17} where #s17<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.Tridiagonal{T,#s23} where #s23<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.UnitLowerTriangular{T,#s19} where #s19<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.UnitUpperTriangular{T,#s21} where #s21<:GPUArrays.AbstractGPUArray{T,N}, LinearAlgebra.UpperTriangular{T,#s20} where #s20<:GPUArrays.AbstractGPUArray{T,N}, PermutedDimsArray{T,N,#s15,#s14,#s13} where #s13<:GPUArrays.AbstractGPUArray where #s14 where #s15} where N} where T, ::Base.Broadcast.Broadcasted{#s23,Axes,F,Args} where Args<:Tuple where F where Axes where #s23<:Base.Broadcast.AbstractArrayStyle{0}) at /home/antonino/.julia/packages/GPUArrays/uaFZh/src/host/broadcast.jl:76
I can’t paste the full code but it works until i use Flux.train!()
hidden_layer = 50
ann = FastChain(FastDense(9,hidden_layer,tanh),FastDense(hidden_layer,hidden_layer),FastDense(hidden_layer,1)) |> gpu
p = initial_params(ann)
function T_fun(u,p,t)
ann(vcat(u[1],(Sys_Out_temp_(t)-MA_temp_(t)),outT_(t),outRH_(t),wind_speed_(t),wind_direction_(t),diff_solar_rad_(t),direct_solar_rad_(t),Occupancy_flag_(t)),p)[1]
end
T0 = inT_avg[1]
nn = ODEProblem(T_fun,T0,t_span,saveat = 1,nothing)
function pred(p)
_prob = remake(nn,p=p)
Array(solve(_prob, Tsit5(), saveat = 1, abstol = 1e-8, reltol = 1e-6))
end
plot(time_,inT_avg,title = "Temperature ",xlabel = "Time",ylabel ="degC",label = "T_avarage")
Plots.scatter!(pred(p))
dt = 1
loss() = Flux.mse(pred(p),inT_avg) |> gpu
data = Iterators.repeated((), 1000)
opt = ADAM(0.01)
cb = function () #callback function to observe training
display(loss())
pl = plot(inT_avg, label = "T",color = "black")
Plots.scatter!(pred(p),label = " nn_ode T")
display(plot(pl))
end
cb()
@time begin
Flux.train!(loss, p, data, opt, cb = throttle(cb, 50))|> gpu
end
Did you try the fixed code?
Yes, i’ve tried to fix my code but the error persists.
Can you open an issue with an MWE?
I can’t use MWE but i can open a new iusse.
If there’s no code I can copy paste to reproduce it then an issue won’t be helpful as I already fixed the one in this thread, with a tutorial in the documentation showing the fix, so whatever your issue is needs an example.