Fitting a dynamic system with an exogenous input (nonhomogenous neural ode) via DiffEqFlux

I extended the code a little bit, so that after the first step response from 0 to 1 another step response from 1 to 0 occurs to make this toy example more general. I also added the variables t0 and t1 to define a subinterval of the the tspan. Therefore one can easily apply the fourth strategy of avoiding local minima, which is explained here https://diffeqflux.sciml.ai/stable/examples/local_minima/. Thanks again.

using DifferentialEquations, Flux, Optim, DiffEqFlux, DiffEqSensitivity, Plots
plotly()

tspan = (0.1f0, Float32(8.0))
tsteps = range(tspan[1], tspan[2], length = 80)
t_vec = collect(tsteps)

ex = vcat(ones(Float32,40), zeros(Float32, 40))
#ex = vcat(zeros(Float32,40), ones(Float32,40))
f(x) = (atan(8.0 * x - 4.0) + atan(4.0)) / (2.0 * atan(4.0))

function hammerstein_system(u)
    y= zeros(size(u))
    for k in 2:length(u)
        y[k] = 0.2 * f(u[k-1]) + 0.8 * y[k-1]
    end
    return y
end

y = Float32.(hammerstein_system(ex))
plot(collect(tsteps), y, ticks=:native)

nn_model = FastChain(FastDense(2,8, tanh), FastDense(8, 1))
p_model = initial_params(nn_model)


function dudt(u, p, t)
    #input_val = u_vals[Int(round(t*10)+1)]
    nn_model(vcat(u[1], ex[Int(round(10*t))]), p)
end


t0 = 0.1f0
t1 = 4.0f0
idx = [t0 .<= tsteps .<= t1][1]
u0 = Float32.(y[t0 .== tsteps])
prob = ODEProblem(dudt,u0,tspan,nothing)

function predict_neuralode(p)
    _prob = remake(prob,p=p)
    Array(solve(_prob, Tsit5(), saveat=tsteps[idx], abstol = 1e-8, reltol = 1e-6))
end

function loss(p)
    sol = predict_neuralode(p)    
    N = sum(idx)
    return sum(abs2.(y[idx] .- sol'))/N
end

# start optimization (I played around with several different optimizers with no success)
res0 = DiffEqFlux.sciml_train(loss,p_model ,ADAM(0.01), maxiters=100)
res1 = DiffEqFlux.sciml_train(loss,res0.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

res2 = DiffEqFlux.sciml_train(loss,res1.minimizer ,ADAM(), maxiters=100)
res3 = DiffEqFlux.sciml_train(loss,res2.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)
res4 = DiffEqFlux.sciml_train(loss,res3.minimizer,BFGS(initial_stepnorm=0.01), maxiters=300, allow_f_increases = true)

Flux.gradient(loss,res4.minimizer)

sol = predict_neuralode(res1.minimizer)
N = length(sol)
plot(tsteps[idx],sol')


scatter!(tsteps[idx],y[idx], ticks=:native, leg=:none)


savefig("trained.png")