Hello everybody I’m searchig to train my NeuraODE to create a surrogate model of a mechanical device.
Could you help me to figure out how can I constrain the Loss function in order to take in consideration the non linearity of the model?
The problem that I formulate is
min_{\theta \in \mathbb{R}}\;Loss Function(\tilde y(\theta),y) \\[1ex] subject\;to\qquad \tilde y \leq k
\tilde y is the NeuralODE prediction and k is a constant value.
I wish to train the NN in a way to embeded the physical limitis of the system I would to mimic.
I try to figure out with Optimization.jl but I have same problem to understand how can I pass to cons
the \tilde y the prediction of the NeuralODE.
using CSV,DataFrames,DifferentialEquations,Plots,Flux,DiffEqFlux,JLD,Random,Lux,Optimization,OptimizationOptimJL
using Flux: throttle
using Statistics
using IterTools: ncycle
using DiffEqFlux: group_ranges
rng = MersenneTwister(1234) #Generatore di numeri random sempre uguali
#Costruzione Layer rappresentativo della dinamica
struct LawLayer{F1} <: Lux.AbstractExplicitLayer
force::Float32
parameters::F1
end
function LawLayer(force::Float32; parameters = Lux.glorot_uniform)
return LawLayer{typeof(parameters)}(force,parameters)
end
function Lux.initialparameters(rng::AbstractRNG, l::LawLayer)
weight = l.parameters(rng,3,1)
return (weight)
end
Lux.initialstates(::AbstractRNG, ::LawLayer) = NamedTuple()
Lux.parameterlength(::LawLayer) = 3
function (l::LawLayer)(u::AbstractArray, ps, st::NamedTuple)
F = Float32(l.force)
a, b, c = ps
z1 = u[2]
z2 = F*a .- (b*u[1] + c*u[2])
return [z1,z2], st
end
#Costruzione container per sequenza di Layer
struct CustomLayer{L1,L2,L3,L4,L5,L6} <: Lux.AbstractExplicitContainerLayer{(:dense1, :dense2, :dynamic, :dense3, :dense4, :dense5)}
dense1::L1
dense2::L2
dynamic::L3
dense3::L4
dense4::L5
dense5::L6
end
function (CL::CustomLayer)(u::AbstractArray, ps, st::NamedTuple)
y1, st_de1 = CL.dense1(u, ps.dense1, st.dense1)
y2, st_de2 = CL.dense2(y1, ps.dense2, st.dense2)
y, st_dy = CL.dynamic(y2, ps.dynamic, st.dynamic)
y3, st_de3 = CL.dense3(y, ps.dense3, st.dense3)
y4, st_de4 = CL.dense4(y3, ps.dense4, st.dense4)
y5, st_de5 = CL.dense5(y4, ps.dense5, st.dense5)
out = y5 #[y1[1,:];y4]
return out, (dynamic = st_dy, dense1 = st_de1, dense2 = st_de2, dense3 = st_de3, dense4 = st_de4, dense5 = st_de5)
end
#Acquisizione dati aumentare il numero di punti
# Provare ad utilizzare un fattore di
t = Data.tout[1:10:end]
Errout = Data.Errout[1:10:end]
XSout = Data.XSout[1:10:end]
i_cmdout = Data.i_cmdout[1:10:end]
P12out = Data.P12out[1:10:end]
DXJout = Data.DXJout[1:10:end]
XJout = Data.XJout[1:10:end]
DXJout = 100 * DXJout # da m/s to mm/s
XJout = 100 * XJout # da m to mm
#Validation Data
DXJout_val = 100 * [Data_val1.DXJout, Data_val2.DXJout, Data_val3.DXJout]
XJout_val = 100 * [Data_val1.XJout, Data_val2.XJout, Data_val3.XJout]
DX_cap = 15*100
# datap = plot(t,XJout,label = "Data",lw = 1.8, title = "Position")
# valp = plot(t,XJout_val,label = "Data_val",lw = 1)
# datav = plot(t,DXJout,label = "Data",lw = 1.8, title = "Velocity")
# valv = plot(t,DXJout_val,label = "Data_val",lw = 1) # plot(datap,valp,datav,valv,layout = (4,1),size = (800,1200))
ϕ = [XJout DXJout]
tspan = Float32[t[1], t[end]]
x0 = Float32[XJout[1]; DXJout[1]]
act = swish
hidden_1 = 5 #cap a 50,35,25
hidden_2 = 5
hidden_3 = 5
learning_rate = 1e-3
opt = ADAM(learning_rate,(0.9,0.8)) #learnig dinamico fare prove
opt = IPNewton()
opt = BFGS()
LOSS = []
F = Float32.([100 50 200 500])
NN = CustomLayer(Lux.Dense(2,hidden_1,act),
Lux.Dense(hidden_1,2,act),
LawLayer(F[1]),
Lux.Dense(2,hidden_2,act),
Lux.Dense(hidden_2,hidden_3),
Lux.Dense(hidden_3,2))
# NN = Lux.Chain(Lux.Dense(2,hidden_1,act),
# Lux.Dense(hidden_1,2,act),
# LawLayer(F[1]),
# Lux.Dense(2,hidden_2,act),
# Lux.Dense(hidden_2,2))
p, st = Lux.setup(rng, NN)
prob_neuralode = NeuralODE(NN, tspan, Tsit5(), saveat = t)
p_init = Lux.ComponentArray(p)
function predict_neuralode(p)
prediction = Array(prob_neuralode(x0,p,st)[1])'
return prediction
end
function loss_neuralode(p)
ỹ = predict_neuralode(p)
loss = Flux.mse(ϕ,ỹ)
return loss, ỹ
end
function cons(x,p)
p = nothing
y = predict_neuralode(x)[:,2]
end
cons(p_init)
#callback function
cb = function(p,l,y_tilde)
train = plot(t,ϕ[:,1:2],layout = (2,1),size = (800,800),linewidth = 1.25,label = ["position" "speed"],
grid=:y, gridwidth=1.5, xlabel = "time", yguidefontrotation = 90,aspect_ration = 1)
Plots.scatter!(t,y_tilde[:,1:2],label = ["position_NN" "speed_NN"])
push!(LOSS,l)
display(train)
println("LOSS: ", LOSS[end])
if l <= 2e-8
flag = true
else
flag = false
end
return flag
end
adtype = Optimization.AutoZygote()
adtype = Optimization.AutoForwardDiff()
optf = Optimization.OptimizationFunction((x, p) -> loss_neuralode(x), adtype; cons = cons)
optprob = Optimization.OptimizationProblem(optf, p_init, lcons = [-Inf], ucons = [DX_cap])
result_neuralode = Optimization.solve(optprob,opt,callback = throttle(cb,20),maxiters = 1000)