PiNN model not working with an LSTM layer

Hi everyone,
It hase been a while that I have an issue with a PINN model. When using a chain of dense layers, the model works fine. However, when using a chain of DENSE and LSTM layers, there seems to be something wrong. During training, the value of the loss function doesn’t change but remains constant across iterations.
Here are both the working and non-working codes.
Can someone help me understand where I’m making a mistake?

CSV Files - > CSV - Google Drive

WORKING CODE

using DifferentialEquations, Lux, ComponentArrays, DiffEqFlux, Optimization,
OptimizationOptimJL, Plots, Random, OptimizationOptimisers, DataInterpolations, CSV, DataFrames, Statistics
data_fn = raw"File csv/Testerna totale.csv"
data_df= CSV.read(data_fn, DataFrame; header = false);
Testerna = convert.(Float32, data_df[!, 1])

phih_fn = raw"File csv/phi heating tutto.csv"
phih_df = CSV.read(phih_fn, DataFrame; header = false);

phih_fn2 = raw"File csv/phi heating tutto.csv"
phih_df2 = CSV.read(phih_fn2, DataFrame; header = false);

sun_fn = raw"File csv/phisun totale.csv"
sun_df = CSV.read(sun_fn, DataFrame; header = false);

int_fn= raw"File csv/Internal gains.csv"
int_df= CSV.read(int_fn, DataFrame; header = false);

phih = convert.(Float32, phih_df[!, 1]);
phisun = convert.(Float32, sun_df[!, 1]);
intg = convert.(Float32, int_df[!, 1]);
phih2= convert.(Float32, phih_df2[!,1]);
tspan= (0.0f0, 31536.f0)
t= range(tspan[1], tspan[2], length=31536)

disturbance = LinearInterpolation(Testerna,t);
#disturbance2 = LinearInterpolation(phih,t);
disturbance2 = LinearInterpolation(phih2,t);
disturbance3 = LinearInterpolation(phisun,t);
disturbance4= LinearInterpolation(intg,t);

function Text(t)
return disturbance(t)
end

function phi(t)
return disturbance2(t)
end

function phis(t)
return disturbance3(t)
end

function intgain(t)
return disturbance4(t)
end

function f(du, u, p, t)
Rv = p[1]
Ci = p[2]
du[1] = 1/(RvCi) . (Text(t) .- u[1]) .+ phi(t)/Ci .+ phis(t)/Ci .+ intgain(t)/Ci

end

u0= [19.3]
p= [0.005, 27800.0]

prob = ODEProblem(f, u0, tspan, p)
#sol = solve(prob, VCABM(), saveat=t, reltol = sqrt(eps(one(Float32))), abstol = eps(one(Float32))),
sol = solve( prob, VCABM();
saveat = t, reltol = sqrt(eps(one(Float32))),abstol = eps(one(Float32)),)
tplot=t

Visualizza il risultato

plot(sol,title = “Soluzione reale”, xlabel=“Tempo”, ylabel=“Temperatura [°C]”, legend=false)
Tmedia= mean(Testerna)
Tstd=std(Testerna)

phihmedio= mean(phih)
phihstd= std(phih)

phisunmedio= mean(phisun)
phisunstd= std(phisun)

intgmedio= mean(intg)
intgstd= std(intg)

function Tnorm(t)
return (Text(t) .- Tmedia) ./ Tstd
end

function phinorm(t)
return (phi(t) .- phihmedio) ./ phihstd
end

function phisunnorm(t)
return (phis(t) .- phisunmedio) ./ phisunstd
end

function intgnorm(t)
return (intgain(t) .- intgmedio) ./ intgstd
end
intgnorm (generic function with 1 method)
rng=Random.default_rng()

nn_model = Lux.Chain(Lux.Dense(5, 8, σ), Lux.Dense(8, 3), Lux.Dense(3,1))
p_model, st = Lux.setup(rng, nn_model)

function dudt(u, p, t)
global st
out, st = nn_model(vcat(u[1], Tnorm(t),phinorm(t), phisunnorm(t), intgnorm(t)), p, st)
return out
end

prob = ODEProblem(dudt, u0, tspan, nothing)

function predict_neuralode(p)
_prob = remake(prob; p = p)
Array( solve( _prob, VCABM(); saveat = t, reltol = sqrt(eps(one(Float32))),abstol = eps(one(Float32)),),)
end

function loss(p)
pred = predict_neuralode(p)
N = length(pred)
return Flux.mse(sol[1,:], pred’)
end

callback = function(p,l)
println(l)
return false
end

adtype = Optimization.AutoZygote()
pinit= ComponentArray(p_model)
optf = Optimization.OptimizationFunction((x, p) → loss(x), adtype)
optprob = Optimization.OptimizationProblem(optf, pinit)
OptimizationProblem. In-place: true

@time begin
res0 = Optimization.solve(optprob, Optimisers.Adam(0.08), callback=callback, maxiters = 300)
end

pred= predict_neuralode(res0.u)
pred=pred’
plot(t,pred, label= “prediction”)
plot!(t, sol[1,:], label= “data”, xlabel=“Tempo”, ylabel=“Temperatura [°C]”)

NON WORKING CODE WITH LSTM

using DifferentialEquations, Flux, ComponentArrays, DiffEqFlux, Optimization, Lux,
OptimizationOptimJL, Plots, Random, OptimizationOptimisers, DataInterpolations, CSV, DataFrames, Statistics
data_fn = raw"File csv/Testerna totale.csv"
data_df= CSV.read(data_fn, DataFrame; header = false);
Testerna = convert.(Float32, data_df[!, 1])

phih_fn = raw"File csv/phi heating tutto.csv"
phih_df = CSV.read(phih_fn, DataFrame; header = false);

phih_fn2 = raw"File csv/phi heating modificato.csv"
phih_df2 = CSV.read(phih_fn2, DataFrame; header = false);

sun_fn = raw"File csv/phisun totale.csv"
sun_df = CSV.read(sun_fn, DataFrame; header = false);

int_fn= raw"File csv/Internal gains.csv"
int_df= CSV.read(int_fn, DataFrame; header = false);

phih = convert.(Float32, phih_df[!, 1]);
phisun = convert.(Float32, sun_df[!, 1]);
intg = convert.(Float32, int_df[!, 1]);
phih2= convert.(Float32, phih_df2[!,1]);
tspan= (0.0f0, 26280.0f0)
t= range(tspan[1], tspan[2], length=26280)

disturbance = LinearInterpolation(Testerna,t);
disturbance2 =LinearInterpolation(phisun,t)

function Text(t)
return disturbance(t)
end

function phis(t)
return disturbance2(t)
end

function f(du, u, p, t)
Rv = p[1]
Ci = p[2]
du[1] = 1/(RvCi) . (Text(t) .- u[1]) .+ phis(t)/Ci

end

u0= [19.3]
p= [0.005, 27800.0]
#p= [0.01 , 1000.0]

prob = ODEProblem(f, u0, tspan, p)
#sol = solve(prob, VCABM(), saveat=t, reltol = sqrt(eps(one(Float32))), abstol = eps(one(Float32))),
sol = solve( prob, VCABM();
saveat = t, reltol = sqrt(eps(one(Float32))),abstol = eps(one(Float32)),)
tplot=t

plot(sol,title = “Soluzione reale”, xlabel=“Tempo”, ylabel=“Temperatura [°C]”, legend=false)
Tmedia= mean(Testerna)
Tstd=std(Testerna)

phihmedio= mean(phih)
phihstd= std(phih)

phisunmedio= mean(phisun)
phisunstd= std(phisun)

intgmedio= mean(intg)
intgstd= std(intg)

function Tnorm(t)
return (Text(t) .- Tmedia) ./ Tstd
end

function phinorm(t)
return (phi(t) .- phihmedio) ./ phihstd
end

function phisunnorm(t)
return (phis(t) .- phisunmedio) ./ phisunstd
end

function intgnorm(t)
return (intgain(t) .- intgmedio) ./ intgstd
end

rng=Random.default_rng()
#nn_model = Lux.Chain(Lux.LSTMCell(5 => 2),Lux.Dense(2,1))
#nn_model = Lux.LSTMCell(3 => 30)
nn_model = Lux.Chain(Lux.Dense(3=> 8,σ), Lux.Recurrence(Lux.LSTMCell(8 => 4)), Lux.Dense(4 => 1))
p_model, st = Lux.setup(rng, nn_model)

function dudt(u, p, t)
global st

Tnorm(t) = reshape(Tnorm(t), (1,1,length(Tnorm(t))));

phisunnorm(t) = reshape(phisunnorm(t), (1,1,length(Tnorm(t))));

u[1] = reshape(u[1], (1,1, length(u[1])));

input = cat(u[1], Tnorm(t), phisunnorm(t), dims = 1)

# out, st = nn_model(input, p, st)

out, st = nn_model(vcat(u[1], Tnorm(t), phisunnorm(t))', p, st)

out, st = nn_model(vcat(u[1]', Testerna',phisun'),p, st)

out, st = nn_model(vcat(u[1], Text(t),phi(t), phis(t)), p, st)

return out

end

function predict_neuralode(p)
_prob = remake(prob; p = p)
Array( solve( _prob, VCABM(); saveat = t, reltol = sqrt(eps(one(Float32))),abstol = eps(one(Float32)),),)
end

function loss(p)
pred = predict_neuralode(p)
N = length(pred)
#return sum(abs2.(sol[1,:] .- pred’))
return Flux.mse(sol[1,:], pred’)
#return Flux.mae(sol[1,:], pred)
end

callback = function(p,l)
println(l)
return false
end

adtype = Optimization.AutoZygote()
pinit= ComponentArray(p_model)
optf = Optimization.OptimizationFunction((x, p) → loss(x), adtype)
optprob = Optimization.OptimizationProblem(optf, pinit)

predict_neuralode(pinit)

res0 = Optimization.solve(optprob, Optimisers.Adam(0.1), callback=callback, maxiters = 300)

pred= predict_neuralode(res0.u)
pred=pred’
plot(t,pred, label= “prediction”)
plot!(t, sol[1,:], label= “data”, xlabel=“Tempo”, ylabel=“Temperatura [°C]”)

1 Like