Thank you for your help. This is my first Turing project so I am still getting to grips with everything.
It’s quite difficult to add a MWE, but here is the most important code from the notebook I am working in:
Code
### imports
using ModelingToolkit
using ModelingToolkitStandardLibrary.Blocks
using ModelingToolkit: t_nounits as t, D_nounits as D
using DataInterpolations
using OrdinaryDiffEq
using DifferentialEquations
using Symbolics
using Plots
using Latexify
using LaTeXStrings
using LinearAlgebra
using Distributions
using Random
using Measures
### constants
# time-varying ambient temperature
T_zeroC = 273.15 # [K]
T_bias = T_zeroC + 20 # [K]
Random.seed!(42)
rng = MersenneTwister(1234);
# time at end of simulation
t_end = 100.0
Δt = 0.2
time = 0:Δt:t_end
### Ambient temperature
# sinusoidal function around the bias temperature
T_a(t) = T_bias #+ 10 * sin((π/16)*t)
T_a_data = T_a.(time)
plot(time, T_a_data, xlabel = "t [sec]", ylabel = L"T_a(t)", legend=false)
### Heat input signals
# heat pump control signal
values_hp = zeros(Int(t_end))
rand_hp = max.(repeat(300*randn(rng, Int(t_end/4)), 1, 4)'[:], 0)
values_hp[begin:length(rand_hp)] = rand_hp
Φ_hp(t) = max(t >= t_end ? values_hp[end] : values_hp[Int(floor(t)) + 1], 0)
# boiler control signal
values_cv = zeros(Int(t_end))
rand_cv = max.(repeat(600*randn(rng, Int(t_end/4)), 1, 4)'[:], 0)
values_cv[begin:length(rand_cv)] = rand_cv
Φ_cv(t) = max(t >= t_end ? values_cv[end] : values_cv[Int(floor(t)) + 1], 0)
# plot control signals
Φ_hp_data = Φ_hp.(time)
Φ_cv_data = Φ_cv.(time)
plot(time, Φ_hp_data, xlabel = "t [sec]", ylabel = L"Φ [W]", label=L"Φ_\mathrm{hp}(t)", legendfontsize=12)
plot!(time, Φ_cv_data, xlabel = "t [sec]", label=L"Φ_\mathrm{cv}(t)")
### Solar irradiance
# functions for the solar irradiance
# Φ_s(t) = 100 * sin((π / t_end) * t)
Φ_s(t) = 100
Φ_s_data = Φ_s.(time)
plot(time, Φ_s_data, xlabel = "t [sec]", ylabel = L"Φ_\mathrm{s}(t)", label=L"Φ_\mathrm{s}(t)", legendfontsize=12)
### Model parameters
# :name => [component_value, initial_condition]
C_i_true = 3.0
C_e_true = 2.0
C_h_true = 1.0
R_ie_true = 0.2
R_ea_true = 0.5
R_ih_true = 0.1
A_w_true = 3.0
A_e_true = 1.0
# true noise
σ_true = 5.0
params_d = Dict(
:lC_i => log(C_i_true),
:lC_e => log(C_e_true),
:lC_h => log(C_h_true),
:lR_ie => log(R_ie_true),
:lR_ea => log(R_ea_true),
:lR_ih => log(R_ih_true),
:lA_w => log(A_w_true),
:lA_e => log(A_e_true),
)
### Model equations
using ModelingToolkit
using ModelingToolkit: t_nounits as t, D_nounits as D
using ModelingToolkitStandardLibrary.Blocks
using DataInterpolations
using OrdinaryDiffEq
function System(input_funcs::Dict, params::Dict; name)
@named T_a_out = TimeVaryingFunction(input_funcs[:T_a])
@named Φ_hp_out = TimeVaryingFunction(input_funcs[:Φ_hp])
@named Φ_cv_out = TimeVaryingFunction(input_funcs[:Φ_cv])
@named Φ_s_out = TimeVaryingFunction(input_funcs[:Φ_s])
vars = @variables T_i(t) T_e(t) T_h(t) T_a(t) Φ_hp(t) Φ_cv(t) Φ_s(t)
pars = @parameters lC_i=params[:lC_i] lC_e=params[:lC_e] lC_h=params[:lC_h] lR_ie=params[:lR_ie] lR_ea=params[:lR_ea] lR_ih=params[:lR_ih] lA_w=params[:lA_w] lA_e=params[:lA_e]
eqs = [
# inputs
T_a ~ T_a_out.output.u
Φ_hp ~ Φ_hp_out.output.u
Φ_cv ~ Φ_cv_out.output.u
Φ_s ~ Φ_s_out.output.u
# dynamics
exp(lC_i)*D(T_i) ~ exp(lA_w)*Φ_s + exp(-lR_ih)*(T_h - T_i) + exp(-lR_ie)*(T_e - T_i)
exp(lC_e)*D(T_e) ~ exp(lA_e)*Φ_s + exp(-lR_ea)*(T_a - T_e) + exp(-lR_ie)*(T_i - T_e)
exp(lC_h)*D(T_h) ~ Φ_cv + Φ_hp + exp(-lR_ih)*(T_i - T_h)
]
ODESystem(eqs, t, vars, pars; systems = [T_a_out, Φ_hp_out, Φ_cv_out, Φ_s_out], name)
end
# input functions
T_a_linf = LinearInterpolation(T_a_data, time)
Φ_hp_linf = LinearInterpolation(Φ_hp_data, time)
Φ_cv_linf = LinearInterpolation(Φ_cv_data, time)
Φ_s_linf = LinearInterpolation(Φ_s_data, time)
input_funcs = Dict(
:T_a => T_a_linf,
:Φ_hp => Φ_hp_linf,
:Φ_cv => Φ_cv_linf,
:Φ_s => Φ_s_linf
)
@named system = System(input_funcs, params_d)
sys = structural_simplify(system)
prob = ODEProblem(sys, [T_bias, T_bias, T_bias], (0, time[end]))
sol = solve(prob, Tsit5());
### Discrete data generator
using DataFrames
function generate_data(time, noise::Float64, inputs::Dict, prob::ODEProblem, system::ODESystem)
"""
Generate data for the RC Toy Model
Args:
time (StepRange): time vector
noise (Float64): noise level
inputs (Dict): dictionary with input functions
prob (ODEProblem): Problem statement
Returns:
output (DataFrame): DataFrame with columns keys(inputs) + prob.u
"""
output = DataFrame()
# get discrete inputs
for (key, input_func) in inputs
input_data = input_func.(time)
output[!, key] = input_data
end
# get output variable names
output_vars = Symbol.(replace.(string.(unknowns(sys)), r"\(t\)" => ""))
d_sol = Array(solve(prob, Tsit5(); saveat=Float64(time.step)))
d_sol_noisy = noise * randn(size(d_sol)) .+ d_sol
d_sol_df = DataFrame(d_sol_noisy', output_vars)
# merge df's and return
return hcat(output, d_sol_df)
end
data = generate_data(time, σ_true, input_funcs, prob, sys)
first(data,5)
### Model prior parameters
using LinearAlgebra
n_states = length(unknowns(sys))
n_measured = 2
# identity matrices
IdT = Matrix(1.0I, n_states, n_states)
IdM = Matrix(1.0I, n_measured, n_measured)
# thermal capacitance prior
μ_C = [0 for _ in 1:n_states]
Σ_C = IdT * 1e0
# thermal resistance prior
μ_R = [-1 for _ in 1:n_states]
Σ_R = IdT * 1e0
# area prior
n_areas = 2
μ_A = [1 for _ in 1:n_areas]
Σ_A = IdM * 1e0
# measurement noise
ν_m = n_measured
Σ_m = IdM * 1e1
Λ_m = (1/n_measured) * inv(Σ_m)
# check that the prior is positive definite
@show isposdef(Σ_C)
@show isposdef(Σ_R)
@show isposdef(Σ_A)
@show isposdef(Σ_m)
@show isposdef(Λ_m)
# buid prior params dict
prior_params = Dict{Symbol,Any}(
:μ_C => μ_C,
:Σ_C => Σ_C,
:μ_R => μ_R,
:Σ_R => Σ_R,
:μ_A => μ_A,
:Σ_A => Σ_A,
:ν_m => ν_m,
:Λ_m => Λ_m
)
### Inference with Turing
using Turing
using DifferentialEquations
@model function LGDS(x::AbstractArray, prob, params::Dict, t_int::Float64)
# prior distributions
σ ~ InverseGamma(2, 3)
lC ~ MvNormal(params[:μ_C], params[:Σ_C])
lR ~ MvNormal(params[:μ_R], params[:Σ_R])
lA ~ MvNormal(params[:μ_A], params[:Σ_A])
p = [lC; lR; lA]
# we only save [T_i, T_h] because the real dataset won't have T_e
T_ode = solve(prob, Tsit5(); p=p, saveat=t_int, save_idxs=[1,3], verbose=false)
if length(T_ode) != length(x)
return
end
# observations
for i in 1:length(x)
x[i] ~ MvNormal(T_ode[i], σ^2 * IdM)
end
return nothing
end
# instantiate model
input_data = Array(data[!, [:T_i, :T_h]])
model = LGDS(input_data, prob, prior_params, Δt)
# sample independent chains
n_chains = 5
chain = sample(model, NUTS(0.65), MCMCSerial(), 1000, n_chains; progress=true)
### Turing results
using StatsPlots
plot(chain)
cols = ["$(param)[$(i)]" for param in ["lC", "lR", "lA"] for i in 1:n_states][begin:end-1]
# vector to hold the sum(MSE) for each chain
error_chain = zeros(n_chains)
n_samples = 100
T_i_sample = data[!, :T_i]
for i in 1:n_chains
# get the chain
ch = chain[:, :, i]
# get the posterior samples of C and R
posterior_samples = DataFrame(sample(chain[:, :, 1], n_samples; replace=false))[!, cols]
for p in eachrow(Array(posterior_samples))
# solve the ODE with the posterior samples [C, R]
sol_p = solve(prob, Tsit5(); p=p, saveat=Δt, save_idxs=1, verbose=false)
if length(sol_p) != length(T_i_sample)
# error_chain[i] = Inf
continue
end
# calculate the normalized root mean squared error between sol_p and T_i_sample
error_chain[i] += sum((sol_p .- T_i_sample).^2) / length(T_i_sample)
end
end
chain_sel = argmin(error_chain)
println("Mean squared error for each chain: ", error_chain)
println("Chain with lowest error: ", chain_sel)
### Data retrodiction
plot(; legend=false)
posterior_samples = DataFrame(sample(chain[:, :, chain_sel], n_samples; replace=false))[!, cols]
nrmse = 0.0
# loop over posterior samples to plot the simulation
for p in eachrow(Array(posterior_samples))
# solve the ODE model
sol_p = solve(prob, Tsit5(); p=p, saveat=Δt, save_idxs=1, verbose=false)
if length(sol_p) != length(T_i_sample)
continue
end
# compute the NRMSE
nrmse += sum((sol_p .- T_i_sample).^2) / length(T_i_sample)
# plot the posterior samples
plot!(sol_p; alpha=0.5, color="#BBBBBB")
end
# Plot simulation and noisy observations.
plot!(sol; idxs=[:T_i], size=(800,400), color=[1 2], linewidth=1,
title="NRMSE: $(nrmse/100)",
xlabel="t",
ylabel="T_i",
ylim=(minimum(T_i_sample)-5,
maximum(T_i_sample)+5)
)
scatter!(0:4*Δt:t_end, T_i_sample[1:4:end], label="T_i (observed)", color=[1 2])
I can also share the full notebook if needed but the website here won’t let me upload .ipynb files.
You are right, this seemed to help. What also helped speed it up is setting priors with low variance.
3 capacitors, 3 resistors and 2 areas so only 8 parameters in total. For context, the third-order RC network is supposed to model the dominant thermal dynamics of a house, the code above is toy model for which I am trying to estimate the known parameters with simulated data.
The posteriors have almost the same means as the priors, so it seems like there is no ‘learning’ being done at all, even when I set a large number of iterations. The means of the posterior are also too far off from the true values to be of any use. I simulated 100 seconds worth of data in ModelingToolkit.jl and I tried dt = 0.1, 0.2 and 0.5. So between 1000 and 200 samples. I also tried toning down the noise, but it doesn’t really have an effect.
I adapted the ODE model so that instead of finding C, R and A I try to estimate log(C), log(R) and log(A). From the first order toy model experiments I noticed this was much more numerically stable because it avoids the singularity at C=R=A=0. It also enforced the parameters to be positive and makes it possible to sample from standard normal distributions, so I think this works well.
All of this worked very well in the strongly simplified first order toy model, but I am unable to reproduce the good results with the third order toy model.