I’m hoping to gather some advice on how to speed up the following example, which attempts to train a NN to learn part of the dynamics of a DAE system.
The first part below simply solves the true DAE in order to generate data to compare to during training. The DAE is formulated as a mass matrix ODE with 4 differential variables and 6 algebraic variables.
using DifferentialEquations, DiffEqFlux, OrdinaryDiffEq, Flux, Optim, Plots,
NLsolve, Sundials, OrdinaryDiffEq, Statistics, DiffEqSensitivity
#Time range and density of points
tspan = (0.0f0,3.0f0)
datasize = 100;
ts = Float32.(collect(range(tspan[1],tspan[2], length=datasize)))
#Constants
p = [0.5, 0.5]
#Calculate the true solution
function ODE_true(dx, x, p, t)
#unpack dynamic and algebraic states
δ₁,ω₁,δ₂,ω₂,pe₁,pe₂,V₁,θ₁,V₂,θ₂ = x
#unpack constants
Eq₁,Xq₁,H₁,D₁,Eq₂,Xq₂,H₂,D₂,V₀,θ₀,Xl₁₂,Xl₁₀,Xl₂₀,Ωb = [
#Gen 1 constants
0.9, #Eq_1
0.25, #Xq_1
5.0, #H_1
2.0, #D_1
#Gen 1 constants
0.9, #Eq_2 =
0.25, #Xq_2 =
5.0, #H_2
2.0, #D_2 =
#IB constants:
1.0, #V0
0.0, #θ0
#Network constants
0.1, #Xl_12
0.1, #Xl_10
0.1, #Xl_20
2*pi*60.0 # Ωb
]
pm₁ = p[1]
pm₂ = p[2]
#Compute dq parameters for gen 1
vd₁ = V₁ * sin(δ₁ - θ₁)
vq₁ = V₁ * cos(δ₁ - θ₁)
id₁= (1.0 / Xq₁) * (Eq₁ - vq₁)
iq₁ = (1.0 / Xq₁) * (Eq₁ - vq₁)
#Compute dq parameters for gen 2
vd₂ = V₂* sin(δ₂ - θ₂)
vq₂ = V₂ * cos(δ₂ - θ₂)
id₂ = (1.0 / Xq₂) * (Eq₂ - vq₂)
iq₂ = (1.0 / Xq₂) * (Eq₂ - vq₂)
#Compute ODEs
dx[1] = (ω₁ - 1.0)
dx[2] = (pm₁ - pe₁ - D₁ * (ω₁ - 1.0))
dx[3] = (ω₂ - 1.0)
dx[4]= (pm₂ - pe₂ - D₂ * (ω₂ - 1.0))
#generator output power equations (2)
dx[5] = vd₁ * id₁ + vq₁ * iq₁ - pe₁
dx[6] = vd₂ * id₂ + vq₂ * iq₂ - pe₂
#line flow equations (bus 1)
dx[7] = vd₁ * id₁ + vq₁ * iq₁ - (V₁ * V₀ / Xl₁₀) * sin(θ₁ - θ₀) - (V₁ * V₂ / Xl₁₂) * sin(θ₁ - θ₂)
dx[8] = vq₁ * id₁ - vd₁ * iq₁ - V₁^2/Xl₁₀ + (V₁ * V₀ / Xl₁₀) * cos(θ₁ - θ₀) - V₁^2/Xl₁₂ + (V₁ * V₂ / Xl₁₂) * cos(θ₁ - θ₂)
#line flow equations (bus 2)
dx[9] = vd₂ * id₂ + vq₂ * iq₂ - (V₂ * V₁ / Xl₁₂) * sin(θ₂ - θ₁) - (V₂ * V₀ / Xl₂₀) * sin(θ₂ - θ₀)
dx[10] = vq₂ * id₂ - vd₂ * iq₂ - V₂^2/Xl₁₂ + (V₂ * V₁ / Xl₁₂) * cos(θ₂ - θ₁) - V₂^2/Xl₂₀ + (V₂ * V₀ / Xl₂₀) * cos(θ₂ - θ₀)
nothing
end
#Mass matrix
M = zeros(10,10)
M[1,1] = 1/(2*pi*60.0) #1/Ωb
M[2,2] = 2*5.0 #2*H1
M[3,3] = 1/(2*pi*60.0) #1/Ωb
M[4,4] = 2*5.0 #2*H1
trueODEfunc = ODEFunction(ODE_true, mass_matrix = M)
p = [0.5,0.5]
# Solve for initial condition
f = (dx, x) -> trueODEfunc(dx, x, p, 0.0)
initial_guess = [0.2,1.0,0.2,1.0,0.5,0.5,1.0,0.0,1.0,0.0]
init_sol = nlsolve(f, initial_guess, ftol = 1e-10)
x0 = init_sol.zero
#introduce disturbance to get a dyanmic response
p[1]=0.65
prob = ODEProblem(trueODEfunc,x0,tspan)
ode_sol = solve(prob, p = p, Rodas5(autodiff=false),saveat=ts, dtmax=0.02)
ode_data = Array(ode_sol)
The section below defines the new DAE which now includes the NN and trains using sciml_train:
n_neurons = 5
#Small NN has 16 parameters and takes ~2.2 seconds for 1 training iteration
ann_gen1 = FastChain(FastDense(1,n_neurons,tanh),FastDense(n_neurons,1))
#ann_gen1 = FastChain(FastDense(1,n_neurons,tanh),FastDense(n_neurons,n_neurons),FastDense(n_neurons,1))
θ = initial_params(ann_gen1)
function UODE(dx,x,p,t)
#unpack dynamic and algebraic states
δ₁,ω₁,δ₂,ω₂,pe₁,pe₂,V₁,θ₁,V₂,θ₂ = x
#unpack constants
Eq₁,Xq₁,H₁,D₁,Eq₂,Xq₂,H₂,D₂,V₀,θ₀,Xl₁₂,Xl₁₀,Xl₂₀,Ωb = [
#Gen 1 constants
0.9, #Eq_1
0.25, #Xq_1
5.0, #H_1
2.0, #D_1
#Gen 1 constants
0.9, #Eq_2 =
0.25, #Xq_2 =
5.0, #H_2
2.0, #D_2 =
#IB constants:
1.0, #V0
0.0, #θ0
#Network constants
0.1, #Xl_12
0.1, #Xl_10
0.1, #Xl_20
2*pi*60.0 # Ωb
]
pm₁ = 0.65
pm₂ = 0.5
#Compute dq parameters for gen 1
vd₁ = V₁ * sin(δ₁ - θ₁)
vq₁ = V₁ * cos(δ₁ - θ₁)
id₁= (1.0 / Xq₁) * (Eq₁ - vq₁)
iq₁ = (1.0 / Xq₁) * (Eq₁ - vq₁)
#Compute dq parameters for gen 2
vd₂ = V₂* sin(δ₂ - θ₂)
vq₂ = V₂ * cos(δ₂ - θ₂)
id₂ = (1.0 / Xq₂) * (Eq₂ - vq₂)
iq₂ = (1.0 / Xq₂) * (Eq₂ - vq₂)
#Compute ODEs
dx[1] = ann_gen1([t],p)[1]
dx[2] = (pm₁ - pe₁ - D₁ * (ω₁ - 1.0))
dx[3] = (ω₂ - 1.0)
dx[4]= (pm₂ - pe₂ - D₂ * (ω₂ - 1.0))
#generator output power equations (2)
dx[5] = vd₁ * id₁ + vq₁ * iq₁ - pe₁
dx[6] = vd₂ * id₂ + vq₂ * iq₂ - pe₂
#line flow equations (bus 1)
dx[7] = vd₁ * id₁ + vq₁ * iq₁ - (V₁ * V₀ / Xl₁₀) * sin(θ₁ - θ₀) - (V₁ * V₂ / Xl₁₂) * sin(θ₁ - θ₂)
dx[8] = vq₁ * id₁ - vd₁ * iq₁ - V₁^2/Xl₁₀ + (V₁ * V₀ / Xl₁₀) * cos(θ₁ - θ₀) - V₁^2/Xl₁₂ + (V₁ * V₂ / Xl₁₂) * cos(θ₁ - θ₂)
#line flow equations (bus 2)
dx[9] = vd₂ * id₂ + vq₂ * iq₂ - (V₂ * V₁ / Xl₁₂) * sin(θ₂ - θ₁) - (V₂ * V₀ / Xl₂₀) * sin(θ₂ - θ₀)
dx[10] = vq₂ * id₂ - vd₂ * iq₂ - V₂^2/Xl₁₂ + (V₂ * V₁ / Xl₁₂) * cos(θ₂ - θ₁) - V₂^2/Xl₂₀ + (V₂ * V₀ / Xl₂₀) * cos(θ₂ - θ₀)
nothing
end
UODEfunc = ODEFunction(UODE, mass_matrix = M)
prob_univ = ODEProblem(UODEfunc,x0,tspan,θ)
function predict_adjoint(θ)
# Array(solve(prob_univ,Rodas5(autodiff=false),p=θ,saveat=ts,sensealg=InterpolatingAdjoint()))
Array(solve(prob_univ,Rodas5(autodiff=false),p=θ,saveat=ts,sensealg=ForwardDiffSensitivity())) #Best for under 100 parameters
end
pred = @time predict_adjoint(θ)
function loss_adjoint(θ)
pred = predict_adjoint(θ)
loss = sum(abs2, pred- ode_data)
return loss, pred
end
@time predict_adjoint(θ)
l = @time loss_adjoint(θ)
cb = function (θ,l,pred; doplot=true)
display(l)
if doplot
p = plot(solve(remake(prob_univ,p=θ),Rodas5(autodiff= false ),saveat=ts), ylims= (0,1.5), color=:red, title=("Blue = True Solution, Red = Universal ODE prediction"))
plot!(p,ode_sol, color = :blue)
display(p)
end
return false
end
@time cb(θ,l,pred)
loss1 = loss_adjoint(θ)
res1 = @time DiffEqFlux.sciml_train(loss_adjoint, θ, BFGS(initial_stepnorm=0.01), cb = cb, maxiters = 2)
When I increase the size of the NN in this example, the training becomes very slow. I’m struggling to tease out which combination of options (solver, NN size, autodiff, sensealg, etc.) makes sense for my particular (stiff) DAE and how these options will affect the training time? Is there any general advice for how to start optimizing these trade offs?