Hi,
I’ve been trying to train a UDE model on a set of time series in parallel using DiffEqGPU and EnsembleProblem, with different stored u0 and p values. The goal is to compute the losses simultaneously for each batch, utilising the power of my GPU. I’ve encountered a few issues along the way, mainly with EnsembleGPUArray(), and I’m unsure where to go from here:
CUDA.allowscalar(true)
println(size(pars), size(tranges), size(fit_data))
# (44,)(1, 44)(2, 104, 44)
NN = FastChain(FastDense(3, 50, tanh),
FastDense(50, 1))
nn_ps = initial_params(NN)
nn_ps = nn_ps |> Lux.gpu
u0s = cu(fit_data[:,1,:])
function ude(u,nn_ps,t,p,NN)
f = (p[7]*cos(p[6]*t) + p[8]*sin(p[6]*t))*p[9]
x,y = u
in = cu([f, x, y])
nn = NN(in, nn_ps)
du1 = y
du2 = f - (p[3]*y+p[1]*x+p[2]*x^3+p[4]*x^5+p[5]*x^7) + nn[1]
du = cu([du1, du2])
end
function predict_adjoint(prob)
solve(prob, Tsit5(), EnsembleGPUArray(), trajectories=length(tranges), saveat=tranges[1])
end
function loss_adjoint(prob, batch)
pred = predict_adjoint(prob)
loss = sum(abs2, batch[1,:,:].-pred[1,:,:])
return loss
end
t_span = (tranges[1][1], tranges[1][end])
function loss_p(nn_ps, batch, tranges, pars)
prob_NN = ODEProblem{false}((u,p,t)->ude(u,nn_ps,t,pars[1],NN),u0s[:, 1],t_span,nn_ps)
function prob_func(prob, i, repeat)
remake(prob, u0=u0s[:, i], saveat=t_span, p=pars[i])
end
prob_b = EnsembleProblem(prob_NN, prob_func=prob_func)
l = loss_adjoint(prob_b, batch)
println(l)
return l
end
function MINIMISE(nn_ps, batch, time_batch, ode_ps)
return loss_p(nn_ps, batch, time_batch, ode_ps)
end
adtype = Optimization.AutoFiniteDiff()
optfunc = OptimizationFunction((nn_ps, _) -> MINIMISE(nn_ps, fit_data, tranges, pars), adtype)
opt = ADAM(0.1)
optprob = Optimization.OptimizationProblem(optfunc, nn_ps)
res = Optimization.solve(optprob, opt, maxiters=50)
println("Training loss after $(length(losses)) iterations: $(losses[end])")
My error message at the moment is:
ERROR: ArgumentError: tuple length should be ≥ 0, got -4
Stacktrace:
[1] _ntuple(f::Adapt.var"#1#4"{DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}}, n::Int64)
@ Base .\ntuple.jl:36
[2] ntuple
@ .\ntuple.jl:19 [inlined]
[3] adapt_structure(to::CUDA.Adaptor, f::DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing})
@ Adapt C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\base.jl:19
[4] adapt
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\Adapt.jl:40 [inlined]
[5] Fix1
@ .\operators.jl:1096 [inlined]
[6] map
@ .\tuple.jl:222 [inlined]
[7] adapt_structure
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\base.jl:3 [inlined]
[8] adapt
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\Adapt.jl:40 [inlined]
[9] Fix1
@ .\operators.jl:1096 [inlined]
[10] map
@ .\tuple.jl:221 [inlined]
[11] adapt_structure
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\base.jl:3 [inlined]
[12] adapt(to::CUDA.Adaptor, x::Tuple{Tuple{DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}, DiffEqFlux.FastDense{typeof(identity), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}}})
@ Adapt C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\Adapt.jl:40
[13] adapt_structure(to::CUDA.Adaptor, f::DiffEqFlux.FastChain{Tuple{DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}, DiffEqFlux.FastDense{typeof(identity), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}}})
@ Adapt C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\base.jl:24
[14] adapt
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\Adapt.jl:40 [inlined]
[15] Fix1
@ .\operators.jl:1096 [inlined]
[16] map
@ .\tuple.jl:223 [inlined]
[17] map
@ .\tuple.jl:224 [inlined]
[18] adapt_structure
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\base.jl:3 [inlined]
[19] adapt
@ C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\Adapt.jl:40 [inlined]
[20] adapt_structure(to::CUDA.Adaptor, f::Main.EnergyHarvesterModel.var"#33#43"{CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Main.EnergyHarvesterModel.var"#ude#39", DiffEqFlux.FastChain{Tuple{DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}, DiffEqFlux.FastDense{typeof(identity), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}}}})
@ Adapt C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\base.jl:24
[21] adapt(to::CUDA.Adaptor, x::Function)
@ Adapt C:\Users\Stefan\.julia\packages\Adapt\0zP2x\src\Adapt.jl:40
[22] cudaconvert(arg::Function)
@ CUDA C:\Users\Stefan\.julia\packages\CUDA\BbliS\src\compiler\execution.jl:152
[23] map(f::typeof(CUDA.cudaconvert), t::Tuple{Main.EnergyHarvesterModel.var"#33#43"{CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Main.EnergyHarvesterModel.var"#ude#39", DiffEqFlux.FastChain{Tuple{DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}, DiffEqFlux.FastDense{typeof(identity), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}}}}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Float32}) (repeats 2 times)
@ Base .\tuple.jl:224
[24] macro expansion
@ C:\Users\Stefan\.julia\packages\CUDA\BbliS\src\compiler\execution.jl:100 [inlined]
.......................
[49] macro expansion
@ C:\Users\Stefan\.julia\packages\OptimizationOptimisers\KGKWE\src\OptimizationOptimisers.jl:36 [inlined]
[50] macro expansion
@ C:\Users\Stefan\.julia\packages\Optimization\aPPOg\src\utils.jl:37 [inlined]
[51] __solve(prob::SciMLBase.OptimizationProblem{true, SciMLBase.OptimizationFunction{true, Optimization.AutoFiniteDiff{Val{:forward}, Val{:forward}, Val{:hcentral}}, Main.EnergyHarvesterModel.var"#34#46"{Main.EnergyHarvesterModel.var"#MINIMISE#45"{Main.EnergyHarvesterModel.var"#loss_p#42"{Tuple{Float32, Float32}, Main.EnergyHarvesterModel.var"#loss_adjoint#41"{Main.EnergyHarvesterModel.var"#predict_adjoint#40"}, Main.EnergyHarvesterModel.var"#ude#39", CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, DiffEqFlux.FastChain{Tuple{DiffEqFlux.FastDense{typeof(NNlib.tanh_fast), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}, DiffEqFlux.FastDense{typeof(identity), DiffEqFlux.var"#initial_params#107"{Vector{Float32}}, Nothing}}}}}}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, SciMLBase.NullParameters, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, opt::Optimisers.Adam{Float64}, data::Base.Iterators.Cycle{Tuple{Optimization.NullData}}; maxiters::Int64, callback::Function, progress::Bool, save_best::Bool, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ OptimizationOptimisers C:\Users\Stefan\.julia\packages\OptimizationOptimisers\KGKWE\src\OptimizationOptimisers.jl:35
[52] #solve#540
@ C:\Users\Stefan\.julia\packages\SciMLBase\QqtZA\src\solve.jl:84 [inlined]
[53] fit_df(train_df::DataFrames.SubDataFrame{DataFrames.DataFrame, DataFrames.Index, Vector{Int64}}, val_df::DataFrames.SubDataFrame{DataFrames.DataFrame, DataFrames.Index, Vector{Int64}}, par::ComponentArrays.ComponentVector{Float32, Vector{Float32}, Tuple{ComponentArrays.Axis{(ωn = 1, μ = 2, b = 3, ν = 4, ρ = 5, ω = 6, A = 7, B = 8, ugain = 9)}}})
@ Main.EnergyHarvesterModel c:\Users\Stefan\VScode projects\EnergyHarvester\src\NeuralODEModel.jl:270
[54] top-level scope
@ .\timing.jl:262 [inlined]
I’m very new to this workflow and have tried to follow the tutorials as best as I can, so any help would be greatly appreciated.
Thanks.