CUDA Error : ArgumentError: Objects are on devices with different types: CPUDevice and CUDADevice

Hi, I am trying to use CUDA to speed up my code. I am running into an error at the last step, where optimization is used for the iterations

This is the message I am getting :
ERROR: ArgumentError: Objects are on devices with different types: CPUDevice and CUDADevice.

Code is below :

using ComponentArrays
using DiffEqFlux
using Lux
using Optimization
using OptimizationOptimJL
using OptimizationOptimisers
using OrdinaryDiffEq
using Plots
using Random
using CUDA
using LinearAlgebra
using LuxCUDA
println("Use NN to solve SIR ODE model")

CUDA.allowscalar(true)  
# u = [s(t), I(t), R(t)]
function trueSirModel!(du, u, p, t)
    beta, gamma, N = p
    du[1] = -(beta * u[1] * u[2]) / N
    du[2] = ((beta * u[1] * u[2]) / N) - (gamma * u[2])
    du[3] = gamma * u[2]
end

# Boundary conditions

N = 1000
i0 = 1
r0 = 0
s0 = (N - i0 - r0)
u0 = cu(Float32[s0, i0, r0])

# constants
beta = 0.3
gamma = 0.1
p = cu(Float32[beta, gamma, N])
# time duration
tspan = (0.0, 160.0)
datasize = 160
tsteps = cu(range(tspan[1], tspan[2]; length=datasize))

# Solving the ODE solution

trueOdeProblem = ODEProblem(trueSirModel!, u0, tspan, p)
trueOdeData = cu(solve(trueOdeProblem, Tsit5(), saveat=tsteps))
# Defining the Nueral Network

rng = Random.default_rng()

# After multiple iterations, the layer with 3x150 fit the true data very well.

sirNN = Lux.Chain(Lux.Dense(3, 150, tanh), Lux.Dense(150, 150, tanh), Lux.Dense(150, 3))
p, st = Lux.setup(rng, sirNN)
sirNNOde = cu(NeuralODE(sirNN, tspan, Tsit5(), saveat=tsteps))

# prediciton function that is determined for every iteration
function prediction(p)
    cu(sirNNOde(u0, p, st)[1])
end

# Loss represents the difference between the original data and the predicted output
function loss(p)
    pred = prediction(p)
    loss = sum(abs2, trueOdeData .- pred)
    return loss, pred
end

# A Callback function to plot the output of the true dat and predicted output for suspectible, infected and recvered data points
callback = function (p, l, pred; doplot=true)
    println(l)
    if doplot
        plt = scatter(tsteps, trueOdeData[1, :]; label="true suspectible")
        scatter!(plt, tsteps, pred[1, :]; label="prediction suspectible")
        iplt = scatter(tsteps, trueOdeData[2, :]; label="true infected")
        scatter!(iplt, tsteps, pred[2, :]; label="prediction infected")
        rplt = scatter(tsteps, trueOdeData[3, :]; label="true recovered")
        scatter!(rplt, tsteps, pred[3, :]; label="prediction recovered")
        display(plot(plt, iplt, rplt))
    end
    return false
end

# Defining optimization techniques

pinit = ComponentArray(p)
adtype = Optimization.AutoZygote()
optimizeFunction = Optimization.OptimizationFunction((x, p) -> loss(x), adtype)

# Defining the problem to be optimized
neuralProblem = (Optimization.OptimizationProblem(optimizeFunction, pinit))

# NN solver that iterates over 3000 using ADAM optimizer

result = (Optimization.solve(neuralProblem, Optimisers.Adam(0.05); callback=callback,
    maxiters=300))

Could this be due to the plotting ?

I have been able to solve the issues with the CUDA Error by reading the online help and modifying the code as below :

using Lux, Optimization, OptimizationOptimisers, Zygote, OrdinaryDiffEq, Plots, LuxCUDA,
SciMLSensitivity, Random, ComponentArrays
import DiffEqFlux: NeuralODE
println("Use NN to solve SIR ODE model")
CUDA.allowscalar(false)

# u = [s(t), I(t), R(t)]
function trueSirModel!(du, u, p, t)
    beta, gamma, N = p
    du[1] = -(beta * u[1] * u[2]) / N
    du[2] = ((beta * u[1] * u[2]) / N) - (gamma * u[2])
    du[3] = gamma * u[2]
end

# Boundary conditions
N = 1000
i0 = 1
r0 = 0
s0 = (N - i0 - r0)
u0 = Float32[s0, i0, r0]

# constants
beta = 0.3
gamma = 0.1
p = Float32[beta, gamma, N]
# time duration
tspan = Float32[0.0, 160.0]
datasize = 160
tsteps = range(tspan[1], tspan[2]; length=datasize)

# Solving the ODE solution

trueOdeProblem = ODEProblem(trueSirModel!, u0, tspan, p)
trueOdeData = solve(trueOdeProblem, Tsit5(); saveat=tsteps)
trueOdeData = Array(ode_data) |> gdev
# Defining the Nueral Network
#rng for Lux.setup
rng = Xoshiro(0)

# After multiple iterations, the layer with 3x150 fit the true data very well.

sirNN = Lux.Chain(Lux.Dense(3, 150, tanh), Lux.Dense(150, 150, tanh), Lux.Dense(150, 3))
u0 = Float32[s0, i0, r0] |> gdev
p, st = Lux.setup(rng, sirNN)
p = p |> ComponentArray |> gdev
st = st |> gdev
sirNNOde = NeuralODE(sirNN, tspan, Tsit5(); saveat=tsteps)

# prediciton function that is determined for every iteration
function prediction(p)
    reduce(hcat, first(sirNNOde(u0, p, st)).u)
end

# Loss represents the difference between the original data and the predicted output
function loss(p)
    pred = prediction(p)
    loss = sum(abs2, trueOdeData .- pred)
    return loss, pred
end

# A Callback function to plot the output of the true dat and predicted output for suspectible, infected and recvered data points
callback = function (p, l, pred; doplot=true)
    println(l)
    if doplot
        plt = scatter(tsteps, Array(trueOdeData[1, :]); label="true suspectible")
        scatter!(plt, tsteps, Array(pred[1, :]); label="prediction suspectible")
        iplt = scatter(tsteps, Array(trueOdeData[2, :]); label="true infected")
        scatter!(iplt, tsteps, Array(pred[2, :]); label="prediction infected")
        rplt = scatter(tsteps, Array(trueOdeData[3, :]); label="true recovered")
        scatter!(rplt, tsteps, Array(pred[3, :]); label="prediction recovered")
        display(plot(plt, iplt, rplt))
    end
    return false
end

# Defining optimization techniques

adtype = Optimization.AutoZygote()
optimizeFunction = Optimization.OptimizationFunction((x, p) -> loss(x), adtype)

# Defining the problem to be optimized
neuralProblem = (Optimization.OptimizationProblem(optimizeFunction, p))

# NN solver that iterates over 3000 using ADAM optimizer

result = (Optimization.solve(neuralProblem, Optimisers.Adam(0.05);callback=callback, maxiters=300))

I removed all the CuArrays … used “gdev” as needed

However now I have a new error and its also generated from the last step :

# NN solver that iterates over 3000 using ADAM optimizer

result = (Optimization.solve(neuralProblem, Optimisers.Adam(0.05); callback=callback,
    maxiters=300))

**Error message : **
ERROR: DimensionMismatch: arrays could not be broadcast to a common size: a has axes Base.OneTo(2) and b has axes Base.OneTo(3)

Any insights / help much appreciated.

Stacktrace:

1. `_bcs1` at [broadcast.jl](vscode-file://vscode-app/c:/~/Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
2. `_bcs` at [broadcast.jl](vscode-file://vscode-app/c:/U~VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
3. `broadcast_shape` at [broadcast.jl](vscode-file://vscode-app/c:/~20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
4. `combine_axes` at [broadcast.jl](vscode-file://vscode-app/c:/~0VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
5. `instantiate` at [broadcast.jl](vscode-file://vscode-app/c:/~VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
6. `materialize` at [broadcast.jl](vscode-file://vscode-app/c:/~Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
7. `adjoint` at [broadcast.jl](vscode-file://vscode-app/c:/~osoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
8. 1. `_pullback(__context__::Zygote.Context{…}, 584::typeof(Base.Broadcast.broadcasted), 585::typeof(-), x::Matrix{…}, y::Matrix{…})` at [adjoint.jl](vscode-file://vscode-app/c:/U~/Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
2. `loss` at [SRI_NN_gpu.jl](vscode-file://vscode-app/c:/~ode/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
3. `_pullback(ctx::Zygote.Context{false}, f::typeof(loss), args::ComponentVector{Float32, Vector{Float32}, Tuple{Axis{…}}})` at [interface2.jl](vscode-file://vscode-app/c:/U~ode/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
4. `#156` at [SRI_NN_gpu.jl](vscode-file://vscode-app/c:~rosoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
5. `_pullback(::Zygote.Context{…}, ::var"#156#157", ::ComponentVector{…}, ::SciMLBase.NullParameters)` at [interface2.jl](vscode-file://vscode-app/c~ocal/Programs/Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
6. `pullback(::Function, ::Zygote.Context{false}, ::ComponentVector{Float32, Vector{…}, Tuple{…}}, ::Vararg{Any})` at [interface.jl](vscode-file://vscode-app/c:~Programs/Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
7. `pullback(::Function, ::ComponentVector{Float32, Vector{Float32}, Tuple{Axis{…}}}, ::SciMLBase.NullParameters)` at [interface.jl](vscode-file://vscode-app/c:/~oft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
8. `withgradient(::Function, ::ComponentVector{Float32, Vector{Float32}, Tuple{Axis{…}}}, ::Vararg{Any})` at [interface.jl](vscode-file://vscode-app/c:/~0VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
9. `value_and_gradient` at [DifferentiationInterfaceZygoteExt.jl](vscode-file://vscode-app/c:/U~VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
10. `value_and_gradient!(f::Function, grad::ComponentVector{…}, prep::DifferentiationInterface.NoGradientPrep, backend::AutoZygote, x::ComponentVector{…}, contexts::DifferentiationInterface.Constant{…})` at [DifferentiationInterfaceZygoteExt.jl](vscode-file://vscode-app/c:/~S%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
11. `(::OptimizationZygoteExt.var"#fg!#16"{…})(res::ComponentVector{…}, θ::ComponentVector{…})` at [OptimizationZygoteExt.jl](vscode-file://vscode-app/c:/~20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
12. `macro expansion` at [OptimizationOptimisers.jl](vscode-file://vscode-app/c:~20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
13. 1. `macro expansion` at [OptimizationOptimisers.jl](vscode-file://vscode-app/c:/~%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
2. `macro expansion` at [utils.jl](vscode-file://vscode-app/c:/~%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
3. `__solve(cache::OptimizationCache{…})` at [OptimizationOptimisers.jl](vscode-file://vscode-app/c:/~20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
4. `solve!(cache::OptimizationCache{…})` at [solve.jl](vscode-file://vscode-app/c:/~Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
5. `solve(::OptimizationProblem{…}, ::Optimisers.Adam; kwargs::Base.Pairs{…})` at [solve.jl](vscode-file://vscode-app/c:/~Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
6. `kwcall(::@NamedTuple{…}, ::typeof(solve), ::OptimizationProblem{…}, ::Optimisers.Adam)` at [solve.jl](vscode-file://vscode-app/c:/~Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)
7. `top-level scope` at [SRI_NN_gpu.jl](vscode-file://vscode-app/c:/~Microsoft%20VS%20Code/resources/app/out/vs/code/electron-sandbox/workbench/workbench.esm.html)

Some type information was truncated. Use `show(err)` to see complete types.

ERROR: DimensionMismatch: arrays could not be broadcast to a common size: a has axes Base.OneTo(2) and b has axes Base.OneTo(3)

I think I found the problem in line 35 of the code.
The variable name was wrong but Julia did not show error because that particular wrong variable was existing in the workspace from another program run.

The following code works without error.

using Lux, Optimization, OptimizationOptimisers, Zygote, OrdinaryDiffEq, Plots, LuxCUDA,
SciMLSensitivity, Random, ComponentArrays
import DiffEqFlux: NeuralODE
println("Use NN to solve SIR ODE model")
CUDA.allowscalar(false)
gdev = gpu_device()
# u = [s(t), I(t), R(t)]
function trueSirModel!(du, u, p, t)
    beta, gamma, N = p
    du[1] = -(beta * u[1] * u[2]) / N
    du[2] = ((beta * u[1] * u[2]) / N) - (gamma * u[2])
    du[3] = gamma * u[2]
end

# Boundary conditions
N = 1000
i0 = 1
r0 = 0
s0 = (N - i0 - r0)
u0 = Float32[s0, i0, r0]

# constants
beta = 0.3
gamma = 0.1
p = Float32[beta, gamma, N]
# time duration
tspan = Float32[0.0, 160.0]
datasize = 160
tsteps = range(tspan[1], tspan[2]; length=datasize)

# Solving the ODE solution

trueOdeProblem = ODEProblem(trueSirModel!, u0, tspan, p)
trueOdeData = solve(trueOdeProblem, Tsit5(); saveat=tsteps)
trueOdeData = Array(trueOdeData) |> gdev
# Defining the Nueral Network
#rng for Lux.setup
rng = Xoshiro(0)

# After multiple iterations, the layer with 3x150 fit the true data very well.

sirNN = Lux.Chain(Lux.Dense(3, 150, tanh), Lux.Dense(150, 150, tanh), Lux.Dense(150, 3))
u0 = Float32[s0, i0, r0] |> gdev
p, st = Lux.setup(rng, sirNN)
p = p |> ComponentArray |> gdev
st = st |> gdev
sirNNOde = NeuralODE(sirNN, tspan, Tsit5(); saveat=tsteps)

function prediction(p)
    Array(sirNNOde(u0, p, st)[1])
end

# Loss represents the difference between the original data and the predicted output
function loss(p)
    pred = prediction(p)
    loss = sum(abs2, trueOdeData .- pred)
    return loss, pred
end

# A Callback function to plot the output of the true dat and predicted output for suspectible, infected and recvered data points
callback = function (p, l, pred; doplot=true)
    println(l)
    if doplot
        plt = scatter(tsteps, Array(trueOdeData[1, :]); label="true suspectible")
        scatter!(plt, tsteps, Array(pred[1, :]); label="prediction suspectible")
        iplt = scatter(tsteps, Array(trueOdeData[2, :]); label="true infected")
        scatter!(iplt, tsteps, Array(pred[2, :]); label="prediction infected")
        rplt = scatter(tsteps, Array(trueOdeData[3, :]); label="true recovered")
        scatter!(rplt, tsteps, Array(pred[3, :]); label="prediction recovered")
        display(plot(plt, iplt, rplt))
    end
    return false
end

# Defining optimization techniques

adtype = Optimization.AutoZygote()
optimizeFunction = Optimization.OptimizationFunction((x, p) -> loss(x), adtype)

# Defining the problem to be optimized
neuralProblem = Optimization.OptimizationProblem(optimizeFunction, p)

# NN solver that iterates over 3000 using ADAM optimizer

result = Optimization.solve(neuralProblem, Optimisers.Adam(0.05); callback=callback, maxiters=3000)

Code runs slow, dont see much performance improvement over CPU only code.

Edit : GPU code is 40-50 times faster (based on screen output timing).