Hi. I’m experimenting with using GPUs to solve large systems of ODEs. As an intial test I am running the following
using DifferentialEquations, CUDA, LinearAlgebra
function dynamics!(du, u, p, t)
mul!(du, p, u)
end
n = 100
A = CuArray(rand(n, n))
u0 = CuArray(rand(n))
tspan = (0.0f0, 100.0f0)
prob = ODEProblem(dynamics!, u0, tspan, A)
sol = solve(prob)
And I keep getting ERROR: LoadError: UndefVarError
: N not defined
with a huge stacktrace
Stacktrace:
[1] ndims(#unused#::Type{Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}})
@ Adapt ~/.julia/packages/Adapt/yYvku/src/wrappers.jl:132
[2] backend(W::Type{Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}})
@ GPUArrays ~/.julia/packages/GPUArrays/dAUOE/src/host/broadcast.jl:13
[3] backend(x::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}})
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:149
[4] gpu_call(::typeof(GPUArrays.linear_copy_kernel!), ::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, ::Int64, ::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, ::Int64, ::Int64; target::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, elements::Int64, threads::Nothing, blocks::Nothing, name::Nothing)
@ GPUArrays ~/.julia/packages/GPUArrays/dAUOE/src/device/execution.jl:61
[5] copyto!(dest::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, dstart::Int64, src::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, sstart::Int64, n::Int64)
@ GPUArrays ~/.julia/packages/GPUArrays/dAUOE/src/host/abstractarray.jl:193
[6] copyto!
@ ~/.julia/packages/GPUArrays/dAUOE/src/host/abstractarray.jl:171 [inlined]
[7] cholcopy(A::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}})
@ CUDA.CUSOLVER ~/.julia/packages/CUDA/YIj5X/lib/cusolver/linalg.jl:354
[8] cholesky(A::Symmetric{Float64, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, ::NoPivot; check::Bool)
@ LinearAlgebra ~/julia-1.9.4/share/julia/stdlib/v1.9/LinearAlgebra/src/cholesky.jl:400
[9] cholesky
@ ~/julia-1.9.4/share/julia/stdlib/v1.9/LinearAlgebra/src/cholesky.jl:400 [inlined]
[10] cholesky_instance
@ ~/.julia/packages/ArrayInterface/Ic7Rb/src/ArrayInterface.jl:453 [inlined]
[11] init_cacheval(alg::NormalCholeskyFactorization{NoPivot}, A::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, b::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, u::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Pl::LinearSolve.InvPreconditioner{Diagonal{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Pr::Diagonal{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, maxiters::Int64, abstol::Float64, reltol::Float64, verbose::Bool, assumptions::OperatorAssumptions{Bool})
@ LinearSolve ~/.julia/packages/LinearSolve/8EgR1/src/factorization.jl:910
[12] macro expansion
@ ~/.julia/packages/LinearSolve/8EgR1/src/default.jl:310 [inlined]
[13] init_cacheval(alg::LinearSolve.DefaultLinearSolver, A::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, b::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, u::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Pl::LinearSolve.InvPreconditioner{Diagonal{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Pr::Diagonal{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, maxiters::Int64, abstol::Float64, reltol::Float64, verbose::Bool, assump::OperatorAssumptions{Bool})
@ LinearSolve ~/.julia/packages/LinearSolve/8EgR1/src/default.jl:293
[14] init(::LinearProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, SciMLBase.NullParameters, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, ::LinearSolve.DefaultLinearSolver; alias_A::Bool, alias_b::Bool, abstol::Float64, reltol::Float64, maxiters::Int64, verbose::Bool, Pl::LinearSolve.InvPreconditioner{Diagonal{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Pr::Diagonal{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, assumptions::OperatorAssumptions{Bool}, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ LinearSolve ~/.julia/packages/LinearSolve/8EgR1/src/common.jl:165
[15] init
@ ~/.julia/packages/LinearSolve/8EgR1/src/common.jl:122 [inlined]
[16] #init#80
@ ~/.julia/packages/LinearSolve/8EgR1/src/default.jl:273 [inlined]
[17] init
@ ~/.julia/packages/LinearSolve/8EgR1/src/default.jl:268 [inlined]
[18] build_nlsolver(alg::TRBDF2{1, false, Nothing, NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, typeof(OrdinaryDiffEq.DEFAULT_PRECS), Val{:forward}, true, nothing}, nlalg::NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, u::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, uprev::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, p::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, t::Float32, dt::Float32, f::ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, rate_prototype::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, #unused#::Type{Float64}, #unused#::Type{Float64}, #unused#::Type{Float32}, γ::Float64, c::Float32, α::Int64, #unused#::Val{true})
@ OrdinaryDiffEq ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/nlsolve/utils.jl:198
[19] build_nlsolver
@ ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/nlsolve/utils.jl:146 [inlined]
[20] build_nlsolver
@ ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/nlsolve/utils.jl:136 [inlined]
[21] alg_cache(alg::TRBDF2{1, false, Nothing, NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, typeof(OrdinaryDiffEq.DEFAULT_PRECS), Val{:forward}, true, nothing}, u::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, rate_prototype::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, #unused#::Type{Float64}, #unused#::Type{Float64}, #unused#::Type{Float32}, uprev::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, uprev2::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, f::ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, t::Float32, dt::Float32, reltol::Float64, p::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, calck::Bool, #unused#::Val{true})
@ OrdinaryDiffEq ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/caches/sdirk_caches.jl:157
[22] alg_cache
@ ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/caches/basic_caches.jl:25 [inlined]
[23] __init(prob::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, alg::CompositeAlgorithm{Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, TRBDF2{1, false, Nothing, NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, typeof(OrdinaryDiffEq.DEFAULT_PRECS), Val{:forward}, true, nothing}}, AutoSwitch{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, TRBDF2{0, false, Nothing, NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, typeof(OrdinaryDiffEq.DEFAULT_PRECS), Val{:forward}, true, nothing}, Rational{Int64}, Int64}}, timeseries_init::Tuple{}, ts_init::Tuple{}, ks_init::Tuple{}, recompile::Type{Val{true}}; saveat::Tuple{}, tstops::Tuple{}, d_discontinuities::Tuple{}, save_idxs::Nothing, save_everystep::Bool, save_on::Bool, save_start::Bool, save_end::Nothing, callback::Nothing, dense::Bool, calck::Bool, dt::Float32, dtmin::Nothing, dtmax::Float32, force_dtmin::Bool, adaptive::Bool, gamma::Rational{Int64}, abstol::Nothing, reltol::Nothing, qmin::Rational{Int64}, qmax::Int64, qsteady_min::Int64, qsteady_max::Int64, beta1::Nothing, beta2::Nothing, qoldinit::Rational{Int64}, controller::Nothing, fullnormalize::Bool, failfactor::Int64, maxiters::Int64, internalnorm::typeof(DiffEqBase.ODE_DEFAULT_NORM), internalopnorm::typeof(opnorm), isoutofdomain::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), unstable_check::typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK), verbose::Bool, timeseries_errors::Bool, dense_errors::Bool, advance_to_tstop::Bool, stop_at_next_tstop::Bool, initialize_save::Bool, progress::Bool, progress_steps::Int64, progress_name::String, progress_message::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), progress_id::Symbol, userdata::Nothing, allow_extrapolation::Bool, initialize_integrator::Bool, alias_u0::Bool, alias_du0::Bool, initializealg::OrdinaryDiffEq.DefaultInit, kwargs::Base.Pairs{Symbol, Bool, Tuple{Symbol, Symbol}, NamedTuple{(:default_set, :second_time), Tuple{Bool, Bool}}})
@ OrdinaryDiffEq ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/solve.jl:334
[24] __init (repeats 5 times)
@ ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/solve.jl:10 [inlined]
[25] #__solve#746
@ ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/solve.jl:5 [inlined]
[26] __solve
@ ~/.julia/packages/OrdinaryDiffEq/Y6bIX/src/solve.jl:1 [inlined]
[27] solve_call(_prob::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, args::CompositeAlgorithm{Tuple{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, TRBDF2{1, false, Nothing, NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, typeof(OrdinaryDiffEq.DEFAULT_PRECS), Val{:forward}, true, nothing}}, AutoSwitch{Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, TRBDF2{0, false, Nothing, NLNewton{Rational{Int64}, Rational{Int64}, Rational{Int64}, Rational{Int64}}, typeof(OrdinaryDiffEq.DEFAULT_PRECS), Val{:forward}, true, nothing}, Rational{Int64}, Int64}}; merge_callbacks::Bool, kwargshandle::Nothing, kwargs::Base.Pairs{Symbol, Bool, Tuple{Symbol, Symbol}, NamedTuple{(:default_set, :second_time), Tuple{Bool, Bool}}})
@ DiffEqBase ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:561
[28] solve_call
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:527 [inlined]
[29] #solve_up#42
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:1010 [inlined]
[30] solve_up
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:996 [inlined]
[31] #solve#40
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:933 [inlined]
[32] __solve(::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, ::Nothing; default_set::Bool, kwargs::Base.Pairs{Symbol, Bool, Tuple{Symbol}, NamedTuple{(:second_time,), Tuple{Bool}}})
@ DifferentialEquations ~/.julia/packages/DifferentialEquations/Tu7HS/src/default_solve.jl:14
[33] __solve
@ ~/.julia/packages/DifferentialEquations/Tu7HS/src/default_solve.jl:1 [inlined]
[34] #__solve#61
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:1323 [inlined]
[35] __solve
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:1316 [inlined]
[36] solve_call(::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}; merge_callbacks::Bool, kwargshandle::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ DiffEqBase ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:561
[37] solve_call(::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem})
@ DiffEqBase ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:527
[38] #solve_up#42
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:1002 [inlined]
[39] solve_up
@ ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:996 [inlined]
[40] solve(::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}; sensealg::Nothing, u0::Nothing, p::Nothing, wrap::Val{true}, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ DiffEqBase ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:933
[41] solve(::ODEProblem{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Tuple{Float32, Float32}, true, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ODEFunction{true, SciMLBase.AutoSpecialize, typeof(dynamics!), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem})
@ DiffEqBase ~/.julia/packages/DiffEqBase/8lmsn/src/solve.jl:923
[42] top-level scope
that I haven’t been able to make sense of. I’m pretty stumped on this. Here is some nvidia-smi
output if that is helpful
Fri Dec 8 21:28:59 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA L4 Off | 00000000:00:03.0 Off | 0 |
| N/A 64C P0 33W / 72W | 0MiB / 23034MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
Is there something wrong with the GPU I am using or the software I have installed to communicate between CPU and GPU? Any help is appreciated!