Here’s a MWE for my problem. I’ve been trying to use Lux with GPU and have been working on a similar problem as the MWE. The error results from the U(u, p, st)[1]
line in most cases. At times I get it due to scalar indexing and other times it throws off some GPU compiler error. Is there a better way to access the parameters of the neural network in the differential equation?
using DiffEqGPU, OrdinaryDiffEq, CUDA
using Lux, LuxCUDA, ComponentArrays
using Random
U = Lux.Chain(
Lux.Dense(3, 32, tanh),
Lux.Dense(32, 3)
)
p, st = Lux.setup(Random.default_rng(), U)
p = p |> ComponentArray |> gpu_device()
st = st |> gpu_device()
function lorenz(du, u, p, t)
u = cu(u)
p = cu(p)
du[1] = p[1] .* (u[2] .- U(u, p, st)[1])
du[2] = u[1] * (p[2] - u[3]) - u[2]
du[3] = u[1] * u[2] - p[3] * u[3]
end
u0 = Float32[1.0; 0.0; 0.0]
tspan = (0.0f0, 100.0f0)
p = [10.0f0, 28.0f0, 8 / 3.0f0]
prob = ODEProblem(lorenz, u0, tspan, p)
prob_func = (prob, i, repeat) -> remake(prob, p = rand(Float32, 3) .* p)
monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
sol = solve(monteprob, Tsit5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10000, saveat = 1.0f0);
and this throws a huge compiler error.
ERROR: LoadError: GPUCompiler.InvalidIRError(GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}(MethodInstance for DiffEqGPU.gpu_gpu_kernel(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::typeof(lorenz), ::CuDeviceMatrix{Float32, 1}, ::CuDeviceMatrix{Float32, 1}, ::CuDeviceMatrix{Float32, 1}, ::Float32), GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}(GPUCompiler.PTXCompilerTarget(v"8.9.0", v"7.8.0", true, nothing, nothing, nothing, nothing, false, nothing, nothing), CUDA.CUDACompilerParams(v"8.9.0", v"8.6.0"), true, nothing, :specfunc, false, 2, true, true, true, true, false, true, false), 0x0000000000006999), Tuple{String, Vector{Base.StackTraces.StackFrame}, Any}[("dynamic function invocation", [setindex! at array.jl:177, setindex! at subarray.jl:384, lorenz at ensemble-multi-species-gpu.jl:17, macro expansion at kernels.jl:43, gpu_gpu_kernel at macros.jl:322, gpu_gpu_kernel at none:0], convert), ("dynamic function invocation", [lorenz at ensemble-multi-species-gpu.jl:17, macro expansion at kernels.jl:43, gpu_gpu_kernel at macros.jl:322, gpu_gpu_kernel at none:0], nothing), ("dynamic function invocation", [lorenz at ensemble-multi-species-gpu.jl:17, macro expansion at kernels.jl:43, gpu_gpu_kernel at macros.jl:322, gpu_gpu_kernel at none:0], getindex), ("dynamic function invocation", [lorenz at ensemble-multi-species-gpu.jl:17, macro expansion at kernels.jl:43, gpu_gpu_kernel at macros.jl:322, gpu_gpu_kernel at none:0], Base.Broadcast.broadcasted), ("dynamic function invocation", [lorenz at ensemble-multi-species-gpu.jl:17, macro expansion at kernels.jl:43, gpu_gpu_kernel at macros.jl:322, gpu_gpu_kernel at none:0], Base.Broadcast.materialize)])
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/validation.jl:167
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:382 [inlined]
[3] emit_llvm(job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/utils.jl:110
[4] emit_llvm(job::GPUCompiler.CompilerJob)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/utils.jl:108
[5] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:95
[6] compile_unhooked
@ ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:80 [inlined]
[7] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:67
[8] compile
@ ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:55 [inlined]
[9] #1171
@ ~/.julia/packages/CUDA/TW8fL/src/compiler/compilation.jl:255 [inlined]
[10] JuliaContext(f::CUDA.var"#1171#1174"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:34
[11] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/driver.jl:25
[12] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/TW8fL/src/compiler/compilation.jl:254
[13] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/execution.jl:245
[14] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/2MI6e/src/execution.jl:159
[15] macro expansion
@ ~/.julia/packages/CUDA/TW8fL/src/compiler/execution.jl:373 [inlined]
[16] macro expansion
@ ./lock.jl:273 [inlined]
[17] cufunction(f::typeof(DiffEqGPU.gpu_gpu_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, typeof(lorenz), CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, Float32}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Nothing})
@ CUDA ~/.julia/packages/CUDA/TW8fL/src/compiler/execution.jl:368
[18] macro expansion
@ ~/.julia/packages/CUDA/TW8fL/src/compiler/execution.jl:112 [inlined]
[19] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_gpu_kernel)})(::Function, ::Vararg{Any}; ndrange::Int64, workgroupsize::Int64)
@ CUDA.CUDAKernels ~/.julia/packages/CUDA/TW8fL/src/CUDAKernels.jl:103
[20] Kernel
@ ~/.julia/packages/CUDA/TW8fL/src/CUDAKernels.jl:89 [inlined]
[21] #12
@ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpuarray/problem_generation.jl:10 [inlined]