External functions in GPU ODE example

xztt · July 7, 2024, 12:48pm

Hi all,

I have tried adapting the DiffEqGPU example to my own example which depends on some external functions. The error is quite dense so if someone could point me in the right direction this would be a big help.

Example

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics, Distributions, QuasiMonteCarlo, JLD, GlobalSensitivity, LinearAlgebra

function Valve(R, deltaP, open)
    dq = 0.0f0
    if (-open) < 0.0f0 
        dq =  deltaP/R
    else
        dq = 0.0f0
    end
    return SVector{1}(dq)

end

function ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    τ = 1.0f0
    Eshift = 0.0f0

    tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ)

    Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 +
         (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 +
         (tᵢ <= τₑₚ) * 0.0f0

    E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ

    return SVector{1}(E)
end

function DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    τ = 1.0f0
    Eshift = 0.0f0

    tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ)

    DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 +
          (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0
    (tᵢ <= τₑₚ) * 0.0f0
    DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ

    return SVector{1}(DE)
end

 
function NIK(u, p, t)
    τₑₛ = p[1] 
    τₑₚ = p[2]
    Rmv = p[3]
    Zao = p[4] 
    Rs  = p[5] 
    Csa = p[6] 
    Csv = p[7] 
    Eₘₐₓ =p[8]  
    Eₘᵢₙ =p[9]
    # pressures (more readable names)
# the differential equations
    du1 = (u[6] - u[5]) * ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) + u[1] / ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) * DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    du2 = (u[5] - u[7] ) / Csa #Systemic arteries     
    du3 = (u[7] - u[6]) / Csv # Venous
    du4 = u[6] - u[5] # volume
    du5    = Valve(Zao, (du1 - du2), u[1] - u[2])  # AV 
    du6   = Valve(Rmv, (du3 - du1), u[3] - u[1])  # MV
    du7     = (du2 - du3) / Rs # Systemic flow
    return SVector{7}(du1,du2,du3,du4,du5,du6,du7)
end
##

u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0]

p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0]

tspan = (0f0, 10f0)

prob = ODEProblem{false}(NIK, u0, tspan, p)

x = collect(range(7.0f0, stop = 8.0f0, length = 250))

#Perform GSA
f1 = function (p)
    prob_func(prob, i, repeat) = remake(prob; p = p[:, i])
    ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
    sol = solve(
        ensemble_prob, GPUVern7(),  reltol = 1e-5, abstol = 1e-5, EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2))
    # Now sol[i] is the solution for the ith set of parameters
    out = zeros(2, size(p, 2))
    for i in 1:size(p, 2)
        out[1, i] = mean(sol[i][1, :]) #Mean LVP
        out[2, i] = maximum(sol[i][2, :]) # Max Psa
    end
    out
end

samples = 100000
lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0]
ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0]
sampler = SobolSample()
A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler)

@time sobol_result = gsa(f1, Sobol(), A, B, batch = true)

Error

ERROR: LoadError: InvalidIRError: compiling MethodInstance for DiffEqGPU.gpu_ode_asolve_kernel(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}, SciMLBase.StandardODEProblem}, 1}, ::GPUVern7, ::CuDeviceMatrix{SVector{7, Float32}, 1}, ::CuDeviceMatrix{Float32, 1}, ::Float32, ::CallbackSet{Tuple{}, Tuple{}}, ::Nothing, ::Float32, ::Float32, ::CuDeviceVector{Float32, 1}, ::Val{false}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to julia.new_gc_frame)
Reason: unsupported call to an unknown function (call to julia.push_gc_frame)
Reason: unsupported call to an unknown function (call to julia.get_gc_frame_slot)
Reason: unsupported dynamic function invocation (call to (f::ODEFunction)(args...) @ SciMLBase ~/.julia/packages/SciMLBase/DbQVU/src/scimlfunctions.jl:2290)
Stacktrace:
 [1] step!
   @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/perform_step/gpu_vern7_perform_step.jl:104
 [2] macro expansion
   @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/kernels.jl:97
 [3] gpu_ode_asolve_kernel
   @ ~/.julia/packages/KernelAbstractions/zPAn3/src/macros.jl:95
 [4] gpu_ode_asolve_kernel
   @ ./none:0
Reason: unsupported call to an unknown function (call to julia.pop_gc_frame)
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:445 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:444 [inlined]
  [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:92
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:86 [inlined]
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:134
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:115 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:111
 [10] compile
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:103 [inlined]
 [11] #1116
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:247 [inlined]
 [12] JuliaContext(f::CUDA.var"#1116#1119"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:246
 [15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:128
 [16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:103
 [17] macro expansion
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:367 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(DiffEqGPU.gpu_ode_asolve_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1}, GPUVern7, CuDeviceMatrix{SVector{7, Float32}, 1}, CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Float32, Float32, CuDeviceVector{Float32, 1}, Val{false}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Nothing})
    @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:362
 [20] macro expansion
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:112 [inlined]
 [21] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_ode_asolve_kernel)})(::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing)
    @ CUDA.CUDAKernels ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:119
 [22] Kernel
    @ ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:105 [inlined]
 [23] vectorized_asolve(probs::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, prob::ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, alg::GPUVern7; dt::Float32, saveat::Vector{Float32}, save_everystep::Bool, abstol::Float64, reltol::Float64, debug::Bool, callback::CallbackSet{Tuple{}, Tuple{}}, tstops::Nothing, kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120"})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:232
 [24] vectorized_asolve
    @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:179 [inlined]
 [25] batch_solve_up_kernel(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, probs::Vector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{saveat::Vector{Float32}, unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:267
 [26] batch_solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64, saveat::Vector{Float32}})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:168
 [27] macro expansion
    @ ./timing.jl:395 [inlined]
 [28] __solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::@Kwargs{reltol::Float64, abstol::Float64, saveat::Vector{Float32}})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:55
 [29] __solve
    @ ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:1 [inlined]
 [30] #solve#55
    @ ~/.julia/packages/DiffEqBase/O8cUq/src/solve.jl:1096 [inlined]
 [31] (::var"#1#2")(p::Matrix{Float32})
    @ Main ~/harry/NIK_GPU.jl:81
 [32] gsa(f::var"#1#2", method::Sobol, A::Matrix{Float32}, B::Matrix{Float32}; batch::Bool, Ei_estimator::Symbol, distributed::Val{false}, kwargs::@Kwargs{})
    @ GlobalSensitivity ~/.julia/packages/GlobalSensitivity/pLty1/src/sobol_sensitivity.jl:138
 [33] macro expansion
    @ ./timing.jl:279 [inlined]
 [34] top-level scope
    @ ~/harry/NIK_GPU.jl:269
in expression starting at /net/people/plgrid/plghs1454/harry/NIK_GPU.jl:98

ChrisRackauckas · July 7, 2024, 11:37pm

As a first check, if you remove your function does it compile?

xztt · July 8, 2024, 8:16am

Yes if I remove the external functions and place them all inside the system definition as below the calculation performs on GPU as expected.

Is it best practice to define everything inside the system function instead of defining external functions and trying to call them inside the system?

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics,  QuasiMonteCarlo, GlobalSensitivity, LinearAlgebra

 
function NIK(u, p, t)
    τₑₛ = p[1] 
    τₑₚ = p[2]
    Rmv = p[3]
    Zao = p[4] 
    Rs  = p[5] 
    Csa = p[6] 
    Csv = p[7] 
    Eₘₐₓ =p[8]  
    Eₘᵢₙ =p[9]
    # pressures (more readable names)
# the differential equations
    tᵢ = (t +  1.0f0) % 1.0f0

    DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 +
      (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0f0
    (tᵢ <= τₑₚ) * 0.0f0

    DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ

    Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 +
    (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 +
    (tᵢ <= τₑₚ) * 0.0f0

    E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ

    du1 = (u[6] - u[5]) * E + (u[1] / E) * DE # Left ventricle
    du2 = (u[5] - u[7] ) / Csa #Systemic arteries     
    du3 = (u[7] - u[6]) / Csv # Venous
    du4 = u[6] - u[5] # volume
    du5 = ifelse(-(u[1] - u[2]) < 0.0f0, (du1 - du2)/Zao, 0.0f0) #Valve(Zao, (du1 - du2), u[1] - u[2])  # AV 
    du6 = ifelse(-(u[3] - u[1]) < 0.0f0,(du3 - du1)/Rmv, 0.0f0) #Valve(Rmv, (du3 - du1), u[3] - u[1])  # MV
    du7 = (du2 - du3) / Rs # Systemic flow
    return SVector{7}(du1,du2,du3,du4,du5,du6,du7)
end
##

u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0]

p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0]

tspan = (0f0, 10f0)

prob = ODEProblem{false}(NIK, u0, tspan, p)

x = collect(range(7.0f0, stop = 8.0f0, length = 250))

#Perform GSA
f1 = function (p)
    prob_func(prob, i, repeat) = remake(prob; p = (p[:, i]))
    ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
    sol = solve(
        ensemble_prob, GPUVern7(),  EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2))
    # Now sol[i] is the solution for the ith set of parameters
    out = zeros(2, size(p, 2))
    for i in 1:size(p, 2)
        out[1, i] = mean(sol[i][1, :]) #Mean LVP
        out[2, i] = maximum(sol[i][2, :]) # Max Psa
    end
    out
end

samples = 100000
lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0]
ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0]
sampler = SobolSample()
A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler)

@time sobol_result = gsa(f1, Sobol(), A, B, batch = true)

ChrisRackauckas · July 9, 2024, 5:08am

No, that seems like it might be a KernelAbstractions.jl bug. It’s supposed to have a pass that inlines everything for you? @vchuravy

vchuravy · July 9, 2024, 5:26am

If a function can’t be inlined/inferred it would look like this.

xztt · July 9, 2024, 12:09pm

What can you do to ensure that the functions can be inferred?

ChrisRackauckas · July 10, 2024, 3:38am

The functions in this case look like they would be simple enough? Maybe try making const pi = π*1.0f0 and using that instead of the irrational?

xztt · July 11, 2024, 9:07pm

Issue opened

github.com/JuliaGPU/KernelAbstractions.jl

External functions in GPU ODE example

opened 09:05PM - 11 Jul 24 UTC

H-Sax

When writing an ODE function to utilise the DiffEqGPU package external functions… can not be inferred in the definition. However when I write the functions within the ODE definition the solve computes on the GPU as example. **Working Example** ``` using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics, QuasiMonteCarlo, GlobalSensitivity, LinearAlgebra function NIK(u, p, t) τₑₛ = p[1] τₑₚ = p[2] Rmv = p[3] Zao = p[4] Rs = p[5] Csa = p[6] Csv = p[7] Eₘₐₓ =p[8] Eₘᵢₙ =p[9] # pressures (more readable names) # the differential equations tᵢ = (t + 1.0f0) % 1.0f0 DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 + (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0f0 (tᵢ <= τₑₚ) * 0.0f0 DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 + (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 + (tᵢ <= τₑₚ) * 0.0f0 E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ du1 = (u[6] - u[5]) * E + (u[1] / E) * DE # Left ventricle du2 = (u[5] - u[7] ) / Csa #Systemic arteries du3 = (u[7] - u[6]) / Csv # Venous du4 = u[6] - u[5] # volume du5 = ifelse(-(u[1] - u[2]) < 0.0f0, (du1 - du2)/Zao, 0.0f0) #Valve(Zao, (du1 - du2), u[1] - u[2]) # AV du6 = ifelse(-(u[3] - u[1]) < 0.0f0,(du3 - du1)/Rmv, 0.0f0) #Valve(Rmv, (du3 - du1), u[3] - u[1]) # MV du7 = (du2 - du3) / Rs # Systemic flow return SVector{7}(du1,du2,du3,du4,du5,du6,du7) end ## u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0] p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0] tspan = (0f0, 10f0) prob = ODEProblem{false}(NIK, u0, tspan, p) x = collect(range(7.0f0, stop = 8.0f0, length = 250)) #Perform GSA f1 = function (p) prob_func(prob, i, repeat) = remake(prob; p = (p[:, i])) ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false) sol = solve( ensemble_prob, GPUVern7(), EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2)) # Now sol[i] is the solution for the ith set of parameters out = zeros(2, size(p, 2)) for i in 1:size(p, 2) out[1, i] = mean(sol[i][1, :]) #Mean LVP out[2, i] = maximum(sol[i][2, :]) # Max Psa end out end samples = 100000 lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0] ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0] sampler = SobolSample() A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler) @time sobol_result = gsa(f1, Sobol(), A, B, batch = true) ``` **Example that Errors** ``` using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics, Distributions, QuasiMonteCarlo, JLD, GlobalSensitivity, LinearAlgebra function Valve(R, deltaP, open) dq = 0.0f0 if (-open) < 0.0f0 dq = deltaP/R else dq = 0.0f0 end return SVector{1}(dq) end function ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) τ = 1.0f0 Eshift = 0.0f0 tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ) Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 + (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 + (tᵢ <= τₑₚ) * 0.0f0 E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ return SVector{1}(E) end function DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) τ = 1.0f0 Eshift = 0.0f0 tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ) DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 + (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0f0 (tᵢ <= τₑₚ) * 0.0f0 DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ return SVector{1}(DE) end function NIK(u, p, t) τₑₛ = p[1] τₑₚ = p[2] Rmv = p[3] Zao = p[4] Rs = p[5] Csa = p[6] Csv = p[7] Eₘₐₓ =p[8] Eₘᵢₙ =p[9] # pressures (more readable names) # the differential equations du1 = (u[6] - u[5]) * ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) + u[1] / ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) * DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) du2 = (u[5] - u[7] ) / Csa #Systemic arteries du3 = (u[7] - u[6]) / Csv # Venous du4 = u[6] - u[5] # volume du5 = Valve(Zao, (du1 - du2), u[1] - u[2]) # AV du6 = Valve(Rmv, (du3 - du1), u[3] - u[1]) # MV du7 = (du2 - du3) / Rs # Systemic flow return SVector{7}(du1,du2,du3,du4,du5,du6,du7) end ## u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0] p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0] tspan = (0f0, 10f0) prob = ODEProblem{false}(NIK, u0, tspan, p) x = collect(range(7.0f0, stop = 8.0f0, length = 250)) #Perform GSA f1 = function (p) prob_func(prob, i, repeat) = remake(prob; p = p[:, i]) ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false) sol = solve( ensemble_prob, GPUVern7(), reltol = 1e-5, abstol = 1e-5, EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2)) # Now sol[i] is the solution for the ith set of parameters out = zeros(2, size(p, 2)) for i in 1:size(p, 2) out[1, i] = mean(sol[i][1, :]) #Mean LVP out[2, i] = maximum(sol[i][2, :]) # Max Psa end out end samples = 100000 lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0] ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0] sampler = SobolSample() A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler) @time sobol_result = gsa(f1, Sobol(), A, B, batch = true) ``` **Stacktrace** ``` ERROR: LoadError: InvalidIRError: compiling MethodInstance for DiffEqGPU.gpu_ode_asolve_kernel(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}, SciMLBase.StandardODEProblem}, 1}, ::GPUVern7, ::CuDeviceMatrix{SVector{7, Float32}, 1}, ::CuDeviceMatrix{Float32, 1}, ::Float32, ::CallbackSet{Tuple{}, Tuple{}}, ::Nothing, ::Float32, ::Float32, ::CuDeviceVector{Float32, 1}, ::Val{false}) resulted in invalid LLVM IR Reason: unsupported call to an unknown function (call to julia.new_gc_frame) Reason: unsupported call to an unknown function (call to julia.push_gc_frame) Reason: unsupported call to an unknown function (call to julia.get_gc_frame_slot) Reason: unsupported dynamic function invocation (call to (f::ODEFunction)(args...) @ SciMLBase ~/.julia/packages/SciMLBase/DbQVU/src/scimlfunctions.jl:2290) Stacktrace: [1] step! @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/perform_step/gpu_vern7_perform_step.jl:104 [2] macro expansion @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/kernels.jl:97 [3] gpu_ode_asolve_kernel @ ~/.julia/packages/KernelAbstractions/zPAn3/src/macros.jl:95 [4] gpu_ode_asolve_kernel @ ./none:0 Reason: unsupported call to an unknown function (call to julia.pop_gc_frame) Stacktrace: [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/validation.jl:147 [2] macro expansion @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:445 [inlined] [3] macro expansion @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined] [4] macro expansion @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:444 [inlined] [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:92 [6] emit_llvm @ ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:86 [inlined] [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:134 [8] codegen @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:115 [inlined] [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:111 [10] compile @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:103 [inlined] [11] #1116 @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:247 [inlined] [12] JuliaContext(f::CUDA.var"#1116#1119"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{}) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52 [13] JuliaContext(f::Function) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42 [14] compile(job::GPUCompiler.CompilerJob) @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:246 [15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link)) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:128 [16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function) @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:103 [17] macro expansion @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:367 [inlined] [18] macro expansion @ ./lock.jl:267 [inlined] [19] cufunction(f::typeof(DiffEqGPU.gpu_ode_asolve_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1}, GPUVern7, CuDeviceMatrix{SVector{7, Float32}, 1}, CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Float32, Float32, CuDeviceVector{Float32, 1}, Val{false}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Nothing}) @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:362 [20] macro expansion @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:112 [inlined] [21] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_ode_asolve_kernel)})(::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing) @ CUDA.CUDAKernels ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:119 [22] Kernel @ ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:105 [inlined] [23] vectorized_asolve(probs::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, prob::ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, alg::GPUVern7; dt::Float32, saveat::Vector{Float32}, save_everystep::Bool, abstol::Float64, reltol::Float64, debug::Bool, callback::CallbackSet{Tuple{}, Tuple{}}, tstops::Nothing, kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120"}) @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:232 [24] vectorized_asolve @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:179 [inlined] [25] batch_solve_up_kernel(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, probs::Vector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{saveat::Vector{Float32}, unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64}) @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:267 [26] batch_solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64, saveat::Vector{Float32}}) @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:168 [27] macro expansion @ ./timing.jl:395 [inlined] [28] __solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::@Kwargs{reltol::Float64, abstol::Float64, saveat::Vector{Float32}}) @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:55 [29] __solve @ ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:1 [inlined] [30] #solve#55 @ ~/.julia/packages/DiffEqBase/O8cUq/src/solve.jl:1096 [inlined] [31] (::var"#1#2")(p::Matrix{Float32}) @ Main ~/harry/NIK_GPU.jl:81 [32] gsa(f::var"#1#2", method::Sobol, A::Matrix{Float32}, B::Matrix{Float32}; batch::Bool, Ei_estimator::Symbol, distributed::Val{false}, kwargs::@Kwargs{}) @ GlobalSensitivity ~/.julia/packages/GlobalSensitivity/pLty1/src/sobol_sensitivity.jl:138 [33] macro expansion @ ./timing.jl:279 [inlined] [34] top-level scope @ ~/harry/NIK_GPU.jl:269 in expression starting at /net/people/plgrid/plghs1454/harry/NIK_GPU.jl:98 ``` This issue is prompted from a discussion on discorse here[https://discourse.julialang.org/t/external-functions-in-gpu-ode-example/116719](url) As far as I understand as long as external functions are GPU compatible they should then be inferred by the ode function, this is motivated by discussions at JuliaCon. Thanks :)

Topic		Replies	Views
Kernel Error while running code on GPU GPU	12	415	January 18, 2024
DiffEqGPU throws InvalidIR Error for simple example General Usage diffeq , differentialequation	3	329	February 14, 2023
Error when running DiffEqGPU example from documentation GPU diffeq , differentialequation	2	303	May 26, 2023
Diffusion by using CUDA.jl, DifferentialEquations and DiffEqGPU GPU question , diffeq	6	1229	March 29, 2022
Trouble with CPU+GPU adjoints for UDEs General Usage question , optimization , sciml , diffeqflux	1	113	June 6, 2024

External functions in GPU ODE example

Related topics