External functions in GPU ODE example

Hi all,

I have tried adapting the DiffEqGPU example to my own example which depends on some external functions. The error is quite dense so if someone could point me in the right direction this would be a big help.

Example

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics, Distributions, QuasiMonteCarlo, JLD, GlobalSensitivity, LinearAlgebra

function Valve(R, deltaP, open)
    dq = 0.0f0
    if (-open) < 0.0f0 
        dq =  deltaP/R
    else
        dq = 0.0f0
    end
    return SVector{1}(dq)

end

function ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    τ = 1.0f0
    Eshift = 0.0f0

    tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ)

    Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 +
         (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 +
         (tᵢ <= τₑₚ) * 0.0f0

    E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ

    return SVector{1}(E)
end

function DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    τ = 1.0f0
    Eshift = 0.0f0

    tᵢ = rem(t + (1.0f0 - Eshift) * τ, τ)

    DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 +
          (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0
    (tᵢ <= τₑₚ) * 0.0f0
    DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ

    return SVector{1}(DE)
end

 
function NIK(u, p, t)
    τₑₛ = p[1] 
    τₑₚ = p[2]
    Rmv = p[3]
    Zao = p[4] 
    Rs  = p[5] 
    Csa = p[6] 
    Csv = p[7] 
    Eₘₐₓ =p[8]  
    Eₘᵢₙ =p[9]
    # pressures (more readable names)
# the differential equations
    du1 = (u[6] - u[5]) * ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) + u[1] / ShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ) * DShiElastance(t, Eₘᵢₙ, Eₘₐₓ, τₑₛ, τₑₚ)
    du2 = (u[5] - u[7] ) / Csa #Systemic arteries     
    du3 = (u[7] - u[6]) / Csv # Venous
    du4 = u[6] - u[5] # volume
    du5    = Valve(Zao, (du1 - du2), u[1] - u[2])  # AV 
    du6   = Valve(Rmv, (du3 - du1), u[3] - u[1])  # MV
    du7     = (du2 - du3) / Rs # Systemic flow
    return SVector{7}(du1,du2,du3,du4,du5,du6,du7)
end
##

u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0]

p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0]

tspan = (0f0, 10f0)

prob = ODEProblem{false}(NIK, u0, tspan, p)

x = collect(range(7.0f0, stop = 8.0f0, length = 250))

#Perform GSA
f1 = function (p)
    prob_func(prob, i, repeat) = remake(prob; p = p[:, i])
    ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
    sol = solve(
        ensemble_prob, GPUVern7(),  reltol = 1e-5, abstol = 1e-5, EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2))
    # Now sol[i] is the solution for the ith set of parameters
    out = zeros(2, size(p, 2))
    for i in 1:size(p, 2)
        out[1, i] = mean(sol[i][1, :]) #Mean LVP
        out[2, i] = maximum(sol[i][2, :]) # Max Psa
    end
    out
end

samples = 100000
lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0]
ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0]
sampler = SobolSample()
A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler)

@time sobol_result = gsa(f1, Sobol(), A, B, batch = true)

Error

ERROR: LoadError: InvalidIRError: compiling MethodInstance for DiffEqGPU.gpu_ode_asolve_kernel(::KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, ::CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}, SciMLBase.StandardODEProblem}, 1}, ::GPUVern7, ::CuDeviceMatrix{SVector{7, Float32}, 1}, ::CuDeviceMatrix{Float32, 1}, ::Float32, ::CallbackSet{Tuple{}, Tuple{}}, ::Nothing, ::Float32, ::Float32, ::CuDeviceVector{Float32, 1}, ::Val{false}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to julia.new_gc_frame)
Reason: unsupported call to an unknown function (call to julia.push_gc_frame)
Reason: unsupported call to an unknown function (call to julia.get_gc_frame_slot)
Reason: unsupported dynamic function invocation (call to (f::ODEFunction)(args...) @ SciMLBase ~/.julia/packages/SciMLBase/DbQVU/src/scimlfunctions.jl:2290)
Stacktrace:
 [1] step!
   @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/perform_step/gpu_vern7_perform_step.jl:104
 [2] macro expansion
   @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/kernels.jl:97
 [3] gpu_ode_asolve_kernel
   @ ~/.julia/packages/KernelAbstractions/zPAn3/src/macros.jl:95
 [4] gpu_ode_asolve_kernel
   @ ./none:0
Reason: unsupported call to an unknown function (call to julia.pop_gc_frame)
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:445 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:444 [inlined]
  [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:92
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:86 [inlined]
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:134
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:115 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:111
 [10] compile
    @ ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:103 [inlined]
 [11] #1116
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:247 [inlined]
 [12] JuliaContext(f::CUDA.var"#1116#1119"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:52
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/driver.jl:42
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/compilation.jl:246
 [15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:128
 [16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/kqxyC/src/execution.jl:103
 [17] macro expansion
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:367 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(DiffEqGPU.gpu_ode_asolve_kernel), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, CuDeviceVector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1}, GPUVern7, CuDeviceMatrix{SVector{7, Float32}, 1}, CuDeviceMatrix{Float32, 1}, Float32, CallbackSet{Tuple{}, Tuple{}}, Nothing, Float32, Float32, CuDeviceVector{Float32, 1}, Val{false}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Nothing})
    @ CUDA ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:362
 [20] macro expansion
    @ ~/.julia/packages/CUDA/jdJ7Z/src/compiler/execution.jl:112 [inlined]
 [21] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(DiffEqGPU.gpu_ode_asolve_kernel)})(::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing)
    @ CUDA.CUDAKernels ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:119
 [22] Kernel
    @ ~/.julia/packages/CUDA/jdJ7Z/src/CUDAKernels.jl:105 [inlined]
 [23] vectorized_asolve(probs::CuArray{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, 1, CUDA.Mem.DeviceBuffer}, prob::ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, alg::GPUVern7; dt::Float32, saveat::Vector{Float32}, save_everystep::Bool, abstol::Float64, reltol::Float64, debug::Bool, callback::CallbackSet{Tuple{}, Tuple{}}, tstops::Nothing, kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120"})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:232
 [24] vectorized_asolve
    @ ~/.julia/packages/DiffEqGPU/I999k/src/ensemblegpukernel/lowerlevel_solve.jl:179 [inlined]
 [25] batch_solve_up_kernel(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, probs::Vector{DiffEqGPU.ImmutableODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{saveat::Vector{Float32}, unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:267
 [26] batch_solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}, I::UnitRange{Int64}, adaptive::Bool; kwargs::@Kwargs{unstable_check::DiffEqGPU.var"#114#120", reltol::Float64, abstol::Float64, saveat::Vector{Float32}})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:168
 [27] macro expansion
    @ ./timing.jl:395 [inlined]
 [28] __solve(ensembleprob::EnsembleProblem{ODEProblem{SVector{7, Float32}, Tuple{Float32, Float32}, false, SVector{9, Float32}, ODEFunction{false, SciMLBase.AutoSpecialize, typeof(NIK), UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing, Nothing, Nothing}, @Kwargs{}, SciMLBase.StandardODEProblem}, var"#prob_func#3"{Matrix{Float32}}, typeof(SciMLBase.DEFAULT_OUTPUT_FUNC), typeof(SciMLBase.DEFAULT_REDUCTION), Nothing}, alg::GPUVern7, ensemblealg::EnsembleGPUKernel{CUDABackend}; trajectories::Int64, batch_size::Int64, unstable_check::Function, adaptive::Bool, kwargs::@Kwargs{reltol::Float64, abstol::Float64, saveat::Vector{Float32}})
    @ DiffEqGPU ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:55
 [29] __solve
    @ ~/.julia/packages/DiffEqGPU/I999k/src/solve.jl:1 [inlined]
 [30] #solve#55
    @ ~/.julia/packages/DiffEqBase/O8cUq/src/solve.jl:1096 [inlined]
 [31] (::var"#1#2")(p::Matrix{Float32})
    @ Main ~/harry/NIK_GPU.jl:81
 [32] gsa(f::var"#1#2", method::Sobol, A::Matrix{Float32}, B::Matrix{Float32}; batch::Bool, Ei_estimator::Symbol, distributed::Val{false}, kwargs::@Kwargs{})
    @ GlobalSensitivity ~/.julia/packages/GlobalSensitivity/pLty1/src/sobol_sensitivity.jl:138
 [33] macro expansion
    @ ./timing.jl:279 [inlined]
 [34] top-level scope
    @ ~/harry/NIK_GPU.jl:269
in expression starting at /net/people/plgrid/plghs1454/harry/NIK_GPU.jl:98

As a first check, if you remove your function does it compile?

Yes if I remove the external functions and place them all inside the system definition as below the calculation performs on GPU as expected.

Is it best practice to define everything inside the system function instead of defining external functions and trying to call them inside the system?

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, Statistics,  QuasiMonteCarlo, GlobalSensitivity, LinearAlgebra

 
function NIK(u, p, t)
    τₑₛ = p[1] 
    τₑₚ = p[2]
    Rmv = p[3]
    Zao = p[4] 
    Rs  = p[5] 
    Csa = p[6] 
    Csv = p[7] 
    Eₘₐₓ =p[8]  
    Eₘᵢₙ =p[9]
    # pressures (more readable names)
# the differential equations
    tᵢ = (t +  1.0f0) % 1.0f0

    DEₚ = (tᵢ <= τₑₛ) * (π*1.0f0) / τₑₛ * sin(tᵢ / τₑₛ * (π*1.0f0)) / 2.0f0 +
      (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (π*1.0f0) / (τₑₚ - τₑₛ) * sin((τₑₛ - tᵢ) / (τₑₚ - τₑₛ) * (π*1.0f0)) / 2.0f0
    (tᵢ <= τₑₚ) * 0.0f0

    DE = (Eₘₐₓ - Eₘᵢₙ) * DEₚ

    Eₚ = (tᵢ <= τₑₛ) * (1.0f0 - cos(tᵢ / τₑₛ * (π*1.0f0))) / 2.0f0 +
    (tᵢ > τₑₛ) * (tᵢ <= τₑₚ) * (1.0f0 + cos((tᵢ - τₑₛ) / (τₑₚ - τₑₛ) * (π*1.0f0))) / 2.0f0 +
    (tᵢ <= τₑₚ) * 0.0f0

    E = Eₘᵢₙ + (Eₘₐₓ - Eₘᵢₙ) * Eₚ

    du1 = (u[6] - u[5]) * E + (u[1] / E) * DE # Left ventricle
    du2 = (u[5] - u[7] ) / Csa #Systemic arteries     
    du3 = (u[7] - u[6]) / Csv # Venous
    du4 = u[6] - u[5] # volume
    du5 = ifelse(-(u[1] - u[2]) < 0.0f0, (du1 - du2)/Zao, 0.0f0) #Valve(Zao, (du1 - du2), u[1] - u[2])  # AV 
    du6 = ifelse(-(u[3] - u[1]) < 0.0f0,(du3 - du1)/Rmv, 0.0f0) #Valve(Rmv, (du3 - du1), u[3] - u[1])  # MV
    du7 = (du2 - du3) / Rs # Systemic flow
    return SVector{7}(du1,du2,du3,du4,du5,du6,du7)
end
##

u0 = @SVector [6.0f0, 6.0f0, 6.0f0, 200.0f0, 0.0f0, 0.0f0, 0.0f0]

p = @SVector [0.3f0, 0.45f0, 0.006f0, 0.033f0, 1.11f0, 1.13f0, 11.0f0, 1.5f0, 0.03f0]

tspan = (0f0, 10f0)

prob = ODEProblem{false}(NIK, u0, tspan, p)

x = collect(range(7.0f0, stop = 8.0f0, length = 250))

#Perform GSA
f1 = function (p)
    prob_func(prob, i, repeat) = remake(prob; p = (p[:, i]))
    ensemble_prob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
    sol = solve(
        ensemble_prob, GPUVern7(),  EnsembleGPUKernel(CUDA.CUDABackend()); saveat = x, trajectories = size(p, 2))
    # Now sol[i] is the solution for the ith set of parameters
    out = zeros(2, size(p, 2))
    for i in 1:size(p, 2)
        out[1, i] = mean(sol[i][1, :]) #Mean LVP
        out[2, i] = maximum(sol[i][2, :]) # Max Psa
    end
    out
end

samples = 100000
lb = @SVector [0.27f0, 0.405f0, 0.0054f0, 0.0297f0, 0.999f0, 1.017f0, 09.9f0, 1.35f0, 0.027f0]
ub = @SVector [0.33f0, 0.495f0, 0.0066f0, 0.0363f0, 1.221f0, 1.243f0, 12.1f0, 1.65f0, 0.033f0]
sampler = SobolSample()
A, B = QuasiMonteCarlo.generate_design_matrices(samples, lb, ub, sampler)

@time sobol_result = gsa(f1, Sobol(), A, B, batch = true)

No, that seems like it might be a KernelAbstractions.jl bug. It’s supposed to have a pass that inlines everything for you? @vchuravy

If a function can’t be inlined/inferred it would look like this.

What can you do to ensure that the functions can be inferred?

The functions in this case look like they would be simple enough? Maybe try making const pi = π*1.0f0 and using that instead of the irrational?

Issue opened