QuadGK don't work in CUDA kernel

I have problem with executing Julia function:

function biotSavartCalculation(x,y,z,Segment,I,SegmentsOnElement,segmentlength,Bx,By,Bz)
  xIndex=(blockIdx().x-1) * blockDim().x + threadIdx().x
  yIndex=(blockIdx().y-1) * blockDim().y + threadIdx().y
  zIndex=(blockIdx().z-1) * blockDim().z + threadIdx().z

  offset=xIndex+(yIndex-1)*blockDim().x*gridDim().x+(zIndex-1)*blockDim().x*gridDim().x*blockDim().y*gridDim().y
  for j=1:segmentlength
    if variant[j]==1.0f0
      for i=1:SegmentsOnElement-1
        L=Segment[i+SegmentsOnElement*2+1,j]-Segment[i+SegmentsOnElement*2+1,j]
        δx=x[xIndex]-Segment[i,j]
        δy=y[yIndex]-Segment[i+SegmentsOnElement,j]
        q=δx*δx+δy*δy
        integral=0.0f0
        try
          integral = quadgk(u -> ((u-z)^2+q)^(-3/2), 0, L, rtol=1f-8)[1]
        catch
        end
        Bx[offset]+=I[j]*integral*δy
        By[offset]+=I[j]*integral*δx
      end
    else
      ...

   return nothing
end

The error comunicate is:

ERROR: LoadError: CUDAnative.InvalidIRError(CUDAnative.CompilerJob(BiotSavartCalculation.biotSavartCalculation, Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global},Int64,Int64,CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global}}, v"6.1.0", true, nothing, nothing, nothing, nothing, nothing), Tuple{String,Array{Base.StackTraces.StackFrame,1},Any}[("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:22], "jl_excstack_state"), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:22], "jl_enter_handler"), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:22], "jl_setjmp"), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:23], "jl_pop_handler"), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:23], "jl_restore_excstack"), ("call through a literal pointer", [Type at boot.jl:433, Type at complex.jl:37, convert at number.jl:7, macro expansion at base.jl:43, macro expansion at pointer.jl:167, unsafe_store! at pointer.jl:167, setindex! at array.jl:84, biotSavartCalculation at BiotSavartCalculation.jl:26], :jl_symbol_n), ("call through a literal pointer", [Type at boot.jl:433, Type at complex.jl:37, convert at number.jl:7, macro expansion at base.jl:43, macro expansion at pointer.jl:167, unsafe_store! at pointer.jl:167, setindex! at array.jl:84, biotSavartCalculation at BiotSavartCalculation.jl:27], :jl_symbol_n), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:51], "jl_excstack_state"), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:51], "jl_enter_handler"), ("call to the Julia runtime", [biotSavartCalculation at BiotSavartCalculation.jl:51], "jl_setjmp")  …  ("dynamic function invocation", [#quadgk#20 at adapt.jl:151, #quadgk at none:0, biotSavartCalculation at BiotSavartCalculation.jl:52], (::getfield(QuadGK, Symbol("#kw##quadgk")))(::Any, ::typeof(QuadGK.quadgk), f, a::T, b::T, c::T...) where T in QuadGK), ("call to the Julia runtime", [print_to_string at io.jl:113,
multiple call sites at unknown:0], "jl_f_tuple"), ("call to the Julia runtime", [getindex at tuple.jl:24, iterate at tuple.jl:43, iterate at tuple.jl:43, print_to_string at io.jl:122, multiple call sites at unknown:0], "jl_f_getfield"), ("dynamic function invocation", [print_to_string at io.jl:123, multiple call sites at unknown:0], print), ("call through a literal pointer", [_growend! at array.jl:812, resize! at array.jl:1004, print_to_string at io.jl:125, multiple call sites at unknown:0], :jl_array_grow_end), ("call through a literal pointer", [_deleteend! at array.jl:821, resize! at array.jl:1009, print_to_string at io.jl:125, multiple call sites at unknown:0], :jl_array_del_end), ("call through a literal pointer", [Type at string.jl:39, print_to_string at io.jl:125, multiple call
sites at unknown:0], :jl_array_to_string), ("call through a literal pointer", [_string_n at string.jl:60, StringVector at iobuffer.jl:31, #IOBuffer#316 at iobuffer.jl:114, Type at none:0, print_to_string at io.jl:121, multiple call sites at unknown:0], :jl_alloc_string), ("call through a literal pointer", [unsafe_wrap at string.jl:71, StringVector at iobuffer.jl:31, #IOBuffer#316 at iobuffer.jl:114, Type at none:0, print_to_string at io.jl:121, multiple call sites at unknown:0], :jl_string_to_array), ("call through a literal pointer", [fill! at array.jl:365, #IOBuffer#316 at iobuffer.jl:121, Type at none:0, print_to_string at io.jl:121, multiple call sites at unknown:0], :memset)])

All the "call to the Julia runtime" reffering to line 23-27 that is:

        try
          integral = quadgk(u -> ((u-z)^2+q)^(-3/2), 0, L, rtol=1f-8)[1]
        catch
        end
        Bx[offset]+=I[j]*integral*δy
        By[offset]+=I[j]*integral*δx

Ok, there some progress, however there still is problem with QuadGK:

using CUDAnative
using CuArrays
using CUDAdrv
using QuadGK

x=50
y=50
z=50

a=cu(rand(x,y,z))
b=cu(rand(x,y,z))
c=cu(rand(x,y,z))

number=512
number2=2
variant=cu(ones(number2))

function testfunction(a,b,c,number2,variant)
  offset= blockIdx().x + (blockIdx().y-1)*gridDim().x + (blockIdx().z-1)*gridDim().x*gridDim().y

  while number2>0
    number2-=1
    variant[number2+1]==1.0f0 ?
    (
      @atomic a[offset]+=quadgk(x->exp(-x^2),0,1)[1];
      @atomic b[offset]+=quadgk(x->exp(-x^2),0,1)[1];
      @atomic c[offset]+=quadgk(x->exp(-x^2),0,1)[1];
    ) :
    (@atomic a[offset]+=quadgk(x->exp(-x^2),0,1)[1];
    @atomic b[offset]+=quadgk(x->exp(-x^2),0,1)[1];
    @atomic c[offset]+=quadgk(x->exp(-x^2),0,1)[1])
  end
  return nothing
end

@cuda blocks=x,y,z threads=number-1,1,1 testfunction(a,b,c,number2,variant)

It have problem with that QuadGK statement, but I need that in my project. The next problem is that not for every phrase it will work. I show it on example. On CPU version of my programs i did it this way:

 integral=0.0f0;
 try
     integral = quadgk(u -> ((u-p)^2+q)^(-3/2), 0, L, rtol=1f-8)[1];
 catch
 end

and next the integral variable was used for remaining operations.
So what should I do with this? I get error of:
Reason: unsupported dynamic function invocation (call to quadgk(f, a::T, b::T, c::T...) where T in QuadGK at C:\Users\Wiktor\.julia\packages\QuadGK\v6Zrs\src\adapt.jl:155)
and stacktrace:

[1] atomic_arrayset at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\device\cuda\atomics.jl:365
 [2] atomic_arrayset at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\device\cuda\atomics.jl:349
 [3] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\device\cuda\atomics.jl:344

What should I do with that?

Any idea how to run quadgk inside Cuda kernel?

Some ideas about that or maybe different method?

There are a number of things going on here, but the crux is that QuadGK.jl is not written for CUDA, and is not likely to be rewritten, since adaptive 1D quadrature is too branch-heavy and not sufficiently data-parallel to port well to a GPU. Your integral has an analytical solution, though:

\int_0^L ((u-z)^2 + q)^{-\frac{3}{2}} \, \mathrm{d}u = \frac{L-z}{q \sqrt{(L-z)^2+q}}+\frac{z}{q \sqrt{q+z^2}}
4 Likes

Some months ago, I calculate this integral, but there was problem with it(that wasn’t good solution for every data), because for some data it gave different result(I led the original denominator part of equation to cannonical form, but there wasn’t 100% certainty if the denomiantor equation have real solutions, than the all equation have two solutions that didn’t cover with thereselves), but I will check it again, maybe I missing something.

It’s probably good answer, however when I analyzed that I must have thinked about it:

sin(arctan(\frac{L-p}{\sqrt{q}})) = \pm \frac{L-p}{\sqrt{(L-p)^2+q}} and sin(arctan(\frac{-p}{\sqrt{q}})) = \pm \frac{-p}{\sqrt{p^2+q}} sign, because L is sure greater than zero, so I have to look for this p element(because all of these L, p and q are just phrases of multiplied/divided of difference of coordinates, nodes or nodes and coordinates and these are quite long phrases) and from what I see p can be positive/negative and be different value. If it goes about all of these values it’s fine, because only thing that now I need to deal with only Inf and NaN values, which isn’t that hard as error handling in CUDAnative. Anyway, Thank you. Maybe I should just avoid this numerical integration.