Trignometric functions on GPU

I tried executing a simple function involving sin and cos as below:

using CUDAdrv,CUDAnative,CuArrays
sine(x::Float32) = sin(x)
cosine(x::Float32) = cos(x)
CuArrays.@cufunc sine(x::Real) =  sin(x)
CuArrays.@cufunc cosine(x::Real) =  cos(x)

function update(cedg,b,s,c)
    index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
    stride = blockDim().x * gridDim().x
    for i = index:stride:length(cedg)
      if(cedg[i]!=0)
        @inbounds s+=cedg[i]*sine(b[i])
        @inbounds c+=cedg[i]*cosine(b[i])
      end
    end
    return nothing
end

On trying to run it,

cedg=CuArrays.rand(10000)
b=CuArrays.rand(10000)
s=0f0
c=0f0
numblocks = ceil(Int, 10000/256)
@cuda threads=256 blocks=numblocks update!(cedg,b,s,c)

I get the following error

┌ Warning: calls to Base intrinsics might be GPU incompatible
│   exception = (CUDAnative.MethodSubstitutionWarning(sin(x::T) where T<:Union{Float32, Float64} in Base.Math at special/trig.jl:30, sin(x::Float32) in CUDAnative at /root/.julia/packages/CUDAnative/2WQzk/src/device/cuda/math.jl:13), Base.StackTraces.StackFrame[sin at trig.jl:30, update! at In[30]:2])
└ @ CUDAnative /root/.julia/packages/CUDAnative/2WQzk/src/compiler/irgen.jl:116
┌ Warning: calls to Base intrinsics might be GPU incompatible
│   exception = (CUDAnative.MethodSubstitutionWarning(cos(x::T) where T<:Union{Float32, Float64} in Base.Math at special/trig.jl:100, cos(x::Float32) in CUDAnative at /root/.julia/packages/CUDAnative/2WQzk/src/device/cuda/math.jl:6), Base.StackTraces.StackFrame[cos at trig.jl:100, update! at In[30]:2])
└ @ CUDAnative /root/.julia/packages/CUDAnative/2WQzk/src/compiler/irgen.jl:116
LLVM error: Cannot select: 0xc22bf70: i64,glue = sube Constant:i64<0>, 0xc22bea0, 0xc22bf08:1
  0x514c820: i64 = Constant<0>
  0xc22bea0: i64 = add 0x514c618, 0xc22be38
    0x514c618: i64 = add 0x514cb60, 0x514c958
      0x514cb60: i64 = mul 0xc22b000, 0x514c1a0
        0xc22b000: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %13
          0x514cf70: i64 = Register %13
        0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
          0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
            0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
              0x514cd00: i64 = Register %0
            0x514c068: i64 = Constant<4503599627370495>
          0x514c8f0: i64 = Constant<4503599627370496>
      0x514c958: i64 = mulhu 0xc22b2d8, 0x514c1a0
        0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
          0x514cc30: i64 = Register %14
        0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
          0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
            0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
              0x514cd00: i64 = Register %0
            0x514c068: i64 = Constant<4503599627370495>
          0x514c8f0: i64 = Constant<4503599627370496>
    0xc22be38: i64 = select 0xc22bdd0, Constant:i64<1>, 0x514c4e0
      0xc22bdd0: i1 = setcc 0xc22bc98, 0xc22bc30, setult:ch
        0xc22bc98: i64 = add 0xc22b1a0, 0xc22bc30
          0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
            0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
              0x514cc30: i64 = Register %14
            0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
              0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
                0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
                  0x514cd00: i64 = Register %0
                0x514c068: i64 = Constant<4503599627370495>
              0x514c8f0: i64 = Constant<4503599627370496>
          0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
            0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
              0x514c3a8: i64 = Register %15
            0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
              0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
                0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
                  0x514cd00: i64 = Register %0
                0x514c068: i64 = Constant<4503599627370495>
              0x514c8f0: i64 = Constant<4503599627370496>
        0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
          0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
            0x514c3a8: i64 = Register %15
          0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
            0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
              0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
                0x514cd00: i64 = Register %0
              0x514c068: i64 = Constant<4503599627370495>
            0x514c8f0: i64 = Constant<4503599627370496>
      0x514c478: i64 = Constant<1>
      0x514c4e0: i64 = zero_extend 0xc22bd00
        0xc22bd00: i1 = setcc 0xc22bc98, 0xc22b1a0, setult:ch
          0xc22bc98: i64 = add 0xc22b1a0, 0xc22bc30
            0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
              0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
                0x514cc30: i64 = Register %14
              0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
                0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
                  0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0

                  0x514c068: i64 = Constant<4503599627370495>
                0x514c8f0: i64 = Constant<4503599627370496>
            0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
              0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
                0x514c3a8: i64 = Register %15
              0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
                0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
                  0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0

                  0x514c068: i64 = Constant<4503599627370495>
                0x514c8f0: i64 = Constant<4503599627370496>
          0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
            0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
              0x514cc30: i64 = Register %14
            0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
              0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
                0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
                  0x514cd00: i64 = Register %0
                0x514c068: i64 = Constant<4503599627370495>
              0x514c8f0: i64 = Constant<4503599627370496>
  0xc22bf08: i64,glue = subc Constant:i64<0>, 0xc22bc98
    0x514c820: i64 = Constant<0>
    0xc22bc98: i64 = add 0xc22b1a0, 0xc22bc30
      0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
        0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
          0x514cc30: i64 = Register %14
        0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
          0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
            0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
              0x514cd00: i64 = Register %0
            0x514c068: i64 = Constant<4503599627370495>
          0x514c8f0: i64 = Constant<4503599627370496>
      0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
        0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
          0x514c3a8: i64 = Register %15
        0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
          0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
            0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
              0x514cd00: i64 = Register %0
            0x514c068: i64 = Constant<4503599627370495>
          0x514c8f0: i64 = Constant<4503599627370496>
In function: julia_paynehanek_19687

Stacktrace:
 [1] handle_error(::Cstring) at /root/.julia/packages/LLVM/ICZSf/src/core/context.jl:103
 [2] macro expansion at /root/.julia/packages/LLVM/ICZSf/src/base.jl:18 [inlined]
 [3] LLVMTargetMachineEmitToMemoryBuffer at /root/.julia/packages/LLVM/ICZSf/lib/6.0/libLLVM_h.jl:2726 [inlined]
 [4] emit(::LLVM.TargetMachine, ::LLVM.Module, ::LLVM.API.LLVMCodeGenFileType) at /root/.julia/packages/LLVM/ICZSf/src/targetmachine.jl:42
 [5] mcgen(::CUDAnative.CompilerJob, ::LLVM.Module, ::LLVM.Function) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/mcgen.jl:87
 [6] macro expansion at /root/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:214 [inlined]
 [7] macro expansion at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:204 [inlined]
 [8] macro expansion at /root/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:214 [inlined]
 [9] #codegen#152(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:201
 [10] #codegen at ./none:0 [inlined]
 [11] #compile#151(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:47
 [12] #compile at ./none:0 [inlined]
 [13] #compile#150 at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:28 [inlined]
 [14] #compile at ./none:0 [inlined] (repeats 2 times)
 [15] macro expansion at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:403 [inlined]
 [16] #cufunction#194(::Nothing, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(cufunction), ::typeof(update!), ::Type{Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},Float32,Float32}}) at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:368
 [17] cufunction(::Function, ::Type) at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:368
 [18] top-level scope at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:176
 [19] top-level scope at gcutils.jl:91
 [20] top-level scope at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:173
 [21] top-level scope at In[60]:1

How do I fix this?

Hello,

this is what I’ve found on this issue (see the bottom of the page):

Many mathematical functions are provided by the libdevice library, and are wrapped by CUDAnative.jl. These functions implement interfaces that are similar to existing functions in Base , albeit often with support for fewer types.

and

in kernel code, call CUDAnative.sin instead of plain sin

I tried what you suggested,

function update(cedg,b,s,c)
    index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
    stride = blockDim().x * gridDim().x
    for i = index:stride:length(cedg)
      if(cedg[i]!=0)
        @inbounds s+=cedg[i]*CUDAnative.sin(b[i])
        @inbounds c+=cedg[i]*CUDAnative.cos(b[i])
      end
    end
    return nothing
end

I still get the following error:

Warning: calls to Base intrinsics might be GPU incompatible
│   exception = (CUDAnative.MethodSubstitutionWarning(sin(x::T) where T<:Union{Float32, Float64} in Base.Math at special/trig.jl:30, sin(x::Float32) in CUDAnative at /root/.julia/packages/CUDAnative/2WQzk/src/device/cuda/math.jl:13), Base.StackTraces.StackFrame[sin at trig.jl:30, update! at In[30]:2])
└ @ CUDAnative /root/.julia/packages/CUDAnative/2WQzk/src/compiler/irgen.jl:116
┌ Warning: calls to Base intrinsics might be GPU incompatible
│   exception = (CUDAnative.MethodSubstitutionWarning(cos(x::T) where T<:Union{Float32, Float64} in Base.Math at special/trig.jl:100, cos(x::Float32) in CUDAnative at /root/.julia/packages/CUDAnative/2WQzk/src/device/cuda/math.jl:6), Base.StackTraces.StackFrame[cos at trig.jl:100, update! at In[30]:2])
└ @ CUDAnative /root/.julia/packages/CUDAnative/2WQzk/src/compiler/irgen.jl:116
LLVM error: Cannot select: 0xe206f70: i64,glue = sube Constant:i64<0>, 0xe206ea0, 0xe206f08:1
  0xc228820: i64 = Constant<0>
  0xe206ea0: i64 = add 0xc228618, 0xe206e38
    0xc228618: i64 = add 0xc228b60, 0xc228958
      0xc228b60: i64 = mul 0xe206000, 0xc2281a0
        0xe206000: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %13
          0xc228f70: i64 = Register %13
        0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
          0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
            0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
              0xc228d00: i64 = Register %0
            0xc228068: i64 = Constant<4503599627370495>
          0xc2288f0: i64 = Constant<4503599627370496>
      0xc228958: i64 = mulhu 0xe2062d8, 0xc2281a0
        0xe2062d8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %14
          0xc228c30: i64 = Register %14
        0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
          0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
            0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
              0xc228d00: i64 = Register %0
            0xc228068: i64 = Constant<4503599627370495>
          0xc2288f0: i64 = Constant<4503599627370496>
    0xe206e38: i64 = select 0xe206dd0, Constant:i64<1>, 0xc2284e0
      0xe206dd0: i1 = setcc 0xe206c98, 0xe206c30, setult:ch
        0xe206c98: i64 = add 0xe2061a0, 0xe206c30
          0xe2061a0: i64 = mul 0xe2062d8, 0xc2281a0
            0xe2062d8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %14
              0xc228c30: i64 = Register %14
            0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
              0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
                0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
                  0xc228d00: i64 = Register %0
                0xc228068: i64 = Constant<4503599627370495>
              0xc2288f0: i64 = Constant<4503599627370496>
          0xe206c30: i64 = mulhu 0xc2287b8, 0xc2281a0
            0xc2287b8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %15
              0xc2283a8: i64 = Register %15
            0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
              0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
                0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
                  0xc228d00: i64 = Register %0
                0xc228068: i64 = Constant<4503599627370495>
              0xc2288f0: i64 = Constant<4503599627370496>
        0xe206c30: i64 = mulhu 0xc2287b8, 0xc2281a0
          0xc2287b8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %15
            0xc2283a8: i64 = Register %15
          0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
            0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
              0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
                0xc228d00: i64 = Register %0
              0xc228068: i64 = Constant<4503599627370495>
            0xc2288f0: i64 = Constant<4503599627370496>
      0xc228478: i64 = Constant<1>
      0xc2284e0: i64 = zero_extend 0xe206d00
        0xe206d00: i1 = setcc 0xe206c98, 0xe2061a0, setult:ch
          0xe206c98: i64 = add 0xe2061a0, 0xe206c30
            0xe2061a0: i64 = mul 0xe2062d8, 0xc2281a0
              0xe2062d8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %14
                0xc228c30: i64 = Register %14
              0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
                0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
                  0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0

                  0xc228068: i64 = Constant<4503599627370495>
                0xc2288f0: i64 = Constant<4503599627370496>
            0xe206c30: i64 = mulhu 0xc2287b8, 0xc2281a0
              0xc2287b8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %15
                0xc2283a8: i64 = Register %15
              0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
                0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
                  0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0

                  0xc228068: i64 = Constant<4503599627370495>
                0xc2288f0: i64 = Constant<4503599627370496>
          0xe2061a0: i64 = mul 0xe2062d8, 0xc2281a0
            0xe2062d8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %14
              0xc228c30: i64 = Register %14
            0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
              0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
                0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
                  0xc228d00: i64 = Register %0
                0xc228068: i64 = Constant<4503599627370495>
              0xc2288f0: i64 = Constant<4503599627370496>
  0xe206f08: i64,glue = subc Constant:i64<0>, 0xe206c98
    0xc228820: i64 = Constant<0>
    0xe206c98: i64 = add 0xe2061a0, 0xe206c30
      0xe2061a0: i64 = mul 0xe2062d8, 0xc2281a0
        0xe2062d8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %14
          0xc228c30: i64 = Register %14
        0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
          0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
            0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
              0xc228d00: i64 = Register %0
            0xc228068: i64 = Constant<4503599627370495>
          0xc2288f0: i64 = Constant<4503599627370496>
      0xe206c30: i64 = mulhu 0xc2287b8, 0xc2281a0
        0xc2287b8: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %15
          0xc2283a8: i64 = Register %15
        0xc2281a0: i64 = or 0xc2282d8, Constant:i64<4503599627370496>
          0xc2282d8: i64 = and 0xc228410, Constant:i64<4503599627370495>
            0xc228410: i64,ch = CopyFromReg 0x2d09c40, Register:i64 %0
              0xc228d00: i64 = Register %0
            0xc228068: i64 = Constant<4503599627370495>
          0xc2288f0: i64 = Constant<4503599627370496>
In function: julia_paynehanek_19754

Stacktrace:
 [1] handle_error(::Cstring) at /root/.julia/packages/LLVM/ICZSf/src/core/context.jl:103
 [2] macro expansion at /root/.julia/packages/LLVM/ICZSf/src/base.jl:18 [inlined]
 [3] LLVMTargetMachineEmitToMemoryBuffer at /root/.julia/packages/LLVM/ICZSf/lib/6.0/libLLVM_h.jl:2726 [inlined]
 [4] emit(::LLVM.TargetMachine, ::LLVM.Module, ::LLVM.API.LLVMCodeGenFileType) at /root/.julia/packages/LLVM/ICZSf/src/targetmachine.jl:42
 [5] mcgen(::CUDAnative.CompilerJob, ::LLVM.Module, ::LLVM.Function) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/mcgen.jl:87
 [6] macro expansion at /root/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:214 [inlined]
 [7] macro expansion at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:204 [inlined]
 [8] macro expansion at /root/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:214 [inlined]
 [9] #codegen#152(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:201
 [10] #codegen at ./none:0 [inlined]
 [11] #compile#151(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:47
 [12] #compile at ./none:0 [inlined]
 [13] #compile#150 at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:28 [inlined]
 [14] #compile at ./none:0 [inlined] (repeats 2 times)
 [15] macro expansion at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:403 [inlined]
 [16] #cufunction#194(::Nothing, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(cufunction), ::typeof(update!), ::Type{Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},Float32,Float32}}) at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:368
 [17] cufunction(::Function, ::Type) at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:368
 [18] top-level scope at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:176
 [19] top-level scope at gcutils.jl:91
 [20] top-level scope at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:173
 [21] top-level scope at In[72]:1

Those error messages seem to be identical. Did you update the function definition?

You were right. For some reason, the redefinition of the function was not getting affected. And strangely now, both the versions of the function work! I have another question though, I initialise s and c outside of the function, but on executing the function their values remain unaffected. Why is this so?