I tried executing a simple function involving sin
and cos
as below:
using CUDAdrv,CUDAnative,CuArrays
sine(x::Float32) = sin(x)
cosine(x::Float32) = cos(x)
CuArrays.@cufunc sine(x::Real) = sin(x)
CuArrays.@cufunc cosine(x::Real) = cos(x)
function update(cedg,b,s,c)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:length(cedg)
if(cedg[i]!=0)
@inbounds s+=cedg[i]*sine(b[i])
@inbounds c+=cedg[i]*cosine(b[i])
end
end
return nothing
end
On trying to run it,
cedg=CuArrays.rand(10000)
b=CuArrays.rand(10000)
s=0f0
c=0f0
numblocks = ceil(Int, 10000/256)
@cuda threads=256 blocks=numblocks update!(cedg,b,s,c)
I get the following error
┌ Warning: calls to Base intrinsics might be GPU incompatible
│ exception = (CUDAnative.MethodSubstitutionWarning(sin(x::T) where T<:Union{Float32, Float64} in Base.Math at special/trig.jl:30, sin(x::Float32) in CUDAnative at /root/.julia/packages/CUDAnative/2WQzk/src/device/cuda/math.jl:13), Base.StackTraces.StackFrame[sin at trig.jl:30, update! at In[30]:2])
└ @ CUDAnative /root/.julia/packages/CUDAnative/2WQzk/src/compiler/irgen.jl:116
┌ Warning: calls to Base intrinsics might be GPU incompatible
│ exception = (CUDAnative.MethodSubstitutionWarning(cos(x::T) where T<:Union{Float32, Float64} in Base.Math at special/trig.jl:100, cos(x::Float32) in CUDAnative at /root/.julia/packages/CUDAnative/2WQzk/src/device/cuda/math.jl:6), Base.StackTraces.StackFrame[cos at trig.jl:100, update! at In[30]:2])
└ @ CUDAnative /root/.julia/packages/CUDAnative/2WQzk/src/compiler/irgen.jl:116
LLVM error: Cannot select: 0xc22bf70: i64,glue = sube Constant:i64<0>, 0xc22bea0, 0xc22bf08:1
0x514c820: i64 = Constant<0>
0xc22bea0: i64 = add 0x514c618, 0xc22be38
0x514c618: i64 = add 0x514cb60, 0x514c958
0x514cb60: i64 = mul 0xc22b000, 0x514c1a0
0xc22b000: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %13
0x514cf70: i64 = Register %13
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0x514c958: i64 = mulhu 0xc22b2d8, 0x514c1a0
0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
0x514cc30: i64 = Register %14
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22be38: i64 = select 0xc22bdd0, Constant:i64<1>, 0x514c4e0
0xc22bdd0: i1 = setcc 0xc22bc98, 0xc22bc30, setult:ch
0xc22bc98: i64 = add 0xc22b1a0, 0xc22bc30
0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
0x514cc30: i64 = Register %14
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
0x514c3a8: i64 = Register %15
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
0x514c3a8: i64 = Register %15
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0x514c478: i64 = Constant<1>
0x514c4e0: i64 = zero_extend 0xc22bd00
0xc22bd00: i1 = setcc 0xc22bc98, 0xc22b1a0, setult:ch
0xc22bc98: i64 = add 0xc22b1a0, 0xc22bc30
0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
0x514cc30: i64 = Register %14
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
0x514c3a8: i64 = Register %15
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
0x514cc30: i64 = Register %14
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22bf08: i64,glue = subc Constant:i64<0>, 0xc22bc98
0x514c820: i64 = Constant<0>
0xc22bc98: i64 = add 0xc22b1a0, 0xc22bc30
0xc22b1a0: i64 = mul 0xc22b2d8, 0x514c1a0
0xc22b2d8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %14
0x514cc30: i64 = Register %14
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
0xc22bc30: i64 = mulhu 0x514c7b8, 0x514c1a0
0x514c7b8: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %15
0x514c3a8: i64 = Register %15
0x514c1a0: i64 = or 0x514c2d8, Constant:i64<4503599627370496>
0x514c2d8: i64 = and 0x514c410, Constant:i64<4503599627370495>
0x514c410: i64,ch = CopyFromReg 0x9fa5040, Register:i64 %0
0x514cd00: i64 = Register %0
0x514c068: i64 = Constant<4503599627370495>
0x514c8f0: i64 = Constant<4503599627370496>
In function: julia_paynehanek_19687
Stacktrace:
[1] handle_error(::Cstring) at /root/.julia/packages/LLVM/ICZSf/src/core/context.jl:103
[2] macro expansion at /root/.julia/packages/LLVM/ICZSf/src/base.jl:18 [inlined]
[3] LLVMTargetMachineEmitToMemoryBuffer at /root/.julia/packages/LLVM/ICZSf/lib/6.0/libLLVM_h.jl:2726 [inlined]
[4] emit(::LLVM.TargetMachine, ::LLVM.Module, ::LLVM.API.LLVMCodeGenFileType) at /root/.julia/packages/LLVM/ICZSf/src/targetmachine.jl:42
[5] mcgen(::CUDAnative.CompilerJob, ::LLVM.Module, ::LLVM.Function) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/mcgen.jl:87
[6] macro expansion at /root/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:214 [inlined]
[7] macro expansion at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:204 [inlined]
[8] macro expansion at /root/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:214 [inlined]
[9] #codegen#152(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:201
[10] #codegen at ./none:0 [inlined]
[11] #compile#151(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:47
[12] #compile at ./none:0 [inlined]
[13] #compile#150 at /root/.julia/packages/CUDAnative/2WQzk/src/compiler/driver.jl:28 [inlined]
[14] #compile at ./none:0 [inlined] (repeats 2 times)
[15] macro expansion at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:403 [inlined]
[16] #cufunction#194(::Nothing, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(cufunction), ::typeof(update!), ::Type{Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},Float32,Float32}}) at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:368
[17] cufunction(::Function, ::Type) at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:368
[18] top-level scope at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:176
[19] top-level scope at gcutils.jl:91
[20] top-level scope at /root/.julia/packages/CUDAnative/2WQzk/src/execution.jl:173
[21] top-level scope at In[60]:1
How do I fix this?