I have this error while running Cuda
ERROR: LoadError: CuError(1, nothing)
Stacktrace:
[1] (::getfield(CUDAdrv, Symbol("##25#26")){Bool,Int64,CuStream,CuFunction})(::Array{Ptr{Nothing},1}) at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\base.jl:147
[2] macro expansion at .\gcutils.jl:87 [inlined]
[3] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:61 [inlined]
[4] pack_arguments(::getfield(CUDAdrv, Symbol("##25#26")){Bool,Int64,CuStream,CuFunction}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global}, ::Int64, ::Int64, ::CuDeviceArray{Float32,3,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,3,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,3,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}) at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:40
[5] #launch#24(::Tuple{Int64,Int64,Int64}, ::Tuple{Int64,Int64,Int64}, ::Bool, ::Int64, ::CuStream, ::Function, ::CuFunction, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:90
[6] #launch at .\none:0 [inlined]
[7] #30 at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:179 [inlined]
[8] macro expansion at .\gcutils.jl:87 [inlined]
[9] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:139 [inlined]
[10] convert_arguments at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:123 [inlined]
[11] #cudacall#29 at C:\Users\Wiktor\.julia\packages\CUDAdrv\WVU1H\src\execution.jl:178 [inlined]
[12] #cudacall at .\none:0 [inlined]
[13] #cudacall#160 at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:279 [inlined]
[14] #cudacall at .\none:0 [inlined]
[15] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:260 [inlined]
[16] #call#148(::Base.Iterators.Pairs{Symbol,Tuple{Int64,Int64,Int64},Tuple{Symbol,Symbol},NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}}, ::typeof(CUDAnative.call), ::CUDAnative.HostKernel{BiotSavartCalculation.biotSavartCalculation,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global},Int64,Int64,CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global}, ::Int64, ::Int64, ::CuDeviceArray{Float32,3,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,3,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,3,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}) at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:237
[17] (::getfield(CUDAnative, Symbol("#kw##call")))(::NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}, ::typeof(CUDAnative.call), ::CUDAnative.HostKernel{BiotSavartCalculation.biotSavartCalculation,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global},Int64,Int64,CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at .\none:0
[18] #call#163(::Base.Iterators.Pairs{Symbol,Tuple{Int64,Int64,Int64},Tuple{Symbol,Symbol},NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}}, ::CUDAnative.HostKernel{BiotSavartCalculation.biotSavartCalculation,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global},Int64,Int64,CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:406
[19] (::getfield(CUDAnative, Symbol("#kw#HostKernel")))(::NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}, ::CUDAnative.HostKernel{BiotSavartCalculation.biotSavartCalculation,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Complex{Float32},2,CUDAnative.AS.Global},Int64,Int64,CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,3,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at .\none:0
[20] macro expansion at .\gcutils.jl:87 [inlined]
[21] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:171 [inlined]
[22] PrepareArrangement(::Base.RefValue{Bool}) at c:\Users\Wiktor\MagneticField3DGPUVersion\src\generateMap.jl:59
[23] top-level scope at c:\Users\Wiktor\MagneticField3DGPUVersion\src\MagneticField3D.jl:95 [inlined]
[24] top-level scope at .\none:0
[25] include_string(::Module, ::String, ::String) at .\loading.jl:1008
[26] (::getfield(Main._vscodeserver, Symbol("##8#10")){String,Int64,Int64,String})() at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.0\scripts\terminalserver\terminalserver.jl:153
[27] hideprompt(::getfield(Main._vscodeserver, Symbol("##8#10")){String,Int64,Int64,String}) at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.0\scripts\terminalserver\repl.jl:28
[28] macro expansion at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.0\scripts\terminalserver\terminalserver.jl:148 [inlined]
[29] (::getfield(Main._vscodeserver, Symbol("##7#9")))() at .\task.jl:259
in expression starting at c:\Users\Wiktor\MagneticField3DGPUVersion\src\MagneticField3D.jl:82
Here’s the moment when I run function:
if CImGui.Button("Generate plot")
x=cu(collect(X[1]:X[2]:X[3]))
y=cu(collect(Y[1]:Y[2]:Y[3]))
z=cu(collect(Z[1]:Z[2]:Z[3]))
# try
lenX=length(x)
lenY=length(y)
lenZ=length(z)
segmentlength=length(S)
variant=cu(zeros(segmentlength))
d_Segment=cu(hcat(S...))
I=cu(hcat(Itab))
SegmentsCalculated=CuArray{Float32}(undef,numberOnLine*3,segmentlength)
optimalBlocks=2*attribute(CuDevice(0),CUDAdrv.MULTIPROCESSOR_COUNT)
@cuda blocks=optimalBlocks,1,1 threads=Int(numberOnLine/optimalBlocks),3,segmentlength divideLine(d_Segment,numberOnLine,SegmentsCalculated)
@cuda blocks=segmentlength threads=1 checkvariant(d_Segment,variant)
# SegmentsCalculated=Array{Float32}(collect(SegmentsCalculated))
# variant=Array{Float32}(collect(variant))
B=cu(zeros(lenX,lenY,lenZ))
Bx=By=Bz=B
@cuda blocks=1,1,1 threads = lenX,lenY,lenZ biotSavartCalculation(x,y,z,SegmentsCalculated,I,numberOnLine,segmentlength,Bx,By,Bz,variant)
...
On CPU version it works perfect. The problem starts in last line.
Here’s the program(but very simplified):
function biotSavartCalculation(x,y,z,Segment,I,SegmentsOnElement,segmentlength,Bx,By,Bz,variant)
xIndex=(blockIdx().x-1) * blockDim().x + threadIdx().x
yIndex=(blockIdx().y-1) * blockDim().y + threadIdx().y
zIndex=(blockIdx().z-1) * blockDim().z + threadIdx().z
offset=xIndex+(yIndex-1)*blockDim().x*gridDim().x+(zIndex-1)*blockDim().x*gridDim().x*blockDim().y*gridDim().y
while segmentlength>0
segmentlength-=1
columnX=3*SegmentsOnElement*segmentlength
columnY=Int(3*SegmentsOnElement*segmentlength+SegmentsOnElement/3)
columnZ=Int(3*SegmentsOnElement*segmentlength+SegmentsOnElement*2/3)
i=SegmentsOnElement-1
while i>0
δx=x[xIndex]-Segment[columnX+i]
δy=y[yIndex]-Segment[columnY+i]
integral=1.0f0
variant[segmentlength+1]==1.0f0 ? (L=Segment[columnZ+i+1]-Segment[columnZ+i];
q=δx*δx+δy*δy;
Bx[offset]+=integral*δy; By[offset]+=integral*δx) :
(Sx=Segment[columnX+i+1]-Segment[columnX+i]; Sy=Segment[columnY+i+1]-Segment[columnY+i];
L=sqrt(Sx*Sx+Sy*Sy);
mi=(Segment[columnZ+i+1]-Segment[columnZ+i])/Sy;
α=Sx/L; β=Sy/L;
a=1+mi*mi;δz=z[zIndex]+mi*Segment[columnY+i]-Segment[columnZ+i]; p=(α*δx+β*δy+mi*δz)/a; q=δx*δx+δy*δy+δz*δz-p*p*a;
Bx[offset]+=integral*(β*δz-mi*δy); By[offset]+=integral*(α*δz-mi*δx); Bz[offset]+=integral*(α*δy-β*δx))
i-=1
end
end
return nothing
end
I think that there isn’t any detail problem(like wrong indexing), but more global problem like using incorrect loops, wrong data types, wrong dependencies in package or something like that, because details(checked and tested by me) looks fine.