I have two cuda functions in my code:
@cuda blocks=3 threads=numberofsegments+1 divideLine(Segment,numberofsegments+1,SegmentsCalculated)
@cuda blocks=lenX,lenY,lenZ threads=numberofsegments,2,1 biotGPU(x,y,z,SegmentsCalculated,Bx,By,Bz, Current)
I don’t want to go into details, but I am almost sure that there is the problem with number of threads, However from that I calculate I use totally 1021 threads which isn’t the limit(the limit is 1024, and I used 341+340*2=1021 threads).
The numberofsegments
variable depending on my input, so for small enough inputs, it works fine, but when numberofsegments is about 1000 threads, my program crash.
And that’s the error:
ERROR: LoadError: CuError(701, nothing)
Stacktrace:
[1] (::getfield(CUDAdrv, Symbol("##25#26")){Bool,Int64,CuStream,CuFunction})(::Array{Ptr{Nothing},1}) at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\base.jl:145
[2] macro expansion at .\gcutils.jl:87 [inlined]
[3] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:61 [inlined]
[4] pack_arguments(::getfield(CUDAdrv, Symbol("##25#26")){Bool,Int64,CuStream,CuFunction}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}) at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:40
[5] #launch#24(::Tuple{Int64,Int64,Int64}, ::Tuple{Int64,Int64,Int64}, ::Bool, ::Int64, ::CuStream, ::Function, ::CuFunction, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:90
[6] #launch at .\none:0 [inlined]
[7] #30 at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:179 [inlined]
[8] macro expansion at .\gcutils.jl:87 [inlined]
[9] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:139 [inlined]
[10] convert_arguments at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:123 [inlined]
[11] #cudacall#29 at C:\Users\Wiktor\.julia\packages\CUDAdrv\ADRHQ\src\execution.jl:178 [inlined]
[12] #cudacall at .\none:0 [inlined]
[13] #cudacall#160 at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:279 [inlined]
[14] #cudacall at .\none:0 [inlined]
[15] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:260 [inlined]
[16] #call#148(::Base.Iterators.Pairs{Symbol,Tuple{Int64,Int64,Int64},Tuple{Symbol,Symbol},NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}}, ::typeof(CUDAnative.call), ::CUDAnative.HostKernel{ConvertCoordinates.biotGPU,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}) at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:237
[17] (::getfield(CUDAnative, Symbol("#kw##call")))(::NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}, ::typeof(CUDAnative.call), ::CUDAnative.HostKernel{ConvertCoordinates.biotGPU,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at .\none:0
[18] #call#163(::Base.Iterators.Pairs{Symbol,Tuple{Int64,Int64,Int64},Tuple{Symbol,Symbol},NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}}, ::CUDAnative.HostKernel{ConvertCoordinates.biotGPU,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:406
[19] (::getfield(CUDAnative, Symbol("#kw#HostKernel")))(::NamedTuple{(:blocks, :threads),Tuple{Tuple{Int64,Int64,Int64},Tuple{Int64,Int64,Int64}}}, ::CUDAnative.HostKernel{ConvertCoordinates.biotGPU,Tuple{CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,1,CUDAnative.AS.Global}}}, ::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, ::Vararg{Any,N} where N) at .\none:0
[20] macro expansion at .\gcutils.jl:87 [inlined]
[21] macro expansion at C:\Users\Wiktor\.julia\packages\CUDAnative\nItlk\src\execution.jl:171 [inlined]
[22] PrepareArrangement(::Base.RefValue{Bool}) at c:\Users\Wiktor\MagneticField3DGPUVersionReal\src\generateMap.jl:102
[23] MainMenu(::Base.RefValue{Bool}) at c:\Users\Wiktor\MagneticField3DGPUVersionReal\src\mainMenu.jl:42
[24] top-level scope at c:\Users\Wiktor\MagneticField3DGPUVersionReal\src\MagneticField3D.jl:102
[25] include_string(::Module, ::String, ::String) at .\loading.jl:1008
[26] (::getfield(Main._vscodeserver, Symbol("##9#12")){String,Int64,Int64,String})() at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.2\scripts\terminalserver\terminalserver.jl:153
[27] withpath(::getfield(Main._vscodeserver, Symbol("##9#12")){String,Int64,Int64,String}, ::String) at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.2\scripts\terminalserver\repl.jl:62
[28] (::getfield(Main._vscodeserver, Symbol("##8#11")){String,Int64,Int64,String})() at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.2\scripts\terminalserver\terminalserver.jl:152
[29] hideprompt(::getfield(Main._vscodeserver, Symbol("##8#11")){String,Int64,Int64,String}) at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.2\scripts\terminalserver\repl.jl:28
[30] macro expansion at c:\Users\Wiktor\.vscode\extensions\julialang.language-julia-0.12.2\scripts\terminalserver\terminalserver.jl:148 [inlined]
[31] (::getfield(Main._vscodeserver, Symbol("##7#10")))() at .\task.jl:259
in expression starting at c:\Users\Wiktor\MagneticField3DGPUVersionReal\src\MagneticField3D.jl:89
102nd line from 22nd Stacktrace is this second function line.