Is it possible that there would be an issue remaining with atomic operations in shared memory following the fix in #642 in CUDAnative? Using the variation on the test that was introduced with the fix the following works:
using CUDA
function kernel3(x)
tid = threadIdx().x
shared = @cuStaticSharedMem(Float32, 4)
fill!(shared, 1f0)
sync_threads()
CUDA.atomic_add!(pointer(shared, tid), shared[tid + 2])
sync_threads()
CUDA.atomic_add!(pointer(x, 1), shared[1])
return
end
x = CUDA.zeros(4)
@cuda threads = 2 kernel3(x)
x
However, it throws an error if the atomic add within the shared memory is repeated a second time (simplification of the iterations that would happens within a loop). The operations seems quite legit to me given the sync_threads()
that occurs between those atomic adds. Am I missing something?
function kernel4(x)
tid = threadIdx().x
shared = @cuStaticSharedMem(Float32, 4)
fill!(shared, 1f0)
sync_threads()
CUDA.atomic_add!(pointer(shared, tid), shared[tid + 2])
sync_threads()
CUDA.atomic_add!(pointer(shared, tid), shared[tid + 2])
sync_threads()
CUDA.atomic_add!(pointer(x, 1), shared[1])
return
end
x = CUDA.zeros(4)
@cuda threads = 2 kernel4(x)
x
4-element CuArray{Float32,1}:
ERROR: CUDA error: an illegal memory access was encountered (code 700, ERROR_ILLEGAL_ADDRESS)
Stacktrace:
[1] throw_api_error(::CUDA.cudaError_enum) at C:\Users\jerem\.julia\packages\CUDA\YeS8q\lib\cudadrv\error.jl:97
[2] macro expansion at C:\Users\jerem\.julia\packages\CUDA\YeS8q\lib\cudadrv\error.jl:104 [inlined]
[3] cuMemcpyDtoH_v2(::Ptr{Float32}, ::CuPtr{Float32}, ::Int64) at C:\Users\jerem\.julia\packages\CUDA\YeS8q\lib\utils\call.jl:93
[4] #unsafe_copyto!#6 at C:\Users\jerem\.julia\packages\CUDA\YeS8q\lib\cudadrv\memory.jl:395 [inlined]
[5] unsafe_copyto! at C:\Users\jerem\.julia\packages\CUDA\YeS8q\lib\cudadrv\memory.jl:388 [inlined]
[6] unsafe_copyto! at C:\Users\jerem\.julia\packages\CUDA\YeS8q\src\array.jl:299 [inlined]
[7] copyto!(::Array{Float32,1}, ::Int64, ::CuArray{Float32,1}, ::Int64, ::Int64) at C:\Users\jerem\.julia\packages\CUDA\YeS8q\src\array.jl:268
[8] copyto! at C:\Users\jerem\.julia\packages\CUDA\YeS8q\src\array.jl:272 [inlined]
[9] copyto_axcheck! at .\abstractarray.jl:946 [inlined]
[10] Array at .\array.jl:562 [inlined]
[11] Array at .\boot.jl:430 [inlined]
[12] convert at .\array.jl:554 [inlined]
[13] adapt_storage at C:\Users\jerem\.julia\packages\CUDA\YeS8q\src\array.jl:243 [inlined]
[14] adapt_structure at C:\Users\jerem\.julia\packages\Adapt\8kQMV\src\Adapt.jl:42 [inlined]
[15] adapt at C:\Users\jerem\.julia\packages\Adapt\8kQMV\src\Adapt.jl:40 [inlined]
[16] convert_to_cpu at C:\Users\jerem\.julia\packages\GPUArrays\jhRU7\src\host\abstractarray.jl:45 [inlined]
[17] print_array at C:\Users\jerem\.julia\packages\GPUArrays\jhRU7\src\host\abstractarray.jl:50 [inlined]
[18] show(::IOContext{REPL.Terminals.TTYTerminal}, ::MIME{Symbol("text/plain")}, ::CuArray{Float32,1}) at .\arrayshow.jl:358
[19] display(::REPL.REPLDisplay, ::MIME{Symbol("text/plain")}, ::Any) at C:\Users\jerem\AppData\Local\Programs\Julia-1.5.2\share\julia\stdlib\v1.5\REPL\src\REPL.jl:214
[20] display(::REPL.REPLDisplay, ::Any) at C:\Users\jerem\AppData\Local\Programs\Julia-1.5.2\share\julia\stdlib\v1.5\REPL\src\REPL.jl:218
[21] display(::Any) at .\multimedia.jl:328
[22] #invokelatest#1 at .\essentials.jl:710 [inlined]
[23] invokelatest at .\essentials.jl:709 [inlined]
[24] (::VSCodeServer.var"#61#65"{String,Int64,Int64,String,Module,Bool,VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:157
[25] withpath(::VSCodeServer.var"#61#65"{String,Int64,Int64,String,Module,Bool,VSCodeServer.ReplRunCodeRequestParams}, ::String) at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\repl.jl:124
[26] (::VSCodeServer.var"#60#64"{String,Int64,Int64,String,Module,Bool,Bool,VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:142
[27] hideprompt(::VSCodeServer.var"#60#64"{String,Int64,Int64,String,Module,Bool,Bool,VSCodeServer.ReplRunCodeRequestParams}) at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\repl.jl:36
[28] (::VSCodeServer.var"#59#63"{String,Int64,Int64,String,Module,Bool,Bool,VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:110
[29] with_logstate(::Function, ::Any) at .\logging.jl:408
[30] with_logger at .\logging.jl:514 [inlined]
[31] (::VSCodeServer.var"#58#62"{VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:109
[32] #invokelatest#1 at .\essentials.jl:710 [inlined]
[33] invokelatest(::Any) at .\essentials.jl:709
[34] macro expansion at c:\Users\jerem\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:27 [inlined]
[35] (::VSCodeServer.var"#56#57")() at .\task.jl:356