i’m not sure whether i should file an issue on github because this is a bug, or post a plea for help here because i’m doing something wrong, but riffing on the multiple GPUs per process example, and adding to it an Array of CuArrays, and trying to be careful about garbage collection, i’m getting “ERROR: CUDA error: an illegal memory access was encountered”:
julia> using CUDA
julia> function alloc(x)
sum(x) # commenting out this line results in no errors
CUDA.zeros(3)
end
alloc (generic function with 1 method)
julia> results = Vector{Any}(undef, ndevices())
2-element Vector{Any}:
#undef
#undef
julia> not_used = [(device!(idevice-1); CuArray([1,2,3])) for idevice=1:ndevices()];
julia> synchronize()
julia> GC.@preserve not_used begin
@sync for idevice = 1:ndevices()
@async begin
device!(idevice-1)
results[idevice] = Array(alloc(not_used))
end
end
end
ERROR: CUDA error: an illegal memory access was encountered (code 700, ERROR_ILLEGAL_ADDRESS)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA /groups/scicompsoft/home/arthurb/.julia/packages/CUDA/s0e3j/lib/cudadrv/libcuda.jl:27
[2] isdone
@ /groups/scicompsoft/home/arthurb/.julia/packages/CUDA/s0e3j/lib/cudadrv/stream.jl:109 [inlined]
[3] nonblocking_synchronize
@ /groups/scicompsoft/home/arthurb/.julia/packages/CUDA/s0e3j/lib/cudadrv/stream.jl:139 [inlined]
[4] nonblocking_synchronize
@ /groups/scicompsoft/home/arthurb/.julia/packages/CUDA/s0e3j/lib/cudadrv/context.jl:325 [inlined]
[5] device_synchronize()
@ CUDA /groups/scicompsoft/home/arthurb/.julia/packages/CUDA/s0e3j/lib/cudadrv/context.jl:319
[6] top-level scope
@ /groups/scicompsoft/home/arthurb/.julia/packages/CUDA/s0e3j/src/initialization.jl:164
caused by: WARNING: Error while freeing DeviceBuffer(12 bytes at 0x0000000402000400):
CUDA.CuError(code=CUDA.cudaError_enum(0x000002bc), meta=nothing)