Variable scoping issue when using multiple GPUs in CUDA.jl

I ended up using a Vector to keep track of the references like so:

    ndevices = length(CUDA.devices()) 

    a_gpu_list = Vector{CuArray{T, 1}}(undef, ndevices)

    # Launch kernels on 2 gpus
    for idev in 1:ndevices
        CUDA.device!(idev-1)
        a_gpu = CuArray(view(a, idx[idev]))
        n = length(idx[idev])

        @cuda threads=n kernel!(a_gpu)
        a_gpu_list[idev] = a_gpu
    end                                    
                                           
    # Copy results back from 2 gpus          
    for idev in 1:ndevices
        CUDA.device!(idev-1)
        a[idx[idev]] .= Array(a_gpu_list[idev])       
    end   

I’m open to more efficient solutions to handle this.