Hello,
I am trying to port some CPU code to GPUs where I run over a high-dimensional array and perform some operations. Below is a MWE of what I am doing on the CPU:
function sequential_rand!(x, test)
for i in Iterators.product((1:length(j) for j in test)...)
x[i...] += rand()
end
end
N = 10
chromArray = [1:5 for i in 1:N]
myarr = ones(5*ones(Int,N)...)
sequential_rand!(myarr,chromArray)
For the GPU, I saw the documentation for a generic for loop (https://juliagpu.gitlab.io/CUDA.jl/tutorials/introduction/)
function gpu_add3!(y, x)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:length(y)
@inbounds y[i] += x[i]
end
return
end
numblocks = ceil(Int, N/256)
fill!(y_d, 2)
@cuda threads=256 blocks=numblocks gpu_add3!(y_d, x_d)
@test all(Array(y_d) .== 3.0f0)
Is there a way to do this with Iterators.product
?