I’m trying my first small codes with CUDA, and using the FLoops interface. I am probably missing the basics here, so a brief explanation of this error will probably illuminate my path:
julia> using CUDA, FLoops, FoldsCUDA
julia> function test(x, ex = ThreadedEx())
@floop ex for val in x
@reduce(u = zero(eltype(x)) + val)
end
return u
end
test (generic function with 2 methods)
julia> x = Float32[1,2,3];
julia> test(x)
6.0f0
julia> test(CuArray(x),CUDAEx())
ERROR: accumulator type must be `isbits` or `isbitsunion`; got: Union{}
What is wrong with the definition of my accumulator (u = zero(eltype(x) + val
) ?
@tkf , I seem to get the same error with the example from the FoldCUDA docs:
julia> function counters(n)
stride = typemax(UInt64) ÷ n
return UInt64(0):stride:typemax(UInt64)-stride
end
counters (generic function with 1 method)
julia> function monte_carlo_pi(n, m = 10_000, ex = has_cuda_gpu() ? CUDAEx() : ThreadedEx())
@floop ex for ctr in counters(n)
rng = set_counter!(Philox2x(0), ctr)
nhits = 0
for _ in 1:m
x = rand(rng)
y = rand(rng)
nhits += x^2 + y^2 < 1
end
@reduce(tot = 0 + nhits)
end
return 4 * tot / (n * m)
end
monte_carlo_pi (generic function with 3 methods)
julia> monte_carlo_pi(2^12)
ERROR: accumulator type must be `isbits` or `isbitsunion`; got: Tuple{Tuple{Any}}