Hi everyone,
I am trying to use Floops, and FoldsCuda library as an alternative to OpenACC in Cuda c++, and Fortran in Julia. Consider the following code
using Random, CUDA, FLoops, Test, FoldsCUDA
function main()
N = 100000
A = Vector{Int}(shuffle(collect(1:N)))
C = CuArray{Int}(copy(A))
@floop for i in 1:N
A[i] = A[i]+1
if i == 10
break
end
end
@floop CUDAEx() for j in 1:N
C[j] = C[j]+1
if j == 10
break
end
end
println(A==C)
end
main()
The second loop using GPU is giving some weird error, can please someone explain how to resolve this issue…?
error:
ERROR: DivideError: integer division error
Stacktrace:
[1] div
@ ./int.jl:288 [inlined]
[2] _derived_array
@ ~/.julia/packages/CUDA/DfvRa/src/array.jl:730 [inlined]
[3] reinterpret(#unused#::Type{Tuple{}}, a::CuArray{Nothing, 1, CUDA.Mem.DeviceBuffer})
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/array.jl:754
[4] #18
@ ~/.julia/packages/UnionArrays/ubZMg/src/impl/vectors.jl:51 [inlined]
[5] macro expansion
@ ~/.julia/packages/Transducers/DSfBv/src/core.jl:155 [inlined]
[6] foldlargs
@ ~/.julia/packages/UnionArrays/ubZMg/src/impl/utils.jl:19 [inlined]
[7] UnionArrays.Impl.UnionVector(ETS::Type{Union{Tuple{}, Transducers.Reduced{Tuple{}}}}, data::CuArray{Nothing, 1, CUDA.Mem.DeviceBuffer}, typeid::CuArray{UInt8, 1, CUDA.Mem.DeviceBuffer})
@ UnionArrays.Impl ~/.julia/packages/UnionArrays/ubZMg/src/impl/vectors.jl:50
[8] UnionArrays.Impl.UnionVector(#unused#::UndefInitializer, DataVectorType::Type{CuArray{T, 1} where T}, TypeTagVectorType::Type{CuArray{UInt8, 1}}, ETS::Type{Union{Tuple{}, Transducers.Reduced{Tuple{}}}}, n::Int64)
@ UnionArrays.Impl ~/.julia/packages/UnionArrays/ubZMg/src/impl/vectors.jl:98
[9] UnionArrays.Impl.UnionVector(undef::UndefInitializer, VectorType::Type{CuArray{T, 1} where T}, ETS::Type{Union{Tuple{}, Transducers.Reduced{Tuple{}}}}, n::Int64)
@ UnionArrays.Impl ~/.julia/packages/UnionArrays/ubZMg/src/impl/vectors.jl:85
[10] #UnionVector#1
@ ~/.julia/packages/UnionArrays/ubZMg/src/UnionArrays.jl:17 [inlined]
[11] UnionArray
@ ~/.julia/packages/UnionArrays/ubZMg/src/UnionArrays.jl:17 [inlined]
[12] allocate_buffer(#unused#::Type{Union{Tuple{}, Transducers.Reduced{Tuple{}}}}, n::Int64)
@ FoldsCUDA ~/.julia/packages/FoldsCUDA/Mo35m/src/kernels.jl:83
[13] _transduce!(buf::Nothing, rf::Transducers.Reduction{Transducers.Map{typeof(first)}, Transducers.BottomRF{Transducers.AdHocRF{var"#__##oninit_function#1359#741", typeof(identity), InitialValues.AdjoinIdentity{var"#__##reducing_function#1360#742"{CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}}}, typeof(identity), typeof(identity), var"#__##combine_function#1361#743"}}}, init::Transducers.InitOf{Transducers.DefaultInitOf}, arrays::UnitRange{Int64})
@ FoldsCUDA ~/.julia/packages/FoldsCUDA/Mo35m/src/kernels.jl:138
[14] transduce_impl(rf::Transducers.Reduction{Transducers.Map{typeof(first)}, Transducers.BottomRF{Transducers.AdHocRF{var"#__##oninit_function#1359#741", typeof(identity), InitialValues.AdjoinIdentity{var"#__##reducing_function#1360#742"{CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}}}, typeof(identity), typeof(identity), var"#__##combine_function#1361#743"}}}, init::Transducers.InitOf{Transducers.DefaultInitOf}, arrays::UnitRange{Int64})
@ FoldsCUDA ~/.julia/packages/FoldsCUDA/Mo35m/src/kernels.jl:32
[15] _transduce_cuda(op::Function, init::Transducers.InitOf{Transducers.DefaultInitOf}, xs::UnitRange{Int64})
@ FoldsCUDA ~/.julia/packages/FoldsCUDA/Mo35m/src/kernels.jl:18
[16] #_transduce_cuda#5
@ ~/.julia/packages/FoldsCUDA/Mo35m/src/kernels.jl:1 [inlined]
[17] _transduce_cuda
@ ~/.julia/packages/FoldsCUDA/Mo35m/src/kernels.jl:1 [inlined]
[18] transduce
@ ~/.julia/packages/FoldsCUDA/Mo35m/src/api.jl:45 [inlined]
[19] _fold
@ ~/.julia/packages/FLoops/6PVny/src/reduce.jl:851 [inlined]
[20] macro expansion
@ ~/.julia/packages/FLoops/6PVny/src/reduce.jl:829 [inlined]
[21] main()
@ Main ~/logistics_updated/logistics_nethaji/removing_element copy2.jl:18
[22] top-level scope
@ ~/logistics_updated/logistics_nethaji/removing_element copy2.jl:28