Hello,
I’m trying to use reduce
(and mapreduce
and its variants) in the loss function of a Flux neural network, but Zygote throws an error when it runs on the GPU (works fine on the CPU though). Are these functions unsupported on GPUs for some particular reason?
Here’s a MWE:
julia> gradient(x -> reduce(+, x), rand(10))
([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],)
julia> gradient(x -> reduce(+, x), CUDA.rand(10))
ERROR: `llvmcall` must be compiled to be called
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0 [inlined]
[2] _pullback(::Zygote.Context{false}, ::Core.IntrinsicFunction, ::String, ::Type{Int64}, ::Type{Tuple{Ptr{Int64}}}, ::Ptr{Int64})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:9
[3] _pullback
@ ./atomics.jl:358 [inlined]
[4] _pullback(ctx::Zygote.Context{false}, f::typeof(getindex), args::Base.Threads.Atomic{Int64})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[5] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/lib/utils/threading.jl:25 [inlined]
[6] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/lib/utils/threading.jl:24 [inlined]
[7] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/compiler/gpucompiler.jl:5 [inlined]
[8] _pullback(ctx::Zygote.Context{false}, f::typeof(CUDA.device_properties), args::CuDevice)
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[9] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/compiler/gpucompiler.jl:49 [inlined]
[10] _pullback(::Zygote.Context{false}, ::CUDA.var"##CUDACompilerTarget#203", ::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, ::typeof(CUDA.CUDACompilerTarget), ::CuDevice)
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[11] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/compiler/gpucompiler.jl:48 [inlined]
[12] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/compiler/execution.jl:303 [inlined]
[13] _pullback(::Zygote.Context{false}, ::CUDA.var"##cufunction#218", ::Nothing, ::Bool, ::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, ::typeof(cufunction), ::typeof(CUDA.partial_mapreduce_grid), ::Type{Tuple{typeof(identity), typeof(+), Float32, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Val{true}, CuDeviceMatrix{Float32, 1}, CuDeviceVector{Float32, 1}}})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[14] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/compiler/execution.jl:299 [inlined]
[15] _pullback(::Zygote.Context{false}, ::typeof(cufunction), ::typeof(CUDA.partial_mapreduce_grid), ::Type{Tuple{typeof(identity), typeof(+), Float32, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Val{true}, CuDeviceMatrix{Float32, 1}, CuDeviceVector{Float32, 1}}})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[16] macro expansion
@ ~/.julia/packages/CUDA/ZdCxS/src/compiler/execution.jl:102 [inlined]
[17] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/mapreduce.jl:234 [inlined]
[18] _pullback(::Zygote.Context{false}, ::CUDA.var"##mapreducedim!#282", ::Float32, ::typeof(GPUArrays.mapreducedim!), ::typeof(identity), ::typeof(+), ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[19] _pullback
@ ~/.julia/packages/CUDA/ZdCxS/src/mapreduce.jl:169 [inlined]
[20] _pullback(::Zygote.Context{false}, ::GPUArrays.var"#mapreducedim!##kw", ::NamedTuple{(:init,), Tuple{Float32}}, ::typeof(GPUArrays.mapreducedim!), ::typeof(identity), ::typeof(+), ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[21] _pullback
@ ~/.julia/packages/GPUArrays/g2pOV/src/host/mapreduce.jl:69 [inlined]
[22] _pullback(::Zygote.Context{false}, ::GPUArrays.var"##_mapreduce#33", ::Colon, ::Nothing, ::typeof(GPUArrays._mapreduce), ::typeof(identity), ::typeof(+), ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[23] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:816
[24] adjoint
@ ~/.julia/packages/Zygote/g2w9o/src/lib/lib.jl:203 [inlined]
[25] _pullback
@ ~/.julia/packages/ZygoteRules/AIbCs/src/adjoint.jl:65 [inlined]
[26] _pullback
@ ~/.julia/packages/GPUArrays/g2pOV/src/host/mapreduce.jl:35 [inlined]
[27] _pullback(::Zygote.Context{false}, ::GPUArrays.var"#_mapreduce##kw", ::NamedTuple{(:dims, :init), Tuple{Colon, Nothing}}, ::typeof(GPUArrays._mapreduce), ::typeof(identity), ::typeof(+), ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[28] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:816
[29] adjoint
@ ~/.julia/packages/Zygote/g2w9o/src/lib/lib.jl:203 [inlined]
[30] _pullback
@ ~/.julia/packages/ZygoteRules/AIbCs/src/adjoint.jl:65 [inlined]
[31] _pullback
@ ~/.julia/packages/GPUArrays/g2pOV/src/host/mapreduce.jl:31 [inlined]
[32] _pullback(::Zygote.Context{false}, ::GPUArrays.var"##mapreduce#31", ::Colon, ::Nothing, ::typeof(mapreduce), ::typeof(identity), ::typeof(+), ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[33] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:816
[34] adjoint
@ ~/.julia/packages/Zygote/g2w9o/src/lib/lib.jl:203 [inlined]
[35] _pullback
@ ~/.julia/packages/ZygoteRules/AIbCs/src/adjoint.jl:65 [inlined]
[36] _pullback
@ ~/.julia/packages/GPUArrays/g2pOV/src/host/mapreduce.jl:31 [inlined]
[37] _pullback(::Zygote.Context{false}, ::typeof(mapreduce), ::typeof(identity), ::typeof(+), ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[38] _pullback (repeats 2 times)
@ ./reducedim.jl:406 [inlined]
[39] _pullback
@ ./REPL[6]:1 [inlined]
[40] _pullback(ctx::Zygote.Context{false}, f::var"#11#12", args::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface2.jl:0
[41] pullback(f::Function, cx::Zygote.Context{false}, args::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface.jl:44
[42] pullback
@ ~/.julia/packages/Zygote/g2w9o/src/compiler/interface.jl:42 [inlined]
[43] gradient(f::Function, args::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/g2w9o/src/compiler/interface.jl:96
[44] top-level scope
@ REPL[6]:1
[45] top-level scope
@ ~/.julia/packages/CUDA/ZdCxS/src/initialization.jl:155
My versions:
julia> VERSION
v"1.8.5"
(@v1.8) pkg> status
Status `~/.julia/environments/v1.8/Project.toml`
[052768ef] CUDA v4.0.1
[e88e6eb3] Zygote v0.6.55