Thanks for the input. Still getting used to the Zygote inner functions. I managed to reproduce the error only with CUDA, LinearAlgebra, and Zygote.
using CUDA, LinearAlgebra, Zygote
function snorm(X)
return CUDA.@allowscalar svd(X).S[1]
end
dL(W) = gradient(X->snorm(X),W)
dL(CUDA.rand(3,2))
Yields:
ERROR: LoadError: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] assertscalar(op::String)
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/HaQcr/src/GPUArraysCore.jl:103
[3] getindex
@ ~/.julia/packages/GPUArrays/6STCb/src/host/indexing.jl:9 [inlined]
[4] svd_rev(USV::SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Ū::ChainRulesCore.ZeroTangent, s̄::Zygote.OneElement{Float32, 1, Tuple{Int64}, Tuple{Base.OneTo{Int64}}}, V̄::ChainRulesCore.ZeroTangent)
@ ChainRules ~/.julia/packages/ChainRules/bEtjZ/src/rulesets/LinearAlgebra/factorization.jl:256
[5] _svd_pullback
@ ~/.julia/packages/ChainRules/bEtjZ/src/rulesets/LinearAlgebra/factorization.jl:219 [inlined]
[6] svd_pullback
@ ~/.julia/packages/ChainRules/bEtjZ/src/rulesets/LinearAlgebra/factorization.jl:225 [inlined]
[7] (::Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}})(dy::NamedTuple{(:U, :S, :Vt), Tuple{Nothing, Zygote.OneElement{Float32, 1, Tuple{Int64}, Tuple{Base.OneTo{Int64}}}, Nothing}})
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/chainrules.jl:211
[8] Pullback
@ ~/.julia/packages/GPUArraysCore/HaQcr/src/GPUArraysCore.jl:125 [inlined]
[9] (::Zygote.Pullback{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Tuple{Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Zygote.ZBack{ChainRules.var"#getproperty_svd_pullback#2114"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Symbol}}, Zygote.var"#2077#back#218"{Zygote.var"#back#217"{:X, Zygote.Context{false}, var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{typeof(Zygote.literal_getindex), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}}, Tuple{Zygote.var"#2427#back#375"{Zygote.var"#379#381"{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}}}}})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/interface2.jl:0
[10] (::Zygote.var"#ad_pullback#50"{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Tuple{Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Zygote.ZBack{ChainRules.var"#getproperty_svd_pullback#2114"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Symbol}}, Zygote.var"#2077#back#218"{Zygote.var"#back#217"{:X, Zygote.Context{false}, var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{typeof(Zygote.literal_getindex), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}}, Tuple{Zygote.var"#2427#back#375"{Zygote.var"#379#381"{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}}}}}})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/chainrules.jl:263
[11] (::ChainRules.var"#task_local_storage_pullback#1257"{Zygote.var"#ad_pullback#50"{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Tuple{Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Zygote.ZBack{ChainRules.var"#getproperty_svd_pullback#2114"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Symbol}}, Zygote.var"#2077#back#218"{Zygote.var"#back#217"{:X, Zygote.Context{false}, var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{typeof(Zygote.literal_getindex), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}}, Tuple{Zygote.var"#2427#back#375"{Zygote.var"#379#381"{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}}}}}}})(dy::Float32)
@ ChainRules ~/.julia/packages/ChainRules/bEtjZ/src/rulesets/Base/base.jl:261
[12] (::Zygote.ZBack{ChainRules.var"#task_local_storage_pullback#1257"{Zygote.var"#ad_pullback#50"{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Tuple{Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Zygote.ZBack{ChainRules.var"#getproperty_svd_pullback#2114"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Symbol}}, Zygote.var"#2077#back#218"{Zygote.var"#back#217"{:X, Zygote.Context{false}, var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{typeof(Zygote.literal_getindex), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}}, Tuple{Zygote.var"#2427#back#375"{Zygote.var"#379#381"{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}}}}}}}})(dy::Float32)
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/chainrules.jl:211
[13] macro expansion
@ ~/.julia/packages/GPUArraysCore/HaQcr/src/GPUArraysCore.jl:124 [inlined]
[14] Pullback
@ ~/NODE_Community_Forecast/test.jl:4 [inlined]
[15] (::Zygote.Pullback{Tuple{typeof(snorm), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.ZBack{ChainRules.var"#task_local_storage_pullback#1257"{Zygote.var"#ad_pullback#50"{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Tuple{Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Zygote.ZBack{ChainRules.var"#getproperty_svd_pullback#2114"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Symbol}}, Zygote.var"#2077#back#218"{Zygote.var"#back#217"{:X, Zygote.Context{false}, var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{typeof(Zygote.literal_getindex), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}}, Tuple{Zygote.var"#2427#back#375"{Zygote.var"#379#381"{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}}}}}}}}, Zygote.var"#1923#back#149"{Zygote.var"#147#148"{Zygote.Context{false}, GlobalRef, GPUArraysCore.ScalarIndexing}}, Zygote.var"#2100#back#226"{Zygote.Jnew{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Nothing, false}}}})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/interface2.jl:0
[16] Pullback
@ ~/NODE_Community_Forecast/test.jl:7 [inlined]
[17] (::Zygote.var"#60#61"{Zygote.Pullback{Tuple{var"#3#4", CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.Pullback{Tuple{typeof(snorm), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.ZBack{ChainRules.var"#task_local_storage_pullback#1257"{Zygote.var"#ad_pullback#50"{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Tuple{Zygote.ZBack{ChainRules.var"#svd_pullback#2113"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Zygote.ZBack{ChainRules.var"#getproperty_svd_pullback#2114"{SVD{Float32, Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Symbol}}, Zygote.var"#2077#back#218"{Zygote.var"#back#217"{:X, Zygote.Context{false}, var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.Pullback{Tuple{typeof(Zygote.literal_getindex), CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}}, Tuple{Zygote.var"#2427#back#375"{Zygote.var"#379#381"{1, Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}}}}}}}}, Zygote.var"#1923#back#149"{Zygote.var"#147#148"{Zygote.Context{false}, GlobalRef, GPUArraysCore.ScalarIndexing}}, Zygote.var"#2100#back#226"{Zygote.Jnew{var"#1#2"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Nothing, false}}}}}}})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/interface.jl:45
[18] gradient(f::Function, args::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/oGI57/src/compiler/interface.jl:97
[19] dL(W::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Main ~/NODE_Community_Forecast/test.jl:7
[20] top-level scope
@ ~/NODE_Community_Forecast/test.jl:8
in expression starting at /home/jarroyoesquivel/NODE_Community_Forecast/test.jl:8
srun: error: vgpu-002: task 0: Exited with exit code 1