Thank you very much for your response. I tried to implement it but am now running into another issue.
I am now processing data as suggested, in the shape vector of matrices. The shape is thus ((features, batch), timesteps)
. My chain simply recurs through each matrix and returns a matrix. The loss is then computed column-wise with a target matrix.
I tried to compute the gradient of this model in order to train it, but I encountered an error that I do not understand nor know how to solve. It is occurs when calculating the gradient of all the data together, as seen in the last two lines of my MWE below. Any help solving the error would be very much appreciated. Also, please let me know if there is any other way I should implement your suggestion.
Thank you,
Jack
using Flux
using MLUtils
using CUDA
struct RNNModel
rnn_model::Flux.Recur
end
function (m::RNNModel)(data)
cur = nothing
for i in data # dataloader
println("hi!")
@info typeof(data)
println(typeof(i))
println("typed")
println(i)
println("printed")
cur = m.rnn_model(i::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
end
return cur
end
Flux.@functor RNNModel
n = 5
m = 4
p = 3
function printdata(data)
println(data)
println(typeof(data))
println("data printed")
return data
end
rnn = RNN(n, n) |> gpu
rnns = RNNModel(rnn) |> gpu
c = Chain(printdata, rnns, softmax) |> gpu
loss(x::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, y_target::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}) = Flux.Losses.crossentropy(c(x), y_target)
x_train = [randn(n, p) for i in 1:m] |> gpu
y_train = randn(n, p)|>softmax |>gpu
y_predict = c(x_train)
loss(x_train, y_train)
Flux.Optimise.train!(loss, Flux.params(c), [(x_train, y_train)], Descent(0.001))
gradient(()->loss(x_train, y_train), Flux.params(c))
ERROR: Compiling Tuple{RNNModel, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}: try/catch is not supported.
Refer to the Zygote documentation for fixes.
https://fluxml.ai/Zygote.jl/dev/limitations.html#Try-catch-statements-1
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] instrument(ir::IRTools.Inner.IR)
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\reverse.jl:121
[3] #Primal#23
@ C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\reverse.jl:205 [inlined]
[4] Zygote.Adjoint(ir::IRTools.Inner.IR; varargs::Nothing, normalise::Bool)
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\reverse.jl:322
[5] _generate_pullback_via_decomposition(T::Type)
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\emit.jl:101
[6] #s3106#1162
@ C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface2.jl:28 [inlined]
[7] var"#s3106#1162"(::Any, ctx::Any, f::Any, args::Any)
@ Zygote .\none:0
[8] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
@ Core .\boot.jl:580
[9] _pullback
@ C:\Users\jackn\.julia\packages\Flux\BPPNj\src\layers\basic.jl:47 [inlined]
[10] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{RNNModel, typeof(softmax)}, ::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface2.jl:0
[11] _pullback
@ C:\Users\jackn\.julia\packages\Flux\BPPNj\src\layers\basic.jl:47 [inlined]
[12] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{typeof(printdata), RNNModel, typeof(softmax)}, ::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface2.jl:0
[13] _pullback
@ C:\Users\jackn\.julia\packages\Flux\BPPNj\src\layers\basic.jl:49 [inlined]
[14] _pullback(ctx::Zygote.Context, f::Chain{Tuple{typeof(printdata), RNNModel, typeof(softmax)}}, args::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface2.jl:0
[15] _pullback
@ c:\Users\jackn\Desktop\Julia Learning Code\FluxQuestion.jl:46 [inlined]
[16] _pullback(::Zygote.Context, ::typeof(loss), ::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface2.jl:0
[17] _pullback
@ c:\Users\jackn\Desktop\Julia Learning Code\FluxQuestion.jl:55 [inlined]
[18] _pullback(::Zygote.Context, ::var"#9#10")
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface2.jl:0
[19] pullback(f::Function, ps::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface.jl:352
[20] gradient(f::Function, args::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
@ Zygote C:\Users\jackn\.julia\packages\Zygote\IoW2g\src\compiler\interface.jl:75
[21] top-level scope
@ c:\Users\jackn\Desktop\Julia Learning Code\FluxQuestion.jl:55