I have several matrices of several different lengths. I want to feed these matrices though a RNN using Flux, however, I haven’t been able to do so. Everything I’ve tried either uses a lot of memory during the backward pass, or gives an error when during the backward pass. The example below works for this small example, but my 16 gigs of ram fills up fast in my real use case, which has a 100 length input vector and a 30 length output vector.
Am I doing this right?
using Flux
using Zygote
function embed(encodings::Vector{Tuple{Int,Vector{Vector{Float32}}}}, embedder)
embeddings = Zygote.Buffer(encodings, Tuple{Int,Vector{Float32}})
for encoding_length in 1:maximum(e -> length(e[2]), encodings)
of_length = filter(e -> length(e[2]) == encoding_length, encodings)
if isempty(of_length); continue end
Flux.reset!(embedder)
local embedded
for i in 1:encoding_length
embedded = embedder(reduce(hcat, map(e -> e[2][i], of_length)))
Zygote.@ignore @assert size(embedded, 2) == length(of_length)
end
for (i_embedded, i0) in enumerate(map(e -> e[1], of_length))
embeddings[i0] = (i0, embedded[:,i_embedded])
end
end
embeddings_vector = copy(embeddings)
Zygote.@ignore @assert embeddings_vector == sort(embeddings_vector)
embeddings_vector
end
function embed_basic(encodings::Vector{Tuple{Int,Vector{Vector{Float32}}}}, embedder)
embeddings = Zygote.Buffer(encodings, Tuple{Int,Vector{Float32}})
for (i, e) in encodings
embeddings[i] = (i, embedder.(e)[end])
end
embeddings_vector = copy(embeddings)
Zygote.@ignore @assert embeddings_vector == sort(embeddings_vector)
embeddings_vector
end
function demo()
nn = LSTM(2, 3)
encodings = [
(1, [[1f0, 2], [3f0, 4], [5f0, 6]]),
(2, [[1f0, 2], [3f0, 4], [5f0, 6]]),
(3, [[1f0, 2], [3f0, 4]]),
(4, [[1f0, 2], [3f0, 4]]),
(5, [[1f0, 2], [3f0, 4]])]
@show embed_basic(encodings, nn)
g = gradient(params(nn)) do
e1 = embed_basic(encodings, nn)
e2 = reduce(hcat, map(x -> x[2], e1))
@show e2
sum(e2)
end
end
demo()
Note embed_basic
works, but uses too much ram. embed
gives an error during the backward pass, but is faster during the forward pass. The error it gives is: ERROR: LoadError: DimensionMismatch("cannot broadcast array to have fewer dimensions")
(Also, I may not understand what “embed” means. I think it’s like an encoding, but created automatically by a neural network. They’re just variable names, of course. If they’re named poorly, the compiler don’t care.)