the problem is in " x_batch, y_batch = x_batch|>gpu, y_batch|>gpu " when i convert batches to gpu, removing it will work, but how can i make it work on gpu? pullback function from Zygote.
here is fullcode
using Flux
using Zygote
using CUDA
using CSV
using DataFrames
using Images
using MLDatasets
using BSON: @save, @load
function get_dataloaders(batch_size::Int, shuffle::Bool)
train_x, train_y = MLDatasets.MNIST.traindata(Float32)
test_x, test_y = MLDatasets.MNIST.testdata(Float32)
train_y, test_y = Flux.onehotbatch(train_y, 0:9), Flux.onehotbatch(test_y, 0:9)
train_loader = Flux.Data.DataLoader((train_x, train_y), batchsize=batch_size, shuffle=shuffle)
test_loader = Flux.Data.DataLoader((test_x, test_y), batchsize=batch_size, shuffle=shuffle)
return train_loader, test_loader
end
struct FFNetwork
fc_1
dropout
fc_2
FFNetwork(
input_dim::Int, hidden_dim::Int, dropout::Float32, num_classes::Int
) = new(
Dense(input_dim, hidden_dim, relu),
Dropout(dropout),
Dense(hidden_dim, num_classes),
)
end
function (net::FFNetwork)(x)
x = Flux.flatten(x)
return net.fc_2(net.dropout(net.fc_1(x)))
end
function main(num_epochs::Int, batch_size::Int, shuffle::Bool, λ::Float64)
train_loader, test_loader = get_dataloaders(batch_size, shuffle)
model = FFNetwork(28*28, 128, 0.2f0, 10) |> gpu
trainable_params = Flux.params(model.fc_1, model.fc_2)
optimiser = ADAM(λ)
optimiser = Flux.Optimise.Optimiser(Flux.Optimise.WeightDecay(λ), optimiser)
for epoch = 1:num_epochs
acc_loss = 0.0
for (x_batch, y_batch) in train_loader
x_batch, y_batch = x_batch|>gpu, y_batch|>gpu
loss, back = pullback(trainable_params) do
ŷ = model(x_batch)
Flux.Losses.logitcrossentropy(ŷ, y_batch)
end
gradients = back(1f0)
Flux.Optimise.update!(optimiser, trainable_params, gradients)
acc_loss += loss
end
avg_loss = acc_loss / length(train_loader)
@info "Epoch: $epoch - Average loss: $avg_loss"
end
end
main(10,128,true,0.001)