Hi, I’m a beginner with Julia and ML. I’m attempting to re-use code from the Flux Model Zoo, specifically this, to classify images from this dataset. Below is my version of the code - I modified the data load and the params in the build_model to account for the difference in image size and the number of character types to be classified. The original had 28x28 and 10 digits, the arabic character set had 32x32 images and 28 characters.
function getimages(filename)
filepath = pwd() * "/images/" * filename
mtrx = Matrix(DataFrame(CSV.File(filepath)))
r, _ = size(mtrx)
v = Vector{Matrix{Int64}}()
for i = 1:r
push!(v, reshape(m[i, :], 32, 32))
end
v
end
function getlabels(filename)
filepath = pwd() * "/images/" * filename
vec(Matrix(DataFrame(CSV.File(filepath))))
end
function load_data(args)
train_data_file = "csvTrainImages.csv"
test_data_file = "csvTestImages.csv"
train_label_file = "csvTrainLabel.csv"
test_label_file = "csvTestLabel.csv"
train_data = getimages(train_data_file)
test_data = getimages(test_data_file)
train_labels = getlabels(train_label_file)
test_labels = getlabels(test_label_file)
xtrain = Flux.flatten(train_data)
xtest = Flux.flatten(test_data)
ytrain, ytest = onehotbatch(train_labels, 1:28), onehotbatch(test_labels, 1:28)
train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)
test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)
return train_loader, test_loader
end
function build_model(; imgsize=(32,32,1), nclasses=28)
return Chain(
Dense(prod(imgsize), 32, relu),
Dense(32, nclasses))
end
function loss_and_accuracy(data_loader, model, device)
acc = 0
ls = 0.0f0
num = 0
for (x, y) in data_loader
x, y = device(x), device(y)
ŷ = model(x)
ls += logitcrossentropy(model(x), y, agg=sum)
acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))
num += size(x, 2)
end
return ls / num, acc / num
end
@kwdef mutable struct Args
η::Float64 = 3e-4 # learning rate
batchsize::Int = 256 # batch size
epochs::Int = 10 # number of epochs
use_cuda::Bool = true # use gpu (if cuda available)
end
function train(; kws...)
args = Args(; kws...) # collect options in a struct for convenience
if CUDA.functional() && args.use_cuda
@info "Training on CUDA GPU"
CUDA.allowscalar(false)
device = gpu
else
@info "Training on CPU"
device = cpu
end
# Create test and train dataloaders
train_loader, test_loader = load_data(args)
# Construct model
model = build_model() |> device
ps = Flux.params(model) # model's trainable parameters
## Optimizer
opt = ADAM(args.η)
## Training
for epoch in 1:args.epochs
for (x, y) in train_loader
x, y = device(x), device(y) # transfer data to device
gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient
Flux.Optimise.update!(opt, ps, gs) # update parameters
end
# Report on train and test
train_loss, train_acc = loss_and_accuracy(train_loader, model, device)
test_loss, test_acc = loss_and_accuracy(test_loader, model, device)
println("Epoch=$epoch")
println(" train_loss = $train_loss, train_accuracy = $train_acc")
println(" test_loss = $test_loss, test_accuracy = $test_acc")
end
end
I get the following error when I train the model. Specifically, during the gradient computation. Could you help me understand which two matrices the error refers to and point me towards a solution? My guess is that it has to do with the build_model params, but I’m not quite sure what needs to change and how.
DimensionMismatch("matrix A has dimensions (32,1024), matrix B has dimensions (1,256)")
macro expansion@interface2.jl:0[inlined]
_pullback(::Zygote.Context, ::typeof(throw), ::DimensionMismatch)@interface2.jl:9
_pullback@matmul.jl:814[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra._generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})@interface2.jl:0
_pullback@matmul.jl:802[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})@interface2.jl:0
_pullback@matmul.jl:302[inlined]
_pullback@matmul.jl:275[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.mul!), ::Matrix{Matrix{Float32}}, ::Matrix{Float32}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@matmul.jl:153[inlined]
_pullback(::Zygote.Context, ::typeof(*), ::Matrix{Float32}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@basic.jl:147[inlined]
_pullback(::Zygote.Context, ::Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@basic.jl:36[inlined]
_pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@basic.jl:38[inlined]
_pullback(::Zygote.Context, ::Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@Other: 27[inlined]
_pullback(::Zygote.Context, ::Main.workspace33.var"#2#3"{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}})@interface2.jl:0
pullback(::Function, ::Zygote.Params)@interface.jl:250
gradient(::Function, ::Zygote.Params)@interface.jl:58
var"#train#1"(::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, ::typeof(Main.workspace33.train))@Other: 27
train@Other: 2[inlined]
top-level scope@Local: 1[inlined]