Unable to understand dimension mismatch error

Hi, I’m a beginner with Julia and ML. I’m attempting to re-use code from the Flux Model Zoo, specifically this, to classify images from this dataset. Below is my version of the code - I modified the data load and the params in the build_model to account for the difference in image size and the number of character types to be classified. The original had 28x28 and 10 digits, the arabic character set had 32x32 images and 28 characters.

function getimages(filename)
    filepath = pwd() * "/images/" * filename

    mtrx = Matrix(DataFrame(CSV.File(filepath)))
    r, _ = size(mtrx)

    v = Vector{Matrix{Int64}}()
	
	for i = 1:r
		push!(v, reshape(m[i, :], 32, 32))
	end
	
	v
end

function getlabels(filename)
    filepath = pwd() * "/images/" * filename
    vec(Matrix(DataFrame(CSV.File(filepath))))
end

function load_data(args)
    train_data_file = "csvTrainImages.csv"
    test_data_file = "csvTestImages.csv"
    train_label_file = "csvTrainLabel.csv"
    test_label_file = "csvTestLabel.csv"

    train_data = getimages(train_data_file)
    test_data = getimages(test_data_file)
    train_labels = getlabels(train_label_file)
    test_labels = getlabels(test_label_file)

    xtrain = Flux.flatten(train_data)
    xtest = Flux.flatten(test_data)

    ytrain, ytest = onehotbatch(train_labels, 1:28), onehotbatch(test_labels, 1:28)

    train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)
    test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)

    return train_loader, test_loader
end

function build_model(; imgsize=(32,32,1), nclasses=28)
    return Chain(
 	        Dense(prod(imgsize), 32, relu),
            Dense(32, nclasses))
end

function loss_and_accuracy(data_loader, model, device)
    acc = 0
    ls = 0.0f0
    num = 0
    for (x, y) in data_loader
        x, y = device(x), device(y)
        ŷ = model(x)
        ls += logitcrossentropy(model(x), y, agg=sum)
        acc += sum(onecold(cpu(model(x))) .== onecold(cpu(y)))
        num +=  size(x, 2)
    end
    return ls / num, acc / num
end

@kwdef mutable struct Args
    η::Float64 = 3e-4       # learning rate
    batchsize::Int = 256    # batch size
    epochs::Int = 10        # number of epochs
    use_cuda::Bool = true   # use gpu (if cuda available)
end

function train(; kws...)
    args = Args(; kws...) # collect options in a struct for convenience

    if CUDA.functional() && args.use_cuda
        @info "Training on CUDA GPU"
        CUDA.allowscalar(false)
        device = gpu
    else
        @info "Training on CPU"
        device = cpu
    end

    # Create test and train dataloaders
    train_loader, test_loader = load_data(args)

    # Construct model
    model = build_model() |> device
    ps = Flux.params(model) # model's trainable parameters
    
    ## Optimizer
    opt = ADAM(args.η)
    
    ## Training
    for epoch in 1:args.epochs
        for (x, y) in train_loader
            x, y = device(x), device(y) # transfer data to device
            gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient
            Flux.Optimise.update!(opt, ps, gs) # update parameters
        end
        
        # Report on train and test
        train_loss, train_acc = loss_and_accuracy(train_loader, model, device)
        test_loss, test_acc = loss_and_accuracy(test_loader, model, device)
        println("Epoch=$epoch")
        println("  train_loss = $train_loss, train_accuracy = $train_acc")
        println("  test_loss = $test_loss, test_accuracy = $test_acc")
    end
end

I get the following error when I train the model. Specifically, during the gradient computation. Could you help me understand which two matrices the error refers to and point me towards a solution? My guess is that it has to do with the build_model params, but I’m not quite sure what needs to change and how.

DimensionMismatch("matrix A has dimensions (32,1024), matrix B has dimensions (1,256)")
macro expansion@interface2.jl:0[inlined]
_pullback(::Zygote.Context, ::typeof(throw), ::DimensionMismatch)@interface2.jl:9
_pullback@matmul.jl:814[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra._generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})@interface2.jl:0
_pullback@matmul.jl:802[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.generic_matmatmul!), ::Matrix{Matrix{Float32}}, ::Char, ::Char, ::Matrix{Float32}, ::Matrix{Matrix{Int64}}, ::LinearAlgebra.MulAddMul{true, true, Bool, Bool})@interface2.jl:0
_pullback@matmul.jl:302[inlined]
_pullback@matmul.jl:275[inlined]
_pullback(::Zygote.Context, ::typeof(LinearAlgebra.mul!), ::Matrix{Matrix{Float32}}, ::Matrix{Float32}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@matmul.jl:153[inlined]
_pullback(::Zygote.Context, ::typeof(*), ::Matrix{Float32}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@basic.jl:147[inlined]
_pullback(::Zygote.Context, ::Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@basic.jl:36[inlined]
_pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@basic.jl:38[inlined]
_pullback(::Zygote.Context, ::Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, ::Matrix{Matrix{Int64}})@interface2.jl:0
_pullback@Other: 27[inlined]
_pullback(::Zygote.Context, ::Main.workspace33.var"#2#3"{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}})@interface2.jl:0
pullback(::Function, ::Zygote.Params)@interface.jl:250
gradient(::Function, ::Zygote.Params)@interface.jl:58
var"#train#1"(::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, ::typeof(Main.workspace33.train))@Other: 27
train@Other: 2[inlined]
top-level scope@Local: 1[inlined]

The first step is to make sure your batched inputs (x and y) have the correct shape. If that looks right, next would be to run the forward pass of the model (i.e. call model(x)) outside of a gradient context. If that also works, then try running logitcrossentropy(model(x), y) outside of gradient. Once all three of those succeed, you can try gradient again and report any errors that pop up (of which I expect there won’t be any).

Thank you for the response. Does correct shape for x and y mean if x is mxn, then y should be nxp (in terms of matrix multiplication)?

n/the batch dimension is always the last dimension by convention in Flux. This means y should have shape pxn, where p is the number of classes. Note that y is never used in a matrix multiplication at all: it’s only used in the loss function calculation as the one-hot encoded target labels, and logitcrossentropy doesn’t have a single matrix multiplication in it.

Thank you. That helped me verify the inputs. So I can confirm that both x and y are of the shape - 1×13439 and 28×13439 respectively. The types are Matrix{Matrix{Int64}} and Flux.OneHotArray{28,2,Vector{UInt32}} respectively. And the y therefore, correctly has the number of classes as p and the number of labels as n (in the pxn notion we discussed above). I could narrow the error down to the model(x) function call. Could you suggest how I might debug further?

The model(x) returns the exact same error.

Great, so the shape and type of y look correct. x looks suspect though: you should have a 2d Array (Matrix) and not a matrix of matrices. The shape should be 1024x13439 (32*32 x batch size) to match your network. I would double check load_data and what it outputs to make sure this is happening, as the culprit is likely somewhere in the call stack of that function.

Great! That’s because I’m manually parsing my original CSV (which actually gives me a nice 1024x13439) and converting it to something else thinking that was how data should be formatted. I’ll try this first. Thank you!

Thank you so very much! I can share that I managed to successfully run my very first model and get values for loss and accuracy! And understood what data goes into and out of a (or at least this) model.

1 Like