Metal.jl and Flux.jl on M1 chip

Jacob_Cahoon · March 31, 2023, 8:03am

When my loss function is called in the train!() method, I keep getting this error and I don’t know how to fix it. I can’t use CUDA because I am using an M1 chip. Any ideas? Here’s my full code:

include("data_prep.jl")

using Flux.Optimise
using Metal

module CustomAdam
using Flux.Optimise: AbstractOptimiser, IdDict

export Adam32

mutable struct Adam32 <: AbstractOptimiser
  eta::Float32
  beta::Tuple{Float32,Float32}
  epsilon::Float32
  state::IdDict{Any,Any}
end
Adam32(η::Real=Float32(0.001), β::Tuple=(Float32(0.9), Float32(0.999)), ϵ::Real=Float32(eps(Float32))) = Adam32(Float32(η), (Float32(β[1]), Float32(β[2])), Float32(ϵ), IdDict())
Adam32(η::Real, β::Tuple, state::IdDict) = Adam32(Float32(η), (Float32(β[1]), Float32(β[2])), Float32(eps(Float32)), state)

function apply!(o::Adam32, x, Δ)
  η, β = o.eta, o.beta

  mt, vt, βp = get!(o.state, x) do
    (zero(x), zero(x), Float32[β[1], β[2]])
  end::Tuple{typeof(x),typeof(x),Vector{Float32}}

  @. mt = β[1] * mt + (1 - β[1]) * Δ
  @. vt = β[2] * vt + (1 - β[2]) * Δ * conj(Δ)
  @. Δ = mt / (1 - βp[1]) / (√(vt / (1 - βp[2])) + o.epsilon) * η
  βp .= βp .* β

  return Δ
end
end

met(m) = fmap(x -> Flux.Adapt.adapt(MtlArray, x), m; exclude = Flux._isleaf)

image_size = 224
#load and split data
X_train, X_test, y_train, y_test = load_data(image_size)

X_train |> met
X_test |> met

y_train |> met
y_test |> met

y_train_mtl = MtlMatrix(y_train) |> met
y_test_mtl = MtlMatrix(y_train) |> met

#define model
model = Chain(
  Dense(image_size * image_size, 64, relu),
  Dense(64, 32, relu),
  Dense(32, 2),
  softmax
) |> met



#define loss function, params, optimizer, and live plot
loss(x, y) = crossentropy(model(x), y)
learning_rate = 0.000001f0
ps = params(model) |> met
opt = CustomAdam.Adam32(learning_rate)
loss_plot = Plots.plot([], [], xlabel="Epochs", ylabel="Loss", title="Live Loss Plot", legend=false, color=:blue, linewidth=2)
display(loss_plot)

println("Let the training begin!\n")

#train model
loss_history = []
epochs = 10
start_time = now()
# for epoch in 1:epochs
  #train with optimized learning rate  
  train!(loss, ps, [(X_train, y_train_mtl)], opt)
  train_loss = loss(X_train, y_train_mtl)

  #add loss value to the loss_history list
  push!(loss_history, train_loss)
  println("Epoch = $epoch : Training loss = $train_loss")

  #update plot and refresh display
  Plots.plot!(loss_plot, 1:epoch, loss_history, xlabel="Epochs", ylabel="Loss", title="Live Loss Plot", legend=false, color=:blue, linewidth=2)
  display(loss_plot)
# end

end_time = now()
elapsed_time = end_time - start_time
elapsed_seconds_total = Dates.value(elapsed_time) ÷ 1000
minutes = elapsed_seconds_total ÷ 60
seconds = elapsed_seconds_total % 60
println("Training time: $minutes minutes and $seconds seconds")

#run the model on the test data
y_hat_raw = model(X_test)

#turn the probabilities into labels with onecold()
y_hat = onecold(y_hat_raw) .- 1

#compare the predicitons (y_hat) to the actual true labels (y_hat_raw)
y = onecold(y_test) .- 1

accuracy = mean(y_hat .== y)

#plot and save the loss funciton with respect to epochs
gr(size=(600, 600))
p_1_curve = Plots.plot(1:epochs, loss_history, xlabel="Epochs", ylabel="Loss", title="Test Accuracy: $accuracy | Time: $minutes:$seconds | LR: $learning_rate", legend=false, color=:blue, linewidth=2)

savetime = now()
savefig(p_1_curve, "model_learning_curve_$savetime.png")

marianoarnaiz · March 5, 2024, 10:44pm

Hi. Very interested in this, any work around?

maleadt · March 6, 2024, 12:50pm

Please post the actual error you’re running into + the versions of things you are using.

Topic		Replies	Views
Flux.jl: training fails at GPU but works on CPU Machine Learning gpu , flux	1	641	September 19, 2019
Error with Flux.update! with Metal gpu backend GPU flux , metaljl	1	559	August 19, 2023
MNIST GPU CuArrays error GPU	23	3120	January 22, 2019
What's wrong with this Flux model definitin? Machine Learning first-steps , flux	1	765	November 7, 2019
The same network performs differently in Flux.jl and tensorflow Machine Learning performance	13	3102	December 18, 2019

Metal.jl and Flux.jl on M1 chip

Related topics