Hello,
I have a toy problem to fit a NN for sin(x)
using the GPU for Lux.jl - LuxCUDA.jl
Here is the code I am running (very similar to the documentation):
using Lux, Optimisers,Zygote, Random
using LuxCUDA
x = rand(50);
y = sin.(x)
rng = MersenneTwister()
model = Chain(Dense(1=>20,tanh),Dense(20=>40),Dense(40=>40),Dense(40=>1,tanh))
opt = Adam()
const loss_function = MSELoss()
const dev_gpu = gpu_device()
const dev_cpu = cpu_device()
ps, st = Lux.setup(rng, model) .|> dev_gpu
tstate = Training.TrainState(model, ps, st, opt)
vjp_rule = AutoZygote()
using ProgressMeter
function main(tstate::Training.TrainState, vjp, data, epochs)
data = data .|> dev_gpu
progressmeter = Progress(epochs; showspeed=true)
for epoch in 1:epochs
_, loss, _, tstate = Training.single_train_step!(vjp, loss_function, data, tstate)
next!(progressmeter; showvalues = [(:Epoch,epoch), (:Loss,loss)])
end
return tstate
end
@info "Training"
tstate = main(tstate, vjp_rule, (x', y'), 10000)
This code works fine for cpu_device()
but for gpu_device()
I get the following error:
ERROR: LoadError: MethodError: no method matching cublasLt_fused_dense!(::CuArray{Float32, 2, CUDA.DeviceMemory}, ::typeof(tanh_fast), ::CuArray{Float32, 2, CUDA.DeviceMemory}, ::LinearAlgebra.Adjoint{Float32, CuArray{…}}, ::CuArray{Float32, 1, CUDA.DeviceMemory})
I am not sure what is my error.
Thank you