CUDA.jl - MethodError: no method matching CUDA.CuArray

I’m training a Neural Network using NeuralEstimators.jl in julia, and I’m submitting my code in my slurm high performance computer. For some reason, it is not working now using GPUs (it was working before) but it is working using CPUs and on my laptop. This is the error I get

Sampling the validation set...
Computing the initial validation risk...ERROR: LoadError: MethodError: no method matching CUDA.CuArray(::RecursiveArrayTools.VectorOfArray{Float32, 3, Vector{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}})

Closest candidates are:
  (::Type{<:GPUArraysCore.AnyGPUArray})(!Matched::LinearAlgebra.UniformScaling{U}, !Matched::Tuple{Int64, Int64}) where U
   @ GPUArrays <path>/.julia/packages/GPUArrays/dAUOE/src/host/construction.jl:40
  (::Type{<:GPUArraysCore.AnyGPUArray})(!Matched::LinearAlgebra.UniformScaling, !Matched::Integer, !Matched::Integer)
   @ GPUArrays <path>/.julia/packages/GPUArrays/dAUOE/src/host/construction.jl:42
  CUDA.CuArray(!Matched::Union{LinearAlgebra.QR, LinearAlgebra.QRCompactWY})
   @ CUDA <path>/.julia/packages/CUDA/YIj5X/lib/cusolver/linalg.jl:147
  ...

Stacktrace:
 [1] convert(T::Type{CUDA.CuArray}, VA::RecursiveArrayTools.VectorOfArray{Float32, 3, Vector{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}})
   @ RecursiveArrayTools <path>/.julia/packages/RecursiveArrayTools/1yY5f/src/RecursiveArrayTools.jl:29
 [2] stackarrays(v::Vector{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}; merge::Bool)
   @ NeuralEstimators <path>/.julia/packages/NeuralEstimators/kwwrH/src/utility.jl:426
 [3] stackarrays(v::Vector{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
   @ NeuralEstimators <path>/.julia/packages/NeuralEstimators/kwwrH/src/utility.jl:416
 [4] (::DeepSet{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, typeof(NeuralEstimators.elementwise_mean), Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}})(Z::Vector{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
   @ NeuralEstimators <path>/.julia/packages/NeuralEstimators/kwwrH/src/Architectures.jl:163
 [5] _lossdataloader(loss::typeof(Flux.Losses.mae), data_loader::MLUtils.DataLoader{Tuple{Vector{Matrix{Float32}}, Matrix{Float32}}, Random._GLOBAL_RNG, Val{nothing}}, θ̂::DeepSet{Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, typeof(NeuralEstimators.elementwise_mean), Chain{Tuple{Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}}, device::Function)
   @ NeuralEstimators <path>/.julia/packages/NeuralEstimators/kwwrH/src/train.jl:694
 [6] train(θ̂::DeepSet{Chain{Tuple{Dense{typeof(relu), Matrix{Float32}, Vector{Float32}}, Dense{typeof(relu), Matrix{Float32}, Vector{Float32}}, Dense{typeof(relu), Matrix{Float32}, Vector{Float32}}}}, typeof(NeuralEstimators.elementwise_mean), Chain{Tuple{Dense{typeof(relu), Matrix{Float32}, Vector{Float32}}, Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}}, sampler::sampler, simulator::typeof(simulate); m::UnitRange{Int64}, ξ::Nothing, xi::Nothing, epochs_per_θ_refresh::Int64, epochs_per_theta_refresh::Int64, epochs_per_Z_refresh::Int64, simulate_just_in_time::Bool, loss::Function, optimiser::Adam, batchsize::Int64, epochs::Int64, savepath::String, stopping_epochs::Int64, use_gpu::Bool, verbose::Bool, K::Int64)
   @ NeuralEstimators <path>/.julia/packages/NeuralEstimators/kwwrH/src/train.jl:138
 [7] top-level scope
   @ <path to file>file.jl:77
in expression starting at <path to file>file.jl:77

This is my bash script (julia/test is the earliest version installed there)

#!/bin/bash

#SBATCH -p gpu-short
#SBATCH --gres=gpu:1
#SBATCH --cpus-per-task=20
#SBATCH --mem-per-cpu=9G

source /etc/profile

module add Julia/test

export JULIA_DEPOT_PATH=$global_storage/.Julia
rm -rf $JULIA_DEPOT_PATH

julia -e 'import Pkg; Pkg.add("CUDA")'
julia -e 'import Pkg; Pkg.add("Flux")'
julia -e 'import Pkg; Pkg.add("cuDNN")'
julia -e 'import Pkg; Pkg.add(url = "https://github.com/msainsburydale/NeuralEstimators.jl")'
julia -e 'import Pkg; Pkg.add("Distributions")'
julia -e 'import Pkg; Pkg.add("CSV")'
julia -e 'import Pkg; Pkg.add("DataFrames")'
julia -e 'import Pkg; Pkg.add("Tables")'
julia -e 'import Pkg; Pkg.add("JLD2")'

module add R

julia -e 'import Pkg; Pkg.add("RData")'
julia -e 'import Pkg; Pkg.add("RCall")'

srun Julia <path>/file.jl

Many thanks