Problem relating to use gradient descends two models using GPU

Sorry, My fault.
I mistakenly used trianloader…though I still didn’t know how to use it now.
Anyway, it will run smoothly without using trainloader:

using CUDA
using Flux
using Random
Random.seed!(333)
model1 = Chain(
    Dense(3, 3) |> gpu
)

model2 = Chain(
    model1,
    Dense(ones(3,3),true,relu) |> gpu
)
model3 = Chain(
    model1,
    Dense(ones(3,3),true,relu) |> gpu
)
A = [1.0,2.0,3.0] |> gpu
B = [1.0,1.0,1.0] |> gpu
label = [10,10,10] |> gpu
opt = Adam(0.1)
opt_stats = Flux.setup(opt, (model2, model3))
for i in 1:5
    global l1 = 0
    global l2 = 0
    gs = gradient(model2, model3) do m2, m3
        l1 += Flux.mse(m2(A),label)
        l2 = Flux.mse(m3(B),label)
        allLoss = l1 + l2
        @show l1
        @show l2
        @show allLoss
    end
    Flux.update!(opt_stats, (model2,model3), gs)
end