Hi All, I am struggling a bit with a ForwardDiff problem on GPU. I would like to determine the gradients of a loss function which internally uses Zygote to calculate gradients. I can get it to work perfectly on CPU but when using GPU (CUDA.jl) I get the following error message:
MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{var"#170#171",Float32},Float32,10})
Closest candidates are:
Float64(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
Float64(::T) where T<:Number at boot.jl:732
Float64(::Irrational{:mad_constant}) at irrationals.jl:189
Here is the declaration of my parameters:
#define architectural parameters
nₙ = 20
nᵢ = 2
nₒ = 4
#define initial parameters
W₁ = cu(rand(nₙ, nᵢ))
W₂ = cu(rand(nₙ, nₙ))
W₃ = cu(rand(nₙ, nₙ))
W₄ = cu(rand(nₒ, nₙ))
b₁ = cu(rand(nₙ))
b₂ = cu(rand(nₙ))
b₃ = cu(rand(nₙ))
b₄ = cu(rand(nₒ))
#push input data to CUDA arrays
x0_t0_cu = cu(x0_t0)
xlb_tlb_cu = cu(xlb_tlb)
xub_tub_cu = cu(xub_tub)
xf_tf_cu = cu(xf_tf)
u0_cu = cu(u0)
v0_cu = cu(v0)
Relevant functions (not that important nothing special happening here):
#layer operations
hlayer(W, b, x) = tanh.(W * x .+ b);
olayer(W, b, x) = W * x .+ b;
#forward prop. function
function net(W, b, x)
x = hlayer(W[1], b[1], x)
if length(W) <= 2
x = olayer(W[2], b[2], x)
else
for i in 2:(length(W)-1)
x = hlayer(W[i], b[i], x)
end
x = olayer(W[end], b[end], x)
end
return x
end
Here is the cost function I wish to differentiate:
#define loss function
function loss(W₁, W₂, W₃, W₄, b₁, b₂, b₃, b₄,
x0_t0, xlb_tlb, xub_tub, xf_tf,
u0, v0)
W = [W₁, W₂, W₃, W₄]
b = [b₁, b₂, b₃, b₄]
u(x) = net(W, b, x)
#initial conditions
u₀ = u(x0_t0)
u₀pred = u₀[1,:]; v₀pred = u₀[2,:];
#lower boundary condition
uₗ = u(xlb_tlb)
uₗpred = uₗ[1,:]; vₗpred = uₗ[2,:];
∂uₗ∂x = Zygote.gradient(a -> sum(u(a)[1,:]), xlb_tlb)[1][1,:]
∂vₗ∂x = Zygote.gradient(a -> sum(u(a)[2,:]), xlb_tlb)[1][1,:]
#upper boundary condition
uᵤ = u(xub_tub)
uᵤpred = uᵤ[1,:]; vᵤpred = uᵤ[2,:];
∂uᵤ∂x = Zygote.gradient(a -> sum(u(a)[1,:]), xub_tub)[1][1,:]
∂vᵤ∂x = Zygote.gradient(a -> sum(u(a)[2,:]), xub_tub)[1][1,:]
loss = mean((u₀pred .- u0).^2) + mean((v₀pred .- v0).^2) +
mean((uₗpred .- uᵤpred).^2) + mean((vₗpred .- vᵤpred).^2) +
mean((∂uₗ∂x .- ∂uᵤ∂x).^2) + mean((∂vₗ∂x .- ∂vᵤ∂x).^2)
return loss
end
Here is the code to call the derivative of the loss function wrt to W1 only:
ForwardDiff.gradient(x -> loss(x, W₂, W₃, W₄, b₁, b₂, b₃, b₄,
x0_t0_cu, xlb_tlb_cu, xub_tub_cu, xf_tf_cu,
u0_cu, v0_cu), W₁)
The ForwardDiff line works fine, if I do not calculate the gradients within the loss function. Any help will be appreciated sorting out this problem. Regards RL