Dear All,
while working on my project (epidemic model, solved using PINN), I tried to port my code to GPU. While function (loss function of my neural network) works and delivers reasonable speed up, while computing gradient of loss function w.r.t. parameters of the neural network, I am getting the following differentiation error. Could somebody please give me some guidance, how to solve this error? Any guidance will be very welcome.
MethodError: no method matching Float32(::ForwardDiff.Dual{Nothing,Float64,1})
Closest candidates are:
Float32(::Real, !Matched::RoundingMode) where T<:AbstractFloat at rounding.jl:200
Float32(::T) where T<:Number at boot.jl:715
Float32(!Matched::Int8) at float.jl:60
...
...
Minimal working example code is here.
#*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
#*-*-*-*-*-*-*-*-*-*-* Solve for Value and Policy Functions -*-*-*-*-*-*-*-*-*-*
#*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
#(1) Install and initialize packages
using Pkg
Pkg.add("Plots")
Pkg.add("Parameters")
Pkg.add("LinearAlgebra")
Pkg.add("SpecialFunctions")
Pkg.add("CUDA")
Pkg.add("Flux")
Pkg.build("Flux")
Pkg.add("Conda")
Pkg.add("PyCall")
Pkg.add("Random")
Pkg.add("Distributions")
Pkg.add("ForwardDiff")
Pkg.add("KernelAbstractions")
Pkg.add("Tullio")
using Plots
using Parameters
using LinearAlgebra
using SpecialFunctions
using CUDA
using Flux
using Conda
using PyCall
using Random
using Distributions
using ForwardDiff
using Tullio
Random.seed!(100)
Pkg.add("ZygoteRules")
using ZygoteRules
ZygoteRules.@adjoint function ForwardDiff.Dual{T}(x, xΜ::Tuple) where T
@assert length(xΜ) == 1
ForwardDiff.Dual{T}(x, xΜ), dΜ -> (dΜ.partials[1], (dΜ.value,))
end
ZygoteRules.@adjoint ZygoteRules.literal_getproperty(d::ForwardDiff.Dual{T}, ::Val{:partials}) where T =
d.partials, pΜ -> (ForwardDiff.Dual{T}(pΜ[1], 0),)
ZygoteRules.@adjoint ZygoteRules.literal_getproperty(d::ForwardDiff.Dual{T}, ::Val{:value}) where T =
d.value, xΜ -> (ForwardDiff.Dual{T}(0, xΜ),)
#(2) Build model structure
@with_kw struct Model
#(A) Structural parameters
Ξ²::Float64 =0.96^(1/52)
β¬::Float64 = 0.25
Ξ±::Float64 = 39.8352
Ο::Float64 = 0.8
ΞΈ::Float64 = 0.0013
Ο1::Float64 = 6.8838e-08
Ο2::Float64 = 1.0924e-04
Ο3::Float64 = 0.3426
Οr::Float64 = 0.3869
Οd::Float64 = 0.0019
ΞΌ0::Float64 = 0.0
#(B) Networks and ADAM setting
S1::Float64 = 0.0
S2::Float64 = 1.0
Ο°::Int64 = 2500
Ο::Int64 = 150
T::Int64 = 32
Ξ::Float64 = 0.001
Ξ²1::Float64 = 0.9
Ξ²2::Float64 = 0.99
Ο΅::Float64 = 10^(-8)
Ξ½::Float32 = 10^(-5)
π°::Int64 = 500000
π::Float64 = 0.001
π―::Int64 = 1500
π::Float64 = 0.9
end
Mod1 = Model(Ξ=0.001,π=0.99,T=32)
@unpack T,Ο° = Mod1
#(3) Generate grid
function RnGrid(Model)
@unpack S1,S2,Ο°,Οd = Model
eGrid = rand(Uniform(S1,S2),Ο°^2,3)
eGrid = reshape(eGrid,3,Ο°^2)
tGrid = Array{Float64}(undef,Ο°^2)
n,m = size(eGrid)
for i in 1:m
tGrid[i] = sum(eGrid[:,i])
end
Ind = findall(x->x<=1.0,tGrid)
Grid = eGrid[:,Ind]
Grid = Grid[:,1:Ο°]
display(scatter3d(Grid[1,:],Grid[2,:],Grid[3,:],
label="Grid",xlabel="X",ylabel="Y",zlabel="Z",title="RandomGrid"))
return Grid
end
Grid = RnGrid(Mod1)
Grid = Grid |>gpu
#(4) Initialize neural networks
#(4.1) Policy network
bent(x) = (sqrt(x^2+1)-1)/2 + x
Ο = Chain(Dense(3,T,swish),Dense(T,T,swish),
Dense(T,T,swish),Dense(T,1,swish)) |>gpu
Cr = 1104.8296628335625
Nr = 27.73500981126146
W = 8292.589822461223
Ci = 883.8637302668501
Ni = 27.73500981126146
Ξ§ = 8251.57380479189
cr = CUDA.ones(Ο°)*Cr
w = CUDA.ones(Ο°)*W
c(x) = cr' - x[2,:]'.*Ο(x)
#(4.2) Value network
Ο = Chain(Dense(3,T,swish),Dense(T,T,swish),
Dense(T,T,swish),Dense(T,1,swish)) |>gpu
π±(x) = w' - x[2,:]'.*Ο(x)
#(5) Build law of motion
function π(π,Cs,Ns)
π’ = π[1,:]' - Ο1.*Cs.*Ci.*π[1,:]'.*π[2,:]' - Ο2.*Ns.*Ni.*π[1,:]'.*π[2,:]' - Ο3.*π[1,:]'.*π[2,:]'
π = (1-Οr-Οd).*π[2,:]' + Ο1.*Cs.*Ci.*π[1,:]'.*π[2,:]' + Ο2.*Ns.*Ni.*π[1,:]'.*π[2,:]' + Ο3.*π[1,:]'.*π[2,:]'
π‘ = π[3,:]' + Οr.*π[2,:]'
π¨ = vcat(π’,π,π‘)
return π¨
end
#(6) Build residual function
@unpack Ξ±,Ξ²,ΞΈ,Ο1,Ο2,Ο3,Οd,Οr,Ο° = Mod1
function π½(x)
Cs = c(x)
Ns = Cs./Ξ±
V = π±(x)
π¨ = π(x,Cs,Ns)
π₯ = π±(π¨)
Ο = Ο1*Ci.*Cs.*x[2,:]' + Ο2*Ni.*Ns.*x[2,:]' + Ο3*x[2,:]'
π = sum((Ξ²*(π₯.-Ξ§).*(Ο2*Ni*x[2,:]'+Ξ±*Ο1*Ci.*x[2,:]') - (Ξ±./Cs-ΞΈ.*Ns)).^2)
π = sum((V - log.(Cs) + ΞΈ.*(Ns.^2)./2 - Ξ².*(1 .-Ο).*π₯ - Ξ².*Ο.*Ξ§).^2)
π‘ = π + π
return π‘
end
π― = Flux.params(Ο,Ο)
Pkg.add("BenchmarkTools")
using BenchmarkTools
@benchmark π½(Grid)
βπ½ = Flux.gradient(()->π½(Grid),Flux.params(Ο,Ο))
Best,
Honza