I’m trying to solve the dynamics of a massive particle on gravitational force by PINNs. The below code is inspired by an example in the documentation of NeuralPDE package. The problem is that the ADAM optimizer runs smoothly on CPU but on GPU (|>gpu) the loss gets NaN after the first iteration.

```
using NeuralPDE, Flux, ModelingToolkit, GalacticOptim, Optim, DiffEqFlux
using Quadrature, Cuba, CUDA, QuasiMonteCarlo
import ModelingToolkit: Interval, infimum, supremum
@parameters t
@variables x(..), y(..)
Dt = Differential(t)
Dtt = Differential(t)^2
# 2D PDE
M = 60. # mass of the planet
X0 , Y0 = 0., 0. #position of the planet
eq = [Dtt(x(t)) ~ -M/((x(t) - X0)^2 + (y(t) - Y0)^2)^1.5 * (x(t) - X0),
Dtt(y(t)) ~ -M/((x(t) - X0)^2 + (y(t) - Y0)^2)^1.5 * (y(t) - Y0)]
# Initial and boundary conditions
x0, y0 = 10., 10. # position of the test particle
vx0, vy0 = 1., -5. #initial velocity of the test particle
bcs = [x(0.) ~ x0,
y(0.) ~ y0,
Dt(x(0.)) ~ vx0,
Dt(y(0.)) ~ vy0]
# Space and time domains
domains = [t ∈ Interval(0., 15.)]
inner = 25
chain = [FastChain(FastDense(1,inner,Flux.σ),
FastDense(inner,inner,Flux.σ),
FastDense(inner,inner,Flux.σ),
FastDense(inner,inner,Flux.σ),
FastDense(inner,1)) for _ in 1:2]
initθ = map(c -> Float64.(c), DiffEqFlux.initial_params.(chain)) |>gpu
strategy = GridTraining(0.01)
#strategy = QuasiRandomTraining(500)
discretization = PhysicsInformedNN(chain,
strategy;
init_params = initθ)
@named pde_system = PDESystem(eq,bcs,domains,[t],[x(t), y(t)])
prob = discretize(pde_system,discretization)
pde_inner_loss_functions = prob.f.f.loss_function.pde_loss_function.pde_loss_functions.contents
bcs_inner_loss_functions = prob.f.f.loss_function.bcs_loss_function.bc_loss_functions.contents
cb = function (p,l)
println("loss: ", l )
println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
return false
end
opt = ADAM(0.001)
#opt = BFGS()
res = GalacticOptim.solve(prob, opt;cb=cb,maxiters=5000)
```