Flux PINN 1D Burgers

Hi! I am trying to construct PINN to solve 1D Burgers equation in Flux.jl without using NeuralPDE.jl. The velocity field u(t,x) is defined by the net_u([t,x]) and the batch for training consists of N_u=32 points corresponding to the initial and boundary conditions and N_f=32 points inside the calculation domain t \in [0,1], \; x \in [-1,1], which are stacked together. Accordingly, the loss function loss(x,y) is the sum of two terms, corresponding to initial+boundary points and to the internal points, where the Burgers equation f(t,x) = u_t(t,x) + u(t,x)*u_x(t,x) - nu*u_xx(t,x) = 0 should be fullfilled. Below is my code

using Flux, Zygote, Statistics

net_u = Chain(Dense(2 => 20, tanh), Dense(20 => 20, tanh), Dense(20 => 20, tanh), Dense(20 => 20, tanh),
        Dense(20 => 1))

u(t,x) = net_u([t,x])[1]
u_t(t,x) = Zygote.gradient((x)->u(x[1],x[2]), [t, x])[1][1]
u_x(t,x) = Zygote.gradient((x)->u(x[1],x[2]), [t, x])[1][2]
u_xx(t,x) = Zygote.hessian((x)->u(x[1],x[2]), [t, x])[2,2]

nu = 0.01/pi
f(t,x) = u_t(t,x) + u(t,x)*u_x(t,x) - nu*u_xx(t,x)

# generating data points
N_u = 32
N_f = 32

function get_batch()
    tx_train = zeros(2, N_u+N_f)
    u_train = zeros(N_u+N_f)

    for i in 1:trunc(Int, N_u/4)
        tx_train[2,i]=-rand()
        u_train[i]=-sin(pi*tx_train[2,i])
    end

    for i in trunc(Int, N_u/4):trunc(Int, N_u/2)
        tx_train[2,i]=rand()
        u_train[i]=-sin(pi*tx_train[2,i])
    end

    for i in trunc(Int, N_u/2):trunc(Int, 3*N_u/4)
        tx_train[1,i]=rand()
        tx_train[2,i]=1.0
        u_train[i]=0.0
    end

    for i in trunc(Int, 3*N_u/4):N_u
        tx_train[1,i]=rand()
        tx_train[2,i]=-1.0
        u_train[i]=0.0
    end

    for i in N_u+1:N_u+N_f
        tx_train[1,i]=rand()
        tx_train[2,i]=2.0*rand()-1.0
    end
    
    return (tx_train, u_train)
end

function loss(x,y)
    loss_u = Flux.Losses.mse(y[1:N_u]',net_u(x[:,1:N_u]))
    loss_f = mean(f.(x[1,N_u+1:N_u+N_f],x[2,N_u+1:N_u+N_f]).^2)  
    return loss_u+loss_f
end

# train and return losses
function run_training(network, epoch)
    losses=[]
    pars = Flux.params(network)  # contains references to arrays in model
    opt = Flux.Adam()    # will store optimiser momentum, etc.
    
    for _ in 1:epoch        
        x,y = get_batch()
        data = Flux.DataLoader((x, y), batchsize=N_u+N_f)   
        Flux.train!(loss, pars, data, opt)        
        push!(losses, loss(x, y))
    end
    return losses
end

losses = run_training(net_u, 1)

which results in error:

Mutating arrays is not supported -- called setindex!(Matrix{Float64}, ...)
This error occurs when you ask Zygote to differentiate operations that change
the elements of arrays in place (e.g. setting values with x .= ...)

Possible fixes:
- avoid mutating operations (preferred)
- or read the documentation and solutions for this error
  https://fluxml.ai/Zygote.jl/latest/limitations


Stacktrace:
  [1] error(s::String)
    @ Base .\error.jl:35
  [2] _throw_mutation_error(f::Function, args::Matrix{Float64})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\array.jl:86
  [3] (::Zygote.var"#391#392"{Matrix{Float64}})(#unused#::Nothing)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\array.jl:98
  [4] (::Zygote.var"#2488#back#393"{Zygote.var"#391#392"{Matrix{Float64}}})(Δ::Nothing)
    @ Zygote C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67
  [5] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\forward.jl:31 [inlined]
  [6] (::typeof(∂(forward_jacobian)))(Δ::Tuple{Nothing, Zygote.OneElement{Float64, 2, Tuple{Int64, Int64}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
  [7] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\forward.jl:44 [inlined]
  [8] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\forward.jl:42 [inlined]
  [9] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\grad.jl:64 [inlined]
 [10] (::typeof(∂(hessian_dual)))(Δ::Zygote.OneElement{Float64, 2, Tuple{Int64, Int64}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [11] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\grad.jl:62 [inlined]
 [12] Pullback
    @ .\In[4]:9 [inlined]
 [13] (::typeof(∂(u_xx)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [14] Pullback
    @ .\In[4]:12 [inlined]
 [15] (::typeof(∂(f)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [16] #938
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\broadcast.jl:205 [inlined]
 [17] #4
    @ .\generator.jl:36 [inlined]
 [18] iterate
    @ .\generator.jl:47 [inlined]
 [19] collect(itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Tuple{Float64, typeof(∂(f))}}, Vector{Float64}}}, Base.var"#4#5"{Zygote.var"#938#944"}})
    @ Base .\array.jl:787
 [20] map
    @ .\abstractarray.jl:3055 [inlined]
 [21] (::Zygote.var"#∇broadcasted#943"{Tuple{Vector{Float64}, Vector{Float64}}, Vector{Tuple{Float64, typeof(∂(f))}}, Val{3}})(ȳ::Vector{Float64})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\broadcast.jl:205
 [22] #3885#back
    @ C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
 [23] #208
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\lib.jl:206 [inlined]
 [24] #2066#back
    @ C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
 [25] Pullback
    @ .\broadcast.jl:1304 [inlined]
 [26] Pullback
    @ .\In[5]:41 [inlined]
 [27] (::typeof(∂(loss)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [28] #208
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\lib.jl:206 [inlined]
 [29] #2066#back
    @ C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
 [30] Pullback
    @ C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:143 [inlined]
 [31] (::typeof(∂(λ)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [32] (::Zygote.var"#99#100"{Params{Zygote.Buffer{Any, Vector{Any}}}, typeof(∂(λ)), Zygote.Context{true}})(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface.jl:389
 [33] withgradient(f::Function, args::Params{Zygote.Buffer{Any, Vector{Any}}})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface.jl:133
 [34] macro expansion
    @ C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:142 [inlined]
 [35] macro expansion
    @ C:\Users\parfe\.julia\packages\ProgressLogging\6KXlp\src\ProgressLogging.jl:328 [inlined]
 [36] train!(loss::Function, ps::Params{Zygote.Buffer{Any, Vector{Any}}}, data::MLUtils.DataLoader{Tuple{Matrix{Float64}, Vector{Float64}}, Random._GLOBAL_RNG, Val{nothing}}, opt::Adam; cb::Flux.Optimise.var"#38#41")
    @ Flux.Optimise C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:140
 [37] train!(loss::Function, ps::Params{Zygote.Buffer{Any, Vector{Any}}}, data::MLUtils.DataLoader{Tuple{Matrix{Float64}, Vector{Float64}}, Random._GLOBAL_RNG, Val{nothing}}, opt::Adam)
    @ Flux.Optimise C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:136
 [38] run_training(network::Chain{Tuple{Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, epoch::Int64)
    @ Main .\In[5]:54
 [39] top-level scope
    @ In[6]:1

but works if the loss function contains only the first part loss(x,y)=loss_u. What is wrong here and how can I resolve the problem?

1 Like

Hi! I have the same error when using Lux.jl package to create a neural network and using Zygote.jl package to differentiate the physical loss function. Do you have any solution now? Thanks!

Hi! No, I have switched to Python+PyTorch in my project.

For a solution using Zygote see Nested Automatic Differentiation | Lux.jl Docs

Though in the long term you might want to use Reactant + Enzyme for autodiff Compiling Lux Models using Reactant.jl | Lux.jl Docs. That supports nested AD and is significantly faster than any of the other AD tools for neural networks in julia

Nested Automatic Differentiation | Lux.jl Docs only shows calculation of the gradients and Jacobi matrix, it does nothing when I calculate Hessian matrix to get the second order derivative.
Here are my codes:

using Lux, Random, Zygote

model = Chain(
    Dense(2 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 1)
)

rng = Random.default_rng()
Random.seed!(rng, 0)

ps, st = Lux.setup(rng, model)

tx = Matrix{Float32}(undef, 2, 10)
tx[1, :] = rand(rng, Float32, 10)
tx[2, :] = 2.0f0 * rand(rng, Float32, 10) .- 1.0f0

function physical_information_loss(model::AbstractLuxLayer, tx::AbstractArray, ps, st)
    net = StatefulLuxLayer{true}(model, ps, st)
    nu = 0.01f0 / π
    u = net(tx)

    ∂u_∂tx = only(Zygote.gradient(sum ∘ net, tx))
    ∂u_∂t, ∂u_∂x = ∂u_∂tx[1:1, :], ∂u_∂tx[2:2, :]
    ∂u_∂xx = only(Zygote.diaghessian(sum ∘ net, tx))[2:2, :]

    return MSELoss()(∂u_∂t + u .* ∂u_∂x, nu * ∂u_∂xx)
end

Zygote.gradient(physical_information_loss, model, tx, ps, st)

If the physical_information_loss function only containes first order derivatives, Zygote.gradient(physical_information_loss, model, tx, ps, st) does well. When I add the second order derivative using hessian or diaghessian function, it gives me the error ERROR: Mutating arrays is not supported. I also tried calculate the gradient of gradient to get second order derivative, it has the same error. So what is the right way to calculate the second order derivative in the loss function?