Flux PINN 1D Burgers

Hi! I am trying to construct PINN to solve 1D Burgers equation in Flux.jl without using NeuralPDE.jl. The velocity field u(t,x) is defined by the net_u([t,x]) and the batch for training consists of N_u=32 points corresponding to the initial and boundary conditions and N_f=32 points inside the calculation domain t \in [0,1], \; x \in [-1,1], which are stacked together. Accordingly, the loss function loss(x,y) is the sum of two terms, corresponding to initial+boundary points and to the internal points, where the Burgers equation f(t,x) = u_t(t,x) + u(t,x)*u_x(t,x) - nu*u_xx(t,x) = 0 should be fullfilled. Below is my code

using Flux, Zygote, Statistics

net_u = Chain(Dense(2 => 20, tanh), Dense(20 => 20, tanh), Dense(20 => 20, tanh), Dense(20 => 20, tanh),
        Dense(20 => 1))

u(t,x) = net_u([t,x])[1]
u_t(t,x) = Zygote.gradient((x)->u(x[1],x[2]), [t, x])[1][1]
u_x(t,x) = Zygote.gradient((x)->u(x[1],x[2]), [t, x])[1][2]
u_xx(t,x) = Zygote.hessian((x)->u(x[1],x[2]), [t, x])[2,2]

nu = 0.01/pi
f(t,x) = u_t(t,x) + u(t,x)*u_x(t,x) - nu*u_xx(t,x)

# generating data points
N_u = 32
N_f = 32

function get_batch()
    tx_train = zeros(2, N_u+N_f)
    u_train = zeros(N_u+N_f)

    for i in 1:trunc(Int, N_u/4)
        tx_train[2,i]=-rand()
        u_train[i]=-sin(pi*tx_train[2,i])
    end

    for i in trunc(Int, N_u/4):trunc(Int, N_u/2)
        tx_train[2,i]=rand()
        u_train[i]=-sin(pi*tx_train[2,i])
    end

    for i in trunc(Int, N_u/2):trunc(Int, 3*N_u/4)
        tx_train[1,i]=rand()
        tx_train[2,i]=1.0
        u_train[i]=0.0
    end

    for i in trunc(Int, 3*N_u/4):N_u
        tx_train[1,i]=rand()
        tx_train[2,i]=-1.0
        u_train[i]=0.0
    end

    for i in N_u+1:N_u+N_f
        tx_train[1,i]=rand()
        tx_train[2,i]=2.0*rand()-1.0
    end
    
    return (tx_train, u_train)
end

function loss(x,y)
    loss_u = Flux.Losses.mse(y[1:N_u]',net_u(x[:,1:N_u]))
    loss_f = mean(f.(x[1,N_u+1:N_u+N_f],x[2,N_u+1:N_u+N_f]).^2)  
    return loss_u+loss_f
end

# train and return losses
function run_training(network, epoch)
    losses=[]
    pars = Flux.params(network)  # contains references to arrays in model
    opt = Flux.Adam()    # will store optimiser momentum, etc.
    
    for _ in 1:epoch        
        x,y = get_batch()
        data = Flux.DataLoader((x, y), batchsize=N_u+N_f)   
        Flux.train!(loss, pars, data, opt)        
        push!(losses, loss(x, y))
    end
    return losses
end

losses = run_training(net_u, 1)

which results in error:

Mutating arrays is not supported -- called setindex!(Matrix{Float64}, ...)
This error occurs when you ask Zygote to differentiate operations that change
the elements of arrays in place (e.g. setting values with x .= ...)

Possible fixes:
- avoid mutating operations (preferred)
- or read the documentation and solutions for this error
  https://fluxml.ai/Zygote.jl/latest/limitations


Stacktrace:
  [1] error(s::String)
    @ Base .\error.jl:35
  [2] _throw_mutation_error(f::Function, args::Matrix{Float64})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\array.jl:86
  [3] (::Zygote.var"#391#392"{Matrix{Float64}})(#unused#::Nothing)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\array.jl:98
  [4] (::Zygote.var"#2488#back#393"{Zygote.var"#391#392"{Matrix{Float64}}})(Δ::Nothing)
    @ Zygote C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67
  [5] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\forward.jl:31 [inlined]
  [6] (::typeof(∂(forward_jacobian)))(Δ::Tuple{Nothing, Zygote.OneElement{Float64, 2, Tuple{Int64, Int64}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
  [7] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\forward.jl:44 [inlined]
  [8] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\forward.jl:42 [inlined]
  [9] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\grad.jl:64 [inlined]
 [10] (::typeof(∂(hessian_dual)))(Δ::Zygote.OneElement{Float64, 2, Tuple{Int64, Int64}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [11] Pullback
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\grad.jl:62 [inlined]
 [12] Pullback
    @ .\In[4]:9 [inlined]
 [13] (::typeof(∂(u_xx)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [14] Pullback
    @ .\In[4]:12 [inlined]
 [15] (::typeof(∂(f)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [16] #938
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\broadcast.jl:205 [inlined]
 [17] #4
    @ .\generator.jl:36 [inlined]
 [18] iterate
    @ .\generator.jl:47 [inlined]
 [19] collect(itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Tuple{Float64, typeof(∂(f))}}, Vector{Float64}}}, Base.var"#4#5"{Zygote.var"#938#944"}})
    @ Base .\array.jl:787
 [20] map
    @ .\abstractarray.jl:3055 [inlined]
 [21] (::Zygote.var"#∇broadcasted#943"{Tuple{Vector{Float64}, Vector{Float64}}, Vector{Tuple{Float64, typeof(∂(f))}}, Val{3}})(ȳ::Vector{Float64})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\broadcast.jl:205
 [22] #3885#back
    @ C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
 [23] #208
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\lib.jl:206 [inlined]
 [24] #2066#back
    @ C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
 [25] Pullback
    @ .\broadcast.jl:1304 [inlined]
 [26] Pullback
    @ .\In[5]:41 [inlined]
 [27] (::typeof(∂(loss)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [28] #208
    @ C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\lib\lib.jl:206 [inlined]
 [29] #2066#back
    @ C:\Users\parfe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
 [30] Pullback
    @ C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:143 [inlined]
 [31] (::typeof(∂(λ)))(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface2.jl:0
 [32] (::Zygote.var"#99#100"{Params{Zygote.Buffer{Any, Vector{Any}}}, typeof(∂(λ)), Zygote.Context{true}})(Δ::Float64)
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface.jl:389
 [33] withgradient(f::Function, args::Params{Zygote.Buffer{Any, Vector{Any}}})
    @ Zygote C:\Users\parfe\.julia\packages\Zygote\g2w9o\src\compiler\interface.jl:133
 [34] macro expansion
    @ C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:142 [inlined]
 [35] macro expansion
    @ C:\Users\parfe\.julia\packages\ProgressLogging\6KXlp\src\ProgressLogging.jl:328 [inlined]
 [36] train!(loss::Function, ps::Params{Zygote.Buffer{Any, Vector{Any}}}, data::MLUtils.DataLoader{Tuple{Matrix{Float64}, Vector{Float64}}, Random._GLOBAL_RNG, Val{nothing}}, opt::Adam; cb::Flux.Optimise.var"#38#41")
    @ Flux.Optimise C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:140
 [37] train!(loss::Function, ps::Params{Zygote.Buffer{Any, Vector{Any}}}, data::MLUtils.DataLoader{Tuple{Matrix{Float64}, Vector{Float64}}, Random._GLOBAL_RNG, Val{nothing}}, opt::Adam)
    @ Flux.Optimise C:\Users\parfe\.julia\packages\Flux\v79Am\src\optimise\train.jl:136
 [38] run_training(network::Chain{Tuple{Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}}, Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, epoch::Int64)
    @ Main .\In[5]:54
 [39] top-level scope
    @ In[6]:1

but works if the loss function contains only the first part loss(x,y)=loss_u. What is wrong here and how can I resolve the problem?

1 Like

Hi! I have the same error when using Lux.jl package to create a neural network and using Zygote.jl package to differentiate the physical loss function. Do you have any solution now? Thanks!

Hi! No, I have switched to Python+PyTorch in my project.

For a solution using Zygote see Nested Automatic Differentiation | Lux.jl Docs

Though in the long term you might want to use Reactant + Enzyme for autodiff Compiling Lux Models using Reactant.jl | Lux.jl Docs. That supports nested AD and is significantly faster than any of the other AD tools for neural networks in julia

Nested Automatic Differentiation | Lux.jl Docs only shows calculation of the gradients and Jacobi matrix, it does nothing when I calculate Hessian matrix to get the second order derivative.
Here are my codes:

using Lux, Random, Zygote

model = Chain(
    Dense(2 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 1)
)

rng = Random.default_rng()
Random.seed!(rng, 0)

ps, st = Lux.setup(rng, model)

tx = Matrix{Float32}(undef, 2, 10)
tx[1, :] = rand(rng, Float32, 10)
tx[2, :] = 2.0f0 * rand(rng, Float32, 10) .- 1.0f0

function physical_information_loss(model::AbstractLuxLayer, tx::AbstractArray, ps, st)
    net = StatefulLuxLayer{true}(model, ps, st)
    nu = 0.01f0 / π
    u = net(tx)

    ∂u_∂tx = only(Zygote.gradient(sum ∘ net, tx))
    ∂u_∂t, ∂u_∂x = ∂u_∂tx[1:1, :], ∂u_∂tx[2:2, :]
    ∂u_∂xx = only(Zygote.diaghessian(sum ∘ net, tx))[2:2, :]

    return MSELoss()(∂u_∂t + u .* ∂u_∂x, nu * ∂u_∂xx)
end

Zygote.gradient(physical_information_loss, model, tx, ps, st)

If the physical_information_loss function only containes first order derivatives, Zygote.gradient(physical_information_loss, model, tx, ps, st) does well. When I add the second order derivative using hessian or diaghessian function, it gives me the error ERROR: Mutating arrays is not supported. I also tried calculate the gradient of gradient to get second order derivative, it has the same error. So what is the right way to calculate the second order derivative in the loss function?

Zygote won’t work for 3rd order, checkout Lux + Reactant for this. I just made a PR updating a PINNs example feat: use 3rd order derivatives using Reactant by avik-pal · Pull Request #1315 · LuxDL/Lux.jl · GitHub

Thank you for your help! The new PINNs example does well. But when I just calculate the first order derivative, it gives me an error ERROR: Function argument passed to autodiff cannot be proven readonly. Here are my codes:

using Lux, Random, Reactant, Enzyme

model = Chain(
    Dense(2 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 1)
)

rng = Random.default_rng()
Random.seed!(rng, 0)

ps, st = Lux.setup(rng, model)
net = StatefulLuxLayer{true}(model, ps, st)

tx = Matrix{Float32}(undef, 2, 10)
tx[1, :] = rand(rng, Float32, 10)
tx[2, :] = 2.0f0 * rand(rng, Float32, 10) .- 1.0f0

function ∂u_∂t(model::StatefulLuxLayer, tx::AbstractArray)
    return Enzyme.gradient(Enzyme.Reverse, sum ∘ model, tx)[1][1, :]
end

∂u_∂t(net, tx)

The Error information:

ERROR: Function argument passed to autodiff cannot be proven readonly.
If the the function argument cannot contain derivative data, instead call autodiff(Mode, Cons
t(f), ...)
See https://enzyme.mit.edu/index.fcgi/julia/stable/faq/#Activity-of-temporary-storage for mor
e information.
The potentially writing call is   store {} addrspace(10)* %.fca.0.extract, {} addrspace(10)* 
addrspace(10)* %.innerparm.sroa.0.0..sroa_cast, align 8, !dbg !22, !alias.scope !24, !noalias
 !28, using   %.fca.0.extract = extractvalue { {} addrspace(10)* } %0, 0, !dbg !8, !enzyme_ty
pe !9, !enzymejl_source_type_StatefulLuxLayer\7BStatic.True\2C\20Chain\7B\40NamedTuple\7Blaye
r_1\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\20Nothing\2C\20Nothing\2C\20Static
.True\7D\2C\20layer_2\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\20Nothing\2C\20N
othing\2C\20Static.True\7D\2C\20layer_3\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2
C\20Nothing\2C\20Nothing\2C\20Static.True\7D\2C\20layer_4\3A\3ADense\7Btypeof\28identity\29\2
C\20Int64\2C\20Int64\2C\20Nothing\2C\20Nothing\2C\20Static.True\7D\7D\2C\20Nothing\7D\2C\20\4
0NamedTuple\7Blayer_1\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D\2C\20bias\3A\3AVec
tor\7BFloat32\7D\7D\2C\20layer_2\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D\2C\20bi
as\3A\3AVector\7BFloat32\7D\7D\2C\20layer_3\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32
\7D\2C\20bias\3A\3AVector\7BFloat32\7D\7D\2C\20layer_4\3A\3A\40NamedTuple\7Bweight\3A\3AMatri
x\7BFloat32\7D\2C\20bias\3A\3AVector\7BFloat32\7D\7D\7D\2C\20\40NamedTuple\7Blayer_1\3A\3A\40
NamedTuple\7B\7D\2C\20layer_2\3A\3A\40NamedTuple\7B\7D\2C\20layer_3\3A\3A\40NamedTuple\7B\7D\

That error says that the function argument (aka sum ∘ model) could contain differentiable data and you didn’t specify if you wanted the derivative of it or not (and Enzyme couldn’t prove it wasn’t written to). In this case you can just write Enzyme.gradient(Enzyme.Reverse, Const(sum ∘ model), tx)[1][1, :] as the error indicates.

However I’ll also note that you aren’t using Reactant here, which can also lead to non trivial speedups.

When I modifed it to Enzyme.gradient(Enzyme.Reverse, Const(sum model), tx)[1][1, :], I have new ERROR:

ERROR: Constant memory is stored (or returned) to a differentiable variable.
As a result, Enzyme cannot provably ensure correctness and throws this error.
This might be due to the use of a constant variable as temporary storage for active memory (h
ttps://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity).
If Enzyme should be able to prove this use non-differentable, open an issue!
To work around this issue, either:
 a) rewrite this variable to not be conditionally active (fastest, but requires a code change
), or
 b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Rever
se), ...) ). This will maintain correctness, but may slightly reduce performance.
Mismatched activity for:   store {} addrspace(10)* %.fca.0.extract, {} addrspace(10)* addrspa
ce(10)* %.innerparm.sroa.0.0..sroa_cast, align 8, !dbg !22, !alias.scope !24, !noalias !28 co
nst val:   %.fca.0.extract = extractvalue { {} addrspace(10)* } %0, 0, !dbg !8, !enzyme_type 
!9, !enzymejl_source_type_StatefulLuxLayer\7BStatic.True\2C\20Chain\7B\40NamedTuple\7Blayer_1
\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\20Nothing\2C\20Nothing\2C\20Static.Tr
ue\7D\2C\20layer_2\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\20Nothing\2C\20Noth
ing\2C\20Static.True\7D\2C\20layer_3\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\2
0Nothing\2C\20Nothing\2C\20Static.True\7D\2C\20layer_4\3A\3ADense\7Btypeof\28identity\29\2C\2
0Int64\2C\20Int64\2C\20Nothing\2C\20Nothing\2C\20Static.True\7D\7D\2C\20Nothing\7D\2C\20\40Na
medTuple\7Blayer_1\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D\2C\20bias\3A\3AVector
\7BFloat32\7D\7D\2C\20layer_2\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D\2C\20bias\
3A\3AVector\7BFloat32\7D\7D\2C\20layer_3\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D
\2C\20bias\3A\3AVector\7BFloat32\7D\7D\2C\20layer_4\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7
BFloat32\7D\2C\20bias\3A\3AVector\7BFloat32\7D\7D\7D\2C\20\40NamedTuple\7Blayer_1\3A\3A\40Nam
edTuple\7B\7D\2C\20layer_2\3A\3A\40NamedTuple\7B\7D\2C\20layer_3\3A\3A\40NamedTuple\7B\7D\2C\
20layer_4\3A\3A\40NamedTuple\7B\7D\7D\7D !0, !enzymejl_byref_MUT_REF !0
 value=Unknown object of type StatefulLuxLayer{Static.True, Chain{@NamedTuple{layer_1::Dense{
typeof(tanh), Int64, Int64, Nothing, Nothing, Static.True}, layer_2::Dense{typeof(tanh), Int6
4, Int64, Nothing, Nothing, Static.True}, layer_3::Dense{typeof(tanh), Int64, Int64, Nothing,
 Nothing, Static.True}, layer_4::Dense{typeof(identity), Int64, Int64, Nothing, Nothing, Stat
ic.True}}, Nothing}, @NamedTuple{layer_1::@NamedTuple{weight::Matrix{Float32}, bias::Vector{F
loat32}}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@Nam
edTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_4::@NamedTuple{weight::Matrix{
Float32}, bias::Vector{Float32}}}, @NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}
, layer_3::@NamedTuple{}, layer_4::@NamedTuple{}}}
 llvalue=  %.fca.0.extract = extractvalue { {} addrspace(10)* } %0, 0, !dbg !8, !enzyme_type 
!9, !enzymejl_source_type_StatefulLuxLayer\7BStatic.True\2C\20Chain\7B\40NamedTuple\7Blayer_1
\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\20Nothing\2C\20Nothing\2C\20Static.Tr
ue\7D\2C\20layer_2\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\20Nothing\2C\20Noth
ing\2C\20Static.True\7D\2C\20layer_3\3A\3ADense\7Btypeof\28tanh\29\2C\20Int64\2C\20Int64\2C\2
0Nothing\2C\20Nothing\2C\20Static.True\7D\2C\20layer_4\3A\3ADense\7Btypeof\28identity\29\2C\2
0Int64\2C\20Int64\2C\20Nothing\2C\20Nothing\2C\20Static.True\7D\7D\2C\20Nothing\7D\2C\20\40Na
medTuple\7Blayer_1\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D\2C\20bias\3A\3AVector
\7BFloat32\7D\7D\2C\20layer_2\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D\2C\20bias\
3A\3AVector\7BFloat32\7D\7D\2C\20layer_3\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7BFloat32\7D
\2C\20bias\3A\3AVector\7BFloat32\7D\7D\2C\20layer_4\3A\3A\40NamedTuple\7Bweight\3A\3AMatrix\7
BFloat32\7D\2C\20bias\3A\3AVector\7BFloat32\7D\7D\7D\2C\20\40NamedTuple\7Blayer_1\3A\3A\40Nam
edTuple\7B\7D\2C\20layer_2\3A\3A\40NamedTuple\7B\7D\2C\20layer_3\3A\3A\40NamedTuple\7B\7D\2C\
20layer_4\3A\3A\40NamedTuple\7B\7D\7D\7D !0, !enzymejl_byref_MUT_REF !0

What if you do

Enzyme.gradient(set_runtime_activity(Enzyme.Reverse), Const(sum model), tx)[1]

like the error message suggests?

Thank you for your response! After adding set_runtime_activity, I can get the result. But it is different from the result obtained by Zygote.jl. Now the codes are

using Lux, Enzyme, Zygote, Random

model = Chain(
    Dense(2 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 32, tanh),
    Dense(32 => 1)
)

rng = Random.default_rng()
Random.seed!(rng, 0)

ps, st = Lux.setup(rng, model)
net = StatefulLuxLayer{true}(model, ps, st)

tx = Matrix{Float32}(undef, 2, 10)
tx[1, :] = rand(rng, Float32, 10)
tx[2, :] = 2.0f0 * rand(rng, Float32, 10) .- 1.0f0

∂u_∂tx_enzyme = Enzyme.gradient(set_runtime_activity(Enzyme.Reverse), Const(sum ∘ net), tx)[1]
∂u_∂tx_zygote = only(Zygote.gradient(sum ∘ net, tx))

The result obtained by Enzyme.jl is

2×10 Matrix{Float32}:
  1.86265   1.98957   1.55351   -0.0734652  …  0.685356   0.78236  0.231365   2.5485
 -0.791866  0.760044  0.845475   0.0183713     0.607271  -1.77086  0.116302  -1.20169

The result obtained by Zygote.jl is

2×10 Matrix{Float32}:
 -0.833       0.0107517  -0.263825  …  -0.970446  -0.348711  -3.83504  -0.140441
  0.0863082  -3.76264    -5.08865      -2.73196   -1.04318   -3.8489   -2.01525

So which one is the correct result?

Tested with Reactant, Enzyme is definitely correct here

julia> @jit(Enzyme.gradient(set_runtime_activity(Enzyme.Reverse), sum ∘ rnet, rtx))[1]
2×10 ConcretePJRTArray{Float32,2}:
  1.86302   1.98831   1.55275   -0.0736206   2.11187  0.24838   0.684956   0.782401  0.231379   2.54811
 -0.792759  0.757957  0.845732   0.0179966  -2.52851  0.312561  0.607287  -1.77052   0.116359  -1.2012