I am trying to implement a basic DeepAR that serves as a baseline to compare with other time series forecasting algorithms. However, when testing the following code, it seems that the code does not learn well an example AR(3) autoregressive model that I have put in. Therefore, I have doubts about whether what I have done is correct, if there is any error. Could someone help me? I am not very familiar with the architecture of DeepAR.

```
losses = []
optim = Flux.setup(Flux.Adam(1e-2), model)
@showprogress for (batch_Xₜ, batch_Xₜ₊₁) in zip(loaderXtrain, loaderYtrain)
loss, grads = Flux.withgradient(model) do m
likelihood = 0
Flux.reset!(m)
model([batch_Xₜ[1]])
for (x, y) in zip(batch_Xₜ[2:end], batch_Xₜ₊₁[2:end])
μ, logσ = model([x])
σ = softplus(logσ)
ŷ = rand(Normal(μ, σ))
likelihood = log(sqrt(2 * π)) + log(σ) + ((y - ŷ)^2 /(2 * σ^2)) + likelihood
end
-likelihood/length(batch_Xₜ)
end
Flux.update!(optim, model, grads[1])
push!(losses, loss)
end
```