Writing a neural network training code with ADAM optimizer

I have a code for my DSGE model and I want to find decision functions using neural networks, namely using deep learning with ADAM optimizer. I read the quick start example in Flux, but I, unfortunately, I don’t know how to write the code for my own problem. Here is the all other parts of the code:

‘’’
using Statistics
using Flux, Plots
using Random, Distributions, Flux.Optimise, ProgressMeter

#Parameters
betta = 0.99;
h = 0.7;
varphi = 1.5;
epsilon = 11;
theta = 0.75;
kappa_p = (epsilon - 1)theta/((1 - theta)(1 - bettatheta));
rho_r = 0.6;
mu_pie = 1.5;
mu_y = 0.125;
rho_a = 0.7;
rho_z = 0.7;
sigma_a = 0.05;
sigma_z = 0.03;
sigma_r = 0.07;
mc_ss = (epsilon - 1)/epsilon;
wss = mc_ss;
css = (wss
(1 - h))^(varphi - 1);
nss = css;
piess = 1.01;
Rss = piess/betta;

Standard deviations for ergodic distributions of exogenous state variables

sigma_e_a = sigma_a/(1-rho_a^2)^0.5
sigma_e_z = sigma_z/(1-rho_z^2)^0.5
sigma_e_r = sigma_r

bounds for endogenous state variables

Rmin = 1
Rmax = 3Rss
cmin = 0.1
css;
cmax = 3*css;

#Neural network construction
model = Chain(
Dense(5 => 32, relu), # activation function inside layer
Dense(32 => 32, relu),
Dense(32 => 2),
softmax) |> gpu # move model to GPU, if available
function dr(a::Vector, z::Vector, Mon_shock::Vector, R::Vector, C::Vector)

# we normalize exogenous state variables by their 2 standard deviations 
# so that they are typically between -1 and 1 
a = a/sigma_e_a/2
z = z/sigma_e_z/2
Mon_shock = Mon_shock/sigma_e_r/2

# we normalze interest rate and consumtpion to be between -1 and 1
R = (R.-Rmin)/(Rmax-Rmin)*2.0.-1.0
C = (C.-cmin)/(cmax-cmin)*2.0.-1.0

s = [a z Mon_shock R C]
s =  transpose(s)
B = convert(Matrix{Float32}, s)


x = model(B) # n x 2 matrix

# Marginal utlity consumption is always positive
lambda_desicion = exp.(x[1,:])

# There is no resticition on inflation
pie_desicion  = x[2,:]

return (lambda_desicion, pie_desicion)

end

function Residuals(e_r::Vector, e_z::Vector, e_a::Vector, Mon_shock::Vector, z::Vector, a::Vector, Rinitial::Vector, Cinitial::Vector)

# all inputs are expected to have the same size n

# arguments correspond to the values of the states today
lambda, inflation = dr(a, z, Mon_shock, Rinitial, Cinitial)
Ccurrent = exp.(z)./lambda + h*Cinitial
gdp = Ccurrent
l = gdp./exp.(a)
w = exp.(z).*l.^varphi./lambda
mc = w./exp.(a)
Rshadow = rho_r*Rinitial + (Rss.+(inflation.- piess)*mu_pie.+(gdp.-css)*mu_y)*(1 - rho_r) + Mon_shock
Rcurrent = [max(1, Rshadow[i]) for i in 1:length(Rshadow)]
# transitions of the exogenous processes
anext = a*rho_r + e_a
znext = z*rho_z + e_z
Mon_shocknext = e_r
# (epsilon = (rnext, δnext, pnext, qnext))

# transition of endogenous states (next denotes variables at t+1)


lambdanext, inflationnext = dr(anext, znext, Mon_shocknext, Rcurrent, Ccurrent)
Cnext = exp.(znext)./lambdanext + h*Ccurrent
gdpnext = Cnext


R1 = lambda - betta*R.*lambdanext./inflationnext
R2 = -inflation + betta*lambda./lambdanext.*(inflationnext.-piess).*gdpnext./gdp.+epsilon/kappa_p*(mc.-(epsilon - 1)/epsilon).+piess

return (R1, R2)

end

function Objective(n) # objective function for DL training

# randomly drawing current states
    
z = vcat((sigma_e_z*randn(n)))
a = vcat((sigma_e_a*randn(n)))
Mon_shock = vcat((sigma_e_r*randn(n)))
Rcurrent = vcat(rand(Uniform(Rmin, Rmax), n))
Ccurrent =  vcat(rand(Uniform(cmin, cmax), n))

#idxR = randperm(length(Rvec))[1:n]
#idxC = randperm(length(Rvec))[1:n]

#Rcurrent = Rvec[idxR]
#Ccurrent = Cvec[idxC]

# randomly drawing 1st realization for shocks
e1_a = vcat(sigma_e_a*randn(n))
e1_z = vcat(sigma_e_z*randn(n))
e1_r = vcat(sigma_e_r*randn(n))
# randomly drawing 2nd realization for shocks
e2_a = vcat(sigma_e_a*randn(n))
e2_z = vcat(sigma_e_z*randn(n))
e2_r = vcat(sigma_e_r*randn(n))

# residuals for n random grid points under 2 realizations of shocks
R1_e1, R2_e1 = Residuals(e1_r, e1_z, e1_a, Mon_shock, z, a, Rcurrent, Ccurrent)
R1_e2, R2_e2 = Residuals(e2_r, e2_z, e2_a, Mon_shock, z, a, Rcurrent, Ccurrent)

# construct all-in-one expectation operator
R_squared = R1_e1.*R1_e2 + R2_e1.*R2_e2 

# compute average across n random draws
return mean(R_squared)

end
‘’’

I need to minimize my objective function.