I am currently attempting to utilize a three-layered neural network to model the function cos(x). Despite extensive efforts to debug my code, I have been unable to identify and rectify the source of error. It is my belief that there is a substantial mistake in the calculation of gradients, which is hindering the accuracy of the model. I would be greatly appreciative of your assistance in resolving this issue. Thank you for your time and consideration.
using LinearAlgebra, Statistics, Plots
# Define the activation functions and their derivatives
function mysigmoid(x)
1 ./ (1 .+ exp(-x));
end
function mysigmoid_derivative(x)
sig = mysigmoid(x);
sig .* (1 .- sig);
end
# Define the loss function
function loss_fn(y, y_pred)
mean((y_pred-y).^2);
end
function loss_derivative(y, y_pred)
2 .* (y_pred - y) ./ length(y);
end
# Define the neural network model
function networkmine(h0, w1, b1, w2, b2, w3, b3)
h0=x;
z1= w1.*h0 .+ b1;
h1 = mysigmoid.(z1);
z2 = w2.*h1 .+ b2;
h2 = mysigmoid.(z2);
z3 = w3.*h2 .+ b3;
h3 = mysigmoid.(z3);
return h3,h2, h1,z3, z2, z1
end
# Train the neural network
n=50;
x = LinRange(0, pi, n);
y =(sin.(x));
# Initialize the weights and biases
w1 = 0.01*randn(n, 1);
b1 = zeros(n, 1);
w2 =0.01* randn(n, 1);
b2 = zeros(n, 1);
w3 =0.01* randn(n, 1);
b3 = zeros(n, 1);
learning_rate = 0.01;
for i in 1:1000
# Compute the predicted output
global w1, b1, w2, b2, w3, b3,y_pred
y_pred,h2, h1,z3, z2, z1 = networkmine(x, w1, b1, w2, b2, w3, b3);
# Compute the error
error = loss_fn(y, y_pred);
# Compute the gradients
d_y_pred = 2 * (y_pred - y) ./ length(y);
d_w3 = h2 .* (d_y_pred .* mysigmoid_derivative.(z3));
d_b3 = (d_y_pred .* mysigmoid_derivative.(z3));
d_h2 = (w3 .* d_y_pred) .* mysigmoid_derivative.(z3);
d_w2 = h1 .* (d_h2 ).* mysigmoid_derivative.(z2);
d_b2 = (d_h2 ).* mysigmoid_derivative.(z2);
d_h1 = (w2 .* d_h2) .* mysigmoid_derivative.(z2);
d_w1 = x .* (d_h1 ).* mysigmoid_derivative.(z1);
d_b1 = (d_h1 ).* mysigmoid_derivative.(z1);
# Update the weights and biases
w3 = w3 .- learning_rate * d_w3;
b3 = b3 .- learning_rate * d_b3;
w2 = w2 .- learning_rate * d_w2;
b2 = b2 .- learning_rate * d_b2;
w1 = w1 .- learning_rate * d_w1;
b1 = b1 .- learning_rate * d_b1;
end
# Plot the results
plot(x, y)
plot!(x, y_pred)