here is the solution that works for me.
# dataset
AZ = rand(100,1)
AX = rand(100,100)
#Import the required modules:
using Flux
using ChainRulesCore
using LinearAlgebra
using Plots
using CUDA
using MLUtils
#Define the custom lower triangular layer
struct LowerTriangularLayer{T}
W::T
b::T
end
function (ltl::LowerTriangularLayer)(x)
return ltl.W * x .+ ltl.b
end
# Define the custom gradient rule for the lower triangular layer:
function ChainRulesCore.rrule(::typeof(*), ltl::LowerTriangularLayer, x)
function lower_triangular_pullback(Δy)
Δltl_W = tril(Δy * x')
Δx = ltl.W' * Δy
return (NoTangent(), Δltl_W, Δx)
end
return ltl(x), lower_triangular_pullback
end
#Create the 2-layer neural network:
function build_model(input_size, hidden_size, output_size)
W1 = tril(randn(hidden_size, input_size)) |> gpu # move data to GPU memory
b1 = zeros(hidden_size) |> gpu # move data to GPU memory
b1_matrix = reshape(b1, (hidden_size, 1))
ltl = LowerTriangularLayer(W1, b1_matrix)
W2 = randn(output_size, hidden_size) |> gpu # move data to GPU memory
b2 = zeros(output_size) |> gpu # move data to GPU memory
layer2 = Dense(W2, b2, identity)
return Chain(ltl, σ, layer2)
end
# Define a function to train a Flux model
function train_model(model, loss, opt_stat, train_loader, epochs)
loss_history = Float64[]
for epoch in 1:epochs
Flux.train!(loss, model, train_loader, opt_stat)
# Compute training loss
train_loss = loss(model, X, Z)
push!(loss_history, train_loss)
end
return loss_history
end
# Define a function to make predictions using a Flux model
function predict(model, data)
return model(data) |> cpu
end
# Define a function to load data into a DataLoader object
function create_dataloader(X, Z, batch_size, shuffle=false)
return DataLoader((X, Z), batchsize=batch_size, shuffle=shuffle)
end
# Define the target and input data
Z = Matrix(AZ') |> gpu
X = Matrix(AX') |> gpu
# Build the model and create a data loader
model = build_model(size(X, 1), size(X, 2), size(Z, 1))
batch_size = 1
train_loader = create_dataloader(X, Z, batch_size)
# Define a loss function and an optimizer
loss(model, x, y) = Flux.mse(model(x), y)
opt_stat = Flux.setup(Adam(), model)
# Train the model
epochs = 3
loss_history = train_model(model, loss, opt_stat, train_loader, epochs)
# Check if W1 is still lower triangular
if istril(model.layers[1].W)
println("W1 is still lower triangular")
else
println("W1 is no longer lower triangular")
end
# Create a matrix of values
matrix = model.layers[1].W |> cpu
# Create a color map
cmap = :plasma
# Create the heatmap
img = heatmap(matrix, background_color = RGB(0,0,0), c=cmap);
plot(img, color="powderblue", background_color = RGB(0,0,0), title="heat map",titlefontsize=10, xtickfontsize=5, primary=false)

# Make predictions on the training data
predictions = predict(model, X)
plot(predictions', color="powderblue", background_color = RGB(0,0,0), title="Q",titlefontsize=10, xtickfontsize=5, primary=false)
