the weights of the first layer are real parameters and i need to fix the values to zero for the rising arrows.
Not sure what you mean by “rising arrows”.
If what you want to do is to keep an entire layer static, then just don’t include it when creating your params. Or you can remove specific arrays from your parameters using Flux.delete!. It is explained in this section of the docs: Custom Layers · Flux
On the other hand if what you want is to keep certain entries within an array static while keeping others learnable then I would suggest that you write a custom rule for the backwards pass, which applies a boolean mask to keep the desired parameters static. This section of the chainrules docs should be helpful for that: Deriving array rules · ChainRules
For freezing an entire array you can follow this section of the docs Custom Layers · Flux
For individual entries instead, probably the most convenient thing is to apply a mask on the gradient.
thanks, rising arrows for the first layer are W(2,1) W(3,1) W(3,2).
The mask is a lower triangular matrix that have zeros in the upper triangle
here is the solution that works for me.
# dataset
AZ = rand(100,1)
AX = rand(100,100)
#Import the required modules:
using Flux
using ChainRulesCore
using LinearAlgebra
using Plots
using CUDA
using MLUtils
#Define the custom lower triangular layer
struct LowerTriangularLayer{T}
W::T
b::T
end
function (ltl::LowerTriangularLayer)(x)
return ltl.W * x .+ ltl.b
end
# Define the custom gradient rule for the lower triangular layer:
function ChainRulesCore.rrule(::typeof(*), ltl::LowerTriangularLayer, x)
function lower_triangular_pullback(Δy)
Δltl_W = tril(Δy * x')
Δx = ltl.W' * Δy
return (NoTangent(), Δltl_W, Δx)
end
return ltl(x), lower_triangular_pullback
end
#Create the 2-layer neural network:
function build_model(input_size, hidden_size, output_size)
W1 = tril(randn(hidden_size, input_size)) |> gpu # move data to GPU memory
b1 = zeros(hidden_size) |> gpu # move data to GPU memory
b1_matrix = reshape(b1, (hidden_size, 1))
ltl = LowerTriangularLayer(W1, b1_matrix)
W2 = randn(output_size, hidden_size) |> gpu # move data to GPU memory
b2 = zeros(output_size) |> gpu # move data to GPU memory
layer2 = Dense(W2, b2, identity)
return Chain(ltl, σ, layer2)
end
# Define a function to train a Flux model
function train_model(model, loss, opt_stat, train_loader, epochs)
loss_history = Float64[]
for epoch in 1:epochs
Flux.train!(loss, model, train_loader, opt_stat)
# Compute training loss
train_loss = loss(model, X, Z)
push!(loss_history, train_loss)
end
return loss_history
end
# Define a function to make predictions using a Flux model
function predict(model, data)
return model(data) |> cpu
end
# Define a function to load data into a DataLoader object
function create_dataloader(X, Z, batch_size, shuffle=false)
return DataLoader((X, Z), batchsize=batch_size, shuffle=shuffle)
end
# Define the target and input data
Z = Matrix(AZ') |> gpu
X = Matrix(AX') |> gpu
# Build the model and create a data loader
model = build_model(size(X, 1), size(X, 2), size(Z, 1))
batch_size = 1
train_loader = create_dataloader(X, Z, batch_size)
# Define a loss function and an optimizer
loss(model, x, y) = Flux.mse(model(x), y)
opt_stat = Flux.setup(Adam(), model)
# Train the model
epochs = 3
loss_history = train_model(model, loss, opt_stat, train_loader, epochs)
# Check if W1 is still lower triangular
if istril(model.layers[1].W)
println("W1 is still lower triangular")
else
println("W1 is no longer lower triangular")
end
# Create a matrix of values
matrix = model.layers[1].W |> cpu
# Create a color map
cmap = :plasma
# Create the heatmap
img = heatmap(matrix, background_color = RGB(0,0,0), c=cmap);
plot(img, color="powderblue", background_color = RGB(0,0,0), title="heat map",titlefontsize=10, xtickfontsize=5, primary=false)
# Make predictions on the training data
predictions = predict(model, X)
plot(predictions', color="powderblue", background_color = RGB(0,0,0), title="Q",titlefontsize=10, xtickfontsize=5, primary=false)