How to freeze a single weight of a layer?

Is there a way to freeze one single weight instead of an entire layer ?
Flux.delete! function is able to freeze just the layer and not one single parameter.

 function micronet()
    simplenet = gpu(Chain(
      Dense(2, 2 relu),
      Dense(2,1),
    ));
function dummy_data_micronet(train::Int64, test::Int64)
        xtrain = rand(train,2) |> cpu
        ytrain = rand(train) |> cpu
        xtest = rand(test,2) |>cpu
        ytest = rand(test) |> cpu
        data = [(xtrain', ytrain')]  |> cpu

        return data,xtest',ytest'
    end
model = SimpleNet.micronet()
data,xtest,ytest = DataCostumizer.dummy_data_micronet(1000,100)
loss(x, y) = Flux.mse(model(x),y)
ps = params(model)
opt = ADAM()
evalcb = () -> println("removed edges : ",MagnitudePruning.compute_zero_entries(model), "\n  loss : ",loss(xtest,ytest))
@Flux.epochs 10 Flux.train!(loss, ps, data, opt,cb = throttle(evalcb, 10))    
delete!(ps,model[1].W[2])
@Flux.epochs 10 Flux.train!(loss, ps, data, opt,cb = throttle(evalcb, 10)) 

The same for Flux.trainable

1 Like

Thank you for your answer!
I have tried your method, but it seems that in this way none of the parameters updates.

model = Chain(
      Flux.Dense(2, 2 ,relu),
      Flux.Dense(2,1),
      softmax
    )

# %%
data,xtest,ytest = DataCostumizer.dummy_data_micronet(10000,1000)

loss(x, y) = Flux.mse(model(x),y)
p = params(model)
opt = ADAM()

epochs = 1

indices_to_update = [i for i in 1:length(p[1]) if i ≠ 4]  # indices of p[1] you want to update
p |> println

for epoch in 1:epochs
    for (x, y) in data
        gr = gradient(p) do 
            loss(x, y)
        end 
        for i in 1:length(p)
            if i ≠ 1
                Flux.update!(opt, p[i], gr[p[i]])
            else
                Flux.update!(opt, 
                             p[i][indices_to_update], 
                             gr[p[i]][indices_to_update])
            end
        end
    end
end

p |> println  

The output are :

Params([Float32[-0.8721409 1.1937331; -0.71039486 1.121799], Float32[0.0, 0.0], Float32[-0.14334783 0.29841754], Float32[0.0]])

and again :

Params([Float32[-0.8721409 1.1937331; -0.71039486 1.121799], Float32[0.0, 0.0], Float32[-0.14334783 0.29841754], Float32[0.0]])

The code above still removes an entire parameter. The easiest way to freeze a single connection/weight is to zero or mask out the corresponding index in the gradient array:

...

for epoch in 1:epochs
    for (x, y) in data
        gr = gradient(p) do 
            loss(x, y)
        end

        # remove offending gradient
        gr[model[1].W][2] = 0
        Flux.update!(opt, p, gr)
    end
end
2 Likes

My answer is wrong, ToucheSir is right.
I used that method to update parameters other than 0 and didn’t realize it was incorrect there as well.
I was stupid, I apologize.

Thanks for your answer!
But I was wondering, this method works only if the updates depend solely on the gradient right ?
For example with the ADAM optimizer each single weight changes even if the gradient is set to zero because it doesn’t depend only on the value of the gradient.

Not so. If you look at the definition of ADAM in Flux, you’ll notice that the parameter update will be 0 if the gradient, mt and vt are all 0 as well. Since we know the gradient for the frozen weight is 0 and the optimizer params are initialized with zeros, we can guarantee that weight will never be updated because all operations are applied element-wise.

1 Like