p = params(model)
ott = ADAM()
for epoch in 1:epochs
for (x, y) in data
gr = gradient(p) do
loss(x, y)
end
for i in 1:length(p)
gr[p[i]][p[i] .== 0f0] .= 0f0
end
Flux.update!(ott, p, gr)
end
end
this solution is inspired by this one: How to freeze a single weight of a layer? - Usage - JuliaLang