Hi there everyone ,
I am new to the Julia ML ecosystem and would like some help on a specific problem. I am operating with Julia v1.8, and Flux v0.13.16 .
So I am minimizing a function , in fact this function is the ground state energy.
The method I am developing requires the evaluation of a custom gradient, it looks like (the following code is an example and is pretty sloppy) :
N=5
π«=rand(Float32,N)
struct NeuralAnsatz
chain::Chain
end
function (m::NeuralAnsatz)(x)
return exp.(m.chain(x)).^2
end
Flux.@functor NeuralAnsatz
βΒ²(g::NeuralAnsatz,π±::Vector)=sum(Diagonal(hessian(π±->sum(g(π±)),π±)))
## example
chain = Chain(Dense(1, 1,relu))
Ξ¨ = NeuralAnsatz(chain)
Ο(π±::Vector)=1/2*π±[1]^2
HΜ(π±::Vector, Ο::NeuralAnsatz)=-βΒ²(Ο,π±)/2 .+Ο(π±)*Ο(π±)
Ξ΅β(π±::Vector,Ο::NeuralAnsatz)=Ο(π±).^-1 .*HΜ(π±,Ο) ## loss function
##
g=gradient(()->sum(Ξ΅β([[π«[1]],Ξ¨)),Flux.params(Ξ¨))
for i in 2:N:
g.+=gradient(()->sum(Ξ΅β([[π«[i]],Ξ¨)),Flux.params(Ξ¨)) ## somehow need to accumulate this gradient
end
g=g./N
# ##
The problem is that the code throws up a typical error about mutating arrays, I know that the mutation comes from :
HΜ(π±::Vector, Ο::NeuralAnsatz)=-βΒ²(Ο,π±)/2 .+Ο(π±)*Ο(π±)
I am guessing that even though there is no explicit mutation, that somewhere under the hood a mutation occurs to vector π± through hessian function. How can I deal with this? Will I have to do some crazy stuff like define my own chain rule?
There is another way of evaluating the gradient of the loss and maybe if there is no good way here I can make another post for that.
Here is the stack trace for :
gradient(()->sum(βΒ²(Ο,[1.0])/2),Flux.params(Ξ¨))
ERROR: Mutating arrays is not supported -- called setindex!(Matrix{Float64}, ...)
This error occurs when you ask Zygote to differentiate operations that change
the elements of arrays in place (e.g. setting values with x .= ...)
Possible fixes:
- avoid mutating operations (preferred)
- or read the documentation and solutions for this error
https://fluxml.ai/Zygote.jl/latest/limitations
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] _throw_mutation_error(f::Function, args::Matrix{Float64})
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/lib/array.jl:88
[3] (::Zygote.var"#550#551"{Matrix{Float64}})(#unused#::Nothing)
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/lib/array.jl:100
[4] (::Zygote.var"#2620#back#552"{Zygote.var"#550#551"{Matrix{Float64}}})(Ξ::Nothing)
@ Zygote ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:71
[5] Pullback
@ ~/.julia/packages/Zygote/SuKWp/src/lib/forward.jl:31 [inlined]
[6] (::Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}, Val{1}}, Any})(Ξ::Tuple{Nothing, Diagonal{Float64, FillArrays.Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}}})
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/compiler/interface2.jl:0
[7] Pullback
@ ~/.julia/packages/Zygote/SuKWp/src/lib/forward.jl:44 [inlined]
[8] Pullback
@ ~/.julia/packages/Zygote/SuKWp/src/lib/forward.jl:42 [inlined]
[9] Pullback
@ ~/.julia/packages/Zygote/SuKWp/src/lib/grad.jl:64 [inlined]
[10] (::Zygote.Pullback{Tuple{typeof(Zygote.hessian_dual), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.var"#1955#back#190"{Zygote.var"#186#189"{Zygote.Context{true}, GlobalRef, Int64}}, Zygote.Pullback{Tuple{Zygote.var"##forward_jacobian#1473", Int64, typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#min_pullback#1342"{Bool, Bool, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}, Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}, Val{1}}, Any}, Zygote.var"#1891#back#157"{Zygote.var"#153#156"}, Zygote.ZBack{ChainRules.var"#length_pullback#747"}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Nothing, false}}, Zygote.var"#1998#back#209"{Zygote.var"#back#207"{2, 2, Zygote.Context{true}, Matrix{Float64}}}}})(Ξ::Diagonal{Float64, FillArrays.Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}})
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/compiler/interface2.jl:0
[11] Pullback
@ ~/.julia/packages/Zygote/SuKWp/src/lib/grad.jl:62 [inlined]
[12] Pullback
@ ./REPL[8]:1 [inlined]
[13] (::Zygote.Pullback{Tuple{typeof(βΒ²), NeuralAnsatz, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(hessian), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.hessian_dual), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.var"#1955#back#190"{Zygote.var"#186#189"{Zygote.Context{true}, GlobalRef, Int64}}, Zygote.Pullback{Tuple{Zygote.var"##forward_jacobian#1473", Int64, typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#min_pullback#1342"{Bool, Bool, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}, Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}, Val{1}}, Any}, Zygote.var"#1891#back#157"{Zygote.var"#153#156"}, Zygote.ZBack{ChainRules.var"#length_pullback#747"}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Nothing, false}}, Zygote.var"#1998#back#209"{Zygote.var"#back#207"{2, 2, Zygote.Context{true}, Matrix{Float64}}}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{var"#17#18"{NeuralAnsatz}, Nothing, false}}, Zygote.var"#2987#back#777"{Zygote.var"#771#775"{Diagonal{Float64, Vector{Float64}}}}, Zygote.Pullback{Tuple{Type{Diagonal}, Matrix{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#diag_pullback#2050"}, Zygote.ZBack{typeof(ChainRules._Diagonal_pullback)}}}}})(Ξ::Float64)
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/compiler/interface2.jl:0
[14] Pullback
@ ./REPL[863]:1 [inlined]
[15] (::Zygote.Pullback{Tuple{var"#389#390"}, Tuple{Zygote.Pullback{Tuple{typeof(sum), Float64}, Tuple{Zygote.Pullback{Tuple{Base.var"##sum#267", Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, typeof(sum), Float64}, Any}, Zygote.Pullback{Tuple{Type{NamedTuple}}, Tuple{}}, Zygote.var"#2331#back#414"{Zygote.var"#pairs_namedtuple_pullback#413"{(), NamedTuple{(), Tuple{}}}}}}, Zygote.ZBack{ChainRules.var"#/_pullback#1336"{Float64, Float64, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}, Zygote.var"#1955#back#190"{Zygote.var"#186#189"{Zygote.Context{true}, GlobalRef, NeuralAnsatz}}, Zygote.ZBack{ChainRules.var"#vect_pullback#1369"{1, Tuple{ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}}, Zygote.Pullback{Tuple{typeof(βΒ²), NeuralAnsatz, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(hessian), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.hessian_dual), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.var"#1955#back#190"{Zygote.var"#186#189"{Zygote.Context{true}, GlobalRef, Int64}}, Zygote.Pullback{Tuple{Zygote.var"##forward_jacobian#1473", Int64, typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#min_pullback#1342"{Bool, Bool, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}, Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}, Val{1}}, Any}, Zygote.var"#1891#back#157"{Zygote.var"#153#156"}, Zygote.ZBack{ChainRules.var"#length_pullback#747"}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Nothing, false}}, Zygote.var"#1998#back#209"{Zygote.var"#back#207"{2, 2, Zygote.Context{true}, Matrix{Float64}}}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{var"#17#18"{NeuralAnsatz}, Nothing, false}}, Zygote.var"#2987#back#777"{Zygote.var"#771#775"{Diagonal{Float64, Vector{Float64}}}}, Zygote.Pullback{Tuple{Type{Diagonal}, Matrix{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#diag_pullback#2050"}, Zygote.ZBack{typeof(ChainRules._Diagonal_pullback)}}}}}}})(Ξ::Float64)
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/compiler/interface2.jl:0
[16] (::Zygote.var"#118#119"{Params{Zygote.Buffer{Any, Vector{Any}}}, Zygote.Pullback{Tuple{var"#389#390"}, Tuple{Zygote.Pullback{Tuple{typeof(sum), Float64}, Tuple{Zygote.Pullback{Tuple{Base.var"##sum#267", Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, typeof(sum), Float64}, Any}, Zygote.Pullback{Tuple{Type{NamedTuple}}, Tuple{}}, Zygote.var"#2331#back#414"{Zygote.var"#pairs_namedtuple_pullback#413"{(), NamedTuple{(), Tuple{}}}}}}, Zygote.ZBack{ChainRules.var"#/_pullback#1336"{Float64, Float64, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}, Zygote.var"#1955#back#190"{Zygote.var"#186#189"{Zygote.Context{true}, GlobalRef, NeuralAnsatz}}, Zygote.ZBack{ChainRules.var"#vect_pullback#1369"{1, Tuple{ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}}, Zygote.Pullback{Tuple{typeof(βΒ²), NeuralAnsatz, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(hessian), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.hessian_dual), var"#17#18"{NeuralAnsatz}, Vector{Float64}}, Tuple{Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.var"#1955#back#190"{Zygote.var"#186#189"{Zygote.Context{true}, GlobalRef, Int64}}, Zygote.Pullback{Tuple{Zygote.var"##forward_jacobian#1473", Int64, typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#min_pullback#1342"{Bool, Bool, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}}, Zygote.Pullback{Tuple{typeof(Zygote.forward_jacobian), Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Vector{Float64}, Val{1}}, Any}, Zygote.var"#1891#back#157"{Zygote.var"#153#156"}, Zygote.ZBack{ChainRules.var"#length_pullback#747"}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{Zygote.var"#121#122"{var"#17#18"{NeuralAnsatz}}, Nothing, false}}, Zygote.var"#1998#back#209"{Zygote.var"#back#207"{2, 2, Zygote.Context{true}, Matrix{Float64}}}}}}}, Zygote.var"#2176#back#309"{Zygote.Jnew{var"#17#18"{NeuralAnsatz}, Nothing, false}}, Zygote.var"#2987#back#777"{Zygote.var"#771#775"{Diagonal{Float64, Vector{Float64}}}}, Zygote.Pullback{Tuple{Type{Diagonal}, Matrix{Float64}}, Tuple{Zygote.ZBack{ChainRules.var"#diag_pullback#2050"}, Zygote.ZBack{typeof(ChainRules._Diagonal_pullback)}}}}}}}, Zygote.Context{true}})(Ξ::Float64)
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/compiler/interface.jl:389
[17] gradient(f::Function, args::Params{Zygote.Buffer{Any, Vector{Any}}})
@ Zygote ~/.julia/packages/Zygote/SuKWp/src/compiler/interface.jl:97
[18] top-level scope
@ REPL[863]:1
I edited my comment per the response in order to provide all necessary information.