When trying to replicate a paper that uses GANs with unconventional loss calculations, I’m having problems with null gradients. The basic steps are the following:
- genOutput = gen(genInput)
- MSE(genOut, label)
- MAE(genOut |> volFrac, label |> volFrac)
- discOutFake = cat( genInput, condition, genOutput ) |> disc
- genLoss = logit_binary_crossentropy(discOutFake, ones)) + 1e4 * MSE + MAE
- discOutReal = cat( genInput, condition, label ) |> disc
- discLoss = logit_binary_crossentropy(discOutReal, ones)) + logit_binary_crossentropy(discOutFake, zeros))
And the MWE:
using Statistics, Flux, MLUtils, Zygote, LinearAlgebra
const genInput_ = rand(Float32, (10, 10, 3, 5))
const condition_ = rand(Float32, (10, 10, 3, 5))
const label_ = rand(Float32, (10, 10, 1, 5))
function models()
gen = Chain(Conv((5, 5), 3 => 1, pad = SamePad()),
ConvTranspose((5, 5), 1 => 1, pad = SamePad()),
)
disc = Chain(Conv((5, 5), 7 => 1, pad = SamePad()), flatten,
Dense(100 => 1, leakyrelu)
)
return gen |> gpu, disc |> gpu
end
volFrac(x) = [mean(x[:, :, :, sample]) for sample in axes(x, 4)]
reshapeDiscOut(x) = dropdims(x |> transpose |> Array; dims = 2)
function GANgradsMWE(gen, disc, genInput, condition, label)
discOutFake, discInputFake = 0.0, 0.0 # initialize for scope purposes
function genLoss(genOutput) # generator loss. Defined here for scope purposes
mse = (genOutput .- label) .^ 2 |> mean
absError = abs.(volFrac(genOutput) .- volFrac(label)) |> mean
discInputFake = cat(genInput, condition, genOutput; dims = 3) |> gpu
discOutFake = discInputFake |> disc |> cpu |> reshapeDiscOut
return Flux.Losses.logitbinarycrossentropy(
discOutFake, ones(size(discOutFake))
) + 10_000 * mse + 1 * absError
end
function discLoss(discOutReal, discOutFake) # discriminator loss
return Flux.Losses.logitbinarycrossentropy(
discOutReal, ones(discOutReal |> size)
) + Flux.Losses.logitbinarycrossentropy(
discOutFake, zeros(discOutFake |> size)
)
end
genInputGPU = genInput |> gpu
discInputReal = cat(genInput, condition, label; dims = 3) |> gpu
genLossVal_, genGrads_ = withgradient(
gen -> genLoss(gen(genInputGPU) |> cpu), gen
)
discLossVal_, discGrads_ = withgradient(
disc -> discLoss(
disc(discInputReal) |> cpu |> reshapeDiscOut,
disc(discInputFake) |> cpu |> reshapeDiscOut
),
disc
)
return genGrads_, genLossVal_, discGrads_, discLossVal_
end
genGrads, genLossVal, discGrads, discLossVal = GANgradsMWE(
models()..., genInput_, condition_, label_
)
@show norm(genGrads); @show norm(discGrads);
And the stacktrace:
ERROR: MethodError: no method matching iterate(::Nothing)
Closest candidates are:
iterate(::Union{LinRange, StepRangeLen}) at range.jl:872
iterate(::Union{LinRange, StepRangeLen}, ::Integer) at range.jl:872
iterate(::T) where T<:Union{Base.KeySet{<:Any, <:Dict}, Base.ValueIterator{<:Dict}} at dict.jl:712
...
Stacktrace:
[1] isempty(itr::Nothing)
@ Base .\essentials.jl:788
[2] norm(itr::Nothing, p::Int64) (repeats 2 times)
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:591
[3] (::Base.MappingRF{typeof(norm), Base.BottomRF{typeof(max)}})(acc::Base._InitialValue, x::Nothing)
@ Base .\reduce.jl:95
[4] _foldl_impl(op::Base.MappingRF{typeof(norm), Base.BottomRF{typeof(max)}}, init::Base._InitialValue, itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ Base .\reduce.jl:58
[5] foldl_impl(op::Base.MappingRF{typeof(norm), Base.BottomRF{typeof(max)}}, nt::Base._InitialValue, itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ Base .\reduce.jl:48
[6] mapfoldl_impl(f::typeof(norm), op::typeof(max), nt::Base._InitialValue, itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ Base .\reduce.jl:44
[7] mapfoldl(f::Function, op::Function, itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}; init::Base._InitialValue)
@ Base .\reduce.jl:162
[8] mapfoldl
@ .\reduce.jl:162 [inlined]
[9] #mapreduce#262
@ .\reduce.jl:294 [inlined]
[10] mapreduce(f::Function, op::Function, itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ Base .\reduce.jl:294
[11] generic_normInf(x::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1,
CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:453
[12] normInf(x::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:522
[13] generic_norm2(x::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:463
[14] norm2(x::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:524
[15] norm(itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, p::Int64)
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:593
[16] norm(itr::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:591
[17] MappingRF
@ .\reduce.jl:95 [inlined]
[18] afoldl(::Base.MappingRF{typeof(norm), Base.BottomRF{typeof(max)}}, ::Base._InitialValue, ::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, ::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}})
@ Base .\operators.jl:548
[19] _foldl_impl(op::Base.MappingRF{typeof(norm), Base.BottomRF{typeof(max)}}, init::Base._InitialValue, itr::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}})
@ Base .\tuple.jl:277
[20] foldl_impl(op::Base.MappingRF{typeof(norm), Base.BottomRF{typeof(max)}}, nt::Base._InitialValue, itr::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups),
Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}})
@ Base .\reduce.jl:48
[21] mapfoldl_impl(f::typeof(norm), op::typeof(max), nt::Base._InitialValue, itr::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}})
@ Base .\reduce.jl:44
[22] mapfoldl(f::Function, op::Function, itr::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}; init::Base._InitialValue)
@ Base .\reduce.jl:162
[23] mapfoldl
@ .\reduce.jl:162 [inlined]
[24] mapreduce(f::Function, op::Function, itr::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base .\reduce.jl:294
[25] mapreduce
@ .\reduce.jl:294 [inlined]
[26] generic_normInf(x::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:453
[27] normInf
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:522 [inlined]
[28] generic_norm2(x::Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:463
[29] norm2
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:524 [inlined]
[30] norm
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:593 [inlined]
[31] norm
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:591 [inlined]
[32] MappingRF
@ .\reduce.jl:95 [inlined]
[33] _foldl_impl
@ .\reduce.jl:58 [inlined]
[34] foldl_impl
@ .\reduce.jl:48 [inlined]
[35] mapfoldl_impl(f::typeof(norm), op::typeof(max), nt::Base._InitialValue, itr::NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}})
@ Base .\reduce.jl:44
[36] mapfoldl(f::Function, op::Function, itr::NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}; init::Base._InitialValue)
@ Base .\reduce.jl:162
[37] mapfoldl
@ .\reduce.jl:162 [inlined]
[38] mapreduce(f::Function, op::Function, itr::NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base .\reduce.jl:294
[39] mapreduce
@ .\reduce.jl:294 [inlined]
[40] generic_normInf(x::NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:453
[41] normInf
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:522 [inlined]
[42] generic_norm2(x::NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:463
[43] norm2
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:524 [inlined]
[44] norm(itr::NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}, p::Int64)
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:593
[45] norm
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:591 [inlined]
[46] MappingRF
@ .\reduce.jl:95 [inlined]
[47] afoldl
@ .\operators.jl:548 [inlined]
[48] _foldl_impl
@ .\tuple.jl:277 [inlined]
[49] foldl_impl
@ .\reduce.jl:48 [inlined]
[50] mapfoldl_impl(f::typeof(norm), op::typeof(max), nt::Base._InitialValue, itr::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation,
:groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}})
@ Base .\reduce.jl:44
[51] mapfoldl(f::Function, op::Function, itr::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}}; init::Base._InitialValue)
@ Base .\reduce.jl:162
[52] mapfoldl
@ .\reduce.jl:162 [inlined]
[53] mapreduce(f::Function, op::Function, itr::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base .\reduce.jl:294
[54] mapreduce
@ .\reduce.jl:294 [inlined]
[55] generic_normInf(x::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:453
[56] normInf
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:522 [inlined]
[57] generic_norm2(x::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:463
[58] norm2
@ C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:524 [inlined]
[59] norm(itr::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}}, p::Int64)
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:593
[60] norm(itr::Tuple{NamedTuple{(:layers,), Tuple{Tuple{NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}, NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{Nothing, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Nothing, Nothing, Nothing, Nothing}}}}}})
@ LinearAlgebra C:\Users\LucasKaoid\AppData\Local\Programs\Julia-1.8.0\share\julia\stdlib\v1.8\LinearAlgebra\src\generic.jl:591
From this preview docs page, I understand that the basic rules for correct (explicit) gradients in Flux are that both the loss function and model executions must be inside the gradient call. I already tried a handful of versions of these loss calculations, and the gradient is never right. Even when being able to “train”, closer inspection always reveals that some intermediate values are being ignored in gradient calculation. How should I implement this in Julia?