Gradient error in Flux model inputs

Thanks for all the help! I updated my code and it can now compute the “gradients”, but these all end up being zero… Here is the current version:

# need this to get the auto-diff to work
ZygoteRules.@adjoint function ForwardDiff.Dual{T}(x, ẋ::Tuple) where T
  @assert length(ẋ) == 1
  ForwardDiff.Dual{T}(x, ẋ), ḋ -> (ḋ.partials[1], (ḋ.value,))
end

ZygoteRules.@adjoint ZygoteRules.literal_getproperty(d::ForwardDiff.Dual{T}, ::Val{:partials}) where T = d.partials, ṗ -> (ForwardDiff.Dual{T}(ṗ[1], 0),)
ZygoteRules.@adjoint ZygoteRules.literal_getproperty(d::ForwardDiff.Dual{T}, ::Val{:value}) where T = d.value, ẋ -> (ForwardDiff.Dual{T}(0, ẋ),)
Zygote.refresh()

m = Chain(Dense(3, 10, relu), Dense(10, 10, relu), Dense(10, 1)) # [u0, k, t] -> u(t)
ps = Flux.params(m)

function get_time_function(x) # forced to do this. FordwardDiff.gradient doesn't work...
    mt(t) = m([x[1:2];t])[1]
    return mt
end

function loss(x, y) # x and y are arrays
    derivativeloss = 0.0f0 
    for i=1:size(x, 2)
        
        f = get_time_function(x[:, i]) # this feels very clunky... 
        dmt(t) = ForwardDiff.derivative(f, t) # dNN/dt @ x[:, i]
        derivativeloss += Float32(dmt(x[3]))

    end
    
    return derivativeloss
end

xts = rand(3, 10)
yts = rand(1, 10)

gs = gradient(ps) do
    loss(xts, yts)
end # all these gradients end up being zero... 

Zygote/Flux keeps on throwing errors no matter how I change the scheme… However, there seem to be a number of issues on Flux’s Github where people have run into similar problems, see here and here. I will ask there as well.