Different results between Zygote, ForwardDiff, and ReverseDiff

I mentioned last time that there was an issue with uniqueness of times in the way that your model was originally posed, and you made that same issue again. If you fix that it’s fine:

using DifferentialEquations, Flux, LinearAlgebra, DiffEqFlux, DiffEqSensitivity
using ForwardDiff, FiniteDiff
using Zygote
using ReverseDiff

function bimolecular!(du,u,p,t)
    # unpack rates and constants
    nᵣ = u[1]
    k₁,k₋₁,mᵣ,mₗ,A  = p
    # model
    du[1] = dnᵣ = A*k₁*mᵣ*mₗ - k₋₁*nᵣ

end

function run_model(p,data,densities,t) # version of run function with multiple models

  Σ_sol_stack = zeros(1, size(data,2))

  for i in 1:size(data,1)
      # run model with given densities
      p_i = [10 .^ p;densities[i,:]]
      tmp_prob = ODEProblem(bimolecular!,u₀,tspan,p_i)
      tmp_sol = solve(tmp_prob,Vern7(),saveat=t, abstol=1e-14,reltol=1e-14)
      # stack Σ of solution across n species
      Σ_sol = sum(Array(tmp_sol),dims=1)
      Σ_sol_stack = vcat(Σ_sol_stack,Σ_sol)
  end
  return Σ_sol_stack[2:end,:]
end


function loss(p,data,dens,t)
  Σ_sol = run_model(p,data,dens,t)
  sum(abs2, (Σ_sol - data)) #, Σ_sol
end

dataset = [  0.25  0.0618754
  0.25  0.040822
  0.5   0.127833
  0.5   0.198451
  1.0   0.274437
  1.0   0.223144
  2.0   0.579818
  2.0   0.653926
  4.0   0.693147
  4.0   0.776529
  6.0   0.820981
  6.0   0.776529
  8.0   0.653926
  8.0   0.776529
 16.0   0.820981
 16.0   0.733969]

t = unique(dataset[:,1])
n = zeros(size(dataset,1)÷2)
for i in 1:length(n)
  n[i÷2 + 1] += dataset[i,2]
end

densities = [25.0, 38.0, 1.0]
tspan = (0,maximum(t)+1)
u₀ = [0.0]
rates = [ -3.367837470456765, -0.2863777340019116]


loss_new = (p) -> loss(p,n',densities',t)
loss_new(rates)

# Works
grad_zyg = Zygote.gradient(loss_new,rates)
grad_for = ForwardDiff.gradient(loss_new,rates)
grad_rev = ReverseDiff.gradient(loss_new,rates)
grad_rev = FiniteDiff.finite_difference_gradient(loss_new,rates)

# Also works
hes_for = ForwardDiff.hessian(loss_new,rates)
hes_zyg = Zygote.hessian(loss_new,rates)

I will make an issue to handle the first formulation better, but there’s essentially no reason to do it: it’s always going to be less tested (I don’t think I’ve ever seen non-unique saveat in the thousands of codes I’ve seen in the last 5 years), and it’s going to be less efficient (since it’s hitting callbacks and saving multiple times in a way that’s unnecessary. So I’ll try to make that safer but even then… don’t do that haha.

1 Like