Help with nested ForwardDiff.jl calculation

i’m trying to solve a non linear system of equations via newton’s method. the problem is along the lines of:

f(model::Nothing,a,x) = sum(x)*a + log(a/sum(x))
function g(model::Nothing,a,x) = ForwardDiff.gradient(z-> f(model,a,z),x)

function neq(model,output,input,a1,a2,a3,nc::Int)
  x1 = view(input,1:nc)
  x2 = view(input,(nc+1):2*nc)
  x3 = view(input,(2*nc+1):3*nc)
  g1 = g(model,a1,x1)
  output[1:nc] .= g1 .- a1
  output[(nc+1):2*nc] .= g1
  output[(2*nc+1):3*nc] .= g1
  output[(nc+1):2*nc] .-= g(model,a2,x2)
  output[(2*nc+1):3*nc] .- g(model,a3,x3)
  return output
end

i can calculate the jacobian of this function via forwarddiff:

nc = 5
x = rand(3*nc)
F = copy(x)
f!(y,x) = neq(nothing,y,x,1.0,2.0,3.0,nc)
config  = ForwardDiff.JacobianConfig(f!,F,x)
J = similar(x,(3*nc,3*nc))
ForwardDiff.jacobian!(J,f!,F,x,config)

I can also calculate the gradient config:

gconfig = ForwardDiff.GradientConfig(f,zeros(3))

my question is, how can i use a gradient config inside the neq function, while also providing a config for the jacobian calculation?

Ideally, something like this:

inner_config = ForwardDiff.GradientConfig(...) #need to figure this out
f!(y,x) = neq(nothing,y,x,1.0,2.0,3.0,nc,inner_config)
outer_config = ForwardDiff.JacobianConfig(f,F,x)
ForwardDiff.jacobian!(J,f!,F,x,outer_config)
1 Like

Hi @longemen3000!

This is a very tricky question, I struggled with it too when I wrote ForwardDiff.jl bindings for DifferentiationInterface.jl. The things you need to pay attention to are:

  • the inner gradient needs to be configured based on Dual numbers, not plain Float64, because during the outer Jacobian the input to g will be Dual
  • these Dual numbers need to have the proper tag and number of partials

Here is a working implementation:

using ForwardDiff

f(model::Nothing, a, x) = sum(x) * a + log(a / sum(x))

function g(model::Nothing, a, x, gradient_config)
    return ForwardDiff.gradient(z -> f(model, a, z), x, gradient_config)
end

function neq(model, y, x, a1, a2, a3, nc::Int, gradient_config)
    x1 = view(x, 1:nc)
    x2 = view(x, (nc + 1):(2 * nc))
    x3 = view(x, (2 * nc + 1):(3 * nc))
    # same gradient config for x1 x2 x3 because same size and type
    g1 = g(model, a1, x1, gradient_config)
    g2 = g(model, a2, x2, gradient_config)
    g3 = g(model, a3, x3, gradient_config)
    y[1:nc] .= g1 .- a1
    y[(nc + 1):(2 * nc)] .= g1
    y[(2 * nc + 1):(3 * nc)] .= g1
    y[(nc + 1):(2 * nc)] .-= g2
    y[(2 * nc + 1):(3 * nc)] .-= g3
    return y
end

chunksize(::ForwardDiff.Chunk{C}) where {C} = C
chunksize(_x::AbstractArray) = chunksize(ForwardDiff.Chunk(x))

nc = 5
x = rand(3 * nc)
y = copy(x)
J = similar(x, (3 * nc, 3 * nc));

# prepare inner gradient on an example input like x1 *but with Dual elements*
x1 = view(x, 1:nc)
T = Nothing
V = eltype(x1)
N = chunksize(x1)
D = ForwardDiff.Dual{T,V,N}
x1_dual = similar(x1, D)

# create configs without function tag to avoid figuring out the type of the inner closure
gradient_config = ForwardDiff.GradientConfig(nothing, x1_dual);
jacobian_config = ForwardDiff.JacobianConfig(nothing, y, x);

# enjoy
f!(y, x) = neq(nothing, y, x, 1.0, 2.0, 3.0, nc, gradient_config)
ForwardDiff.jacobian!(J, f!, y, x, jacobian_config, Val(false))

Does it make sense for you or do you need more guidance?

2 Likes