Autodiff of vector inputs with Enzyme.jl (and possibly Optimization.jl)

So we started a new huge project for which we aim to use Optimization.jl with Enzyme.jl as AD backend.
We came up with some early prototypes and plugged that into Optimization.jl, unfortunately not very succesful (It seems to be the case that the closures we need to provide to Optimization.jl introduces type instabilities Enzyme does not like, but that is another issue).

Now I started to play around a little bit with Enzyme itself to understand the Optimization.jl wrappers better and I just hit an issue with vectors as differentiable args I don’t understand (according to the docs this should work). I reduced a lot of code to the most minimal bit that still reproduces ther error. Consider the following module and variables:

module Nodes

abstract type Node end

struct Source{T}<:Node where T<:Real
    output::Vector{T}
end

struct Multiplicator{T}<:Node where T<:Real
    factors::Vector{T}
    output::Vector{T}
    upstream_node::Node
end


struct Sink{T}<:Node where T<:Real
    target_value::Vector{T}
    target_fraction::Vector{T}
    upstream_node::Node
end


function calculate(node::Multiplicator)
    @. node.output = (node.factors) * node.upstream_node.output
end

function calculate(node::Sink)
    @. node.target_fraction = node.upstream_node.output / node.target_value
end

end

begin
    using Enzyme
    using .Nodes
    using Statistics
    Enzyme.API.runtimeActivity!(true)

    const system_size = 1000

    node_1 = Nodes.Source(fill(10.0,system_size))
    node_2 = Nodes.Multiplicator(randn(system_size).+1,zeros(system_size),node_1)
    node_3 = Nodes.Sink(fill(11.0,system_size),zeros(system_size), node_2)

    factor_matrix =  [fill(1.2,system_size) fill(0.9,system_size) fill(1.0,system_size) fill(1.1,system_size)]
    factor_matrix.+= randn(system_size,4).*0.1
end

If we now define

function target(a,b,c,d)
    weights = [a, b, c , d]
    factors = factor_matrix*weights
    node_2.factors .= factors

    Nodes.calculate(node_2)
    Nodes.calculate(node_3)

    y = (abs(1 -mean(node_3.target_fraction)))

    return y
end


x = [0.2, 0.2, 0.2, 0.4]

We can autodiff that via Enzyme without a problem:

autodiff(Reverse, target, Active,Active(x[1]), Active(x[2]), Active(x[3]), Active(x[4])) #gives ((1274.8375256354548, 958.0874431949107, 1062.6665657969513, 1172.9304957963361),)

If I use a vector as an input

function vec_input_target(weights)
    factors = factor_matrix*weights
    node_2.factors .= factors

    Nodes.calculate(node_2)
    Nodes.calculate(node_3)

    y = (abs(1 -mean(node_3.target_fraction)))

    return y
end


autodiff(Reverse,vec_input_target,Active,Active(x))

This throws and assertion error

ERROR: AssertionError: !is_split
Stacktrace:
  [1] (::Enzyme.Compiler.var"#397#401"{LLVM.Function, DataType, UnionAll, Enzyme.API.CDerivativeMode, Int64, Bool, Bool, UInt64, Enzyme.Compiler.Interpreter.EnzymeInterpreter, Vector{LLVM.Argument}, LLVM.DataLayout, LLVM.Function, LLVM.StructType, LLVM.StructType, Vector{UInt8}, Vector{Int64}, Vector{Type}, LLVM.PointerType, LLVM.VoidType, LLVM.Context, LLVM.Module, Bool, Bool})(builder::LLVM.IRBuilder)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:7734
  [2] LLVM.IRBuilder(f::Enzyme.Compiler.var"#397#401"{LLVM.Function, DataType, UnionAll, Enzyme.API.CDerivativeMode, Int64, Bool, Bool, UInt64, Enzyme.Compiler.Interpreter.EnzymeInterpreter, Vector{LLVM.Argument}, LLVM.DataLayout, LLVM.Function, LLVM.StructType, LLVM.StructType, Vector{UInt8}, Vector{Int64}, Vector{Type}, LLVM.PointerType, LLVM.VoidType, LLVM.Context, LLVM.Module, Bool, Bool}, args::LLVM.Context; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ LLVM ~/.julia/packages/LLVM/5aiiG/src/irbuilder.jl:23
  [3] LLVM.IRBuilder(f::Function, args::LLVM.Context)
    @ LLVM ~/.julia/packages/LLVM/5aiiG/src/irbuilder.jl:20
  [4] create_abi_wrapper(enzymefn::LLVM.Function, TT::Type, rettype::Type, actualRetType::Type, Mode::Enzyme.API.CDerivativeMode, augmented::Ptr{Nothing}, width::Int64, returnPrimal::Bool, shadow_init::Bool, world::UInt64, interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:7692
  [5] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::Tuple{Bool, Bool}, returnPrimal::Bool, jlrules::Vector{String}, expectedTapeType::Type)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:7433
  [6] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, ctx::LLVM.ThreadSafeContext, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:8984
  [7] codegen
    @ ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:8592 [inlined]
  [8] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, ctx::Nothing, postopt::Bool)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:9518
  [9] _thunk
    @ ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:9515 [inlined]
 [10] cached_compilation
    @ ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:9553 [inlined]
 [11] #s291#456
    @ ~/.julia/packages/Enzyme/RiUxJ/src/compiler.jl:9615 [inlined]
 [12] var"#s291#456"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ::Any, #unused#::Type, #unused#::Type, #unused#::Type, tt::Any, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Any)
    @ Enzyme.Compiler ./none:0
 [13] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [14] autodiff(#unused#::ReverseMode{false, FFIABI}, f::Const{typeof(vec_input_target)}, #unused#::Type{Active}, args::Active{Vector{Float64}})
    @ Enzyme ~/.julia/packages/Enzyme/RiUxJ/src/Enzyme.jl:195
 [15] autodiff(::ReverseMode{false, FFIABI}, ::typeof(vec_input_target), ::Type, ::Active{Vector{Float64}})
    @ Enzyme ~/.julia/packages/Enzyme/RiUxJ/src/Enzyme.jl:222

This is on Enzyme 0.11.4 and Julia 1.9. Any help is greatly appreciated!

PS: Maybe as an additional question: Does anybody has experience with constrained nonlinear optimization via Optimization.jl and Enzyme.jl as AD backend? Is it mature enough? We would really love to make it fly (and avoid Zygote.jl due to the mutation issues, ReverseDiff.jl due to recompilation of the tapes and ForwardDiff.jl due to inflexible type layout)

For arrays in reverse mode, you should not use Active(x), but rather create a zero shadow of memory for it to store the derivatives into.

In other words, something like:

dx = zeros(4)
autodiff(Reverse,vec_input_target,Active,Duplicated(x, dx))
# result is in dx
1 Like

Thnak, you that works. So I guess with

begin
    using Optimization, OptimizationMOI, OptimizationOptimJL, Ipopt
    using ForwardDiff, ModelingToolkit, Enzyme
end

cons(res,x,p) = (res.= [sum(x), x[1],x[2], x[3], x[4]])

 lb = [1.0, 0.0, 0.0, 0.0, 0.0]
 ub = [1.0, 1.0, 1.0, 1.0, 1.0]

optprob = OptimizationFunction((x,p) -> vec_input_target(x), Optimization.AutoEnzyme(), cons = cons)
prob = OptimizationProblem(optprob, x, lcons = lb, ucons = ub)
sol = solve(prob, Ipopt.Optimizer()) 
# ERROR: Duplicated Returns not yet handled

I’m hitting some type stability issues similar to #741 due to the closure required in order to satisfy the signature needed for Optimization.OptimizationFunction?

Where is your autodiff call here? In the absense of type stability issues that usually means you’re returning an array rather than a float. See here for an example: Implementing pullbacks · Enzyme.jl

It is hit thorugh Optimization.AutoEnzyme() (relevant bits). So I think the issue(s) (Xref) is with Optimization.jl and not with Enzyme.jl itself.

@Vaibhavdixit02 are you aware of this? Is it that issue?

So first of all huge thanks to @wsmoses and @Vaibhavdixit02 for resolving Optimization.jl/issues/564. Although this was an issue, it did not resolve the MWE here.

  • If vec_input_target(weights) is used without type annotation/check, one still gets a ERROR: Duplicated Returns not yet handled
  • If the function is defined with type annotation, vec_input_target(weights)::Float64 one hits:
ERROR: Enzyme execution failed.
Enzyme: Not yet implemented, mixed activity for jl_new_struct constants=Bool[1, 1, 1]   %4 = call noalias {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)*, {} addrspace(10)*, ...) @julia.call({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32)* noundef nonnull @jl_f_tuple, {} addrspace(10)* noundef null, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140590368042048 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140590368042048 to {}*) to {} addrspace(10)*), {} addrspace(10)* %3) #14, !dbg !10
Stacktrace:
 [1] vec_input_target
   @ ~/repos/Capybara.jl/src/scratch.jl:57

Skimming through the (closed) issues over at Enzyme.jl this looks like something inside Optimization.jl seems to introduce type instabilites which Enzyme does not like (that is at least my guess).

Quickly checking @code_warntype solve(prob,Solver()) shows some red, too; independent of the AD backend used, hence does not seem to be inferable in general.

Status `~/repos/Capybara.jl/Project.toml`
  [7da242da] Enzyme v0.11.7
  [f6369f11] ForwardDiff v0.10.36
  [b6b21f68] Ipopt v1.4.1
  [961ee093] ModelingToolkit v8.64.0
  [429524aa] Optim v1.7.6
  [7f7a1694] Optimization v3.16.0 `https://github.com/SciML/Optimization.jl.git#master`
  [fd9f6733] OptimizationMOI v0.1.15
  [36348300] OptimizationOptimJL v0.1.9
  [d236fae5] PreallocationTools v0.4.12

That’s the error message as here, so I’ll link my answer here too: Understanding an Enzyme Warning Message - #2 by wsmoses

Falling forward :upside_down_face:! I indeed also found a type instability in my code which I fixed now, sorry for overlooking this. I now hit the following error before the julia process crashes:

┌ Warning: TypeAnalysisDepthLimit
│ LLVM.StoreInst(store {} addrspace(10)* %subcache, {} addrspace(10)* addrspace(10)* %9, align 8, !dbg !140, !noalias !26)
│ {[]:Pointer, [0]:Pointer, [0,0]:Pointer, [0,0,0]:Pointer, [0,0,0,0]:Pointer, [0,0,0,0,0]:Pointer, [0,0,0,0,0,0]:Pointer, [0,0,0,0,0,8]:Pointer, [0,0,0,0,8]:Pointer, [0,0,0,0,8,0]:Pointer, [0,0,0,0,8,8]:Pointer, [0,0,0,8]:Pointer, [0,0,0,8,0]:Pointer, [0,0,0,8,0,0]:Pointer, [0,0,0,8,0,8]:Pointer, [0,0,0,8,8]:Pointer, [0,0,0,8,8,0]:Pointer, [0,0,0,8,8,8]:Pointer, [0,0,8]:Pointer, [0,0,8,0]:Pointer, [0,0,8,0,0]:Pointer, [0,0,8,0,0,0]:Pointer, [0,0,8,0,0,8]:Pointer, [0,0,8,0,8]:Pointer, [0,0,8,0,8,0]:Pointer, [0,0,8,0,8,8]:Pointer, [0,0,8,8]:Pointer, [0,0,8,8,0]:Pointer, [0,0,8,8,0,0]:Pointer, [0,0,8,8,0,8]:Pointer, [0,0,8,8,8]:Pointer, [0,0,8,8,8,0]:Pointer, [0,0,8,8,8,8]:Pointer, [0,8]:Pointer, [0,8,0]:Pointer, [0,8,0,0]:Pointer, [0,8,0,0,0]:Pointer, [0,8,0,0,0,0]:Pointer, [0,8,0,0,0,8]:Pointer, [0,8,0,0,8]:Pointer, [0,8,0,0,8,0]:Pointer, [0,8,0,0,8,8]:Pointer, [0,8,0,8]:Pointer, [0,8,0,8,0]:Pointer, [0,8,0,8,0,0]:Pointer, [0,8,0,8,0,8]:Pointer, [0,8,0,8,8]:Pointer, [0,8,0,8,8,0]:Pointer, [0,8,0,8,8,8]:Pointer, [0,8,8]:Pointer, [0,8,8,0]:Pointer, [0,8,8,0,0]:Pointer, [0,8,8,0,0,0]:Pointer, [0,8,8,0,0,8]:Pointer, [0,8,8,0,8]:Pointer, [0,8,8,0,8,0]:Pointer, [0,8,8,0,8,8]:Pointer, [0,8,8,8]:Pointer, [0,8,8,8,0]:Pointer, [0,8,8,8,0,0]:Pointer, [0,8,8,8,0,8]:Pointer, [0,8,8,8,8]:Pointer, [0,8,8,8,8,0]:Pointer, [0,8,8,8,8,8]:Pointer}
│ 
│ Stacktrace:
│  [1] mapreduce_impl
│    @ ./reduce.jl:267
│  [2] multiple call sites
│    @ unknown:0
└ @ Enzyme.Compiler ~/.julia/packages/GPUCompiler/YO8Uj/src/utils.jl:56
julia: /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:492: void AdjointGenerator<AugmentedReturnType>::visitLoadLike(llvm::Instruction&, llvm::MaybeAlign, bool, llvm::Value*, llvm::Value*) [with AugmentedReturnType = const AugmentedReturn*]: Assertion `!mask' failed.

Edit: I managed to pipe the actual linux logs into a file, looks a lot like Enzyme.jl#993:

[10372] signal (6.-6): Aborted
in expression starting at /home/thisisme/repos/Capybara.jl/src/scratch.jl:93
gsignal at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x7fdbb9dc2728)
__assert_fail at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
visitLoadLike at /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:492
handleAdjointForIntrinsic at /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:3430
visitIntrinsicInst at /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:3374
visitDbgInfoIntrinsic at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/InstVisitor.h:209 [inlined]

[…]

__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x401098)
Allocations: 118786424 (Pool: 118700647; Big: 85777); GC: 158

Should I post the whole stack over there?

Yeah post a bug report with a MWE if you can