Error using Enzyme to autodiff wrt Flux neural net parameters

I’m trying to use Enzyme to autodiff a function with respect to a neural network’s parameters. I’m running into this error:

No augmented forward pass found for jl_genericmemory_slice
 at context:   %93 = call nonnull "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @jl_genericmemory_slice({} addrspace(10)* nonnull %76, i64 %92, i64 %40) #25, !dbg !234

Stacktrace:
 [1] reshape
   @ ./reshapedarray.jl:55
 [2] reshape
   @ ./reshapedarray.jl:121
 [3] _getat
   @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:102


Stacktrace:
  [1] reshape
    @ ./reshapedarray.jl:55 [inlined]
  [2] reshape
    @ ./reshapedarray.jl:121 [inlined]
  [3] _getat
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:102
  [4] #57
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:97 [inlined]
  [5] ExcludeWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:144 [inlined]
  [6] CachedWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:195 [inlined]
  [7] CachedWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:0 [inlined]
  [8] augmented_julia_CachedWalk_44270_inner_9wrap
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:0
  [9] macro expansion
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:5218 [inlined]
 [10] enzyme_call
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:4764 [inlined]
 [11] AugmentedForwardThunk
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:4700 [inlined]
 [12] runtime_generic_augfwd(activity::Type{…}, runtimeActivity::Val{…}, width::Val{…}, ModifiedBetween::Val{…}, RT::Val{…}, f::Functors.CachedWalk{…}, df::Functors.CachedWalk{…}, primal_1::Functors.var"#recurse#26"{…}, shadow_1_1::Functors.var"#recurse#26"{…}, primal_2::Matrix{…}, shadow_2_1::Nothing, primal_3::Int64, shadow_3_1::Nothing)
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/QVjE5/src/rules/jitrules.jl:480
 [13] recurse
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:52 [inlined]
 [14] #59
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:115 [inlined]
 [15] map
    @ ./tuple.jl:406 [inlined]
 [16] map
    @ ./namedtuple.jl:266 [inlined]
 [17] _trainmap
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:114 [inlined]
 [18] _Trainable_biwalk
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:110 [inlined]
 [19] ExcludeWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:144 [inlined]
 [20] CachedWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:195 [inlined]
 [21] recurse
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:52 [inlined]
 [22] #59
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:115 [inlined]
 [23] map
    @ ./tuple.jl:406 [inlined]
 [24] _trainmap
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:114
 [25] _Trainable_biwalk
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:110 [inlined]
 [26] ExcludeWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:144 [inlined]
 [27] CachedWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:195 [inlined]
 [28] recurse
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:52 [inlined]
 [29] #59
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:115 [inlined]
 [30] map
    @ ./tuple.jl:406 [inlined]
 [31] map
    @ ./namedtuple.jl:266 [inlined]
 [32] _trainmap
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:114 [inlined]
 [33] _Trainable_biwalk
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:110 [inlined]
 [34] ExcludeWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:144 [inlined]
 [35] CachedWalk
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:195 [inlined]
 [36] execute
    @ ~/.julia/packages/Functors/wOtRi/src/walks.jl:53 [inlined]
 [37] #fmap#40
    @ ~/.julia/packages/Functors/wOtRi/src/maps.jl:11 [inlined]
 [38] fmap
    @ ~/.julia/packages/Functors/wOtRi/src/maps.jl:3 [inlined]
 [39] #_rebuild#56
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:96 [inlined]
 [40] _rebuild
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:94 [inlined]
 [41] Restructure
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:59 [inlined]
 [42] Restructure
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:0 [inlined]
 [43] augmented_julia_Restructure_43766_inner_1wrap
    @ ~/.julia/packages/Optimisers/V8kHf/src/destructure.jl:0
 [44] macro expansion
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:5218 [inlined]
 [45] enzyme_call
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:4764 [inlined]
 [46] AugmentedForwardThunk
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:4700 [inlined]
 [47] runtime_generic_augfwd(activity::Type{Val{…}}, runtimeActivity::Val{false}, width::Val{1}, ModifiedBetween::Val{(true, true)}, RT::Val{@NamedTuple{…}}, f::Optimisers.Restructure{Chain{…}, @NamedTuple{…}}, df::Nothing, primal_1::Vector{Float64}, shadow_1_1::Vector{Float64})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/QVjE5/src/rules/jitrules.jl:480
 [48] #73
    @ ~/nuclear-diffprog/MWEs/coreloop_ez_nn.jl:137 [inlined]
 [49] augmented_julia__73_42667wrap
    @ ~/nuclear-diffprog/MWEs/coreloop_ez_nn.jl:0
 [50] macro expansion
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:5218 [inlined]
 [51] enzyme_call
    @ ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:4764 [inlined]
 [52] (::Enzyme.Compiler.AugmentedForwardThunk{Ptr{Nothing}, Const{var"#73#74"}, DuplicatedNoNeed{Any}, Tuple{Duplicated{Vector{Float64}}}, 1, false, @NamedTuple{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}})(fn::Const{var"#73#74"}, args::Duplicated{Vector{Float64}})
    @ Enzyme.Compiler ~/.julia/packages/Enzyme/QVjE5/src/compiler.jl:4700
 [53] #130
    @ ~/.julia/packages/Enzyme/QVjE5/src/sugar.jl:928 [inlined]
 [54] ntuple
    @ ./ntuple.jl:49 [inlined]
 [55] jacobian(mode::ReverseMode{false, false, FFIABI, false, false}, f::var"#73#74", x::Vector{Float64}; n_outs::Val{(2,)}, chunk::Nothing)
    @ Enzyme ~/.julia/packages/Enzyme/QVjE5/src/sugar.jl:924
 [56] jacobian
    @ ~/.julia/packages/Enzyme/QVjE5/src/sugar.jl:841 [inlined]
 [57] #jacobian#129
    @ ~/.julia/packages/Enzyme/QVjE5/src/sugar.jl:856 [inlined]
 [58] jacobian(mode::ReverseMode{false, false, FFIABI, false, false}, f::var"#73#74", x::Vector{Float64})
    @ Enzyme ~/.julia/packages/Enzyme/QVjE5/src/sugar.jl:841
 [59] top-level scope
    @ ~/nuclear-diffprog/MWEs/coreloop_ez_nn.jl:137
 [60] include(fname::String)
    @ Main ./sysimg.jl:38
in expression starting at /vast/home/daningburg/nuclear-diffprog/MWEs/coreloop_ez_nn.jl:137
Some type information was truncated. Use `show(err)` to see complete types.

I was able to recreate this error in my MWE:

# Test coreloop diff with Enzyme

begin # Packages
    using SpecialFunctions
    using BenchmarkTools
    using SphericalHarmonics
    using Enzyme
    using Flux
end

begin # Functions
    # Coulomb funcs
    function GL(k, r, L)
        return -k*r*sphericalbessely(L, k*r)
    end
    function FL(k, r, L)
        return k*r*sphericalbesselj(L, k*r)
    end 

    # Spherical Hankel functions
    function Hminus(k, r, L)
        return complex(GL(k, r, L), -FL(k, r, L))
    end
    function Hplus(k, r, L)
        return complex(GL(k, r, L), FL(k, r, L))
    end

    # Derivatives
    enzR_Hminusprime(k, r, L) =
        complex(Enzyme.gradient(Reverse, x -> GL(k, x, L), r)[1], -Enzyme.gradient(Reverse, x -> FL(k, x, L), r)[1])
    enzR_Hplusprime(k, r, L) =
        complex(Enzyme.gradient(Reverse, x -> GL(k, x, L), r)[1], Enzyme.gradient(Reverse, x -> FL(k, x, L), r)[1])

    enzF_Hminusprime(k, r, L) =
        complex(Enzyme.gradient(Forward, x -> GL(k, x, L), r)[1], -Enzyme.gradient(Forward, x -> FL(k, x, L), r)[1])
    enzF_Hplusprime(k, r, L) =
        complex(Enzyme.gradient(Forward, x -> GL(k, x, L), r)[1], Enzyme.gradient(Forward, x -> FL(k, x, L), r)[1])

    function enzSL_0f0(U, L, μ, k, r, Ecm)
        dr = r[2] - r[1]
        len = size(r)[1]-1
        ur1, ur2, ur3 = 0.0, 0.0, 0.0
        ui1, ui2, ui3 = 0.0, 0.0, 0.0
        dur1, dur2, dur3 = 0.0, 0.0, 0.0
        dui1, dui2, dui3 = 0.0, 0.0, 0.0
        a = r[end-2]
        ur2 = 1e-6
        ui1 = 1e-12  # ideally these are all always Float32, or all always Float64
        ui2 = 1e-6
        for i in 3:len
            vreal = Ecm - U[i,1]
            vimag = -U[i,2]
            w = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
            vreal = Ecm -U[i-1,1]
            vimag = -U[i-1,2]
            wmo = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
            vreal = Ecm - U[i+1,1]
            vimag = -U[i+1,2]
            wpo = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
            uval = (2*complex(ur2,ui2)-complex(ur1,ui1)-(dr^2/12)*(10*w*complex(ur2,ui2)+wmo*complex(ur1,ui1)))/(1+(dr^2/12)*wpo)
            
            ur3 = real.(uval)
            dur3 = 0.5*(ur3-ur1)/dr
            ui3 = imag.(uval)
            dui3 = 0.5*(ui3-ui1)/dr
    
            ur1, ur2 = ur2, ur3
            dur1, dur2 = dur2, dur3
            ui1, ui2 = ui2, ui3
            dui1, dui2 = dui2, dui3
        end
        ua = complex(ur2,ui2)
        dua = complex(dur3,dui3)
        
        RL = ua / dua
        # SLtop = Hminus(k, a, L) - RL*enzR_Hminusprime(k, a, L)
        # SLbot = Hplus(k, a, L) - RL*enzR_Hplusprime(k, a, L)
        SLtop = Hminus(k, a, L) - RL*enzF_Hminusprime(k, a, L)
        SLbot = Hplus(k, a, L) - RL*enzF_Hplusprime(k, a, L)
    
        SL = SLtop/SLbot
        return [real(SL), imag(SL)]
    end

    function build_model(n_in, n_out, n_layers, n_nodes, act_fun=relu, last_fun=relu)
        first_layer = Flux.Dense(n_in, n_nodes, act_fun)
        # hidden_layers = [Flux.Dense(n_in => n_nodes, act_fun) for _ in 1:n_layers-1]
        last_layer = Flux.Dense(n_nodes => n_out)
        m = Chain(first_layer, Flux.Dense(n_nodes => n_nodes, act_fun), Flux.Dense(n_nodes => n_nodes, act_fun),
            Flux.Dense(n_nodes => n_nodes, act_fun), Flux.Dense(n_nodes => n_nodes, act_fun), last_layer) |> f64
        return m
    end

    function eval_model(m, x)
        # x_eval = convert(Array{Float32}, normalize_to_existing(x, tx))
        # x_eval = normalize_to_existing(x, tx)
        # println("x_eval: " * string(x_eval))
        X = m(x)
        # Z = denormalize_data(M, ty)
        return X
    end

    # Combine x with r for use by neural network
    function combex(x, r)
        xlen = size(x)[1]
        rlen = size(r)[1]
        X = zeros(eltype(x), xlen*rlen, size(x)[2]+1)
        for i in 1:1:xlen
            X[(i-1)*rlen+1:i*rlen, 1] = r
            for j in (i-1)*rlen+1:1:i*rlen
                X[j, 2:end] = x[i,:]
            end
        end
        return X'
    end
end

# Set up particular scattering problem
A = 65.
Z = 29.
N = A - Z
E = 10.
L = 30
Ecm = 9.848393154293218
μ = 925.3211722114523
k = 0.6841596644044445
r = Vector(LinRange(0, 20, 2000))
dr = r[2] - r[1]
const global ħ = 197.3269804

# Model
x = [A Z E]
X = combex(x, r)
m = build_model(4, 2, 4, 16)
params, re = Flux.destructure(m)

Enzyme.jacobian(Reverse, p -> enzSL_0f0(eval_model(re(p), X)', L, μ, k, r, Ecm), params)

Note: this is on Julia 1.11.1 and Enzyme v0.13.23.

I get a different error, below is 1.10 (which I believe is still better-tested) but see the same on 1.11.0:

julia> Enzyme.jacobian(Reverse, p -> enzSL_0f0(eval_model(re(p), X)', L, μ, k, r, Ecm), params)
ERROR: Constant memory is stored (or returned) to a differentiable variable.
As a result, Enzyme cannot provably ensure correctness and throws this error.
This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity).
If Enzyme should be able to prove this use non-differentable, open an issue!
To work around this issue, either:
 a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or
 b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.
Mismatched activity for:   store {} addrspace(10)* %.fca.0.0.0.0.extract, {} addrspace(10)* addrspace(10)* %.repack.repack, align 8, !dbg !69, !tbaa !43, !alias.scope !47, !noalias !48 const val:   %.fca.0.0.0.0.extract = extractvalue { [1 x [6 x { {} addrspace(10)*, {} addrspace(10)* }]], [1 x [6 x { i64, i64 }]], i64 } %0, 0, 0, 0, 0, !dbg !8, !enzyme_type !51, !enzymejl_byref_MUT_REF !0, !enzymejl_source_type_Matrix\7BFloat64\7D !0
 value=Unknown object of type Matrix{Float64}
 llvalue=  %.fca.0.0.0.0.extract = extractvalue { [1 x [6 x { {} addrspace(10)*, {} addrspace(10)* }]], [1 x [6 x { i64, i64 }]], i64 } %0, 0, 0, 0, 0, !dbg !8, !enzyme_type !51, !enzymejl_byref_MUT_REF !0, !enzymejl_source_type_Matrix\7BFloat64\7D !0

Stacktrace:
  [1] _trainmap
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:114
  [2] _Trainable_biwalk
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:110
  [3] ExcludeWalk
    @ ~/.julia/packages/Functors/LbNAu/src/walks.jl:126

(jl_vmDBZd) pkg> st
Status `/private/var/folders/yq/4p2zwd614y59gszh7y9ypyhh0000gn/T/jl_vmDBZd/Project.toml`
  [7da242da] Enzyme v0.13.24
  [587475ba] Flux v0.16.0
  [276daf66] SpecialFunctions v2.5.0
⌃ [c489a379] SphericalHarmonics v0.1.14
Info Packages marked with ⌃ have new versions available and may be upgradable.

julia> versioninfo()
Julia Version 1.10.4
Commit 48d4fd48430 (2024-06-04 10:41 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: macOS (arm64-apple-darwin22.4.0)
  CPU: 11 × Apple M3 Pro
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, apple-m1)

As suggested, set_runtime_activity(Reverse) does avoid it, at least on 1.10:

julia> Enzyme.jacobian(set_runtime_activity(Reverse), p -> enzSL_0f0(eval_model(re(p), X)', L, μ, k, r, Ecm), params)[1]
2×1202 transpose(::Matrix{Float64}) with eltype Float64:
 -2.7278e-17   -4.22341e-18  -3.93169e-18  0.0  …  0.0  0.0  0.0  0.0   1.44062e-18  1.86651e-17
  2.95072e-17   2.51867e-17  -7.68234e-18  0.0     0.0  0.0  0.0  0.0  -1.86646e-17  1.34197e-18

julia> VERSION
v"1.10.4"

But on 1.11 there’s another error about reshape:

julia> Enzyme.jacobian(set_runtime_activity(Reverse), p -> enzSL_0f0(eval_model(re(p), X)', L, μ, k, r, Ecm), params)[1]
ERROR: 
No augmented forward pass found for jl_genericmemory_slice
 at context:   %93 = call nonnull "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @jl_genericmemory_slice({} addrspace(10)* nonnull %76, i64 %92, i64 %40) #28, !dbg !234

Stacktrace:
 [1] reshape
   @ ./reshapedarray.jl:55
 [2] reshape
   @ ./reshapedarray.jl:121
 [3] _getat
   @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:102


Stacktrace:
  [1] reshape
    @ ./reshapedarray.jl:55 [inlined]
  [2] reshape
    @ ./reshapedarray.jl:121 [inlined]
  [3] _getat
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:102
  [4] #57
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:97 [inlined]
  [5] ExcludeWalk
    @ ~/.julia/packages/Functors/LbNAu/src/walks.jl:126 [inlined]
  [6] CachedWalk
    @ ~/.julia/packages/Functors/LbNAu/src/walks.jl:177 [inlined]

julia> VERSION
v"1.11.0"
1 Like

Some more minimal cases which hit bugs, for @wsmoses … possibly even the same bugs?

julia> VERSION
v"1.10.4"

julia> v2, re2 = Flux.destructure((a=[1 2; 3 4.], b=2.0))  # easy case, involves reshape
([1.0, 3.0, 2.0, 4.0], Restructure(NamedTuple, ..., 4))

julia> Enzyme.jacobian(set_runtime_activity(Reverse), v -> re2(v).a[1,:], v2)[1]
2×4 transpose(::Matrix{Float64}) with eltype Float64:
 1.0  0.0  0.0  0.0
 0.0  0.0  1.0  0.0

julia> Enzyme.jacobian(Reverse, v -> re2(v).a[1,:], v2)[1]
ERROR: Constant memory is stored (or returned) to a differentiable variable.

julia> v3, re3 = Flux.destructure((a=[1 2; 3 4.], b=[5, 6.]))  # harder case
([1.0, 3.0, 2.0, 4.0, 5.0, 6.0], Restructure(NamedTuple, ..., 6))

julia> Enzyme.jacobian(set_runtime_activity(Reverse), v -> begin nt = re3(v); nt.a[1,:] .^ nt.b[1] end, v3)[1]
{[0]:Pointer, [0,-1]:Float@double, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer, [12]:Integer, [13]:Integer,
...
 canonicalizing 8
ERROR: LLVM error: Canonicalization failed
Stacktrace:
 [1] handle_error(reason::Cstring)
   @ LLVM ~/.julia/packages/LLVM/wMjUU/src/core/context.jl:194

On 1.11, all give this error:

julia> VERSION
v"1.11.0"

julia> v2, re2 = Flux.destructure((a=[1 2; 3 4.], b=2.0))  # easy case
([1.0, 3.0, 2.0, 4.0], Restructure(NamedTuple, ..., 4))

julia> Enzyme.jacobian(set_runtime_activity(Reverse), v -> re2(v).a[1,:], v2)[1]
ERROR: 
No augmented forward pass found for jl_genericmemory_slice
 at context:   %93 = call nonnull "enzyme_type"="{[-1]:Pointer}" {} addrspace(10)* @jl_genericmemory_slice({} addrspace(10)* nonnull %76, i64 %92, i64 %40) #28, !dbg !234

Stacktrace:
 [1] reshape
   @ ./reshapedarray.jl:55
 [2] reshape
   @ ./reshapedarray.jl:121
 [3] _getat
   @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:102


Stacktrace:
  [1] reshape
    @ ./reshapedarray.jl:55 [inlined]
  [2] reshape
    @ ./reshapedarray.jl:121 [inlined]
  [3] _getat
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:102
  [4] #57
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:97 [inlined]
  [5] ExcludeWalk
    @ ~/.julia/packages/Functors/LbNAu/src/walks.jl:126 [inlined]
1 Like

This is just saying we haven’t added that 1.11 intrinsic support yet (and should).

The canonicalization failed one is weird, can you file an issue with that. That shouldn’t happen ever.

1 Like

I noticed when I use set_runtime_activity(Reverse) as you do, the runtime is about 20x longer than earlier attempts without using Flux. My previous post has this (you both helped you may recall), reposting code here for clarity:

# Test coreloop diff with Enzyme

begin # Packages
    using SpecialFunctions
    using BenchmarkTools
    using SphericalHarmonics
    using Enzyme
end

begin # Functions
    # Coulomb funcs
    function GL(k, r, L)
        return -k*r*sphericalbessely(L, k*r)
    end
    function FL(k, r, L)
        return k*r*sphericalbesselj(L, k*r)
    end 

    # Spherical Hankel functions
    function Hminus(k, r, L)
        return complex(GL(k, r, L), -FL(k, r, L))
    end
    function Hplus(k, r, L)
        return complex(GL(k, r, L), FL(k, r, L))
    end

    # Derivatives
    enzR_Hminusprime(k, r, L) =
        complex(Enzyme.gradient(Reverse, x -> GL(k, x, L), r)[1], -Enzyme.gradient(Reverse, x -> FL(k, x, L), r)[1])
    enzR_Hplusprime(k, r, L) =
        complex(Enzyme.gradient(Reverse, x -> GL(k, x, L), r)[1], Enzyme.gradient(Reverse, x -> FL(k, x, L), r)[1])

    enzF_Hminusprime(k, r, L) =
        complex(Enzyme.gradient(Forward, x -> GL(k, x, L), r)[1], -Enzyme.gradient(Forward, x -> FL(k, x, L), r)[1])
    enzF_Hplusprime(k, r, L) =
        complex(Enzyme.gradient(Forward, x -> GL(k, x, L), r)[1], Enzyme.gradient(Forward, x -> FL(k, x, L), r)[1])

    function enzSL_0f0(U, L, μ, k, r, Ecm)
        dr = r[2] - r[1]
        len = size(r)[1]-1
        ur1, ur2, ur3 = 0.0, 0.0, 0.0
        ui1, ui2, ui3 = 0.0, 0.0, 0.0
        dur1, dur2, dur3 = 0.0, 0.0, 0.0
        dui1, dui2, dui3 = 0.0, 0.0, 0.0
        a = r[end-2]
        ur2 = 1e-6
        ui1 = 1e-12  # ideally these are all always Float32, or all always Float64
        ui2 = 1e-6
        for i in 3:len
            vreal = Ecm - U[i,1]
            vimag = -U[i,2]
            w = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
            vreal = Ecm -U[i-1,1]
            vimag = -U[i-1,2]
            wmo = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
            vreal = Ecm - U[i+1,1]
            vimag = -U[i+1,2]
            wpo = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
            uval = (2*complex(ur2,ui2)-complex(ur1,ui1)-(dr^2/12)*(10*w*complex(ur2,ui2)+wmo*complex(ur1,ui1)))/(1+(dr^2/12)*wpo)
            
            ur3 = real.(uval)
            dur3 = 0.5*(ur3-ur1)/dr
            ui3 = imag.(uval)
            dui3 = 0.5*(ui3-ui1)/dr
    
            ur1, ur2 = ur2, ur3
            dur1, dur2 = dur2, dur3
            ui1, ui2 = ui2, ui3
            dui1, dui2 = dui2, dui3
        end
        ua = complex(ur2,ui2)
        dua = complex(dur3,dui3)
        
        RL = ua / dua
        # SLtop = Hminus(k, a, L) - RL*enzR_Hminusprime(k, a, L)
        # SLbot = Hplus(k, a, L) - RL*enzR_Hplusprime(k, a, L)
        SLtop = Hminus(k, a, L) - RL*enzF_Hminusprime(k, a, L)
        SLbot = Hplus(k, a, L) - RL*enzF_Hplusprime(k, a, L)
    
        SL = SLtop/SLbot
        return [real(SL), imag(SL)]
    end

    # Koning-Delaroche Potential
    function kd_params(A, Z, E)
        N = A - Z
        Ef = -11.23814 + 0.02646*A
        v1 = 59.3 - 21.0*(N-Z)/A - 0.024*A
        v2 = 0.007228 - (1.48e-6)*A
        v3 = 1.994e-5 - (2.e-8)*A
        v4 = 7.e-9
        Vo = v1*(1-v2*(E-Ef)+v3*(E-Ef)^2-v4*(E-Ef)^3)
        ro = (1.3039 - 0.4054/A^(1/3))*A^(1/3)
        ao = 0.6778 - (1.487e-4)*A
        w1 = 12.195 + 0.0167*A
        w2 = 73.55 + 0.0795*A
        Wro = w1*(E-Ef)^2/((E-Ef)^2+w2^2)
        d1 = 16 - 16*(N-Z)/A
        d2 = 0.0180 + 0.003802/(1+exp((A-156.)/8.))
        d3 = 11.5
        rw = (1.3424 - 0.01585*A^(1/3))*A^(1/3)
        aw = 0.5446 - (1.656e-4)*A
        Wso = d1*(E-Ef)^2*exp(-d2*(E-Ef))/((E-Ef)^2+d3^2)
        return Vo, ro, ao, Wro, Wso, rw, aw
    end
    function kd_pots(A, Z, E, r)
        Vo, ro, ao, Wro, Wso, rw, aw = kd_params(A, Z, E)
        Vr = -Vo ./(1 .+ exp.(-(ro.-r)./ao))
        W = -Wro ./(1 .+ exp.(-(ro.-r)./ao))
        Ws = -4 .* Wso .* exp.(-(rw.-r)./aw) ./(1 .+exp.(-(rw.-r)./aw)).^2
        return Vr, W, Ws
    end
end

# Set up particular scattering problem
A = 65
Z = 29
N = A - Z
E = 10
L = 30
Ecm = 9.848393154293218
μ = 925.3211722114523
k = 0.6841596644044445
r = Vector(LinRange(0, 20, 2000))
dr = r[2] - r[1]
const global ħ = 197.3269804

# General a potential from K-D
Vreal, Wv, Ws = kd_pots(A, Z, E, r);
U = Float32.(hcat(Vreal, Wv + Ws);)

@btime reshape(Enzyme.jacobian(Reverse, U -> enzSL_0f0(U, L, μ, k, r, Ecm), U)[1], 2, :)
@btime reshape(Enzyme.jacobian(Reverse, U -> enzSL_0f0(U, L, μ, k, r, Ecm), Float64.(U))[1], 2, :)  # all the rest is Float64

This code is very fast:

288.222 μs (65 allocations: 65.91 KiB)
308.659 μs (70 allocations: 159.58 KiB)

@mcabbott’s version with set_runtime_activity(Reverse) applied to the neural network version is slower:

6.788 ms (599 allocations: 6.38 MiB)

The new NN version is the derivative with respect to 1202 parameters, while the older “U” version is with respect to 8000 constant values. It’s not apples and oranges, but I’d like to know if there’s a better way to do this than invoke set_runtime_activity which may be faster with Flux.

Effectively, I’m asking how to apply a) below to this problem:

ERROR: LoadError: Constant memory is stored (or returned) to a differentiable variable.
As a result, Enzyme cannot provably ensure correctness and throws this error.
This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity).
If Enzyme should be able to prove this use non-differentable, open an issue!
To work around this issue, either:
 a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or
 b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.

If that’s the case, then I really think once should heed the advice of option a then (rewriting the code to not be conditionally active). It turns out that Enzyme’s activity analysis proved that a lot of code didn’t need to be differentiated (and was hindered by seemingly this?

Stacktrace:
  [1] _trainmap
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:114
  [2] _Trainable_biwalk
    @ ~/.julia/packages/Optimisers/a4OnF/src/destructure.jl:110
  [3] ExcludeWalk
    @ ~/.julia/packages/Functors/LbNAu/src/walks.jl:126

In particular it believes it is storing a non-differentiable matrix into a larger data structure which it thinks is differentiable (perhaps conservatively).

@mcabbott does this code ring any bells or give hints as to where something is going awry?

re is rebuilding a nested structure from a Vector v by doing ynew = reshape(v[o .+ (1:length(y))], axes(y)) repeatedly, with known offsets o. Here it seems y is const, while v and hence ynew are active. That part seems like it ought to be fine?

The “repeatedly” is some recursive walk by Functors.jl, which keeps an IdDict of what’s seen, and I believe here this will be indexed by y and some ynew may be taken from this. That sounds like it could be awful?

I’m surprised you say it this way around. In low-tech terms, you can’t store a Dual(1.0,1) in [1.0, 2.0], but you can store 4.0 in [Dual(5.0, 1), Dual(6.0, 0)]. So I’m surprised that this fails:

julia> Enzyme.gradient(Reverse, (x,y) -> sum(vcat(x,y)), [1.0, 2.0], Const([3.0]))
ERROR: Constant memory is stored (or returned) to a differentiable variable.

And as a result, not surprised that the following fmap example fails, it’s not testing the IdDict or anything just vcat…

julia> using Enzyme, Functors

julia> alpha = (a=[1,2.], b=[3.]); beta = (a=[4,5.], b=[6.]);

julia> fmap(vcat, alpha, beta)  # this is what fmap does
(a = [1.0, 2.0, 4.0, 5.0], b = [3.0, 6.0])

julia> Enzyme.gradient(Reverse, (α,β) -> sum(fmap(vcat,α,β).a), alpha, beta)
((a = [1.0, 1.0], b = [0.0]), (a = [1.0, 1.0], b = [0.0]))

julia> Enzyme.gradient(Reverse, (α,β) -> sum(fmap(vcat,α,β).a), alpha, Const(beta))
ERROR: Constant memory is stored (or returned) to a differentiable variable.

When you say “non-differentiable matrix into a larger data structure”, for what kinds of structure is this a concern? I can happily make some objects which seem to be partly const:

julia> Enzyme.gradient(Reverse, (a,b) -> begin nt = (; a, b, c=sum(a.*b)); sum(nt.a) / nt.c end, [1.0, 20], Const([3.0, 4.0]))
([0.0029031789809841786, -0.0001451589490492084], nothing)

That’s what the function _trainmap does… it’s taking all the children of something (a NamedTuple), and a subset (represented as a similar NamedTuple with some nothing entries, the others like y above) and some aux info (the offsets above) and building another NamedTuple with either ynew, or the original child. And this seems to be no problem:

julia> function _trainmap(f, ch, tr, aux)
         map(ch, tr, aux) do c, t, a  # isnothing(t) indicates non-trainable field
           isnothing(t) ? c : f(t, a)
         end
       end

julia> _trainmap(getindex, ([1.0], [2.0, 3.0]), (nothing, [2.0, 3.0]), (99:101, 2:2))
([1.0], [3.0])

julia> Enzyme.gradient(Reverse, (ch, tr, aux) -> sum(sum, _trainmap(getindex, ch, tr, aux)), Const(([1.0], [2.0, 3.0])), (nothing, [2.0, 3.0]), Const((99:101, 2:2)))
(nothing, (nothing, [0.0, 1.0]), nothing)

Yeah so storing a immutable value like 4.0 into a differentiable structure is fine. Doing an mutable and constant value store into a differentiable structure, however, presents challenges. I think the Runtime Activity FAQ linked in the error message is particularly helpful here, so I’ll copy it below. In the case of a float array I think we also ought be able to resolve it (but clearly haven’t fully generalized mechnaisms to avoid runtime activity yet). It will however come with a performance hit here either from computing unnecessary derivatives or doing an extra check before doing so.

Runtime Activity

When computing the derivative of mutable variables, Enzyme also needs additional temporary storage space for the corresponding derivative variables. If an argument tmp is marked as Const, Enzyme does not have any temporary storage space for the derivatives!

Enzyme will error when they detect these latter types of situations, which we will refer to as activity unstable. This term is chosen to mirror the Julia notion of type-unstable code (e.g. where a type is not known at compile time). If an expression is activity unstable, it could either be constant, or active, depending on data not known at compile time. For example, consider the following:

function g(cond, active_var, constant_var)
  if cond
    return active_var
  else
    return constant_var
end

Enzyme.autodiff(Forward, g, Const(condition), Duplicated(x, dx), Const(y))

The returned value here could either by constant or duplicated, depending on the runtime-defined value of cond. If cond is true, Enzyme simply returns the shadow of active_var as the derivative. However, if cond is false, there is no derivative shadow for constant_var and Enzyme will throw a EnzymeRuntimeActivityError error. For some simple types, e.g. a float Enzyme can circumvent this issue, for example by returning the float 0. Similarly, for some types like the Symbol type, which are never differentiable, such a shadow value will never be used, and Enzyme can return the original “primal” value as its derivative. However, for arbitrary data structures, Enzyme presently has no generic mechanism to resolve this.

For example consider a third function:

function h(cond, active_var, constant_var)
  return [g(cond, active_var, constant_var), g(cond, active_var, constant_var)]
end

Enzyme.autodiff(Forward, h, Const(condition), Duplicated(x, dx), Const(y))

Enzyme provides a nice utility Enzyme.make_zero which takes a data structure and constructs a deepcopy of the data structure with all of the floats set to zero and non-differentiable types like Symbols set to their primal value. If Enzyme gets into such a “Mismatched activity” situation where it needs to return a differentiable data structure from a constant variable, it could try to resolve this situation by constructing a new shadow data structure, such as with Enzyme.make_zero. However, this still can lead to incorrect results. In the case of h above, suppose that active_var and consant_var are both arrays, which are mutable (aka in-place) data types. This means that the return of h is going to either be result = [active_var, active_var] or result = [constant_var, constant_var]. Thus an update to result[1][1] would also change result[2][1] since result[1] and result[2] are the same array.

If one created a new zero’d copy of each return from g, this would mean that the derivative dresult would have one copy made for the first element, and a second copy made for the second element. This could lead to incorrect results, and is unfortunately not a general resolution. However, for non-mutable variables (e.g. like floats) or non-differrentiable types (e.g. like Symbols) this problem can never arise.

Instead, Enzyme has a special mode known as “Runtime Activity” which can handle these types of situations. It can come with a minor performance reduction, and is therefore off by default. It can be enabled with by setting runtime activity to true in a desired differentiation mode.

The way Enzyme’s runtime activity resolves this issue is to return the original primal variable as the derivative whenever it needs to denote the fact that a variable is a constant. As this issue can only arise with mutable variables, they must be represented in memory via a pointer. All addtional loads and stores will now be modified to first check if the primal pointer is the same as the shadow pointer, and if so, treat it as a constant. Note that this check is not saying that the same arrays contain the same values, but rather the same backing memory represents both the primal and the shadow (e.g. a === b or equivalently pointer(a) == pointer(b)).

Enabling runtime activity does therefore, come with a sharp edge, which is that if the computed derivative of a function is mutable, one must also check to see if the primal and shadow represent the same pointer, and if so the true derivative of the function is actually zero.

Generally, the preferred solution to these type of activity unstable codes should be to make your variables all activity-stable (e.g. always containing differentiable memory or always containing non-differentiable memory). However, with care, Enzyme does support “Runtime Activity” as a way to differentiate these programs without having to modify your code. One can enable runtime activity for your code by changing the mode, such as

Enzyme.autodiff(set_runtime_activity(Forward), h, Const(condition), Duplicated(x, dx), Const(y))

I can see how that g is bad.

But why is vcat(x,y) in any way uncertain? (Although this might be orthogonal to OP’s problem.)

More importantly, what does “structure” mean here? My NamedTuple examples wrap Duplicated and Const things into the same struct, but probably don’t “store” anything, it’s not an array & has no pointer. Is that always OK?