Hello,
i started to investigate the julia internals a bit and toyed around with some code that tries to invalidate the generator of a ‘@generated’ function. I came up with the following code:
import Core.Compiler.MethodInstance
import Core.Compiler.method_instance
import Core.Compiler.method_instances
import Core.Compiler.@_gc_preserve_begin
import Core.Compiler.@_gc_preserve_end
import Core.Compiler.pointer_from_objref
import Core.Compiler.CodeInstance
import Core.Compiler.RefValue
import Core.Compiler.CodeInfo
#Helper functions to deal with types
function named_tuple_types(::Type{NamedTuple{T1, T2}}) where {T1, T2}
return (T1, T2)
end
function pairs_types(::Type{Base.Pairs{T1, T2, T3, T4}}) where {T1, T2, T3, T4}
return (T1, T2, T3, T4)
end
function tuple_types(t::Type{T}) where {T <: Tuple{Vararg{Any, N}}} where {N}
return (t.parameters...,)
end
#Helper macro for setting pure and generated
macro set_pure(f)
f.args[2] = quote
return Core._apply_pure(()->($(f.args[2])), ())
end
return esc(:($(f)))
end
macro set_pure_generated(f)
f.args[2] = quote
return Core._apply_pure(()->($(f.args[2])), ())
end
return esc(:(@generated $(f)))
end
#Helper function to get the code instance
function call_get_staged(minstance::MethodInstance, world::UInt, cinstance::RefValue{CodeInstance})
token = @_gc_preserve_begin cinstance
cinstance_ptr = pointer_from_objref(cinstance)
src = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{CodeInstance}), minstance, world, cinstance_ptr)
@_gc_preserve_end token
return src
end
function call_get_staged(minstance::MethodInstance, world::UInt, ::Nothing)
return ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), minstance, world, C_NULL)
end
function code_instance(minstance::MethodInstance, world::UInt)
cinstance = RefValue{CodeInstance}()
return call_get_staged(minstance, world, cinstance)
end
#Generator 2
@set_pure_generated function g__p__f(pargs, kargs)
#println("GENERATE BODY")
#println(pargs, " ", kargs)
et = ()
_1, _2, _3, ntt = pairs_types(kargs)
st, t = named_tuple_types(ntt)
tt = tuple_types(t)
for i in 1:length(st)
if do_stuff(g, Val(st[i])) == true
et = (et..., quote
println("EXECUTE EXPRESSION")
#println($(QuoteNode(st[i])))
end)
end
end
return quote
println("EXECUTE BODY")
#println(pargs, " ", kargs)
$(et...)
return
end
end
#Generator 1
@set_pure function g__p(world::UInt, source, self, pargs, kargs)
#println("GENERATE CODE")
minstance = method_instance(
g__p__f,
Tuple{pargs, kargs};
world=world
)
cinstance = code_instance(minstance, world)
cinstance.edges = MethodInstance[]
for minstance ∈ method_instances(do_stuff, Tuple{typeof(g), Any}, world)
#println("ADD EDGE")
#println(minstance)
push!(cinstance.edges, minstance)
end
return cinstance
end
@eval function g__p(pargs, kargs)
$(Expr(:meta, :generated_only))
$(Expr(:meta, :generated, g__p))
end
#Proxy
function g(pargs...; kargs...)
return g__p(pargs, kargs)
end
#Function that invalidates later
function do_stuff(::typeof(g), ::Val)
return false
end
#Call the function
g(1; karg_1=2.0)
g(1; karg_1=2.0)
g(1.0; karg_1=3)
g(1.0; karg_1=3)
println("########## INVALIDATE ##########")
function do_stuff(::typeof(g), ::Val{:karg_1})
return true
end
g(1; karg_1=2.0)
g(1; karg_1=2.0)
g(1.0; karg_1=3)
g(1.0; karg_1=3)
g(1; karg_2=2.0)
g(1; karg_2=2.0)
g(1.0; karg_2=3)
g(1.0; karg_2=3)
println("########## INVALIDATE ##########")
function do_stuff(::typeof(g), ::Val{:karg_2})
return true
end
g(1; karg_1=2.0)
g(1; karg_1=2.0)
g(1.0; karg_1=3)
g(1.0; karg_1=3)
g(1; karg_2=2.0)
g(1; karg_2=2.0)
g(1.0; karg_2=3)
g(1.0; karg_2=3)
g(1; karg_1=2.0, karg_2=2.0)
g(1; karg_1=2.0, karg_2=2.0)
g(1.0; karg_1=3, karg_2=3)
g(1.0; karg_1=3, karg_2=3)
It works through setting up two generators (g__p and g__p__f) and a proxy (g) that ultimatively generates a body that is executed on the call of g.
The proxy g only passes splatted positional and keyword arguments to the first generator g__p which then retrieves a method instance and a code instance for the second generator g__p__f. The code instance is manually extended with edges to do_stuff and then returned by the first generator g__p.
Then the second generator g__p__f is called over the just created code instance which generates the body that is executed afterwards.
The body is generated based on the return of do_stuff which is a standard julia function with multiple methods wherby the function gets extended subsequentelly with methods that trigger the invalidation.
The code can be directly executed in the REPL (tested with 1.11.1) and seems to output the correct result.
New bodys are generated after the function do_stuff is extended.
To observe the generators, the code has to be executed in a test environment of a package, because this allows us to print from the generators (At least that is the only way i know how to see the output).
So in the end i am asking you is this a somewhat reasonable way of invalidating the generator or are there some pitfalls that i might have missed.