There are several optimizations happening now in Julia 1.9.
julia> function bar(x)
r = Ref(5)
r[] += x
finalizer(x->nothing, r)
return r[]
end
bar (generic function with 1 method)
Under Julia 1.8.x, this function would allocate memory.
julia> @code_llvm bar(5)
; @ REPL[5]:1 within `bar`
define i64 @julia_bar_320(i64 signext %0) #0 {
top:
%gcframe4 = alloca [3 x {}*], align 16
%gcframe4.sub = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe4, i64 0, i64 0
%1 = bitcast [3 x {}*]* %gcframe4 to i8*
call void @llvm.memset.p0i8.i32(i8* noundef nonnull align 16 dereferenceable(24) %1, i8 0, i32 24, i1 false)
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #4
%ppgcstack_i8 = getelementptr i8, i8* %thread_ptr, i64 -8
%ppgcstack = bitcast i8* %ppgcstack_i8 to {}****
%pgcstack = load {}***, {}**** %ppgcstack, align 8
; @ REPL[5]:2 within `bar`
; β @ refpointer.jl:134 within `Ref`
; ββ @ refvalue.jl:10 within `RefValue` @ refvalue.jl:8
%2 = bitcast [3 x {}*]* %gcframe4 to i64*
store i64 4, i64* %2, align 16
%3 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe4, i64 0, i64 1
%4 = bitcast {}** %3 to {}***
%5 = load {}**, {}*** %pgcstack, align 8
store {}** %5, {}*** %4, align 8
%6 = bitcast {}*** %pgcstack to {}***
store {}** %gcframe4.sub, {}*** %6, align 8
%ptls_field5 = getelementptr inbounds {}**, {}*** %pgcstack, i64 2
%7 = bitcast {}*** %ptls_field5 to i8**
%ptls_load67 = load i8*, i8** %7, align 8
%8 = call noalias nonnull {}* @ijl_gc_pool_alloc(i8* %ptls_load67, i32 1392, i32 16) #5
%9 = bitcast {}* %8 to i64*
%10 = getelementptr inbounds i64, i64* %9, i64 -1
store atomic i64 140139148322928, i64* %10 unordered, align 8
; ββ
; @ REPL[5]:3 within `bar`
; β @ int.jl:87 within `+`
%11 = add i64 %0, 5
; β
; β @ refvalue.jl:57 within `setindex!`
; ββ @ Base.jl:39 within `setproperty!`
store i64 %11, i64* %9, align 8
%12 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe4, i64 0, i64 2
store {}* %8, {}** %12, align 16
; ββ
; @ REPL[5]:4 within `bar`
; β @ gcutils.jl:48 within `finalizer`
; ββ @ boot.jl:364 within `getptls`
%13 = call i64 inttoptr (i64 140139484240096 to i64 ()*)()
; ββ
call void inttoptr (i64 140139484263040 to void (i64, {}*, {}*)*)(i64 %13, {}* nonnull %8, {}* inttoptr (i64 140139382781504 to {}*))
; β
; @ REPL[5]:5 within `bar`
; β @ refvalue.jl:56 within `getindex`
; ββ @ Base.jl:38 within `getproperty`
%14 = load i64, i64* %9, align 8
%15 = load {}*, {}** %3, align 8
%16 = bitcast {}*** %pgcstack to {}**
store {}* %15, {}** %16, align 8
; ββ
ret i64 %14
}
julia> @time bar(5)
0.000002 seconds (1 allocation: 16 bytes)
10
Under Julia 1.9.0-beta2 this does not allocate memory and is now quite simple. In fact the finalizer is just ignored.
julia> @code_llvm bar(5)
; @ REPL[1]:1 within `bar`
define i64 @julia_bar_109(i64 signext %0) #0 {
top:
; @ REPL[1]:3 within `bar`
; β @ int.jl:87 within `+`
%1 = add i64 %0, 5
; β
; @ REPL[1]:5 within `bar`
ret i64 %1
}
julia> @time bar(5)
0.000001 seconds
10