In the following minimum example, the function f1
is optimized, but the function f2
is not. Is there any essential difficulty that hinders the compiler to do so? Can I make the setfield!(a, a.i, v)
pattern faster?
And here is my actual code which need that pattern.
The minimum example is as following:
mutable struct A
val::Int
i::Int
end
function f1(v)
a = A(v, 1)
i = 1 # <-
setfield!(a, i, v)
a.val
end
function f2(v)
a = A(v, 1)
# a.i = 1 # optional
i = a.i # <-
setfield!(a, i, v)
a.val
end
@code_llvm f1(1)
; Function Signature: f1(Int64)
; @ path withinf1
; Function Attrs: uwtable
define i64 @julia_f1_28558(i64 signext %“v::Int64”) #0 {
top:
ret i64 %“v::Int64”
}
@code_llvm f2(1)
; Function Signature: f2(Int64)
; @ path withinf2
; Function Attrs: uwtable
define i64 @julia_f2_28289(i64 signext %“v::Int64”) #0 {
top:
%jlcallframe1 = alloca [3 x ptr], align 8
%gcframe2 = alloca [5 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe2, i8 0, i64 40, i1 true)
%pgcstack = call ptr inttoptr (i64 140735906416704 to ptr)() #9
store i64 12, ptr %gcframe2, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe2, i64 1
%task.gcstack = load ptr, ptr %pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe2, ptr %pgcstack, align 8
; @ path withinf2
; ┌ @ path withinA
%ptls_field = getelementptr inbounds ptr, ptr %pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%“new::A” = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 800, i32 32, i64 1468019289872) #7
%“new::A.tag_addr” = getelementptr inbounds i64, ptr %“new::A”, i64 -1
store atomic i64 1468019289872, ptr %“new::A.tag_addr” unordered, align 8
store i64 %“v::Int64”, ptr %“new::A”, align 8
%“new::A.i_ptr” = getelementptr inbounds i8, ptr %“new::A”, i64 8
store i64 1, ptr %“new::A.i_ptr”, align 8
%gc_slot_addr_2 = getelementptr inbounds ptr, ptr %gcframe2, i64 4
store ptr %“new::A”, ptr %gc_slot_addr_2, align 16
; └
; @ path withinf2
%box_Int64 = call nonnull align 8 dereferenceable(8) ptr @ijl_box_int64(i64 signext 1) #2
%gc_slot_addr_1 = getelementptr inbounds ptr, ptr %gcframe2, i64 3
store ptr %box_Int64, ptr %gc_slot_addr_1, align 8
%box_Int642 = call nonnull align 8 dereferenceable(8) ptr @ijl_box_int64(i64 signext %“v::Int64”) #2
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe2, i64 2
store ptr %box_Int642, ptr %gc_slot_addr_0, align 16
store ptr %“new::A”, ptr %jlcallframe1, align 8
%0 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 1
store ptr %box_Int64, ptr %0, align 8
%1 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 2
store ptr %box_Int642, ptr %1, align 8
%jl_f_setfield_ret = call nonnull ptr @jl_f_setfield(ptr null, ptr nonnull %jlcallframe1, i32 3)
; @ path withinf2
; ┌ @ Base.jl:49 withingetproperty
%“new::A.val” = load i64, ptr %“new::A”, align 8
%frame.prev11 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev11, ptr %pgcstack, align 8
ret i64 %“new::A.val”
; └
}