In the following minimum example, the function f1 is optimized, but the function f2 is not. Is there any essential difficulty that hinders the compiler to do so? Can I make the setfield!(a, a.i, v) pattern faster?
And here is my actual code which need that pattern.
The minimum example is as following:
mutable struct A
val::Int
i::Int
end
function f1(v)
a = A(v, 1)
i = 1 # <-
setfield!(a, i, v)
a.val
end
function f2(v)
a = A(v, 1)
# a.i = 1 # optional
i = a.i # <-
setfield!(a, i, v)
a.val
end
@code_llvm f1(1)
; Function Signature: f1(Int64)
; @ path withinf1
; Function Attrs: uwtable
define i64 @julia_f1_28558(i64 signext %âv::Int64â) #0 {
top:
ret i64 %âv::Int64â
}
@code_llvm f2(1)
; Function Signature: f2(Int64)
; @ path withinf2
; Function Attrs: uwtable
define i64 @julia_f2_28289(i64 signext %âv::Int64â) #0 {
top:
%jlcallframe1 = alloca [3 x ptr], align 8
%gcframe2 = alloca [5 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe2, i8 0, i64 40, i1 true)
%pgcstack = call ptr inttoptr (i64 140735906416704 to ptr)() #9
store i64 12, ptr %gcframe2, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe2, i64 1
%task.gcstack = load ptr, ptr %pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe2, ptr %pgcstack, align 8
; @ path withinf2
; â @ path withinA
%ptls_field = getelementptr inbounds ptr, ptr %pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%ânew::Aâ = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 800, i32 32, i64 1468019289872) #7
%ânew::A.tag_addrâ = getelementptr inbounds i64, ptr %ânew::Aâ, i64 -1
store atomic i64 1468019289872, ptr %ânew::A.tag_addrâ unordered, align 8
store i64 %âv::Int64â, ptr %ânew::Aâ, align 8
%ânew::A.i_ptrâ = getelementptr inbounds i8, ptr %ânew::Aâ, i64 8
store i64 1, ptr %ânew::A.i_ptrâ, align 8
%gc_slot_addr_2 = getelementptr inbounds ptr, ptr %gcframe2, i64 4
store ptr %ânew::Aâ, ptr %gc_slot_addr_2, align 16
; â
; @ path withinf2
%box_Int64 = call nonnull align 8 dereferenceable(8) ptr @ijl_box_int64(i64 signext 1) #2
%gc_slot_addr_1 = getelementptr inbounds ptr, ptr %gcframe2, i64 3
store ptr %box_Int64, ptr %gc_slot_addr_1, align 8
%box_Int642 = call nonnull align 8 dereferenceable(8) ptr @ijl_box_int64(i64 signext %âv::Int64â) #2
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe2, i64 2
store ptr %box_Int642, ptr %gc_slot_addr_0, align 16
store ptr %ânew::Aâ, ptr %jlcallframe1, align 8
%0 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 1
store ptr %box_Int64, ptr %0, align 8
%1 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 2
store ptr %box_Int642, ptr %1, align 8
%jl_f_setfield_ret = call nonnull ptr @jl_f_setfield(ptr null, ptr nonnull %jlcallframe1, i32 3)
; @ path withinf2
; â @ Base.jl:49 withingetproperty
%ânew::A.valâ = load i64, ptr %ânew::Aâ, align 8
%frame.prev11 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev11, ptr %pgcstack, align 8
ret i64 %ânew::A.valâ
; â
}