While looking at an old issue I found a case where FixedSizeVector
is more efficient than Memory
:
julia> using FixedSizeArrays
julia> function f(v)
for idx in eachindex(v)
v[idx] = 1
end
end
f (generic function with 1 method)
julia> code_llvm(f, (Memory{Int64},); debuginfo=:none)
; Function Signature: f(Memory{Int64})
define void @julia_f_2112(ptr noundef nonnull align 8 dereferenceable(16) %"v::GenericMemory") local_unnamed_addr #0 {
top:
%pgcstack = call ptr inttoptr (i64 4303044364 to ptr)(i64 4303044400) #10
%.unbox = load i64, ptr %"v::GenericMemory", align 8
%0 = icmp slt i64 %.unbox, 1
br i1 %0, label %L29, label %preloop.pseudo.exit
L11: ; preds = %vector.body, %L11.preheader33, %load
%value_phi3 = phi i64 [ %1, %load ], [ 1, %L11.preheader33 ], [ %13, %vector.body ]
%exitcond.not.not = icmp eq i64 %value_phi3, %7
br i1 %exitcond.not.not, label %oob, label %load
L29: ; preds = %load.postloop, %main.exit.selector, %top
ret void
oob: ; preds = %L11.postloop, %L11
%value_phi3.lcssa = phi i64 [ %value_phi3.postloop, %L11.postloop ], [ %7, %L11 ]
%ptls_field = getelementptr inbounds nuw i8, ptr %pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"box::GenericMemoryRef" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 472, i32 32, i64 4630575984) #7
%"box::GenericMemoryRef.tag_addr" = getelementptr inbounds i8, ptr %"box::GenericMemoryRef", i64 -8
store atomic i64 4630575984, ptr %"box::GenericMemoryRef.tag_addr" unordered, align 8
store ptr %memoryref_data, ptr %"box::GenericMemoryRef", align 8
%.repack16 = getelementptr inbounds nuw i8, ptr %"box::GenericMemoryRef", i64 8
store ptr %"v::GenericMemory", ptr %.repack16, align 8
call void @ijl_bounds_error_int(ptr nonnull %"box::GenericMemoryRef", i64 %value_phi3.lcssa)
unreachable
load: ; preds = %L11
%gep = getelementptr i64, ptr %invariant.gep, i64 %value_phi3
store i64 1, ptr %gep, align 8
%1 = add nuw nsw i64 %value_phi3, 1
%exitcond39.not = icmp eq i64 %value_phi3, %5
br i1 %exitcond39.not, label %main.exit.selector, label %L11
main.exit.selector: ; preds = %load
%2 = icmp ult i64 %4, %.unbox
br i1 %2, label %L11.postloop, label %L29
preloop.pseudo.exit: ; preds = %top
%memory_data_ptr = getelementptr inbounds nuw i8, ptr %"v::GenericMemory", i64 8
%memoryref_data = load ptr, ptr %memory_data_ptr, align 8
%3 = shl nuw i64 %.unbox, 1
%memoryref_bytelen = shl i64 %.unbox, 3
%smin21 = call i64 @llvm.smin.i64(i64 %.unbox, i64 %3)
%4 = sub i64 %3, %smin21
%isnotneg.inv = icmp slt i64 %3, 0
%5 = call i64 @llvm.umin.i64(i64 %.unbox, i64 %4)
%.not45 = icmp eq i64 %3, %smin21
%.not = or i1 %isnotneg.inv, %.not45
br i1 %.not, label %L11.postloop, label %L11.preheader33
L11.preheader33: ; preds = %preloop.pseudo.exit
%6 = and i64 %.unbox, 2305843009213693951
%7 = add nuw nsw i64 %6, 1
%8 = add nuw i64 %5, 1
%invariant.gep = getelementptr i8, ptr %memoryref_data, i64 -8
%9 = add nsw i64 %5, -1
%umin = call i64 @llvm.umin.i64(i64 %9, i64 %6)
%min.iters.check = icmp samesign ult i64 %umin, 8
br i1 %min.iters.check, label %L11, label %vector.ph
vector.ph: ; preds = %L11.preheader33
%10 = add nuw nsw i64 %umin, 1
%n.mod.vf = and i64 %10, 7
%11 = icmp eq i64 %n.mod.vf, 0
%12 = select i1 %11, i64 8, i64 %n.mod.vf
%n.vec = sub nuw nsw i64 %10, %12
%13 = add nuw nsw i64 %n.vec, 1
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%14 = getelementptr i64, ptr %memoryref_data, i64 %index
%15 = getelementptr i8, ptr %14, i64 16
%16 = getelementptr i8, ptr %14, i64 32
%17 = getelementptr i8, ptr %14, i64 48
store <2 x i64> splat (i64 1), ptr %14, align 8
store <2 x i64> splat (i64 1), ptr %15, align 8
store <2 x i64> splat (i64 1), ptr %16, align 8
store <2 x i64> splat (i64 1), ptr %17, align 8
%index.next = add nuw i64 %index, 8
%18 = icmp eq i64 %index.next, %n.vec
br i1 %18, label %L11, label %vector.body
L11.postloop: ; preds = %load.postloop, %preloop.pseudo.exit, %main.exit.selector
%value_phi3.postloop = phi i64 [ %20, %load.postloop ], [ %8, %main.exit.selector ], [ 1, %preloop.pseudo.exit ]
%memoryref_offset.postloop = add nsw i64 %value_phi3.postloop, -1
%19 = add i64 %memoryref_offset.postloop, %.unbox
%memoryref_ovflw.not.postloop = icmp ult i64 %19, %3
%memoryref_data_offset.idx.postloop = shl i64 %memoryref_offset.postloop, 3
%memoryref_isinbounds.postloop = icmp ult i64 %memoryref_data_offset.idx.postloop, %memoryref_bytelen
%"memoryref_isinbounds¬ovflw.postloop" = and i1 %memoryref_ovflw.not.postloop, %memoryref_isinbounds.postloop
br i1 %"memoryref_isinbounds¬ovflw.postloop", label %load.postloop, label %oob
load.postloop: ; preds = %L11.postloop
%memoryref_data10.postloop = getelementptr inbounds i64, ptr %memoryref_data, i64 %memoryref_offset.postloop
store i64 1, ptr %memoryref_data10.postloop, align 8
%.not.postloop = icmp eq i64 %value_phi3.postloop, %.unbox
%20 = add nuw nsw i64 %value_phi3.postloop, 1
br i1 %.not.postloop, label %L29, label %L11.postloop
}
julia> code_llvm(f, (FixedSizeVectorDefault{Int64},); debuginfo=:none)
; Function Signature: f(FixedSizeArrays.FixedSizeArray{Int64, 1, Memory{Int64}})
define void @julia_f_2115(ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) %"v::FixedSizeArray", ptr nocapture readonly %.roots.v) local_unnamed_addr #0 {
top:
%0 = getelementptr inbounds nuw i8, ptr %"v::FixedSizeArray", i64 8
%.unbox = load i64, ptr %0, align 8
%1 = icmp slt i64 %.unbox, 1
br i1 %1, label %L49, label %L13.preheader19
L13.preheader19: ; preds = %top
%memoryref_mem = load ptr, ptr %.roots.v, align 8
%memory_data_ptr = getelementptr inbounds nuw i8, ptr %memoryref_mem, i64 8
%memoryref_data.pre = load ptr, ptr %memory_data_ptr, align 8
%invariant.gep = getelementptr i8, ptr %memoryref_data.pre, i64 -8
%min.iters.check = icmp samesign ult i64 %.unbox, 8
br i1 %min.iters.check, label %L13, label %vector.ph
vector.ph: ; preds = %L13.preheader19
%n.vec = and i64 %.unbox, 9223372036854775800
%2 = or disjoint i64 %n.vec, 1
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%3 = getelementptr i64, ptr %memoryref_data.pre, i64 %index
%4 = getelementptr i8, ptr %3, i64 16
%5 = getelementptr i8, ptr %3, i64 32
%6 = getelementptr i8, ptr %3, i64 48
store <2 x i64> splat (i64 1), ptr %3, align 8
store <2 x i64> splat (i64 1), ptr %4, align 8
store <2 x i64> splat (i64 1), ptr %5, align 8
store <2 x i64> splat (i64 1), ptr %6, align 8
%index.next = add nuw i64 %index, 8
%7 = icmp eq i64 %index.next, %n.vec
br i1 %7, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %.unbox, %n.vec
br i1 %cmp.n, label %L49, label %L13
L13: ; preds = %L13, %middle.block, %L13.preheader19
%value_phi3 = phi i64 [ %8, %L13 ], [ 1, %L13.preheader19 ], [ %2, %middle.block ]
%gep = getelementptr i64, ptr %invariant.gep, i64 %value_phi3
store i64 1, ptr %gep, align 8
%8 = add i64 %value_phi3, 1
%exitcond.not = icmp eq i64 %value_phi3, %.unbox
br i1 %exitcond.not, label %L49, label %L13
L49: ; preds = %L13, %middle.block, %top
ret void
}
Note that the version with Memory{Int64}
has the out-of-bound error block, while that’s removed entirely for FixedSizeVectorDefault{Int64}
. Honestly, right now I can’t tell why it’s that, it feels like the compiler should have enough information to elide the error path also for Memory{Int64}
, but that’s what we get. In general the generated code for FixedSizeVectorDefault{Int64}
for this function looks a lot simpler than for Memory{Int64}
.