Recently weβve seen one strange performance regression report, and the following is a MWE we can get:
using BenchmarkTools
function rand_n_1(a,n)
r = Vector{UInt}(undef,n)
@inbounds for i in 1:n
p = a[1] - a[2]
a[1] = a[2]
a[2] = a[3]
a[3] = p
r[i] = p
end
return r
end
function rand_n_2(a,n)
r = Vector{UInt}(undef,n)
@inbounds for i in 1:n
p = a[1] - a[2]
a[1] = a[2] * 2
a[2] = a[3] * 3
a[3] = p * 4
r[i] = p
end
return r
end
a = UInt[1, 2, 3]
# 43.626 ms (3 allocations: 76.30 MiB)
@btime rand_n_1(a, 10^7);
# 20.388 ms (3 allocations: 76.30 MiB)
@btime rand_n_2(a, 10^7);
The rand_n_2
is a baseline version, and I expect that rand_n_1
is at least as fast as rand_n_2
. But surprisingly, it isnβt.
By checking the LLVM IR, the slow version introduces an instead overly smart optimization pass (unroll and vectorize the loop), while the fast version is more or less scalar.
Although weβve found ways to rewrite the function structure so that Julia/llvm fails to recognize the pattern and thus works around the performance regression, Iβd like to know the suggested fix for such issues since both versions look good.
@code_llvm rand_n_1(a, 10^7)
; Function Signature: rand_n_1(Array{UInt64, 1}, Int64)
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:3 within `rand_n_1`
define nonnull ptr @julia_rand_n_1_2870(ptr noundef nonnull align 8 dereferenceable(24) %"a::Array", i64 signext %"n::Int64") #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 8
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:4 within `rand_n_1`
; β @ boot.jl:647 within `Array`
; ββ @ boot.jl:588 within `GenericMemory`
%memorynew_empty = icmp eq i64 %"n::Int64", 0
br i1 %memorynew_empty, label %retval, label %nonemptymem
L21: ; preds = %L21.preheader.new, %L21
%value_phi5 = phi i64 [ 1, %L21.preheader.new ], [ %16, %L21 ]
%niter = phi i64 [ 0, %L21.preheader.new ], [ %niter.next.3, %L21 ]
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:6 within `rand_n_1`
; β @ essentials.jl:918 within `getindex`
%0 = load i64, ptr %memoryref_data, align 8
%1 = load <2 x i64>, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%2 = extractelement <2 x i64> %1, i64 0
%3 = sub i64 %0, %2
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:7 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store <2 x i64> %1, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:9 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %3, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:10 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep = getelementptr i64, ptr %invariant.gep, i64 %value_phi5
store i64 %3, ptr %gep, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:6 within `rand_n_1`
; β @ essentials.jl:918 within `getindex`
%4 = load i64, ptr %memoryref_data, align 8
%5 = load <2 x i64>, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%6 = extractelement <2 x i64> %5, i64 0
%7 = sub i64 %4, %6
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:7 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store <2 x i64> %5, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:9 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %7, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:10 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.1 = getelementptr i64, ptr %memory_data, i64 %value_phi5
store i64 %7, ptr %gep.1, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:6 within `rand_n_1`
; β @ essentials.jl:918 within `getindex`
%8 = load i64, ptr %memoryref_data, align 8
%9 = load <2 x i64>, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%10 = extractelement <2 x i64> %9, i64 0
%11 = sub i64 %8, %10
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:7 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store <2 x i64> %9, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:9 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %11, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:10 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.2 = getelementptr i64, ptr %gep, i64 2
store i64 %11, ptr %gep.2, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:6 within `rand_n_1`
; β @ essentials.jl:918 within `getindex`
%12 = load i64, ptr %memoryref_data, align 8
%13 = load <2 x i64>, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%14 = extractelement <2 x i64> %13, i64 0
%15 = sub i64 %12, %14
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:7 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store <2 x i64> %13, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:9 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %15, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:10 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.3 = getelementptr i64, ptr %gep, i64 3
store i64 %15, ptr %gep.3, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:11 within `rand_n_1`
; β @ range.jl:921 within `iterate`
%16 = add i64 %value_phi5, 4
; β
%niter.next.3 = add i64 %niter, 4
%niter.ncmp.3 = icmp eq i64 %niter.next.3, %unroll_iter
br i1 %niter.ncmp.3, label %L183.loopexit.unr-lcssa, label %L21
L183.loopexit.unr-lcssa: ; preds = %L21.preheader, %L21
%value_phi5.unr = phi i64 [ 1, %L21.preheader ], [ %16, %L21 ]
%lcmp.mod.not = icmp eq i64 %xtraiter, 0
br i1 %lcmp.mod.not, label %L183, label %L21.epil
L21.epil: ; preds = %L21.epil, %L183.loopexit.unr-lcssa
%value_phi5.epil = phi i64 [ %21, %L21.epil ], [ %value_phi5.unr, %L183.loopexit.unr-lcssa ]
%epil.iter = phi i64 [ %epil.iter.next, %L21.epil ], [ 0, %L183.loopexit.unr-lcssa ]
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:6 within `rand_n_1`
; β @ essentials.jl:918 within `getindex`
%17 = load i64, ptr %memoryref_data, align 8
%18 = load <2 x i64>, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%19 = extractelement <2 x i64> %18, i64 0
%20 = sub i64 %17, %19
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:7 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store <2 x i64> %18, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:9 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %20, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:10 within `rand_n_1`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.epil = getelementptr i64, ptr %invariant.gep, i64 %value_phi5.epil
store i64 %20, ptr %gep.epil, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:11 within `rand_n_1`
; β @ range.jl:921 within `iterate`
%21 = add i64 %value_phi5.epil, 1
; β
%epil.iter.next = add i64 %epil.iter, 1
%epil.iter.cmp.not = icmp eq i64 %epil.iter.next, %xtraiter
br i1 %epil.iter.cmp.not, label %L183, label %L21.epil
L183: ; preds = %retval, %L21.epil, %L183.loopexit.unr-lcssa
%frame.prev110 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev110, ptr %tls_pgcstack, align 8
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:12 within `rand_n_1`
ret ptr %"new::Array"
nonemptymem: ; preds = %top
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:4 within `rand_n_1`
; β @ boot.jl:647 within `Array`
; ββ @ boot.jl:588 within `GenericMemory`
%22 = icmp ult i64 %"n::Int64", 1152921504606846976
br i1 %22, label %pass, label %fail
fail: ; preds = %nonemptymem
call void @jl_argument_error(ptr nonnull @"_j_str_invalid GenericMemory siz...#1")
unreachable
pass: ; preds = %nonemptymem
%23 = shl nuw nsw i64 %"n::Int64", 3
%ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"Memory{UInt64}[]" = call noalias nonnull align 16 ptr @jl_alloc_genericmemory_unchecked(ptr %ptls_load, i64 %23, ptr nonnull @"+Core.GenericMemory#2873.jit") #13
store i64 %"n::Int64", ptr %"Memory{UInt64}[]", align 8
br label %retval
retval: ; preds = %pass, %top
%memoryref_mem81 = phi ptr [ %"Memory{UInt64}[]", %pass ], [ @"jl_global#2872.jit", %top ]
; ββ
; β @ boot.jl:648 within `Array`
; ββ @ boot.jl:593 within `memoryref`
%memory_data_ptr = getelementptr inbounds { i64, ptr }, ptr %memoryref_mem81, i64 0, i32 1
%memory_data = load ptr, ptr %memory_data_ptr, align 8
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %memoryref_mem81, ptr %gc_slot_addr_0, align 8
; ββ
%ptls_field108 = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load109 = load ptr, ptr %ptls_field108, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load109, i32 408, i32 32, i64 140686563959504) #8
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 140686563959504, ptr %"new::Array.tag_addr" unordered, align 8
%24 = getelementptr inbounds i8, ptr %"new::Array", i64 8
store ptr %memory_data, ptr %"new::Array", align 8
store ptr %memoryref_mem81, ptr %24, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
store i64 %"n::Int64", ptr %"new::Array.size_ptr", align 8
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:5 within `rand_n_1`
br i1 %memorynew_empty, label %L183, label %L21.preheader
L21.preheader: ; preds = %retval
%memoryref_data = load ptr, ptr %"a::Array", align 8
%memoryref_data19 = getelementptr inbounds i64, ptr %memoryref_data, i64 1
%memoryref_data52 = getelementptr inbounds i64, ptr %memoryref_data, i64 2
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:11 within `rand_n_1`
%invariant.gep = getelementptr i64, ptr %memory_data, i64 -1
%xtraiter = and i64 %"n::Int64", 3
%25 = icmp ult i64 %"n::Int64", 4
br i1 %25, label %L183.loopexit.unr-lcssa, label %L21.preheader.new
L21.preheader.new: ; preds = %L21.preheader
%unroll_iter = and i64 %"n::Int64", -4
br label %L21
}
@code_llvm rand_n_2(a, 10^7)
; Function Signature: rand_n_2(Array{UInt64, 1}, Int64)
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:15 within `rand_n_2`
define nonnull ptr @julia_rand_n_2_2903(ptr noundef nonnull align 8 dereferenceable(24) %"a::Array", i64 signext %"n::Int64") #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 8
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:16 within `rand_n_2`
; β @ boot.jl:647 within `Array`
; ββ @ boot.jl:588 within `GenericMemory`
%memorynew_empty = icmp eq i64 %"n::Int64", 0
br i1 %memorynew_empty, label %retval, label %nonemptymem
L21: ; preds = %L21.preheader.new, %L21
%value_phi5 = phi i64 [ 1, %L21.preheader.new ], [ %28, %L21 ]
%niter = phi i64 [ 0, %L21.preheader.new ], [ %niter.next.3, %L21 ]
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:18 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%0 = load i64, ptr %memoryref_data, align 8
%1 = load i64, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%2 = sub i64 %0, %1
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:19 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%3 = shl i64 %1, 1
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %3, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:20 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%4 = load i64, ptr %memoryref_data52, align 8
; β
; β @ int.jl:863 within `*` @ int.jl:88
%5 = mul i64 %4, 3
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %5, ptr %memoryref_data19, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:21 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%6 = shl i64 %2, 2
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %6, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:22 within `rand_n_2`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep = getelementptr i64, ptr %invariant.gep, i64 %value_phi5
store i64 %2, ptr %gep, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:18 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%7 = load i64, ptr %memoryref_data, align 8
%8 = load i64, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%9 = sub i64 %7, %8
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:19 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%10 = shl i64 %8, 1
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %10, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:20 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%11 = load i64, ptr %memoryref_data52, align 8
; β
; β @ int.jl:863 within `*` @ int.jl:88
%12 = mul i64 %11, 3
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %12, ptr %memoryref_data19, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:21 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%13 = shl i64 %9, 2
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %13, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:22 within `rand_n_2`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.1 = getelementptr i64, ptr %memory_data, i64 %value_phi5
store i64 %9, ptr %gep.1, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:18 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%14 = load i64, ptr %memoryref_data, align 8
%15 = load i64, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%16 = sub i64 %14, %15
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:19 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%17 = shl i64 %15, 1
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %17, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:20 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%18 = load i64, ptr %memoryref_data52, align 8
; β
; β @ int.jl:863 within `*` @ int.jl:88
%19 = mul i64 %18, 3
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %19, ptr %memoryref_data19, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:21 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%20 = shl i64 %16, 2
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %20, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:22 within `rand_n_2`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.2 = getelementptr i64, ptr %gep, i64 2
store i64 %16, ptr %gep.2, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:18 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%21 = load i64, ptr %memoryref_data, align 8
%22 = load i64, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%23 = sub i64 %21, %22
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:19 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%24 = shl i64 %22, 1
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %24, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:20 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%25 = load i64, ptr %memoryref_data52, align 8
; β
; β @ int.jl:863 within `*` @ int.jl:88
%26 = mul i64 %25, 3
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %26, ptr %memoryref_data19, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:21 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%27 = shl i64 %23, 2
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %27, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:22 within `rand_n_2`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.3 = getelementptr i64, ptr %gep, i64 3
store i64 %23, ptr %gep.3, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:23 within `rand_n_2`
; β @ range.jl:921 within `iterate`
%28 = add i64 %value_phi5, 4
; β
%niter.next.3 = add i64 %niter, 4
%niter.ncmp.3 = icmp eq i64 %niter.next.3, %unroll_iter
br i1 %niter.ncmp.3, label %L186.loopexit.unr-lcssa, label %L21
L186.loopexit.unr-lcssa: ; preds = %L21.preheader, %L21
%value_phi5.unr = phi i64 [ 1, %L21.preheader ], [ %28, %L21 ]
%lcmp.mod.not = icmp eq i64 %xtraiter, 0
br i1 %lcmp.mod.not, label %L186, label %L21.epil
L21.epil: ; preds = %L21.epil, %L186.loopexit.unr-lcssa
%value_phi5.epil = phi i64 [ %36, %L21.epil ], [ %value_phi5.unr, %L186.loopexit.unr-lcssa ]
%epil.iter = phi i64 [ %epil.iter.next, %L21.epil ], [ 0, %L186.loopexit.unr-lcssa ]
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:18 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%29 = load i64, ptr %memoryref_data, align 8
%30 = load i64, ptr %memoryref_data19, align 8
; β
; β @ int.jl:86 within `-`
%31 = sub i64 %29, %30
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:19 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%32 = shl i64 %30, 1
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %32, ptr %memoryref_data, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:20 within `rand_n_2`
; β @ essentials.jl:918 within `getindex`
%33 = load i64, ptr %memoryref_data52, align 8
; β
; β @ int.jl:863 within `*` @ int.jl:88
%34 = mul i64 %33, 3
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %34, ptr %memoryref_data19, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:21 within `rand_n_2`
; β @ int.jl:863 within `*` @ int.jl:88
%35 = shl i64 %31, 2
; β
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
store i64 %35, ptr %memoryref_data52, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:22 within `rand_n_2`
; β @ array.jl:986 within `setindex!`
; ββ @ array.jl:991 within `_setindex!`
%gep.epil = getelementptr i64, ptr %invariant.gep, i64 %value_phi5.epil
store i64 %31, ptr %gep.epil, align 8
; ββ
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:23 within `rand_n_2`
; β @ range.jl:921 within `iterate`
%36 = add i64 %value_phi5.epil, 1
; β
%epil.iter.next = add i64 %epil.iter, 1
%epil.iter.cmp.not = icmp eq i64 %epil.iter.next, %xtraiter
br i1 %epil.iter.cmp.not, label %L186, label %L21.epil
L186: ; preds = %retval, %L21.epil, %L186.loopexit.unr-lcssa
%frame.prev110 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev110, ptr %tls_pgcstack, align 8
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:24 within `rand_n_2`
ret ptr %"new::Array"
nonemptymem: ; preds = %top
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:16 within `rand_n_2`
; β @ boot.jl:647 within `Array`
; ββ @ boot.jl:588 within `GenericMemory`
%37 = icmp ult i64 %"n::Int64", 1152921504606846976
br i1 %37, label %pass, label %fail
fail: ; preds = %nonemptymem
call void @jl_argument_error(ptr nonnull @"_j_str_invalid GenericMemory siz...#1")
unreachable
pass: ; preds = %nonemptymem
%38 = shl nuw nsw i64 %"n::Int64", 3
%ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"Memory{UInt64}[]" = call noalias nonnull align 16 ptr @jl_alloc_genericmemory_unchecked(ptr %ptls_load, i64 %38, ptr nonnull @"+Core.GenericMemory#2906.jit") #13
store i64 %"n::Int64", ptr %"Memory{UInt64}[]", align 8
br label %retval
retval: ; preds = %pass, %top
%memoryref_mem81 = phi ptr [ %"Memory{UInt64}[]", %pass ], [ @"jl_global#2905.jit", %top ]
; ββ
; β @ boot.jl:648 within `Array`
; ββ @ boot.jl:593 within `memoryref`
%memory_data_ptr = getelementptr inbounds { i64, ptr }, ptr %memoryref_mem81, i64 0, i32 1
%memory_data = load ptr, ptr %memory_data_ptr, align 8
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %memoryref_mem81, ptr %gc_slot_addr_0, align 8
; ββ
%ptls_field108 = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load109 = load ptr, ptr %ptls_field108, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load109, i32 408, i32 32, i64 140686563959504) #8
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 140686563959504, ptr %"new::Array.tag_addr" unordered, align 8
%39 = getelementptr inbounds i8, ptr %"new::Array", i64 8
store ptr %memory_data, ptr %"new::Array", align 8
store ptr %memoryref_mem81, ptr %39, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
store i64 %"n::Int64", ptr %"new::Array.size_ptr", align 8
; β
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:17 within `rand_n_2`
br i1 %memorynew_empty, label %L186, label %L21.preheader
L21.preheader: ; preds = %retval
%memoryref_data = load ptr, ptr %"a::Array", align 8
%memoryref_data19 = getelementptr inbounds i64, ptr %memoryref_data, i64 1
%memoryref_data52 = getelementptr inbounds i64, ptr %memoryref_data, i64 2
; @ /home/chenjiuning/Documents/ai/LazyFFTW/example3.jl:23 within `rand_n_2`
%invariant.gep = getelementptr i64, ptr %memory_data, i64 -1
%xtraiter = and i64 %"n::Int64", 3
%40 = icmp ult i64 %"n::Int64", 4
br i1 %40, label %L186.loopexit.unr-lcssa, label %L21.preheader.new
L21.preheader.new: ; preds = %L21.preheader
%unroll_iter = and i64 %"n::Int64", -4
br label %L21
}