Apologies if this is answered somewhere else, but I couldn’t find it.
I have the following two functions; testvt
is intended to do some sort of dispatch, while remaining type-stable:
@noinline f(x, a, b, c, ::Val{N}) where N = muladd.(x, 0.1a + 0.5b + 2c, N*c)
testvt(x, a, b, c, k) = Base.@nif 10 d -> (k == d) d -> f(x, a, b, c, Val(d))
With these arguments:
x, a, b, c = (rand(1000) for _ in 1:4)
It looks like there is a lot of duplicated llvm code.
all vector args
julia> @code_llvm testvt(x, a, b, c, 10)
; @ REPL[2]:1 within `testvt`
; Function Attrs: uwtable
define nonnull {}* @julia_testvt_226({}* noundef nonnull align 16 dereferenceable(40) %0, {}* noundef nonnull align 16 dereferenceable(40) %1, {}* noundef nonnull align 16 dereferenceable(40) %2, {}* noundef nonnull align 16 dereferenceable(40) %3, i64 signext %4) #0 {
top:
%5 = alloca [5 x {}*], align 8
%.sub = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 0
switch i64 %4, label %L37 [
i64 1, label %L3
i64 2, label %L7
i64 3, label %L11
i64 4, label %L15
i64 5, label %L19
i64 6, label %L23
i64 7, label %L27
i64 8, label %L31
i64 9, label %L35
]
common.ret: ; preds = %L37, %L35, %L31, %L27, %L23, %L19, %L15, %L11, %L7, %L3
%common.ret.op = phi {}* [ %10, %L3 ], [ %15, %L7 ], [ %20, %L11 ], [ %25, %L15 ], [ %30, %L19 ], [ %35, %L23 ], [ %40, %L27 ], [ %45, %L31 ], [ %50, %L35 ], [ %55, %L37 ]
ret {}* %common.ret.op
L3: ; preds = %top
store {}* %0, {}** %.sub, align 8
%6 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %6, align 8
%7 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %7, align 8
%8 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %8, align 8
%9 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735412136144 to {}*), {}** %9, align 8
%10 = call nonnull {}* @j1_f_228({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L7: ; preds = %top
store {}* %0, {}** %.sub, align 8
%11 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %11, align 8
%12 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %12, align 8
%13 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %13, align 8
%14 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735412139296 to {}*), {}** %14, align 8
%15 = call nonnull {}* @j1_f_229({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L11: ; preds = %top
store {}* %0, {}** %.sub, align 8
%16 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %16, align 8
%17 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %17, align 8
%18 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %18, align 8
%19 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735427216096 to {}*), {}** %19, align 8
%20 = call nonnull {}* @j1_f_230({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L15: ; preds = %top
store {}* %0, {}** %.sub, align 8
%21 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %21, align 8
%22 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %22, align 8
%23 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %23, align 8
%24 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735425748336 to {}*), {}** %24, align 8
%25 = call nonnull {}* @j1_f_231({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L19: ; preds = %top
store {}* %0, {}** %.sub, align 8
%26 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %26, align 8
%27 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %27, align 8
%28 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %28, align 8
%29 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735427217952 to {}*), {}** %29, align 8
%30 = call nonnull {}* @j1_f_232({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L23: ; preds = %top
store {}* %0, {}** %.sub, align 8
%31 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %31, align 8
%32 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %32, align 8
%33 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %33, align 8
%34 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735427219520 to {}*), {}** %34, align 8
%35 = call nonnull {}* @j1_f_233({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L27: ; preds = %top
store {}* %0, {}** %.sub, align 8
%36 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %36, align 8
%37 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %37, align 8
%38 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %38, align 8
%39 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 2425037422688 to {}*), {}** %39, align 8
%40 = call nonnull {}* @j1_f_234({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L31: ; preds = %top
store {}* %0, {}** %.sub, align 8
%41 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %41, align 8
%42 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %42, align 8
%43 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %43, align 8
%44 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 2425037408776 to {}*), {}** %44, align 8
%45 = call nonnull {}* @j1_f_235({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L35: ; preds = %top
store {}* %0, {}** %.sub, align 8
%46 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %46, align 8
%47 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %47, align 8
%48 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %48, align 8
%49 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735423996592 to {}*), {}** %49, align 8
%50 = call nonnull {}* @j1_f_236({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
L37: ; preds = %top
store {}* %0, {}** %.sub, align 8
%51 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 1
store {}* %1, {}** %51, align 8
%52 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 2
store {}* %2, {}** %52, align 8
%53 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 3
store {}* %3, {}** %53, align 8
%54 = getelementptr inbounds [5 x {}*], [5 x {}*]* %5, i64 0, i64 4
store {}* inttoptr (i64 140735432190096 to {}*), {}** %54, align 8
%55 = call nonnull {}* @j1_f_237({}* inttoptr (i64 2425037407000 to {}*), {}** nonnull %.sub, i32 5)
br label %common.ret
}
But with just one less vector argument, it looks a whole lot cleaner:
c = 1.
julia> @code_llvm testvt(x, a, b, 1., 10)
; @ REPL[2]:1 within `testvt`
; Function Attrs: noreturn uwtable
define void @julia_testvt_277({}* noundef nonnull align 16 dereferenceable(40) %0, {}* noundef nonnull align 16 dereferenceable(40) %1, {}* noundef nonnull align 16 dereferenceable(40) %2, double %3, i64 signext %4) #0 {
top:
switch i64 %4, label %L37 [
i64 1, label %L3
i64 2, label %L7
i64 3, label %L11
i64 4, label %L15
i64 5, label %L19
i64 6, label %L23
i64 7, label %L27
i64 8, label %L31
i64 9, label %L35
]
L3: ; preds = %top
call void @j_f_279({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L7: ; preds = %top
call void @j_f_280({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L11: ; preds = %top
call void @j_f_281({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L15: ; preds = %top
call void @j_f_282({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L19: ; preds = %top
call void @j_f_283({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L23: ; preds = %top
call void @j_f_284({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L27: ; preds = %top
call void @j_f_285({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L31: ; preds = %top
call void @j_f_286({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L35: ; preds = %top
call void @j_f_287({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
L37: ; preds = %top
call void @j_f_288({}* nonnull %0, {}* nonnull %1, {}* nonnull %2, double %3) #6
unreachable
}
Is there some reason for this? I don’t have too much knowledge about processors/codegen in general, so I would appreciate some enlightenment.