Hm, unfortunately, that seems to generate a bunch more inlined code, and is slower.
julia> macro outline(x)
return esc(:((() -> $x)()))
end
@outline (macro with 1 method)
julia> function f(x)
if x >= 0
x + 1
else
#throw(DomainError("f requires inputs greater than 0, you gave $x which is less than 0"))
throw(@outline DomainError(lazy"f requires inputs greater than 0, you gave $x which is less than 0"))
end
end;
julia> @btime f($(Ref(1.0))[])
2.809 ns (0 allocations: 0 bytes)
2.0
julia> code_llvm(f, Tuple{Int})
; @ REPL[24]:1 within `f`
define i64 @julia_f_624(i64 signext %0) #0 {
top:
%gcframe16 = alloca [5 x {}*], align 16
%gcframe16.sub = getelementptr inbounds [5 x {}*], [5 x {}*]* %gcframe16, i64 0, i64 0
%1 = bitcast [5 x {}*]* %gcframe16 to i8*
call void @llvm.memset.p0i8.i32(i8* noundef nonnull align 16 dereferenceable(40) %1, i8 0, i32 40, i1 false)
%2 = getelementptr inbounds [5 x {}*], [5 x {}*]* %gcframe16, i64 0, i64 2
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #6
%ppgcstack_i8 = getelementptr i8, i8* %thread_ptr, i64 -8
%ppgcstack = bitcast i8* %ppgcstack_i8 to {}****
%pgcstack = load {}***, {}**** %ppgcstack, align 8
; @ REPL[24]:2 within `f`
; ┌ @ operators.jl:429 within `>=`
; │┌ @ int.jl:481 within `<=`
%3 = bitcast [5 x {}*]* %gcframe16 to i64*
store i64 12, i64* %3, align 16
%4 = getelementptr inbounds [5 x {}*], [5 x {}*]* %gcframe16, i64 0, i64 1
%5 = bitcast {}** %4 to {}***
%6 = load {}**, {}*** %pgcstack, align 8
store {}** %6, {}*** %5, align 8
%7 = bitcast {}*** %pgcstack to {}***
store {}** %gcframe16.sub, {}*** %7, align 8
%8 = icmp slt i64 %0, 0
; └└
br i1 %8, label %L5, label %L3
L3: ; preds = %top
; @ REPL[24]:3 within `f`
; ┌ @ int.jl:87 within `+`
%9 = add nuw i64 %0, 1
%10 = load {}*, {}** %4, align 8
%11 = bitcast {}*** %pgcstack to {}**
store {}* %10, {}** %11, align 8
; └
ret i64 %9
L5: ; preds = %top
; @ REPL[24]:6 within `f`
; ┌ @ REPL[23]:2 within `#13`
; │┌ @ strings/lazy.jl:19 within `LazyString`
%ptls_field17 = getelementptr inbounds {}**, {}*** %pgcstack, i64 2
%12 = bitcast {}*** %ptls_field17 to i8**
%ptls_load1819 = load i8*, i8** %12, align 8
%13 = call noalias nonnull {}* @ijl_gc_pool_alloc(i8* %ptls_load1819, i32 1440, i32 32) #7
%14 = bitcast {}* %13 to i64*
%15 = getelementptr inbounds i64, i64* %14, i64 -1
store atomic i64 139939984728400, i64* %15 unordered, align 8
%16 = bitcast {}* %13 to {}**
%17 = bitcast {}* %13 to <2 x {}*>*
store <2 x {}*> zeroinitializer, <2 x {}*>* %17, align 8
%18 = getelementptr inbounds [5 x {}*], [5 x {}*]* %gcframe16, i64 0, i64 4
store {}* %13, {}** %18, align 16
%ptls_load132021 = load i8*, i8** %12, align 8
%19 = call noalias nonnull {}* @ijl_gc_pool_alloc(i8* %ptls_load132021, i32 1440, i32 32) #7
%20 = bitcast {}* %19 to i64*
%21 = getelementptr inbounds i64, i64* %20, i64 -1
store atomic i64 139939991032544, i64* %21 unordered, align 8
%22 = bitcast {}* %19 to { {}*, i64, {}* }*
%.repack = bitcast {}* %19 to {}**
store {}* inttoptr (i64 139940196512528 to {}*), {}** %.repack, align 8
%.repack6 = getelementptr inbounds { {}*, i64, {}* }, { {}*, i64, {}* }* %22, i64 0, i32 1
store i64 %0, i64* %.repack6, align 8
%.repack8 = getelementptr inbounds { {}*, i64, {}* }, { {}*, i64, {}* }* %22, i64 0, i32 2
store {}* inttoptr (i64 139937774046432 to {}*), {}** %.repack8, align 8
store {}* %19, {}** %16, align 8
%23 = load atomic i64, i64* %15 unordered, align 8
%24 = and i64 %23, 3
%25 = icmp eq i64 %24, 3
br i1 %25, label %26, label %27
26: ; preds = %L5
call void @ijl_gc_queue_root({}* nonnull %13)
br label %27
27: ; preds = %26, %L5
%28 = bitcast {}** %2 to [2 x {}*]*
; │└
call void @j_DomainError_626([2 x {}*]* noalias nocapture nonnull sret([2 x {}*]) %28, {}* nonnull readonly %13) #0
; └
%ptls_load152223 = load i8*, i8** %12, align 8
%29 = call noalias nonnull {}* @ijl_gc_pool_alloc(i8* %ptls_load152223, i32 1440, i32 32) #7
%30 = bitcast {}* %29 to i64*
%31 = getelementptr inbounds i64, i64* %30, i64 -1
store atomic i64 139939981380608, i64* %31 unordered, align 8
%32 = bitcast {}* %29 to i8*
%33 = bitcast {}** %2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(16) %32, i8* noundef nonnull align 16 dereferenceable(16) %33, i64 16, i1 false)
call void @ijl_throw({}* %29)
unreachable
}
Marking the function inside @outline
as @noinline
and avoiding capturing variables didn’t seem to help.