That is my intent. I’m trying to illustrate this capability of Julia with something like the following
function iterator(g, N)
# construct gᴺ, the Nth iterate of g
function gᴺ(x)
for i ∈ 1:N
x = g(x)
end
return x
end
return gᴺ
end
f(x) = 4*x*(1-x)
fᴺ = iterator(f, 10^6);
With julia-0.6.4,@code_llvm fᴺ(0.3)
returns the fairly comprehensible
define double @"julia_g\E1\B4\BA_62655"(%"#g\E1\B4\BA#1"* nocapture readonly dereferenceable(8), double) #0 !dbg !5 {
top:
%2 = getelementptr inbounds %"#g\E1\B4\BA#1", %"#g\E1\B4\BA#1"* %0, i64 0, i32 1
%3 = load i64, i64* %2, align 8
%4 = icmp slt i64 %3, 1
br i1 %4, label %L14, label %if.preheader
if.preheader: ; preds = %top
br label %if
if: ; preds = %if.preheader, %if
%x.03 = phi double [ %8, %if ], [ %1, %if.preheader ]
%"#temp#.02" = phi i64 [ %5, %if ], [ 1, %if.preheader ]
%5 = add i64 %"#temp#.02", 1
%6 = fmul double %x.03, 4.000000e+00
%7 = fsub double 1.000000e+00, %x.03
%8 = fmul double %6, %7
%9 = icmp eq i64 %"#temp#.02", %3
br i1 %9, label %L14.loopexit, label %if
L14.loopexit: ; preds = %if
br label %L14
L14: ; preds = %L14.loopexit, %top
%x.0.lcssa = phi double [ %1, %top ], [ %8, %L14.loopexit ]
ret double %x.0.lcssa
}
showing that Julia has inlined the f
function into the iterator
and optimized them down to a simple for loop.
However, wiith julia-1.0.0, the output of @code_llvm
is so excessively laden with comments that it’s hard for a talk audience to see this optimization has occured:
; Function gᴺ
; Location: REPL[22]:5
define double @"julia_g\E1\B4\BA_36152"({ i64 } addrspace(11)* nocapture nonnull readonly dereferenceable(8), double) {
top:
%2 = getelementptr inbounds { i64 }, { i64 } addrspace(11)* %0, i64 0, i32 0
; Function Colon; {
; Location: range.jl:5
; Function Type; {
; Location: range.jl:255
; Function unitrange_last; {
; Location: range.jl:260
; Function >=; {
; Location: operators.jl:333
; Function <=; {
; Location: int.jl:428
%3 = load i64, i64 addrspace(11)* %2, align 8
%4 = icmp sgt i64 %3, 0
;}}}}}
br i1 %4, label %L9.L13_crit_edge, label %L28
L9.L13_crit_edge: ; preds = %top
br label %L13
L13: ; preds = %L13, %L9.L13_crit_edge
%value_phi2 = phi i64 [ 1, %L9.L13_crit_edge ], [ %9, %L13 ]
%value_phi3 = phi double [ %1, %L9.L13_crit_edge ], [ %7, %L13 ]
; Location: REPL[22]:6
; Function f; {
; Location: REPL[16]:1
; Function -; {
; Location: promotion.jl:315
; Function -; {
; Location: float.jl:397
%5 = fsub double 1.000000e+00, %value_phi3
;}}
; Function *; {
; Location: operators.jl:502
; Function *; {
; Location: promotion.jl:314
; Function *; {
; Location: float.jl:399
%6 = fmul double %value_phi3, 4.000000e+00
;}}
; Function *; {
; Location: float.jl:399
%7 = fmul double %6, %5
;}}}
; Function iterate; {
; Location: range.jl:575
; Function ==; {
; Location: promotion.jl:425
%8 = icmp eq i64 %value_phi2, %3
;}
; Location: range.jl:576
; Function +; {
; Location: int.jl:53
%9 = add nuw i64 %value_phi2, 1
;}}
br i1 %8, label %L28, label %L13
L28: ; preds = %L13, %top
%value_phi6 = phi double [ %1, %top ], [ %7, %L13 ]
; Location: REPL[22]:8
ret double %value_phi6
}
This example is slightly artificial. I would like to show the same with an ODE integrator (e.g. rungekutta4) and an a user-defined dx/dt=f(x), but the above suffices to make the point.
Trust me, I’m not equating the length of the @code_llvm
output or the LLVM IR with the efficiency of its execution., This is all about pedagogy and clarity.