It does:
julia> function unroll(::Val{N}) where N
t = 0.0
for i = 1:N
t += exp(i)
end
return t
end
unroll (generic function with 1 method)
julia> @code_native unroll(Val(0))
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0
.globl _julia_unroll_198 ## -- Begin function julia_unroll_198
.p2align 4, 0x90
_julia_unroll_198: ## @julia_unroll_198
; β @ REPL[16]:1 within `unroll`
.cfi_startproc
## %bb.0: ## %top
; β @ REPL[16]:6 within `unroll`
vxorps %xmm0, %xmm0, %xmm0
retq
.cfi_endproc
; β
## -- End function
.subsections_via_symbols
julia> @code_native unroll(Val(1))
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0
.section __TEXT,__literal8,8byte_literals
.p2align 3 ## -- Begin function julia_unroll_200
LCPI0_0:
.quad 0x4005bf0a8b145769 ## double 2.7182818284590451
.section __TEXT,__text,regular,pure_instructions
.globl _julia_unroll_200
.p2align 4, 0x90
_julia_unroll_200: ## @julia_unroll_200
; β @ REPL[16]:1 within `unroll`
.cfi_startproc
## %bb.0: ## %top
movabsq $LCPI0_0, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; β @ REPL[16]:6 within `unroll`
retq
.cfi_endproc
; β
## -- End function
.subsections_via_symbols
julia> @code_native unroll(Val(2))
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0
.section __TEXT,__literal8,8byte_literals
.p2align 3 ## -- Begin function julia_unroll_202
LCPI0_0:
.quad 0x3ff0000000000000 ## double 1
LCPI0_1:
.quad 0x4000000000000000 ## double 2
.section __TEXT,__text,regular,pure_instructions
.globl _julia_unroll_202
.p2align 4, 0x90
_julia_unroll_202: ## @julia_unroll_202
; β @ REPL[16]:1 within `unroll`
.cfi_startproc
## %bb.0: ## %top
; β @ REPL[16]:4 within `unroll`
; ββ @ math.jl:1356 within `exp`
pushq %rbx
.cfi_def_cfa_offset 16
subq $16, %rsp
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -16
movabsq $_j_exp_204, %rbx
movabsq $LCPI0_0, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
callq *%rbx
vxorpd %xmm1, %xmm1, %xmm1
; ββ
; ββ @ float.jl:383 within `+`
vaddsd %xmm1, %xmm0, %xmm0
vmovsd %xmm0, 8(%rsp) ## 8-byte Spill
movabsq $LCPI0_1, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; ββ
; ββ @ math.jl:1356 within `exp`
callq *%rbx
; ββ
; ββ @ float.jl:383 within `+`
vaddsd 8(%rsp), %xmm0, %xmm0 ## 8-byte Folded Reload
; ββ
; β @ REPL[16]:6 within `unroll`
addq $16, %rsp
popq %rbx
retq
.cfi_endproc
; β
## -- End function
.subsections_via_symbols
julia> @code_native unroll(Val(3))
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0
.section __TEXT,__literal8,8byte_literals
.p2align 3 ## -- Begin function julia_unroll_205
LCPI0_0:
.quad 0x3ff0000000000000 ## double 1
LCPI0_1:
.quad 0x4000000000000000 ## double 2
LCPI0_2:
.quad 0x4008000000000000 ## double 3
.section __TEXT,__text,regular,pure_instructions
.globl _julia_unroll_205
.p2align 4, 0x90
_julia_unroll_205: ## @julia_unroll_205
; β @ REPL[16]:1 within `unroll`
.cfi_startproc
## %bb.0: ## %top
; β @ REPL[16]:4 within `unroll`
; ββ @ math.jl:1356 within `exp`
pushq %rbx
.cfi_def_cfa_offset 16
subq $16, %rsp
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -16
movabsq $_j_exp_207, %rbx
movabsq $LCPI0_0, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
callq *%rbx
vxorpd %xmm1, %xmm1, %xmm1
; ββ
; ββ @ float.jl:383 within `+`
vaddsd %xmm1, %xmm0, %xmm0
vmovsd %xmm0, 8(%rsp) ## 8-byte Spill
movabsq $LCPI0_1, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; ββ
; ββ @ math.jl:1356 within `exp`
callq *%rbx
; ββ
; ββ @ float.jl:383 within `+`
vaddsd 8(%rsp), %xmm0, %xmm0 ## 8-byte Folded Reload
vmovsd %xmm0, 8(%rsp) ## 8-byte Spill
movabsq $LCPI0_2, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; ββ
; ββ @ math.jl:1356 within `exp`
callq *%rbx
; ββ
; ββ @ float.jl:383 within `+`
vaddsd 8(%rsp), %xmm0, %xmm0 ## 8-byte Folded Reload
; ββ
; β @ REPL[16]:6 within `unroll`
addq $16, %rsp
popq %rbx
retq
.cfi_endproc
; β
## -- End function
.subsections_via_symbols
julia> @code_native unroll(Val(4))
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0
.section __TEXT,__literal8,8byte_literals
.p2align 3 ## -- Begin function julia_unroll_208
LCPI0_0:
.quad 0x3ff0000000000000 ## double 1
LCPI0_1:
.quad 0x4000000000000000 ## double 2
LCPI0_2:
.quad 0x4008000000000000 ## double 3
LCPI0_3:
.quad 0x4010000000000000 ## double 4
.section __TEXT,__text,regular,pure_instructions
.globl _julia_unroll_208
.p2align 4, 0x90
_julia_unroll_208: ## @julia_unroll_208
; β @ REPL[16]:1 within `unroll`
.cfi_startproc
## %bb.0: ## %top
; β @ REPL[16]:4 within `unroll`
; ββ @ math.jl:1356 within `exp`
pushq %rbx
.cfi_def_cfa_offset 16
subq $16, %rsp
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -16
movabsq $_j_exp_210, %rbx
movabsq $LCPI0_0, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
callq *%rbx
vxorpd %xmm1, %xmm1, %xmm1
; ββ
; ββ @ float.jl:383 within `+`
vaddsd %xmm1, %xmm0, %xmm0
vmovsd %xmm0, 8(%rsp) ## 8-byte Spill
movabsq $LCPI0_1, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; ββ
; ββ @ math.jl:1356 within `exp`
callq *%rbx
; ββ
; ββ @ float.jl:383 within `+`
vaddsd 8(%rsp), %xmm0, %xmm0 ## 8-byte Folded Reload
vmovsd %xmm0, 8(%rsp) ## 8-byte Spill
movabsq $LCPI0_2, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; ββ
; ββ @ math.jl:1356 within `exp`
callq *%rbx
; ββ
; ββ @ float.jl:383 within `+`
vaddsd 8(%rsp), %xmm0, %xmm0 ## 8-byte Folded Reload
vmovsd %xmm0, 8(%rsp) ## 8-byte Spill
movabsq $LCPI0_3, %rax
vmovsd (%rax), %xmm0 ## xmm0 = mem[0],zero
; ββ
; ββ @ math.jl:1356 within `exp`
callq *%rbx
; ββ
; ββ @ float.jl:383 within `+`
vaddsd 8(%rsp), %xmm0, %xmm0 ## 8-byte Folded Reload
; ββ
; β @ REPL[16]:6 within `unroll`
addq $16, %rsp
popq %rbx
retq
.cfi_endproc
; β
## -- End function
.subsections_via_symbols
Your example doesnβt exhibit unrolling because the compiler replaces the entire function with a closed form solution. Specilized code gets generated for each N
though, but thatβs not very useful in this example.