I think this is the issue. Memoize.jl
is a great package and would be the clear choice if the point was to do memoization (with more instances, multiple arguments, etc). However, in this case the goal seems to be much simpler: to delay the initialization of a variable until its first use. Achieving this through @memoize
feels like overkill, as it appears to build an entire (global) dictionary and perform a search everytime you want to access the “lazy variable”. While this should still be “fast enough”, this juggling is sure to confuse the compiler, preventing the use of the “lazy variable” in critical loops (I also suspect it would be a nightmare for compilation to GPUs). More concretly, the @memoize
approach is very likely to prevent autovectorization.
I believe the best solution would be to have static variables, but, until then, I can see simple cases benefitting from the redefinition trick I proposed. It’s effectively a single extra line and subsequent accesses to the variable should be zero-cost.
In this simple benchmark, the redefinition is something like 7 orders of magnitude faster than memoization:
using Memoize
using BenchmarkTools
function f()
val = (println("... heavy calculations ..."); 12)
@eval f() = $val
return val
end
@show f()
@show f()
@memoize g() = (println("... heavy calculations ..."); 13)
@show g()
@show g()
function usef()
s = 0
for i in 1:1_000_000
s += i * f() - f()
end
return s
end
function useg()
s = 0
for i in 1:1_000_000
s += i * g() - g()
end
return s
end
@btime usef()
@btime useg()
Output:
... heavy calculations ...
f() = 12
f() = 12
... heavy calculations ...
g() = 13
g() = 13
1.490 ns (0 allocations: 0 bytes)
16.334 ms (0 allocations: 0 bytes)
This is due to the compiler confusion I mentioned. The compiler understands what’s going on with usef()
and optimize everythig away. See the native code:
.text
endbr64
movabsq $5999994000000, %rax # imm = 0x574FB82D280
retq
nop
While for useg()
we get the monstrosity below.
.text
endbr64
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $72, %rsp
movabsq $139940655471280, %rcx # imm = 0x7F46790F1AB0
vxorps %xmm0, %xmm0, %xmm0
vmovaps %xmm0, 32(%rsp)
movq $0, 48(%rsp)
movq %fs:0, %rax
movq -8(%rax), %rdx
movq $4, 32(%rsp)
movq (%rdx), %rax
movq %rax, 40(%rsp)
leaq 32(%rsp), %rax
movq %rdx, 64(%rsp)
movq %rax, (%rdx)
movl $1, %ebp
xorl %ebx, %ebx
leaq 1787265262(%rcx), %r13
movabsq $jl_system_image_data, %r14
movabsq $139942131169320, %r15 # imm = 0x7F46D1047828
nopl (%rax)
L128:
movabsq $139940655471280, %rax # imm = 0x7F46790F1AB0
movq (%rax), %rdi
movq %rdi, 48(%rsp)
movq %r14, %rsi
movq %r15, %rdx
callq *%r13
cmpq %r15, %rax
jne L277
movabsq $139940626210368, %rax # imm = 0x7F4677509E40
movq %rax, 8(%rsp)
movabsq $jl_system_image_data, %rdi
leaq 8(%rsp), %rsi
movl $1, %edx
movabsq $139941898452352, %rax # imm = 0x7F46C3257D80
callq *%rax
movabsq $139940655471280, %rax # imm = 0x7F46790F1AB0
movq %rax, 8(%rsp)
movabsq $139942130905952, %r12 # imm = 0x7F46D1007360
movq %r12, 16(%rsp)
movq %r14, 24(%rsp)
movabsq $jl_system_image_data, %rdi
leaq 8(%rsp), %rsi
movl $3, %edx
movabsq $139941899165808, %rax # imm = 0x7F46C3306070
callq *%rax
movq %r12, %rax
L277:
movq -8(%rax), %rcx
shrq $4, %rcx
movabsq $8746369302984, %rdx # imm = 0x7F46C3C41C8
cmpq %rdx, %rcx
jne L570
movq (%rax), %r12
imulq %rbp, %r12
movabsq $139940655471280, %rax # imm = 0x7F46790F1AB0
movq (%rax), %rdi
movq %rdi, 48(%rsp)
movq %r14, %rsi
movq %r15, %rdx
callq *%r13
cmpq %r15, %rax
jne L494
movabsq $139940626210368, %rax # imm = 0x7F4677509E40
movq %rax, 8(%rsp)
movabsq $jl_system_image_data, %rdi
movq %r13, %r15
movq %r14, %r13
leaq 8(%rsp), %r14
movq %r14, %rsi
movl $1, %edx
movabsq $139941898452352, %rax # imm = 0x7F46C3257D80
callq *%rax
movabsq $139940655471280, %rax # imm = 0x7F46790F1AB0
movq %rax, 8(%rsp)
movabsq $139942130905952, %rax # imm = 0x7F46D1007360
movq %rax, 16(%rsp)
movq %r13, 24(%rsp)
movabsq $jl_system_image_data, %rdi
movq %r14, %rsi
movq %r13, %r14
movq %r15, %r13
movabsq $139942131169320, %r15 # imm = 0x7F46D1047828
movl $3, %edx
movabsq $139941899165808, %rax # imm = 0x7F46C3306070
callq *%rax
movabsq $139942130905952, %rax # imm = 0x7F46D1007360
L494:
movq -8(%rax), %rcx
shrq $4, %rcx
movabsq $8746369302984, %rdx # imm = 0x7F46C3C41C8
cmpq %rdx, %rcx
jne L605
addq %r12, %rbx
subq (%rax), %rbx
incq %rbp
cmpq $1000001, %rbp # imm = 0xF4241
jne L128
movq 40(%rsp), %rax
movq 64(%rsp), %rcx
movq %rax, (%rcx)
movq %rbx, %rax
addq $72, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
L570:
movabsq $.rodata.str1.1, %rdi
movabsq $jl_type_error, %rcx
movabsq $jl_system_image_data, %rsi
movq %rax, %rdx
callq *%rcx
L605:
movabsq $.rodata.str1.1, %rdi
movabsq $jl_type_error, %rcx
movabsq $jl_system_image_data, %rsi
movq %rax, %rdx
callq *%rcx