I was surprised that something like the following is slow, and not type-stable:
function g(A)
function make(A)
B = similar(A)
act!(B, A, axes(A, 1))
B
end
function act!(B, A, ax)
for i in ax
B[i] = A[i]
end
end
B = make(A)
end
A = rand(3)
@code_warntype g(A) # Body::Any, act!@_4::Core.Box, B::Core.Box
@btime g($A) # 78.043 ns (5 allocations: 192 bytes)
This can be improved by avoiding re-using variable names. I just found this thread from last year which explains that assigning to B
at the end causes the inner function’s B
not to be a distinct variable. Which explains why this version is a bit better:
function g2(A2) # all names distinct, although B is sufficient
function make2(A3)
B3 = similar(A3)
act2!(B3, A3, axes(A3, 1))
B3
end
function act2!(B4, A4, ax4)
for i in ax4
B4[i] = A4[i]
end
nothing
end
B2 = make2(A2)
end
@code_warntype g2(A) # Better, but still has act2!@_4::Core.Box
@btime g2($A) # 70.867 ns (4 allocations: 160 bytes)
However this still has a Core.Box
around the inner function. Is this some variant of the closure bug/problem (although nothing is closed over)?
For comparison, the full-speed version is this:
function h(A) # with nothing nested
B = similar(A)
_act!(B, A, axes(A, 1))
B
end
function _act!(B, A, ax)
for i in ax
B[i] = A[i]
end
end
@code_warntype h(A) # fine
@btime h($A) # 36.986 ns (1 allocation: 112 bytes)