Note the code example below:
struct Radix{N}
Radix(n::Integer) = new{n}();
end
# Nesting: testfunc! -> _add, _addpositions
function testfunc!(array, num, radix)
_add(a, b, ::Radix{2}) = a ⊻ b
_add(a, b, ::Radix{radix}) where radix = mod(a+b, radix)
function _add_positions(array, a, b)
array[b] = _add(array[a], array[b], radix)
end
for _ in 1:num
a,b = rand(1:length(array)), rand(1:length(array))
_add_positions(array, a, b)
end
end
# Nesting: testfunc! -> _addpositions -> _add
function testfunc2!(array, num, radix)
function _add_positions(array, a, b)
_add(a, b, ::Radix{2}) = a ⊻ b
_add(a, b, ::Radix{radix}) where radix = mod(a+b, radix)
array[b] = _add(array[a], array[b], radix)
end
for _ in 1:num
a,b = rand(1:length(array)), rand(1:length(array))
_add_positions(array, a, b)
end
end
# Nesting: testfunc! -> _add, _addpositions
function testfunc3!(array, num, radix)
#_add(a, b, ::Radix{2}) = a ⊻ b # Commented out - so no dispatch ambiguity
_add(a, b, ::Radix{radix}) where radix = mod(a+b, radix)
function _add_positions(array, a, b)
array[b] = _add(array[a], array[b], radix)
end
for _ in 1:num
a,b = rand(1:length(array)), rand(1:length(array))
_add_positions(array, a, b)
end
end
which benchmarks as follows:
arr = rand(0:1, 1024);
@btime testfunc!(arr, 1000, Radix(5))
@btime testfunc2!(arr, 1000, Radix(5))
@btime testfunc3!(arr, 1000, Radix(5))
# 117.200 μs (439 allocations: 6.86 KiB)
# 13.300 μs (0 allocations: 0 bytes)
# 17.200 μs (0 allocations: 0 bytes)
Note specifically the extra allocations. A @code_warntype
check of testfunc!
shows that _add
itself gets boxed. Why does this happen, and why does it not in testfunc2!
(which can receive the value of radix
without boxing)? Once only one definition of _add
exists, see testfunc3!
, the problem is resolved.