Improper function nesting causes boxing/runtime dispatch?

Note the code example below:

struct Radix{N} 
    Radix(n::Integer) = new{n}();
end

# Nesting: testfunc! -> _add, _addpositions
function testfunc!(array, num, radix)
    
    _add(a, b, ::Radix{2}) = a ⊻ b
    _add(a, b, ::Radix{radix}) where radix = mod(a+b, radix)
    
    function _add_positions(array, a, b) 
        array[b] =  _add(array[a], array[b], radix)
    end
    
    for _ in 1:num
        a,b = rand(1:length(array)), rand(1:length(array))
        _add_positions(array, a, b)
    end
end

# Nesting: testfunc! -> _addpositions -> _add
function testfunc2!(array, num, radix)
    
    function _add_positions(array, a, b) 
        _add(a, b, ::Radix{2}) = a ⊻ b
        _add(a, b, ::Radix{radix}) where radix = mod(a+b, radix)
        array[b] =  _add(array[a], array[b], radix)
    end
    
    for _ in 1:num
        a,b = rand(1:length(array)), rand(1:length(array))
        _add_positions(array, a, b)
    end
end

# Nesting: testfunc! -> _add, _addpositions
function testfunc3!(array, num, radix)
    
    #_add(a, b, ::Radix{2}) = a ⊻ b # Commented out - so no dispatch ambiguity
    _add(a, b, ::Radix{radix}) where radix = mod(a+b, radix)
    
    function _add_positions(array, a, b) 
        array[b] =  _add(array[a], array[b], radix)
    end
    
    for _ in 1:num
        a,b = rand(1:length(array)), rand(1:length(array))
        _add_positions(array, a, b)
    end
end

which benchmarks as follows:

arr = rand(0:1, 1024);
@btime testfunc!(arr, 1000, Radix(5))
@btime testfunc2!(arr, 1000, Radix(5))
@btime testfunc3!(arr, 1000, Radix(5))

#  117.200 μs (439 allocations: 6.86 KiB)
#  13.300 μs (0 allocations: 0 bytes)
#  17.200 μs (0 allocations: 0 bytes)

Note specifically the extra allocations. A @code_warntype check of testfunc! shows that _add itself gets boxed. Why does this happen, and why does it not in testfunc2! (which can receive the value of radix without boxing)? Once only one definition of _add exists, see testfunc3!, the problem is resolved.

See Performance Tips · The Julia Language

1 Like

@Oscar_Smith so if I understand right, in the first implementation ‘_add’ is implicitly passed as an argument to a lifted realization of ‘_add_positions’? Then, since its type is ‘Function’, ‘_add_positions’ doesn’t specialize, and since it’s passed implicitly I cannot add the explicit specialization directive the manual suggests?

The documentation mentions this problem won’t show up with ‘@code_warntype’ - this fact indeed stumped my search for where these allocations were coming from! (I had guessed it arose from the capturing of ‘radix’)

I think @Oscar_Smith just linked the wrong section of the Performance Tips. This should be the right one.