# Manually specialized function is faster than the generic version

The code:

``````using BenchmarkTools

function adc(a::T, b::T, carry::T) where T
T2 = widen(T)
ret = (a % T2) + (b % T2) + (carry % T2)
ret % T, (ret >> 64) % T
end

function mac(a::T, b::T, c::T, carry::T) where T
T2 = widen(T)
ret = (a % T2) + ((b % T2) * (c % T2)) + (carry % T2)
ret % T, (ret >> 64) % T
end

function generic(x::NTuple{4, T}, y::NTuple{4, T}, m::NTuple{4, T}, m_p::T) where T

a1 = zero(T)
a2 = zero(T)
a3 = zero(T)
a4 = zero(T)
a5 = zero(T)
a6 = zero(T)

for i in 1:4
u1 = (a1 + x[i] * y) * m_p
(a1, carry) = mac(a1, u1, m, zero(T))
(a2, carry) = mac(a2, u1, m, carry)
(a3, carry) = mac(a3, u1, m, carry)
(a4, carry) = mac(a4, u1, m, carry)
(a5, carry2) = adc(a5, zero(T), carry)

(a1, carry) = mac(a1, x[i], y, zero(T))
(a2, carry) = mac(a2, x[i], y, carry)
(a3, carry) = mac(a3, x[i], y, carry)
(a4, carry) = mac(a4, x[i], y, carry)
(a5, a6) = adc(a5, carry2, carry)

a1 = a2
a2 = a3
a3 = a4
a4 = a5
a5 = a6
end

(a1, a2, a3, a4, a5)
end

specialized(x::NTuple{4, UInt64}, y::NTuple{4, UInt64}, m::NTuple{4, UInt64}, m_p::UInt64) =
generic(x, y, m, m_p)

x = tuple(rand(UInt64, 4)...)
y = tuple(rand(UInt64, 4)...)
m = tuple(rand(UInt64, 4)...)
m_p = rand(UInt64)

display(@benchmark generic(x, y, m, m_p))
println()

display(@benchmark specialized(x, y, m, m_p))
println()
``````

When I run this, I get 78 ns with 1 48-byte allocation for the first (generic) call, and 52 ns with 0 allocations for the second (specialized) call. I was under the impression that the results should be identical, because the JIT compiler specializes the function on the first call. What am I missing here?

You have to use `\$` on variables when using `BenchmarkTools`

``````julia> @benchmark generic(\$x, \$y, \$m, \$m_p)
BenchmarkTools.Trial:
memory estimate:  0 bytes
allocs estimate:  0
--------------
minimum time:     39.406 ns (0.00% GC)
median time:      39.665 ns (0.00% GC)
mean time:        42.901 ns (0.00% GC)
maximum time:     216.663 ns (0.00% GC)
--------------
samples:          10000
evals/sample:     991

julia> @benchmark specialized(\$x, \$y, \$m, \$m_p)
BenchmarkTools.Trial:
memory estimate:  0 bytes
allocs estimate:  0
--------------
minimum time:     39.321 ns (0.00% GC)
median time:      39.838 ns (0.00% GC)
mean time:        42.475 ns (0.00% GC)
maximum time:     541.635 ns (0.00% GC)
--------------
samples:          10000
evals/sample:     991
``````

Thank you! Usually I have these benchmarks inside functions, and `@benchmark` complains if variables are not quoted.