Why does explicit declaration of ::INT save 40% runtime?

Here’s a pretty big speedup:

function createCountMatrix2(dataMatrix, resolution, minMatrix, maxMatrix, ::Val{dim}) where dim
    if !(length(minMatrix) == length(maxMatrix) == size(dataMatrix, 2) == dim)
        error("unexpected set of sizes")
    end
    sz = ntuple(i -> (maxMatrix[i] - minMatrix[i]) / resolution, Val(dim))
    countMatrix = zeros(Int, ntuple(i -> resolution, Val(dim)))
    for k in axes(dataMatrix, 1)
        coord = ntuple(i -> trunc(Int, (dataMatrix[k, i] - minMatrix[i]) / sz[i]) + 1, Val(dim))
        countMatrix[coord...] += 1
    end
    return countMatrix
end
julia> dataMat = rand(1000, 2);

julia> @btime createCountMatrix($dataMat, $10, $(zeros(2)), $(ones(2)), 2)
  121.242 μs (2010 allocations: 55.88 KiB)
10×10 Matrix{Int64}:
 13  10  13  15   9  10   6   9  13   7
  8  11   6   5   8  10  15   6  10  14
 17  13   7   6   7  19  10  12  11   5
  7   7   9  12  12  10  10   9  10  11
 13  13   8  15   7  15   6  10  10   8
  6  12  12   8   8   6  16   7   5  11
  9  12   7  12  10  15   6   6  11  12
  9  14   9   9  11  10  11   7  12   7
 15  16  15  11  11   7   5  10   8   7
 10   5  13   9  11  13  13  12   8   4

julia> @btime createCountMatrix2($dataMat, $10, $(zeros(2)), $(ones(2)), Val(2))
  1.737 μs (2 allocations: 944 bytes)
10×10 Matrix{Int64}:
 13  10  13  15   9  10   6   9  13   7
  8  11   6   5   8  10  15   6  10  14
 17  13   7   6   7  19  10  12  11   5
  7   7   9  12  12  10  10   9  10  11
 13  13   8  15   7  15   6  10  10   8
  6  12  12   8   8   6  16   7   5  11
  9  12   7  12  10  15   6   6  11  12
  9  14   9   9  11  10  11   7  12   7
 15  16  15  11  11   7   5  10   8   7
 10   5  13   9  11  13  13  12   8   4

I had to use Val to put the dimension in the type domain here to let me use tuples in a type stable way. This is probably useful in this case, but isn’t always.

4 Likes