What are the best options for a dict/map with integer keys and the fastest insert/update? For background, I wish to aggregate those position-weight pairs (p,w) where the position is the same, I’m tinkering with the idea of having a sparse matrix be implemented as a list of dict’s, one for each column, and the matrix product needs this aggregation. So far, my benchmarks are:
function f0(pp::Vector{tp}, ww::Vector{tw}, Z::Vector{tw}) where {tp,tw}
# aggregating with a dense vector Z
for (p,w) ∈ zip(pp,ww) Z[p] += w end;
_pp = findall(Z .≠ 0)
_ww = Z[_pp]
Z .= 0 # clean up
return _pp, _ww end;
function _add!(D::Dict{tp,tw}, p::tp, w::tw) where {tp, tw}
idx = Base.ht_keyindex(D,p)
if idx<0 D[p] = w
else D.vals[idx] += w end end;
function f1(pp::Vector{tp}, ww::Vector{tw}, D::Dict{tp,tw}) where {tp,tw}
# aggregating with a dict
for (p,w) ∈ zip(pp,ww)
@inline _add!(D,p,w) end
filter!(pw -> !iszero(pw[2]), D) end; # clean up
n, N =10^5, 10^7;
pp, ww = UInt32.(rand(1:n,N)), rand(-1:0.001:1,N);
Z, D = fill(0.0, n), Dict{UInt32,Float64}();
@time f0(pp, ww, Z);
@time f1(pp ,ww, D);
0.034545 seconds (11 allocations: 1.538 MiB)
0.212361 seconds (39 allocations: 4.334 MiB)