As part of the code I’m working on I need to find the unique elements of a collection of small integer arrays and assign a unique identifier to each one of them. To do this I could do something like the following:
arrays = [[1,2], [3,4,5], [3,1,2], [1,2]]
U = Dict{Vector{Int64}, Int64}()
for arr in arrays
get!(U, arr, length(U)+1)
end
So that I would have [1,2] => 1
, [3,4,5] => 2
, [3,1,2] => 3
. In order to avoid the allocation of a large number of small arrays, what I want to do instead is to write all the data into one large buffer and then represent the small arrays as pairs of numbers indicating the position in the buffer. For this I have a version of the following code:
struct CustomView
data::Vector{UInt32}
rb::UInt32
re::UInt32
end
function Base.hash(x::CustomView, h::UInt64)
h = hash(x.re - x.rb, h)
for i=x.re:-1:x.rb
h = hash(x.data[i], h)
end
return h
end
function Base.isequal(x::CustomView, y::CustomView)
if (x.re - x.rb) != (y.re - y.rb)
return false
end
for (v1, v2) in zip(x.data[x.rb:x.re], y.data[y.rb:y.re])
if v1 != v2
return false
end
end
return true
end
Now I can create the array data = [1,2,3,4,5,3,1,2,1,2]
and replace the small arrays above with CustomView(data, 1, 2)
, CustomView(data, 3, 5)
, CustomView(data, 6, 8)
, CustomView(data, 9, 10)
. The dictionaries for CustomView
work correctly. However, they seem to perform a lot of memory allocations that I would think should not be necessary, and perform slower than comparable methods. Consider the following test:
using Random
function test_dicts_tuple(n::Integer, data)
U = Dict{NTuple{3,UInt32}, UInt32}()
sizehint!(U, n)
for i=1:n
ind = 3 * (i-1) + 1
tp = (data[ind], data[ind+1], data[ind+2])
get!(U, tp, length(U)+1)
end
return U
end
function test_dicts_view(n::Integer, data)
U = Dict{CustomView, UInt32}()
sizehint!(U, n)
for i=1:n
ind = 3 * (i-1) + 1
vw = CustomView(data, ind, ind+2)
get!(U, vw, length(U)+1)
end
return U
end
function test_dicts(n::Integer, m::Integer)
data = Vector{UInt32}(rand(1:m, n*3))
U1 = @time test_dicts_tuple(n, data)
U2 = @time test_dicts_view(n, data)
@show length(U1), length(U2)
end
let
test_dicts(1000, 100)
for n in [1000, 10000, 100000, 1000000, 10000000]
test_dicts(n, 1000)
test_dicts(n, 5000)
test_dicts(n, 10000)
end
end
I won’t post the entire output here, but the very last test case produces the following on my machine:
0.858638 seconds (7 allocations: 272.008 MiB, 0.84% gc time)
1.197626 seconds (172.67 k allocations: 343.033 MiB, 4.10% gc time)
(length(U1), length(U2)) = (9999948, 9999948)
The pattern in allocation numbers is consistent. Using tuples, only 7 allocations are performed, while using my CustomView
requires hundreds of thousands. I cannot reconcile this with my mental picture of how the code above should work, so I would appreciate some help here.