Get vector of vectors of indice of appearances for every unique element of an array

Hey, I am trying to get a vector of vectors containing the indices of the appearance of every unique element of an array. So far i have tried

y = [1,2,2,3,5,5,6,1]

unique_elements = unique(y)
unique_elements = [1,2,3,5,6]

index_array = Vector{Vector}(undef,length(unique_elements))

for (index,unique) in enumerate(unique_elements)
all_index = findall(x->x==unique,y)
index_array[index] = all_index
end

index_array = [[1,8],[2,3],[4],[5,6],[7]]

Is there any way that is faster than this ? My problem is that the y array im actually working with has 3 000 000 elements and this therefore takes over a minute to do… So I was wondering if anyone had an idea on how to optimize this.
Thanks !!

1 Like

See: Is there a function similar to numpy unique with inverse? - #6 by stevengj

1 Like

Your implementation is O(n^2) where n is the length of the array, so you can definitely do better by making a single pass over the array and using e.g. a dictionary (hash table) as in some of the posts I linked above.

2 Likes

Is this a good function?

function dict_unique_indx(y)
    index_dict = Dict{eltype(y), Vector{Int64}}()

    for (indx, element) in pairs(y)
        if haskey(index_dict, element)
            push!(index_dict[element], indx)
        else
            index_dict[element] = [indx]
        end
    end
    index_dict    
end

I would use Int rather than Int64, so it is the correct index type on 32-bit machines. Otherwise looks good.

1 Like

just to rewrite the same function in another way

function dict_unique_indx(y)
    d = Dict{eltype(y), Vector{Int}}()
    for (i, e) in pairs(y)
       push!(get!(()->Vector{Int}(), d,e),i)
    end
    d    
end