Get indices of unique rows in 2D array

MWE:

A = [1 2 ; 3 4 ; 1 2 ]

I want as output the row indices 1, 2.

Using unique, one can access the indices of unique values by using i = unique(i -> A[i], eachindex(A)), however I don’t know how to modify this for 2D arrays.

Appreciate any guidance. Context is I have a list of (x,y,z) coordinates (Mx3 array) where there are duplicate (x,y) values, and I’m trying to get rid of them. There may be a better way to do this. This issue is preventing me from gridding mesh coordinates using GMT, LinearInterpolations, and other packages.

To get rid of the repeated rows:
unique(A, dims=1)

To get the indices of the first unique rows, perhaps:

ix = unique(i -> A[i, :], axes(A, 1))

This works! axes(A, 1) was the missing piece.

Thank you!

a handmade one, but much more efficient

function distindrow(m)
    d=Dict{SubArray{Int64, 1, Matrix{Int64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true},Int}()
    #d=Dict{Array{Int},Int}()
    for (i,e) in enumerate(eachrow(m))
        d[e]=i
        #d[copy(e)]=i
    end
    values(d)
end

PS
unlike unique provides the latest index among those that have the same value
use thie version in case

julia> function distind(m)
           d=Dict{SubArray{Int64, 1, Matrix{Int64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true},Int}()
           #d=Dict{Array{Int},Int}()
           for (i,e) in enumerate(Iterators.reverse(eachrow(m)))
               d[e]=i
               #d[copy(e)]=i
           end
           values(d)
       end
distind (generic function with 1 method)
1 Like

OMG!

Perhaps just:

Dict{Vector{Int}, Int}()

I had tried as you say, but it gave me an error which (due to the rush) I attributed to the fact that the type returned by the eacrow() function could not be replaced by array{Int}.
I take this opportunity to add a different way of obtaining the same result
senza usare il tipo OMG :grin:

Although, sometimes, the measurements say that the OMG type is better

julia> function distindOMG2(m)
           d=Dict{SubArray{Int64, 1, Matrix{Int64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true},Int}()
           for (i,e) in enumerate(eachrow(m))
               get!(d,e,i)
           end
           values(d)
       end
distindOMG2 (generic function with 1 method)

julia> function distindJustV(m)
           d=Dict{Array{Int},Int}()
           for (i,e) in enumerate(eachrow(m))
               get!(d,e,i)
           end
           values(d)
       end
distindJustV (generic function with 1 method)

julia> @btime distindOMG2($m)
  786.667 ns (7 allocations: 4.38 KiB)
ValueIterator for a Dict{SubArray{Int64, 1, Matrix{Int64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true}, Int64} with 11 entries. Values:    
  2
  4
  8
  1
  14
  20
  16
  7
  10
  5
  3

julia> @btime distindJustV($m)
  2.000 μs (58 allocations: 3.98 KiB)
ValueIterator for a Dict{Array{Int64}, Int64} with 11 entries. Values:   
  2
  4
  8
  1
  14
  20
  16
  7
  10
  5
  3
1 Like

Instead of writing out the view type, you could use the first element to define it. This should be as efficient:

julia> function distind2(mat::AbstractMatrix)
         (i,e), rest = Iterators.peel(zip(axes(mat,1), eachrow(mat)))
         d = Dict(e => i)
         for (i,e) in rest
           d[e] = i
         end
         values(d)
       end
distind2 (generic function with 2 methods)

julia> distind2([1 2; 3 4; 3 4] ./ pi)  # any eltype
ValueIterator for a Dict{SubArray{Float64, 1, Matrix{Float64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true}, Int64} with 2 entries. Values:
  1
  3
1 Like

This seems to be just as efficient, but simpler:

function distind3(m)
    d = Dict{eltype(eachrow(m)), Int}()
    for (i,e) in pairs(eachrow(m))
        d[e] = i
    end
    values(d)
end
1 Like

True, and this allows the empty case distind3(rand(0,3)) which mine does not.

1 Like

to squeeze the instructions a little more, a fully functional solution (no for loop)

function distind4(m)
    values(Dict(zip(eachrow(m),axes(m,1))))
end
1 Like

Another cool way:

distind5(M) = first.(unique(last,pairs(eachrow(M))))

Often it’s more convenient to use a vector of static vectors to represent such data. In this case, it lets you use unique in the way you were already familiar with:

julia> using StaticArrays

julia> A = [1 2; 3 4; 1 2];

julia> B = SVector{2}.(eachrow(A))
3-element Vector{SVector{2, Int64}}:
 [1, 2]
 [3, 4]
 [1, 2]

julia> unique(i -> B[i], eachindex(B))
2-element Vector{Int64}:
 1
 2