Delete rows in matrix A that are in matrix B

I search for function, what is equal to Matlab with setdiff.

[C,ia] = setdiff(A,B,‘rows’);

Thanks, for every help!

I think there is no build-in counterpart to setdiff(A, B, 'rows').

You could do it yourself, e.g.

rows(A) = [ A[i,:] for i in axes(A,1) ]

C = setdiff(rows(A),rows(B))

If you want a matrix as output

C = reduce(hcat, C)'   # see https://discourse.julialang.org/t/finding-indices-using-setdiff/26930

If you also need the indices (and performance does not matter too much):

ia = [ i for i in axes(A,1) if A[i, :] in C ]
mapreduce(transpose, vcat,setdiff(eachrow(m1),eachrow(m2)))
sd=setdiff(eachrow(m1),eachrow(m2))
m1[[first(vsd.indices) for vsd in sd],:]
sd=setdiff(eachrow(m1),eachrow(m2))
reinterpret(reshape, Int64, Tuple.([sd...]))'

this seems to be the fastest

let S=Set(eachrow(m2)); m1[[∉(s,S) for s in eachrow(m1)],:]end
3 Likes

One more approach (edited with thanks to @rafael.guerra and @bertschi):

function setdiffrows(a::AbstractMatrix, b::AbstractMatrix)
    ia = Int[]
    Sb = Set(eachrow(b))
    @views for i in axes(a,1)
        a[i, :] ∉ Sb && push!(ia, i)
    end
    return a[ia, :], ia
end
1 Like

A compact one:

A = A[eachrow(A) .∉ Ref(eachrow(B)), :]
2 Likes

I get different return, if I use the solution from SteffanPL or rafael.guerra. ?!
I think the reason is setdiff compare the difference in the rows and the solution from rafael ist the right one. But the performance with large Matrix is not fine … sadly!

Using setdiff(rows(A), rows(B)) considers rows(A) as a set, i.e., removes duplicate rows when these are not in rows(B).
The solution which does not do that can be made quite a bit faster via

A = A[eachrow(A) .∉ Ref(Set(eachrow(B))), :]

as lookup in a set is (obviously) much faster than in an iterable.

2 Likes

@PeterSimon’s code performs much better for large matrices by using Set too.

edited code
function setdiffrows(a::AbstractMatrix, b::AbstractMatrix)
    ia = Int[]
    Sb = Set(eachrow(b))
    @views for i in axes(a,1)
        a[i, :] ∉ Sb && push!(ia, i)
    end
    return a[ia, :], ia
end
1 Like

I will let Peter edit his post and I have marked it as the solution, with special credit to bertschi.

a comparison between the correct and fast solutions

m1=rand(1:5, 10^6,10);
m2=rand(1:5, 10^6,10);
function setdiffrows(a::AbstractMatrix, b::AbstractMatrix)
    ia = Int[]
    Sb = Set(eachrow(b))
    @views for i in axes(a,1)
        a[i, :] ∉ Sb && push!(ia, i)
    end
    return a[ia, :] #, ia
end
julia> function dr(m1,m2)
           #S=Set(eachrow(m2))
           S=Set(@view m2[r,:] for r in axes(m2,1))
           return @view m1[[∉(s,S) for s in eachrow(m1)],:]
       end 
dr (generic function with 1 method)

julia> @btime dr($m1,$m2);
  479.037 ms (11 allocations: 89.84 MiB)

julia> @btime setdiffrows($m1,$m2); 
  521.005 ms (23 allocations: 160.63 MiB)