I search for function, what is equal to Matlab with setdiff.
[C,ia] = setdiff(A,B,‘rows’);
Thanks, for every help!
I search for function, what is equal to Matlab with setdiff.
[C,ia] = setdiff(A,B,‘rows’);
Thanks, for every help!
I think there is no build-in counterpart to setdiff(A, B, 'rows')
.
You could do it yourself, e.g.
rows(A) = [ A[i,:] for i in axes(A,1) ]
C = setdiff(rows(A),rows(B))
If you want a matrix as output
C = reduce(hcat, C)' # see https://discourse.julialang.org/t/finding-indices-using-setdiff/26930
If you also need the indices (and performance does not matter too much):
ia = [ i for i in axes(A,1) if A[i, :] in C ]
mapreduce(transpose, vcat,setdiff(eachrow(m1),eachrow(m2)))
sd=setdiff(eachrow(m1),eachrow(m2))
m1[[first(vsd.indices) for vsd in sd],:]
sd=setdiff(eachrow(m1),eachrow(m2))
reinterpret(reshape, Int64, Tuple.([sd...]))'
this seems to be the fastest
let S=Set(eachrow(m2)); m1[[∉(s,S) for s in eachrow(m1)],:]end
One more approach (edited with thanks to @rafael.guerra and @bertschi):
function setdiffrows(a::AbstractMatrix, b::AbstractMatrix)
ia = Int[]
Sb = Set(eachrow(b))
@views for i in axes(a,1)
a[i, :] ∉ Sb && push!(ia, i)
end
return a[ia, :], ia
end
A compact one:
A = A[eachrow(A) .∉ Ref(eachrow(B)), :]
I get different return, if I use the solution from SteffanPL or rafael.guerra. ?!
I think the reason is setdiff compare the difference in the rows and the solution from rafael ist the right one. But the performance with large Matrix is not fine … sadly!
Using setdiff(rows(A), rows(B))
considers rows(A)
as a set, i.e., removes duplicate rows when these are not in rows(B)
.
The solution which does not do that can be made quite a bit faster via
A = A[eachrow(A) .∉ Ref(Set(eachrow(B))), :]
as lookup in a set is (obviously) much faster than in an iterable.
@PeterSimon’s code performs much better for large matrices by using Set
too.
function setdiffrows(a::AbstractMatrix, b::AbstractMatrix)
ia = Int[]
Sb = Set(eachrow(b))
@views for i in axes(a,1)
a[i, :] ∉ Sb && push!(ia, i)
end
return a[ia, :], ia
end
I will let Peter edit his post and I have marked it as the solution, with special credit to bertschi.
a comparison between the correct and fast solutions
m1=rand(1:5, 10^6,10);
m2=rand(1:5, 10^6,10);
function setdiffrows(a::AbstractMatrix, b::AbstractMatrix)
ia = Int[]
Sb = Set(eachrow(b))
@views for i in axes(a,1)
a[i, :] ∉ Sb && push!(ia, i)
end
return a[ia, :] #, ia
end
julia> function dr(m1,m2)
#S=Set(eachrow(m2))
S=Set(@view m2[r,:] for r in axes(m2,1))
return @view m1[[∉(s,S) for s in eachrow(m1)],:]
end
dr (generic function with 1 method)
julia> @btime dr($m1,$m2);
479.037 ms (11 allocations: 89.84 MiB)
julia> @btime setdiffrows($m1,$m2);
521.005 ms (23 allocations: 160.63 MiB)