Indices of intersection of two arrays

You might be using an earlier version of the function I posted in deleted post. Please check again.

Another way to take into account the cases of multiple occurrences and to keep the correspondence of the position of the common value in the two vectors

function intersectalajulia3(a,b)
    ab=intersect(a,b)
    ia = [findall(==(e), a) for e in ab]
    ib = [findall(==(e), b) for e in ab]
    return Dict(zip(ab, zip(ia,ib)))
end

The data can be organized in different ways according to the use to be made of it.


a = rand(1:10, 20)
b = rand(1:15, 30)

function intersectalajulia4(a,b)
    ab=intersect(a,b)
    ia = [findall(==(e), a) for e in ab]
    ib = [findall(==(e), b) for e in ab]
    return hcat(ab, ia,ib)
end
m=intersectalajulia4(a,b)

using IndexedTables

function intersectalajulia5(a,b)
    ab=intersect(a,b)
    ia = [findall(==(e), a) for e in ab]
    ib = [findall(==(e), b) for e in ab]
    return ndsparse((comval=ab,), (whereina=ia,whereinb=ib))
end

it=intersectalajulia5(a,b)
it[keys(it)[2]]
julia> m=intersectalajulia4(a,b)
8Ɨ3 Matrix{Any}:
  4  [1, 12]         [6, 9]
  6  [3, 8, 16]      [1, 10, 28]
  9  [4, 13]         [14, 24]
  8  [5, 9, 14, 15]  [12, 16]
  7  [6, 20]         [11]
 10  [7, 19]         [2, 8]
  5  [10]            [3, 22]
  1  [11, 18]        [4, 17, 30]

julia> it=intersectalajulia5(a,b)
1-d NDSparse with 8 values (2 field named tuples):
comval ā”‚ whereina        whereinb
ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€
1      ā”‚ [11, 18]        [4, 17, 30]
4      ā”‚ [1, 12]         [6, 9]
5      ā”‚ [10]            [3, 22]
6      ā”‚ [3, 8, 16]      [1, 10, 28]
7      ā”‚ [6, 20]         [11]
8      ā”‚ [5, 9, 14, 15]  [12, 16]
9      ā”‚ [4, 13]         [14, 24]
10     ā”‚ [7, 19]         [2, 8]
2 Likes

The following algorithm is more efficient for large vectors with intersection also of large size.


function intersectalajulia6(a, b)
    cv=Dict{Int64, NTuple{2, Vector{Int64}}}()
    for (i,e) in enumerate(a)
        push!(get!(()->(Int[],Int[]), cv, e)[1], i)
    end
    for (i,e) in enumerate(b)
        push!(get!(()->(Int[],Int[]), cv, e)[2], i)
    end
    filter(e->sum(isempty.(e[2]))==0 ,cv)
end

(For numeric vectors; Just a code translation from MATLAB)

First, let there be a ismember():

function ismember(a::Vector{T}, b::Vector{T}) where T <: Real
    locb = indexin(a, b);
    lia = [isnothing(e) ? false : true for e in locb];
    locb = convert(Vector{Int64}, replace(locb, nothing=>0::eltype(b)));
    return (lia, locb)
end

Then we can do:

function intersectidx(a::Vector{T}, b::Vector{T}) where T <: Real
    if length(a) >= length(b)
        (tf, ib) = ismember(a, b);
        ia = findall(tf .== true);
        ib = ib[ia];

        iau = uniqueidx(a[ia]);
        c = a[ia][iau];
        ia = ia[iau];
        ib = ib[iau];
    else
        (tf, ia) = ismember(b, a);
        ib = findall(tf .== true);
        ia = ia[ib];

        ibu = uniqueidx(b[ib]);
        c = b[ib][ibu];
        ia = ia[ibu];
        ib = ib[ibu];
    end
    return (c, ia, ib)
end

indexin() is less known than it deserves

a = [2, 4, 6, 7, 10, 11]
b = [6, 7, 10, 11, 13, 15, 17, 19, 21, 23]

julia> indexin(b,a)
10-element Vector{Union{Nothing, Int64}}:
 3
 4
 5
 6
  nothing
  nothing
  nothing
  nothing
  nothing
  nothing


julia> indexin(a,b)
6-element Vector{Union{Nothing, Int64}}:
  nothing
  nothing
 1
 2
 3
 4

that is why Julia is so elegant.