Counting the number of appearance of Tuples in a Vector

Let vec = [(“A”, “B”), (“A”, “C”), (“A”, “D”), (“B”, “C”), (“B”, “D”)] be a vector.
I want to count the number of appearance of each element at each step in the for loop.

function count_occurrences(vec)
    vect = []
    nb_appear_A = 0
    nb_appear_B = 0
    nb_appear_C = 0
    nb_appear_D = 0

    for v in vec
        v1, v2 = v 
        if v1 == "A"
            nb_appear_A += 1
        end
        if v2 == "A"
            nb_appear_A += 1
        end
        if v2 == "B"
            nb_appear_B += 1
        end
        if v1 == "B"
            nb_appear_B += 1
        end
        if v1 == "C"
            nb_appear_C += 1
        end
        if v2 == "C"
            nb_appear_C += 1
        end
        if v1 == "D"
            nb_appear_D += 1
        end
        if v2 == "D"
            nb_appear_D += 1
        end

        # I check for each pairs and push them onto vect
        if v1 == "A" && v2 == "B"
            push!(vect, (nb_appear_A, nb_appear_B))
        elseif v1 == "A" && v2 == "C"
            push!(vect, (nb_appear_A, nb_appear_C))
        elseif v1 == "A" && v2 == "D"
            push!(vect, (nb_appear_A, nb_appear_D))
        elseif v1 == "B" && v2 == "C"
            push!(vect, (nb_appear_B, nb_appear_C))
        elseif v1 == "B" && v2 == "D"
            push!(vect, (nb_appear_B, nb_appear_D))
        elseif v1 == "C" && v2 == "D"
            push!(vect, (nb_appear_C, nb_appear_D))
        end
    end

    return vect
end
 count_occurrences(vec)
5-element Vector{Any}:
 (1, 1)
 (2, 1)
 (3, 1)
 (2, 2)
 (3, 2)

This code works, but I would like to apply it for a different vector, say vec = [("A", "B"), ("A", "C"), ("A", "D"), ("B", "C"), ("R", "E"),...] .

Just replace your multitude of variables with a dictionary:

function count_occurrences(vec)
    vect = []
    nb_appear = Dict()

    for v in vec
        v1, v2 = v
        nb_appear[v1] = get(nb_appear, v1, 0) + 1
        nb_appear[v2] = get(nb_appear, v2, 0) + 1
        push!(vect, (nb_appear[v1], nb_appear[v2]))
    end

    return vect
end
1 Like

An alternative way without pushing, as the output vector length is known:

function count_occurrences2(v)
    vn = Vector{Tuple{Int, Int}}(undef, length(v))
    d = Dict(Iterators.flatten(v) .=> 0)
    for (i, vi) in pairs(v)
        d[vi[1]] += 1
        d[vi[2]] += 1
        vn[i] = (d[vi[1]], d[vi[2]])
    end
    return vn
end
2 Likes

Another solution:

count_occurrences(v) = accumulate(v; 
  init=((0,0),Dict{String,Int}())) do (r,d),(x,y)
    (((d[x] = get(d,x,0)+1;), (d[y] = get(d,y,0)+1;)),d)
  end .|> first

From the same question on StackOverflow:

1 Like

I don’t know what OP would like as result when the input vector has tuples with equal elements such as ("A","A"). For example for the input vector:

v = [("A","B"),("A","C"),("A","D"),("B","C"),("B","D"),("A","A")] 

The codes above may need to be adjusted slightly accordingly.

1 Like

StatsBase.countmap does this for you. (Apply it to Iterators.flatten to count individual elements in the tuples.)

That was my first thought too, but is not what the original code does.

for those who ignore or really can’t stand dictionaries

vc=collect(Base.Flatten(vec))
cumcount(s,c)=let fc=findlast(==(c),s;by=first); !isnothing(fc) ? (c,s[fc][2]+1) : (c,1) end
res=[]
foreach(c->push!(res,cumcount(res,c)), vc)
tuple.(res[1:2:end],res[2:2:end])


using StatsBase
vc=collect(Base.Flatten(vec))
res=[]
for (k,v) in countmap(vc)
    append!(res,k.=>1:v)
end


sort(res)[invperm(sortperm(vc))]