Any speed improvements to the implementation of `BiRank`?

Hi All,

I implemented the paper BiRank: Towards Ranking on Bipartite Graphs in Julia (R and Python implementations already exist here). I was wondering if there are any obvious performance problems I missed? I noticed large differences when the eltype of my Sparse W is UInt32 (much slower) compared to Float64. Also should I try to integrate it with Graphs.jl given that it is specifically for bipartite graphs? Or create a separate package?

Thanks!


module BiRank
using LinearAlgebra
using SparseArrays
using ProgressMeter


export birank

"""
    birank(W; method="BiRank",
    α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10)
Implements Algorithm 1 in 
BiRank: Towards Ranking on Bipartite Graphs by
Xiangnan He, Ming Gao Member, Min-Yen Kan Member, and Dingxian Wang

- W: Weighted adjacency matrix (dim: |U| x |P|)
- method: one of "HITS", "CoHITS", "BGER", "BGRM", "BiRank"
- α: damping factor for 'p'
- β: damping factor for 'u'
- u⁰: query vector for 'u'. Defaults to 1/|U| for all elements.
- p⁰: query vector for 'p'. Defaults to 1/|P| for all elements.
- max_iter: maximum number of iterations
- tol: tolerance for convergence 
"""
function birank(W; method="BiRank",
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10)
    method = lowercase(method)
    Wᵀ = transpose(W)
    ## Weighted degrees
    Dₚ_vec = sum(W; dims=1) |> vec
    Dᵤ_vec = sum(W; dims=2) |> vec
    ## Avoid division by 0  
    Dₚ_vec[Dₚ_vec.==zero(eltype(Dₚ_vec))] .= one(eltype(Dₚ_vec))
    Dᵤ_vec[Dᵤ_vec.==zero(eltype(Dᵤ_vec))] .= one(eltype(Dᵤ_vec))
    Dₚ⁻¹ = Diagonal(one(eltype(Dₚ_vec)) ./ Dₚ_vec)
    Dᵤ⁻¹ = Diagonal(one(eltype(Dᵤ_vec)) ./ Dᵤ_vec)
    ## Table 1 in "BiRank: Towards Ranking on Bipartite Graphs"

    is_hits = method == "hits"
    if is_hits
        S = W
        Sᵀ = Wᵀ
    elseif method == "cohits"
        ## S = W Dₚ⁻¹
        ## Sᵀ = Wᵀ Dᵤ⁻¹
        S = W * Dₚ⁻¹
        Sᵀ = Wᵀ * Dᵤ⁻¹
    elseif method == "bger"
        ## S = Dᵤ⁻¹ W
        ## Sᵀ = Dₚ⁻¹ Wᵀ
        S = Dᵤ⁻¹ * W
        Sᵀ = Dₚ⁻¹ * Wᵀ
    elseif method == "bgrm"
        ## S = Dᵤ⁻¹ W Dₚ⁻¹
        ## Sᵀ = Dₚ⁻¹ Wᵀ Dᵤ⁻¹
        S = (Dᵤ⁻¹ * W) * Dₚ⁻¹
        Sᵀ = transpose(S)
    elseif method == "birank"
        ## S = sqrt(Dᵤ)⁻¹ W sqrt(Dₚ)⁻¹
        ## Sᵀ = sqrt(Dₚ)⁻¹ Wᵀ sqrt(Dᵤ)⁻¹
        sqrtDᵤ⁻¹ = Diagonal(one(eltype(Dᵤ_vec)) ./ sqrt.(abs.(Dᵤ_vec)))
        sqrtDₚ⁻¹ = Diagonal(one(eltype(Dₚ_vec)) ./ sqrt.(abs.(Dₚ_vec)))
        S = (sqrtDᵤ⁻¹ * W) * sqrtDₚ⁻¹
        Sᵀ = transpose(S)
    else
        error("""method must be one of "HITS", "CoHITS", "BGER", "BGRM", "BiRank\"""")
    end
    isnothing(u⁰) && (u⁰ = fill(one(eltype(Sᵀ)) / size(W, 1), size(W, 1)))
    uₗ = copy(u⁰)
    isnothing(p⁰) && (p⁰ = fill(one(eltype(S)) / size(W, 2), size(W, 2)))
    pₗ = copy(p⁰)
    normalizer_names = Dict("hits" => "HITS", "cohits" => "CoHITS", "bger" => "BGER", "bgrm" => "BGRM", "birank" => "BiRank")
    progress = ProgressUnknown("Running $(normalizer_names[method])...")
    let u = copy(uₗ), p = copy(pₗ)
        for i in 1:max_iter
            p .= α * (Sᵀ * u) + (one(α) - α) * p⁰
            is_hits && (p ./= sum(p))
            u .= β * (S * p) + (one(β) - β) * u⁰
            is_hits && (u ./= sum(u))

            εₚ = sum(abs.(p - pₗ))
            εᵤ = sum(abs.(u - uₗ))
            if εₚ < tol && εᵤ < tol
                ProgressMeter.next!(progress)
                ProgressMeter.finish!(progress)
                @info "Converged after $i iterations"
                break
            end
            ProgressMeter.next!(progress)
            uₗ .= u
            pₗ .= p
            if i == max_iter
                ProgressMeter.finish!(progress)
                @warn "Not converged after $max_iter iterations. Consider increasing max_iter."
            end
        end
        return u, p
    end
end

end

One thing is that rather than passing method as a string, you should use a Symbol (or possibly dispatch). String comparisons happen letter by letter. Symbol comparisons are a single integer compare, and if you used multiple dispatch, it would all get done at compile time.

2 Likes

Also, instead of (p ./= sum(p)) you can do p .*= inv(sum(p)) which will replace O(n) divisions with 1 division and a bunch of multiplications.
You’re also missing a number of dots which are leading to a large number of unnecessary allocations.

2 Likes

Thank you! For multiple dispatch would you use Val{method} or is there another way?

That is very cool thanks! Should I also calculate inv.(sqrt.(abs.(Dᵤ_vec))) instead of one(eltype(Dᵤ_vec)) ./ sqrt.(abs.(Dᵤ_vec))

One commonly used interface for this is with structs. Specifically, you would define something like

abstract type BirankMethod
struct Hits end
struct Cohits end

(etc). and then call it as biranck(W, BiRank())

2 Likes

Alright, thank you!

yes although the ones in the loop matter more.

1 Like

I did a significant refactor based on @Oscar_Smith’s comments, thank you! Is it worth registering this as a package?

module BiRanks
using LinearAlgebra
using SparseArrays
using ProgressMeter

export birank
export BiRank, HITS, CoHITS, BGER, BGRM
abstract type BiRankMethod end
struct BiRank <: BiRankMethod end
struct HITS <: BiRankMethod end
struct CoHITS <: BiRankMethod end
struct BGER <: BiRankMethod end
struct BGRM <: BiRankMethod end

"""
birank(W, method;
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
Implements Algorithm 1 in 
BiRank: Towards Ranking on Bipartite Graphs by
Xiangnan He, Ming Gao Member, Min-Yen Kan Member, and Dingxian Wang

- W: Weighted adjacency matrix (dim: |U| x |P|)
- method: one of HITS(), CoHITS(), BGER(), BGRM(), BiRank(). Default: BiRank()
- α: damping factor for 'p'
- β: damping factor for 'u'
- u⁰: query vector for 'u'. Defaults to 1/|U| for all elements.
- p⁰: query vector for 'p'. Defaults to 1/|P| for all elements.
- max_iter: maximum number of iterations
- tol: tolerance for convergence 
"""
birank(W) = birank(W, BiRank())
birank(W, α::Real, β::Real) = birank(W, BiRank(); α=α, β=β)
birank(W, method::BiRankMethod, α::Real, β::Real) = birank(W, method; α=α, β=β)

birank(W, method::HITS;
    α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10) = birank_inner(W, W, transpose(W), α, β, u⁰, p⁰, max_iter, tol, method)

function birank(W, method::CoHITS;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10)
    @assert 0 <= α <= 1
    @assert 0 <= β <= 1
    Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
    ## S = W Dₚ⁻¹
    ## Sᵀ = Wᵀ Dᵤ⁻¹
    S = W * Dₚ⁻¹
    Sᵀ = Wᵀ * Dᵤ⁻¹
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGER;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10)
    @assert 0 <= α <= 1
    @assert 0 <= β <= 1
    Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
    ## S = Dᵤ⁻¹ W
    ## Sᵀ = Dₚ⁻¹ Wᵀ
    S = Dᵤ⁻¹ * W
    Sᵀ = Dₚ⁻¹ * Wᵀ
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGRM;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10)
    @assert 0 <= α <= 1
    @assert 0 <= β <= 1
    _, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
    ## S = Dᵤ⁻¹ W Dₚ⁻¹
    ## Sᵀ = Dₚ⁻¹ Wᵀ Dᵤ⁻¹
    S = (Dᵤ⁻¹ * W) * Dₚ⁻¹
    Sᵀ = transpose(S)
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BiRank;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10)
    @assert 0 <= α <= 1
    @assert 0 <= β <= 1
    sqrtDᵤ⁻¹, sqrtDₚ⁻¹ = birank_inits(W, method)
    ## S = sqrt(Dᵤ)⁻¹ W sqrt(Dₚ)⁻¹
    ## Sᵀ = sqrt(Dₚ)⁻¹ Wᵀ sqrt(Dᵤ)⁻¹
    S = (sqrtDᵤ⁻¹ * W) * sqrtDₚ⁻¹
    Sᵀ = transpose(S)
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end


function birank_inits_common(W)
    ## Weighted degrees
    Dₚ_vec = sum(W; dims=1) |> vec
    Dᵤ_vec = sum(W; dims=2) |> vec
    ## Avoid division by 0  
    Dₚ_vec[Dₚ_vec.==zero(eltype(Dₚ_vec))] .= one(eltype(Dₚ_vec))
    Dᵤ_vec[Dᵤ_vec.==zero(eltype(Dᵤ_vec))] .= one(eltype(Dᵤ_vec))
    return Dₚ_vec, Dᵤ_vec
end

transpose_ifneeded(W, method::BiRankMethod) = transpose(W)
transpose_ifneeded(W, method::BGRM) = nothing

function birank_inits(W, method::BiRankMethod)
    Wᵀ = transpose_ifneeded(W, method)
    Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
    Dₚ⁻¹ = Diagonal(inv.(Dₚ_vec))
    Dᵤ⁻¹ = Diagonal(inv.(Dᵤ_vec))
    return Wᵀ, Dₚ⁻¹, Dᵤ⁻¹
end

function birank_inits(W, method::BiRank)
    Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
    sqrtDᵤ⁻¹ = Diagonal(inv.(sqrt.(abs.(Dᵤ_vec))))
    sqrtDₚ⁻¹ = Diagonal(inv.(sqrt.(abs.(Dₚ_vec))))
    return sqrtDᵤ⁻¹, sqrtDₚ⁻¹
end

normalize_!(v, m::BiRankMethod) = nothing
normalize_!(v, m::HITS) = v .*= inv(sum(v))

function birank_inner(W, S, Sᵀ,
    α, β,
    u⁰, p⁰,
    max_iter, tol, method::BiRankMethod)
    isnothing(u⁰) && (u⁰ = fill(one(eltype(Sᵀ)) / size(W, 1), size(W, 1)))
    uₗ = copy(u⁰)
    isnothing(p⁰) && (p⁰ = fill(one(eltype(S)) / size(W, 2), size(W, 2)))
    pₗ = copy(p⁰)
    progress = ProgressUnknown("Running $method...")
    let u = copy(uₗ), p = copy(pₗ)
        for i in 1:max_iter
            p .= α * (Sᵀ * u) + (one(α) - α) * p⁰
            normalize_!(p, method)
            u .= β * (S * p) + (one(β) - β) * u⁰
            normalize_!(u, method)

            εₚ = sum(abs.(p - pₗ))
            εᵤ = sum(abs.(u - uₗ))
            if εₚ < tol && εᵤ < tol
                ProgressMeter.next!(progress)
                ProgressMeter.finish!(progress)
                @info "Converged after $i iterations"
                break
            end
            ProgressMeter.next!(progress)
            uₗ .= u
            pₗ .= p
            if i == max_iter
                ProgressMeter.finish!(progress)
                @warn "Not converged after $max_iter iterations. Consider increasing max_iter."
            end
        end
        return u, p
    end
end

end
1 Like

Sure! No idea what this is useful for, but it looks good to me!

You could also contribute it to Graphs.jl?

I would prefer that. Are there any facilities for bipartite graphs in Graphs.jl? Right now the adjacency matrix is |U| x |P| so each have their own index rather than a global one for all vertices.

I am not aware of anything specific for bipartite graphs, but if you find an implementation you could contribute it to my list: The graphs ecosystem

Pinging @etienne_dg cause he knows better

1 Like

We currently have some algorithms that supports only bipartite graphs, but we do not have a specific type for bipartite graphs. Since it is not that costly to check, it is currently checked at the start, and error if not bipartite (see for example GraphsMatching.jl/src/hungarian.jl at master · JuliaGraphs/GraphsMatching.jl · GitHub).
Maybe we could consider adding a Trait in the Interface for bipartite graphs in a later version.

2 Likes

I’ll have a look. I’ll have to figure out what changes for a square adjacency matrix but that shouldn’t be a problem.

I optimized the code quite a bit more, especially regarding allocations. The user can now set overwrite=true to consume the matrix W to save RAM. E.g. for a matrix
776338×864116 SparseMatrixCSC{Float64, UInt32} with 46389429 stored entries it allocates 53.14 MiB with overwrite=true vs. 587.325 MiB with overwrite=false

module BiRanks
using LinearAlgebra
using SparseArrays
using ProgressMeter
import Profile
export birank
export BiRank, HITS, CoHITS, BGER, BGRM
abstract type BiRankMethod end
struct BiRank <: BiRankMethod end
struct HITS <: BiRankMethod end
struct CoHITS <: BiRankMethod end
struct BGER <: BiRankMethod end
struct BGRM <: BiRankMethod end

"""
birank(W, method;
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
Implements Algorithm 1 in 
BiRank: Towards Ranking on Bipartite Graphs by
Xiangnan He, Ming Gao Member, Min-Yen Kan Member, and Dingxian Wang

- W: Weighted adjacency matrix (dim: |U| x |P|)
- method: one of HITS(), CoHITS(), BGER(), BGRM(), BiRank(). Default: BiRank()
- α: damping factor for 'p'
- β: damping factor for 'u'
- u⁰: query vector for 'u'. Defaults to 1/|U| for all elements.
- p⁰: query vector for 'p'. Defaults to 1/|P| for all elements.
- max_iter: maximum number of iterations
- tol: tolerance for convergence 
"""
birank(W) = birank(W, BiRank())
birank(W, α::Real, β::Real) = birank(W, BiRank(); α=α, β=β)
birank(W, method::BiRankMethod, α::Real, β::Real) = birank(W, method; α=α, β=β)

birank(W, method::HITS;
    α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10) = birank_inner(W, W, transpose(W), α, β, u⁰, p⁰, max_iter, tol, method)

function birank(W, method::CoHITS;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10, overwrite=false)
    Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
    ## S = W Dₚ⁻¹
    ## Sᵀ = Wᵀ Dᵤ⁻¹
    if !overwrite
        W = copy(W)
    end
    S = rmul!(W, Dₚ⁻¹)
    Sᵀ = rmul!(Wᵀ, Dᵤ⁻¹)
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGER;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10, overwrite=false)
    Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
    ## S = Dᵤ⁻¹ W
    ## Sᵀ = Dₚ⁻¹ Wᵀ
    if !overwrite
        W = copy(W)
    end
    S = lmul!(Dᵤ⁻¹, W)
    Sᵀ = lmul!(Dₚ⁻¹, Wᵀ)
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGRM;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10, overwrite=false)
    _, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
    ## S = Dᵤ⁻¹ W Dₚ⁻¹
    ## Sᵀ = Dₚ⁻¹ Wᵀ Dᵤ⁻¹
    if !overwrite
        W = copy(W)
    end
    S = rmul!(lmul!(Dᵤ⁻¹, W), Dₚ⁻¹)
    Sᵀ = transpose(S)
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BiRank;
    α=0.85, β=0.85,
    u⁰=nothing, p⁰=nothing,
    max_iter=200, tol=1.0e-10, overwrite=false)
    sqrtDᵤ⁻¹, sqrtDₚ⁻¹ = birank_inits(W, method)
    ## S = sqrt(Dᵤ)⁻¹ W sqrt(Dₚ)⁻¹
    ## Sᵀ = sqrt(Dₚ)⁻¹ Wᵀ sqrt(Dᵤ)⁻¹
    if !overwrite
        W = copy(W)
    end
    #S = (sqrtDᵤ⁻¹ * W) * sqrtDₚ⁻¹
    S = rmul!(lmul!(sqrtDᵤ⁻¹, W), sqrtDₚ⁻¹)
    Sᵀ = transpose(S)
    return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end


function birank_inits_common(W)
    @assert eltype(W) <: AbstractFloat "W must be a matrix of floats"
    ## Weighted degrees
    Dₚ_vec = sum(W; dims=1) |> vec
    Dᵤ_vec = sum(W; dims=2) |> vec
    ## Avoid division by 0  
    replace_zeros!(Dₚ_vec)
    replace_zeros!(Dᵤ_vec)
    return Dₚ_vec, Dᵤ_vec
end

transpose_ifneeded(W, method::BiRankMethod) = transpose(W)
transpose_ifneeded(W, method::BGRM) = nothing

function birank_inits(W, method::BiRankMethod)
    Wᵀ = transpose_ifneeded(W, method)
    Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
    Dₚ⁻¹ = Diagonal(elem_inv!(Dₚ_vec))
    Dᵤ⁻¹ = Diagonal(elem_inv!(Dᵤ_vec))
    return Wᵀ, Dₚ⁻¹, Dᵤ⁻¹
end

function birank_inits(W, method::BiRank)
    Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
    sqrtDᵤ⁻¹ = Diagonal(elem_sqrt_inv!(Dᵤ_vec))
    sqrtDₚ⁻¹ = Diagonal(elem_sqrt_inv!(Dₚ_vec))
    return sqrtDᵤ⁻¹, sqrtDₚ⁻¹
end

normalize_!(v, m::BiRankMethod) = nothing
normalize_!(v, m::HITS) = v .*= inv(sum(v))

function birank_inner(W, S, Sᵀ,
    α, β,
    u⁰, p⁰,
    max_iter, tol, method::BiRankMethod)
    @assert 0 <= α <= 1
    @assert 0 <= β <= 1
    isnothing(u⁰) && (u⁰ = fill(one(eltype(Sᵀ)) / size(W, 1), size(W, 1)))
    uₗ = copy(u⁰)
    isnothing(p⁰) && (p⁰ = fill(one(eltype(S)) / size(W, 2), size(W, 2)))
    pₗ = copy(p⁰)
    progress = ProgressUnknown("Running $method...")
    let u = copy(uₗ), p = copy(pₗ)
        for i in 1:max_iter
            p .= α .* mul!(p, Sᵀ, u) .+ (one(α) - α) .* p⁰
            normalize_!(p, method)
            u .= β .* mul!(u, S, p) .+ (one(β) - β) .* u⁰
            normalize_!(u, method)

            εₚ = sum_absdiff(p, pₗ)
            εᵤ = sum_absdiff(u, uₗ)

            if εₚ < tol && εᵤ < tol
                ProgressMeter.next!(progress)
                ProgressMeter.finish!(progress)
                @info "Converged after $i iterations"
                break
            end

            ProgressMeter.next!(progress; showvalues=[("εₚ", εₚ), ("εᵤ", εᵤ)])
            uₗ .= u
            pₗ .= p
            if i == max_iter
                ProgressMeter.finish!(progress)
                @warn "Not converged after $max_iter iterations. Consider increasing max_iter."
            end
        end
        return u, p
    end
end

function sum_absdiff(x::Vector{T}, y::Vector{T}) where {T}
    s = zero(T)
    @simd for i in eachindex(x, y)
        s += abs(x[i] - y[i])
    end
    return s
end

function replace_zeros!(x::Vector{T}) where {T}
    @simd for i in eachindex(x)
        if x[i] == zero(T)
            x[i] = one(T)
        end
    end
    return x
end

function elem_inv!(x::Vector{T}) where {T}
    @simd for i in eachindex(x)
        x[i] = inv(x[i])
    end
    return x
end

function elem_sqrt_inv!(x::Vector{T}) where {T}
    @simd for i in eachindex(x)
        x[i] = inv(sqrt(abs(x[i])))
    end
    return x
end

end