# Any speed improvements to the implementation of `BiRank`?

Hi All,

I implemented the paper BiRank: Towards Ranking on Bipartite Graphs in Julia (R and Python implementations already exist here). I was wondering if there are any obvious performance problems I missed? I noticed large differences when the `eltype` of my Sparse `W` is `UInt32` (much slower) compared to `Float64`. Also should I try to integrate it with Graphs.jl given that it is specifically for bipartite graphs? Or create a separate package?

Thanks!

``````
module BiRank
using LinearAlgebra
using SparseArrays
using ProgressMeter

export birank

"""
birank(W; method="BiRank",
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
Implements Algorithm 1 in
BiRank: Towards Ranking on Bipartite Graphs by
Xiangnan He, Ming Gao Member, Min-Yen Kan Member, and Dingxian Wang

- W: Weighted adjacency matrix (dim: |U| x |P|)
- method: one of "HITS", "CoHITS", "BGER", "BGRM", "BiRank"
- α: damping factor for 'p'
- β: damping factor for 'u'
- u⁰: query vector for 'u'. Defaults to 1/|U| for all elements.
- p⁰: query vector for 'p'. Defaults to 1/|P| for all elements.
- max_iter: maximum number of iterations
- tol: tolerance for convergence
"""
function birank(W; method="BiRank",
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
method = lowercase(method)
Wᵀ = transpose(W)
## Weighted degrees
Dₚ_vec = sum(W; dims=1) |> vec
Dᵤ_vec = sum(W; dims=2) |> vec
## Avoid division by 0
Dₚ_vec[Dₚ_vec.==zero(eltype(Dₚ_vec))] .= one(eltype(Dₚ_vec))
Dᵤ_vec[Dᵤ_vec.==zero(eltype(Dᵤ_vec))] .= one(eltype(Dᵤ_vec))
Dₚ⁻¹ = Diagonal(one(eltype(Dₚ_vec)) ./ Dₚ_vec)
Dᵤ⁻¹ = Diagonal(one(eltype(Dᵤ_vec)) ./ Dᵤ_vec)
## Table 1 in "BiRank: Towards Ranking on Bipartite Graphs"

is_hits = method == "hits"
if is_hits
S = W
Sᵀ = Wᵀ
elseif method == "cohits"
## S = W Dₚ⁻¹
## Sᵀ = Wᵀ Dᵤ⁻¹
S = W * Dₚ⁻¹
Sᵀ = Wᵀ * Dᵤ⁻¹
elseif method == "bger"
## S = Dᵤ⁻¹ W
## Sᵀ = Dₚ⁻¹ Wᵀ
S = Dᵤ⁻¹ * W
Sᵀ = Dₚ⁻¹ * Wᵀ
elseif method == "bgrm"
## S = Dᵤ⁻¹ W Dₚ⁻¹
## Sᵀ = Dₚ⁻¹ Wᵀ Dᵤ⁻¹
S = (Dᵤ⁻¹ * W) * Dₚ⁻¹
Sᵀ = transpose(S)
elseif method == "birank"
## S = sqrt(Dᵤ)⁻¹ W sqrt(Dₚ)⁻¹
## Sᵀ = sqrt(Dₚ)⁻¹ Wᵀ sqrt(Dᵤ)⁻¹
sqrtDᵤ⁻¹ = Diagonal(one(eltype(Dᵤ_vec)) ./ sqrt.(abs.(Dᵤ_vec)))
sqrtDₚ⁻¹ = Diagonal(one(eltype(Dₚ_vec)) ./ sqrt.(abs.(Dₚ_vec)))
S = (sqrtDᵤ⁻¹ * W) * sqrtDₚ⁻¹
Sᵀ = transpose(S)
else
error("""method must be one of "HITS", "CoHITS", "BGER", "BGRM", "BiRank\"""")
end
isnothing(u⁰) && (u⁰ = fill(one(eltype(Sᵀ)) / size(W, 1), size(W, 1)))
uₗ = copy(u⁰)
isnothing(p⁰) && (p⁰ = fill(one(eltype(S)) / size(W, 2), size(W, 2)))
pₗ = copy(p⁰)
normalizer_names = Dict("hits" => "HITS", "cohits" => "CoHITS", "bger" => "BGER", "bgrm" => "BGRM", "birank" => "BiRank")
progress = ProgressUnknown("Running \$(normalizer_names[method])...")
let u = copy(uₗ), p = copy(pₗ)
for i in 1:max_iter
p .= α * (Sᵀ * u) + (one(α) - α) * p⁰
is_hits && (p ./= sum(p))
u .= β * (S * p) + (one(β) - β) * u⁰
is_hits && (u ./= sum(u))

εₚ = sum(abs.(p - pₗ))
εᵤ = sum(abs.(u - uₗ))
if εₚ < tol && εᵤ < tol
ProgressMeter.next!(progress)
ProgressMeter.finish!(progress)
@info "Converged after \$i iterations"
break
end
ProgressMeter.next!(progress)
uₗ .= u
pₗ .= p
if i == max_iter
ProgressMeter.finish!(progress)
@warn "Not converged after \$max_iter iterations. Consider increasing max_iter."
end
end
return u, p
end
end

end
``````

One thing is that rather than passing `method` as a string, you should use a `Symbol` (or possibly dispatch). String comparisons happen letter by letter. Symbol comparisons are a single integer compare, and if you used multiple dispatch, it would all get done at compile time.

2 Likes

Also, instead of `(p ./= sum(p))` you can do `p .*= inv(sum(p))` which will replace O(n) divisions with 1 division and a bunch of multiplications.
You’re also missing a number of dots which are leading to a large number of unnecessary allocations.

2 Likes

Thank you! For multiple dispatch would you use `Val{method}` or is there another way?

That is very cool thanks! Should I also calculate `inv.(sqrt.(abs.(Dᵤ_vec)))` instead of `one(eltype(Dᵤ_vec)) ./ sqrt.(abs.(Dᵤ_vec))`

One commonly used interface for this is with structs. Specifically, you would define something like

``````abstract type BirankMethod
struct Hits end
struct Cohits end
``````

(etc). and then call it as `biranck(W, BiRank())`

2 Likes

Alright, thank you!

yes although the ones in the loop matter more.

1 Like

I did a significant refactor based on @Oscar_Smith’s comments, thank you! Is it worth registering this as a package?

``````module BiRanks
using LinearAlgebra
using SparseArrays
using ProgressMeter

export birank
export BiRank, HITS, CoHITS, BGER, BGRM
abstract type BiRankMethod end
struct BiRank <: BiRankMethod end
struct HITS <: BiRankMethod end
struct CoHITS <: BiRankMethod end
struct BGER <: BiRankMethod end
struct BGRM <: BiRankMethod end

"""
birank(W, method;
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
Implements Algorithm 1 in
BiRank: Towards Ranking on Bipartite Graphs by
Xiangnan He, Ming Gao Member, Min-Yen Kan Member, and Dingxian Wang

- W: Weighted adjacency matrix (dim: |U| x |P|)
- method: one of HITS(), CoHITS(), BGER(), BGRM(), BiRank(). Default: BiRank()
- α: damping factor for 'p'
- β: damping factor for 'u'
- u⁰: query vector for 'u'. Defaults to 1/|U| for all elements.
- p⁰: query vector for 'p'. Defaults to 1/|P| for all elements.
- max_iter: maximum number of iterations
- tol: tolerance for convergence
"""
birank(W) = birank(W, BiRank())
birank(W, α::Real, β::Real) = birank(W, BiRank(); α=α, β=β)
birank(W, method::BiRankMethod, α::Real, β::Real) = birank(W, method; α=α, β=β)

birank(W, method::HITS;
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10) = birank_inner(W, W, transpose(W), α, β, u⁰, p⁰, max_iter, tol, method)

function birank(W, method::CoHITS;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
@assert 0 <= α <= 1
@assert 0 <= β <= 1
Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
## S = W Dₚ⁻¹
## Sᵀ = Wᵀ Dᵤ⁻¹
S = W * Dₚ⁻¹
Sᵀ = Wᵀ * Dᵤ⁻¹
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGER;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
@assert 0 <= α <= 1
@assert 0 <= β <= 1
Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
## S = Dᵤ⁻¹ W
## Sᵀ = Dₚ⁻¹ Wᵀ
S = Dᵤ⁻¹ * W
Sᵀ = Dₚ⁻¹ * Wᵀ
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGRM;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
@assert 0 <= α <= 1
@assert 0 <= β <= 1
_, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
## S = Dᵤ⁻¹ W Dₚ⁻¹
## Sᵀ = Dₚ⁻¹ Wᵀ Dᵤ⁻¹
S = (Dᵤ⁻¹ * W) * Dₚ⁻¹
Sᵀ = transpose(S)
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BiRank;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
@assert 0 <= α <= 1
@assert 0 <= β <= 1
sqrtDᵤ⁻¹, sqrtDₚ⁻¹ = birank_inits(W, method)
## S = sqrt(Dᵤ)⁻¹ W sqrt(Dₚ)⁻¹
## Sᵀ = sqrt(Dₚ)⁻¹ Wᵀ sqrt(Dᵤ)⁻¹
S = (sqrtDᵤ⁻¹ * W) * sqrtDₚ⁻¹
Sᵀ = transpose(S)
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank_inits_common(W)
## Weighted degrees
Dₚ_vec = sum(W; dims=1) |> vec
Dᵤ_vec = sum(W; dims=2) |> vec
## Avoid division by 0
Dₚ_vec[Dₚ_vec.==zero(eltype(Dₚ_vec))] .= one(eltype(Dₚ_vec))
Dᵤ_vec[Dᵤ_vec.==zero(eltype(Dᵤ_vec))] .= one(eltype(Dᵤ_vec))
return Dₚ_vec, Dᵤ_vec
end

transpose_ifneeded(W, method::BiRankMethod) = transpose(W)
transpose_ifneeded(W, method::BGRM) = nothing

function birank_inits(W, method::BiRankMethod)
Wᵀ = transpose_ifneeded(W, method)
Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
Dₚ⁻¹ = Diagonal(inv.(Dₚ_vec))
Dᵤ⁻¹ = Diagonal(inv.(Dᵤ_vec))
return Wᵀ, Dₚ⁻¹, Dᵤ⁻¹
end

function birank_inits(W, method::BiRank)
Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
sqrtDᵤ⁻¹ = Diagonal(inv.(sqrt.(abs.(Dᵤ_vec))))
sqrtDₚ⁻¹ = Diagonal(inv.(sqrt.(abs.(Dₚ_vec))))
return sqrtDᵤ⁻¹, sqrtDₚ⁻¹
end

normalize_!(v, m::BiRankMethod) = nothing
normalize_!(v, m::HITS) = v .*= inv(sum(v))

function birank_inner(W, S, Sᵀ,
α, β,
u⁰, p⁰,
max_iter, tol, method::BiRankMethod)
isnothing(u⁰) && (u⁰ = fill(one(eltype(Sᵀ)) / size(W, 1), size(W, 1)))
uₗ = copy(u⁰)
isnothing(p⁰) && (p⁰ = fill(one(eltype(S)) / size(W, 2), size(W, 2)))
pₗ = copy(p⁰)
progress = ProgressUnknown("Running \$method...")
let u = copy(uₗ), p = copy(pₗ)
for i in 1:max_iter
p .= α * (Sᵀ * u) + (one(α) - α) * p⁰
normalize_!(p, method)
u .= β * (S * p) + (one(β) - β) * u⁰
normalize_!(u, method)

εₚ = sum(abs.(p - pₗ))
εᵤ = sum(abs.(u - uₗ))
if εₚ < tol && εᵤ < tol
ProgressMeter.next!(progress)
ProgressMeter.finish!(progress)
@info "Converged after \$i iterations"
break
end
ProgressMeter.next!(progress)
uₗ .= u
pₗ .= p
if i == max_iter
ProgressMeter.finish!(progress)
@warn "Not converged after \$max_iter iterations. Consider increasing max_iter."
end
end
return u, p
end
end

end
``````
1 Like

Sure! No idea what this is useful for, but it looks good to me!

You could also contribute it to Graphs.jl?

I would prefer that. Are there any facilities for bipartite graphs in Graphs.jl? Right now the adjacency matrix is |U| x |P| so each have their own index rather than a global one for all vertices.

I am not aware of anything specific for bipartite graphs, but if you find an implementation you could contribute it to my list: The graphs ecosystem

Pinging @etienne_dg cause he knows better

1 Like

We currently have some algorithms that supports only bipartite graphs, but we do not have a specific type for bipartite graphs. Since it is not that costly to check, it is currently checked at the start, and error if not bipartite (see for example GraphsMatching.jl/src/hungarian.jl at master · JuliaGraphs/GraphsMatching.jl · GitHub).
Maybe we could consider adding a Trait in the Interface for bipartite graphs in a later version.

2 Likes

I’ll have a look. I’ll have to figure out what changes for a square adjacency matrix but that shouldn’t be a problem.

I optimized the code quite a bit more, especially regarding allocations. The user can now set `overwrite=true` to consume the matrix `W` to save RAM. E.g. for a matrix
`776338×864116 SparseMatrixCSC{Float64, UInt32} with 46389429 stored entries` it allocates 53.14 MiB with `overwrite=true` vs. 587.325 MiB with `overwrite=false`

``````module BiRanks
using LinearAlgebra
using SparseArrays
using ProgressMeter
import Profile
export birank
export BiRank, HITS, CoHITS, BGER, BGRM
abstract type BiRankMethod end
struct BiRank <: BiRankMethod end
struct HITS <: BiRankMethod end
struct CoHITS <: BiRankMethod end
struct BGER <: BiRankMethod end
struct BGRM <: BiRankMethod end

"""
birank(W, method;
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10)
Implements Algorithm 1 in
BiRank: Towards Ranking on Bipartite Graphs by
Xiangnan He, Ming Gao Member, Min-Yen Kan Member, and Dingxian Wang

- W: Weighted adjacency matrix (dim: |U| x |P|)
- method: one of HITS(), CoHITS(), BGER(), BGRM(), BiRank(). Default: BiRank()
- α: damping factor for 'p'
- β: damping factor for 'u'
- u⁰: query vector for 'u'. Defaults to 1/|U| for all elements.
- p⁰: query vector for 'p'. Defaults to 1/|P| for all elements.
- max_iter: maximum number of iterations
- tol: tolerance for convergence
"""
birank(W) = birank(W, BiRank())
birank(W, α::Real, β::Real) = birank(W, BiRank(); α=α, β=β)
birank(W, method::BiRankMethod, α::Real, β::Real) = birank(W, method; α=α, β=β)

birank(W, method::HITS;
α=0.85, β=0.85, u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10) = birank_inner(W, W, transpose(W), α, β, u⁰, p⁰, max_iter, tol, method)

function birank(W, method::CoHITS;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10, overwrite=false)
Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
## S = W Dₚ⁻¹
## Sᵀ = Wᵀ Dᵤ⁻¹
if !overwrite
W = copy(W)
end
S = rmul!(W, Dₚ⁻¹)
Sᵀ = rmul!(Wᵀ, Dᵤ⁻¹)
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGER;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10, overwrite=false)
Wᵀ, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
## S = Dᵤ⁻¹ W
## Sᵀ = Dₚ⁻¹ Wᵀ
if !overwrite
W = copy(W)
end
S = lmul!(Dᵤ⁻¹, W)
Sᵀ = lmul!(Dₚ⁻¹, Wᵀ)
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BGRM;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10, overwrite=false)
_, Dₚ⁻¹, Dᵤ⁻¹ = birank_inits(W, method)
## S = Dᵤ⁻¹ W Dₚ⁻¹
## Sᵀ = Dₚ⁻¹ Wᵀ Dᵤ⁻¹
if !overwrite
W = copy(W)
end
S = rmul!(lmul!(Dᵤ⁻¹, W), Dₚ⁻¹)
Sᵀ = transpose(S)
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank(W, method::BiRank;
α=0.85, β=0.85,
u⁰=nothing, p⁰=nothing,
max_iter=200, tol=1.0e-10, overwrite=false)
sqrtDᵤ⁻¹, sqrtDₚ⁻¹ = birank_inits(W, method)
## S = sqrt(Dᵤ)⁻¹ W sqrt(Dₚ)⁻¹
## Sᵀ = sqrt(Dₚ)⁻¹ Wᵀ sqrt(Dᵤ)⁻¹
if !overwrite
W = copy(W)
end
#S = (sqrtDᵤ⁻¹ * W) * sqrtDₚ⁻¹
S = rmul!(lmul!(sqrtDᵤ⁻¹, W), sqrtDₚ⁻¹)
Sᵀ = transpose(S)
return birank_inner(W, S, Sᵀ, α, β, u⁰, p⁰, max_iter, tol, method)
end

function birank_inits_common(W)
@assert eltype(W) <: AbstractFloat "W must be a matrix of floats"
## Weighted degrees
Dₚ_vec = sum(W; dims=1) |> vec
Dᵤ_vec = sum(W; dims=2) |> vec
## Avoid division by 0
replace_zeros!(Dₚ_vec)
replace_zeros!(Dᵤ_vec)
return Dₚ_vec, Dᵤ_vec
end

transpose_ifneeded(W, method::BiRankMethod) = transpose(W)
transpose_ifneeded(W, method::BGRM) = nothing

function birank_inits(W, method::BiRankMethod)
Wᵀ = transpose_ifneeded(W, method)
Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
Dₚ⁻¹ = Diagonal(elem_inv!(Dₚ_vec))
Dᵤ⁻¹ = Diagonal(elem_inv!(Dᵤ_vec))
return Wᵀ, Dₚ⁻¹, Dᵤ⁻¹
end

function birank_inits(W, method::BiRank)
Dₚ_vec, Dᵤ_vec = birank_inits_common(W)
sqrtDᵤ⁻¹ = Diagonal(elem_sqrt_inv!(Dᵤ_vec))
sqrtDₚ⁻¹ = Diagonal(elem_sqrt_inv!(Dₚ_vec))
return sqrtDᵤ⁻¹, sqrtDₚ⁻¹
end

normalize_!(v, m::BiRankMethod) = nothing
normalize_!(v, m::HITS) = v .*= inv(sum(v))

function birank_inner(W, S, Sᵀ,
α, β,
u⁰, p⁰,
max_iter, tol, method::BiRankMethod)
@assert 0 <= α <= 1
@assert 0 <= β <= 1
isnothing(u⁰) && (u⁰ = fill(one(eltype(Sᵀ)) / size(W, 1), size(W, 1)))
uₗ = copy(u⁰)
isnothing(p⁰) && (p⁰ = fill(one(eltype(S)) / size(W, 2), size(W, 2)))
pₗ = copy(p⁰)
progress = ProgressUnknown("Running \$method...")
let u = copy(uₗ), p = copy(pₗ)
for i in 1:max_iter
p .= α .* mul!(p, Sᵀ, u) .+ (one(α) - α) .* p⁰
normalize_!(p, method)
u .= β .* mul!(u, S, p) .+ (one(β) - β) .* u⁰
normalize_!(u, method)

εₚ = sum_absdiff(p, pₗ)
εᵤ = sum_absdiff(u, uₗ)

if εₚ < tol && εᵤ < tol
ProgressMeter.next!(progress)
ProgressMeter.finish!(progress)
@info "Converged after \$i iterations"
break
end

ProgressMeter.next!(progress; showvalues=[("εₚ", εₚ), ("εᵤ", εᵤ)])
uₗ .= u
pₗ .= p
if i == max_iter
ProgressMeter.finish!(progress)
@warn "Not converged after \$max_iter iterations. Consider increasing max_iter."
end
end
return u, p
end
end

function sum_absdiff(x::Vector{T}, y::Vector{T}) where {T}
s = zero(T)
@simd for i in eachindex(x, y)
s += abs(x[i] - y[i])
end
return s
end

function replace_zeros!(x::Vector{T}) where {T}
@simd for i in eachindex(x)
if x[i] == zero(T)
x[i] = one(T)
end
end
return x
end

function elem_inv!(x::Vector{T}) where {T}
@simd for i in eachindex(x)
x[i] = inv(x[i])
end
return x
end

function elem_sqrt_inv!(x::Vector{T}) where {T}
@simd for i in eachindex(x)
x[i] = inv(sqrt(abs(x[i])))
end
return x
end

end
``````