KrylovKit eigsolve of LinearMap giving different values in CPU and GPU

I am trying to compare the eigsolve function using CPU and GPU, using the following code:

using CUDA
using CUDA.CUSPARSE
using LinearAlgebra
using LinearMaps
using SparseArrays
using Random
using KrylovKit

function eign_cpu(M::Union{AbstractMatrix,LinearMap{T}}, v0::Vector{T}) where {T<:Number}
    tol = 1e-10
    (E, V, info) = eigsolve(M, v0, 1, :SR; ishermitian=true, tol=tol)
    return (E[1], V[1], info)
end

function eign_gpu(M::Union{CuArray,LinearMap{T}}, v0::CuVector{T}) where {T<:Number}
    tol = 1e-10
    (E, V, info) = eigsolve(M, v0, 1, :SR; ishermitian=true, tol=tol)
    return (E[1], V[1], info)
end

Random.seed!(0)

n = 4
m = 4
Es = [1, 1, 2, 3]
Ed = [2, 3, 4, 4]

w = ones(4)
sw = sqrt.(w)
sw_d = CuArray(sw)

C = LinearMap{Float64}((out::AbstractArray, y::AbstractArray) -> (out .= sw; out .*= -dot(sw, y)), n, ismutating=true, issymmetric=true)
C_d = LinearMap{Float64}((out::CuArray, y::CuArray) -> (out .= sw_d; out .*= -dot(sw_d, y)), n, ismutating=true, issymmetric=true)

M = sparse(Es, Ed, ones(Float64, m), n, n)
M_d = CuSparseMatrixCSC(M)

adj_cpu = p::AbstractArray -> (M.nzval .= p ./ 2; Symmetric(M))
adj_gpu = p::CuArray -> (M_d.nzVal .= p ./ 2; Symmetric(M_d))

p = rand(m, 1)
p_d = CuArray(p)

aux = Vector{Float64}(undef, n)
eig_grad = randn(Float64, n)
v0 = ((randn!(aux) .*= 1e-4) .+= eig_grad)
v0_d = CuArray(v0)

if isapprox(v0, Array(v0_d))
    println("v and v0 are approximate")
end
if isapprox(sw, Array(sw_d))
    println("C and C_d are approximate")
end
if isapprox(M, SparseMatrixCSC(M_d))
    println("M and M_d are approximate")
end
S = C + adj_cpu(p)
(E_cpu, V_cpu, info_cpu) = eign_cpu(S, v0)
println("info cpu = ", info_cpu)
S_d = C_d + adj_gpu(p_d)
(E_gpu, V_gpu, info_gpu) = eign_gpu(S_d, v0_d)
println("info gpu = ", info_gpu)

if !isapprox(V_cpu, Array(V_gpu))
    println("WARN: diverging eigenvalues between cpu and gpu:\n",
        "E_cpu = ", E_cpu, " V_cpu = ", V_cpu, "\nE_gpu = ", E_gpu, " V_gpu = ", V_gpu)
    readline()
end

The isapprox checks for the input data all return true. However, the output is:

info cpu = ConvergenceInfo: 4 converged values after 1 iterations and 4 applications of the linear map;
norms of residuals are given by (1.3683491030815672e-36, 6.545806401840613e-35, 6.344902430327517e-34, 8.827885896605726e-33).

info gpu = ConvergenceInfo: 4 converged values after 1 iterations and 4 applications of the linear map;
norms of residuals are given by (7.776628566574466e-20, 1.087879714604506e-17, 4.976482876540383e-17, 3.382716406082749e-17).

WARN: different eigenvalues between cpu and gpu:
E_cpu = -3.6566857184706016 V_cpu = [-0.5154937434198816, -0.46021338805521755, -0.5355038728938648, -0.485495046386023]
E_gpu = -3.7700145739756743 V_gpu = [0.48275625184457943, 0.47793647243109605, 0.52265534023327, 0.5151257370300364]

I expected the results to be approximately equal.
Am I missing something in this experiment? Some wrong assumption maybe?

Just to be sure, did you check that S and S_d are the same ?
Is there a guarantee that M.nzval and M_d.nzval refer to the same non-zero elements, in the same order ?

When running:

println("M = ", M)
println("M_d = ", SparseMatrixCSC(M_d))

I get

M = sparse([1, 1, 2, 3], [2, 3, 4, 4], [1.0, 1.0, 1.0, 1.0], 4, 4)
M_d = sparse(Int32[1, 1, 2, 3], Int32[2, 3, 4, 4], [1.0, 1.0, 1.0, 1.0], 4, 4)

So the only difference is the type of the indexes, do you think it has some influence?

I think you have encountered a bug in the CUDA sparse array implementation: multiplication by adj_gpu is incomplete.
There is an open issue for CUDA.jl

Thank you. That’s unfortunate.

I tried removing the Symmetric call from the adjs:

adj_cpu = p::AbstractArray -> (M.nzval .= p ./ 2; M)
adj_gpu = p::CuArray -> (M_d.nzVal .= p ./ 2; M_d)

And now the GPU and CPU returns the same minimum eigenvalue but with different eigenvectors:

WARN: diverging eigenvalues between cpu and gpu:
E_cpu = -3.770014573975674 V_cpu = [-0.4827562518445795, -0.47793647243109577, -0.5226553402332699, -0.5151257370300364]
E_gpu = -3.7700145739756743 V_gpu = [0.48275625184457943, 0.47793647243109605, 0.52265534023327, 0.5151257370300364]

However, The eigenvalue-eigenvector equality is not holding in both GPU and CPU:

if !isapprox(V_cpu, Array(V_gpu))
    println("WARN: diverging eigenvalues between cpu and gpu:\n",
        "E_cpu = ", E_cpu, " V_cpu = ", V_cpu, "\nE_gpu = ", E_gpu, " V_gpu = ", V_gpu)
    lhs_gpu = S_d * V_gpu
    rhs_gpu = E_gpu * V_gpu
    println("lhs gpu = ", lhs_gpu)
    println("rhs gpu = ", rhs_gpu)

    lhs_cpu = S * V_cpu
    rhs_cpu = E_cpu * V_cpu
    println("lhs cpu = ", lhs_cpu)
    println("rhs cpu = ", rhs_cpu)
    readline()
end

Output:

WARN: diverging eigenvalues between cpu and gpu:
E_cpu = -3.770014573975674 V_cpu = [-0.4827562518445795, -0.47793647243109577, -0.5226553402332699, -0.5151257370300364]
E_gpu = -3.7700145739756743 V_gpu = [0.48275625184457943, 0.47793647243109605, 0.52265534023327, 0.5151257370300364]
lhs gpu = [-1.8836115939631806, -1.7764183292957163, -1.976330898719767, -1.998473801538982]
rhs gpu = [-1.8199981051319354, -1.8018274664997551, -1.9704182498456424, -1.942031536033198]
lhs cpu = [1.8836115939631803, 1.7764183292957159, 1.9763308987197665, 1.9984738015389816]
rhs cpu = [1.8199981051319356, 1.8018274664997542, 1.970418249845642, 1.942031536033198]

Do you think this is also a bug or I am doing something wrong in my test?

It is the same eigenvector. Just note that E_cpu ≈ -E_gpu, which is totally allowed (eigenvectors are generally only unique upto a factor and since KrylovKit normalizes the vector they are unique upto a global sign).

I cannot tell you why the eigenvalue/eigenvector pair seems to be off though. But again you can see that the values are almost identical between GPU and CPU (neglecting the global sign).

Now the operator is nonsymmetric, but it seems that you told eigsolve otherwise.

That’s true, I forgot about that! By passing issymmetric=false to eigsolve, both eigenvalues and eigenvectors returned are equal.