I don’t think you’ll benefit from using a GPU on a matrix of size (100,100)
.
These are the timings on my GTX 970 (the yscale
-argument messes with the alignment of the bars on the bottom,sorry):
Perhaps you would rather look at the relative performance-gain:
Try increasing the size of the matrix.
You may also want to consider calling LinearAlgebra.LAPACK.syev!
for the CPU-version for a more direct comparison, since eigen
uses LinearAlgebra.LAPACK.geevx!
.
My benchmark
using CuArrays, CUDAnative, LinearAlgebra
using Plots, StatsPlots
# cpu version
function cpu_eigen(mat)
eigen(mat) # Uses gesvx!, not syev!
end
# CUSOLVER
function gpu_eigen(mat)
CuArrays.CUSOLVER.syevd!('V','U', mat)
end
function timing(mat)
matrix = mat + mat'
matrix_d = cu(matrix)
t1 = @elapsed cpu_eigen(matrix)
t2 = @elapsed gpu_eigen(matrix_d)
return t1, t2
end
function main(datatype=Float32,N=100)
matrix = rand(datatype, N, N)
tcpu,tgpu = timing(matrix)
end
main(N) = main(Float32,N)
Nrange = 2 .^ (0:12)
Nl = length(Nrange)
data = main.(Nrange) # Collect benchmarks
# Rearrange into array for plotting
arraydata = [[data[i][1] for i in 1:Nl] [data[i][2] for i in 1:Nl]]
# Plot timings
pyplot()
exponents = repeat(0:12, outer = 2)
group = repeat(["CPU", "GPU"],inner=Nl)
timingplot = StatsPlots.groupedbar(exponents,arraydata,group=group,title="Timing eigen on CPU and GPU",xlabel="logâ‚‚(N)", ylabel="Elapsed time (s)",yscale= :log10,ylims=(2.0e-6,100.0),bar_width=0.5)
savefig(timingplot,"timing.png")
# Plot speedups
speedupplot = plot(-1:13,repeat([1],15))
plot!(speedupplot,0:12,arraydata[:,1] ./ arraydata[:,2], linetype=:bar,title="GPU-speedup over CPU",xlabel="logâ‚‚(N)",xlims=(-0.5,12.5),ylabel="Speedup", yscale= :log10, bar_width=0.25, legend=false)
savefig(speedupplot,"speedup.png")