No, i haven’t set anything…
But i loaded the wrong version of the script… and now it work!
Here the working version:
# Test MKL solve linear systems
N = 10000;
A = rand(N,N);
AA = copy(A);
b = rand(N,1);
c = copy(b);
function juliaSol(A,b)
\(A,b);
end
@time sol = juliaSol(A,b);
# Test MKL
const global librt = Libdl.find_library(["libmkl_rt"], ["/opt/intel/mkl/lib"])
# Open librt
Libdl.dlopen(librt)
# LU FACORIZATION
function luFactMKL(A::StridedMatrix{Float64})
m, n = size(A)
lda = max(1,stride(A, 2))
ipiv = similar(A, Int64, min(m,n))
# lapack_int LAPACKE_dgetrf (int matrix_layout , lapack_int m , lapack_int n ,
# double * a , lapack_int lda , lapack_int * ipiv );
dd = ccall(("LAPACKE_dgetrf", librt), Cint,
(Int64, Int64, Int64,
Ptr{Float64}, Int64, Ptr{Int64}),
102, m, n,
A, lda, ipiv
)
A, ipiv
end
@time AA,pivot = luFactMKL(AA)
# 101 = LAPACK_ROW_MAJOR
# 102 = LAPACK_COL_MAJOR
function linSOLVE(A::StridedMatrix{Float64}, b::StridedMatrix{Float64}, ipiv::StridedVector{Int64})
m, n = size(A)
lda = max(1,stride(A, 2))
#lapack_int LAPACKE_dgetrs (int matrix_layout , char trans , lapack_int n ,
# lapack_int nrhs , const double * a , lapack_int lda ,
# const lapack_int * ipiv , double * b , lapack_int ldb );
dd = ccall(("LAPACKE_dgetrs", librt),
Cint, # Return type
(Int64, Cuchar, Int64,
Int64, Ptr{Float64}, Int64,
Ptr{Int64}, Ptr{Float64}, Int64),
102, 'N', m,
1, A, lda,
ipiv, b, m
)
dd, b, ipiv # dd = 0 if calculation is done, b = output
end
@time dd, sol2, piv = linSOLVE(AA,c, pivot)
Using Intel MKL dgetrf + dgtrs julia is as fast as Matlab !
On my PC: with A = rand(10000,10000);
Julia Version 0.5.0
Commit 3c9d753 (2016-09-19 18:14 UTC)
Platform Info:
System: NT (x86_64-w64-mingw32)
CPU: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Prescott)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.7.1 (ORCJIT, broadwell)
Mkl "dgetrf" Factorization time
12.180575 seconds (4.16 k allocations: 261.296 KB)
JULIA LAPACK factorization time
38.910369 seconds (10 allocations: 78.469 KB)
LAPACK is very memory efficient… but MKL is 3 times faster (and use 3 times more memory)