Call Intel MKL as external library

No, i haven’t set anything…

But i loaded the wrong version of the script… and now it work!
Here the working version:

# Test MKL solve linear systems

N = 10000;
A = rand(N,N);
AA = copy(A);
b = rand(N,1);
c = copy(b);

function juliaSol(A,b)
 \(A,b);
end
@time sol = juliaSol(A,b);

# Test MKL
const global librt = Libdl.find_library(["libmkl_rt"], ["/opt/intel/mkl/lib"])

# Open librt
Libdl.dlopen(librt)
# LU FACORIZATION
function luFactMKL(A::StridedMatrix{Float64})
  m, n = size(A)
  lda  = max(1,stride(A, 2))
  ipiv = similar(A, Int64, min(m,n))

# lapack_int LAPACKE_dgetrf (int matrix_layout , lapack_int m , lapack_int n ,
# double * a , lapack_int lda , lapack_int * ipiv );
  dd = ccall(("LAPACKE_dgetrf",  librt), Cint,
            (Int64, Int64, Int64,
            Ptr{Float64}, Int64, Ptr{Int64}),
            102, m, n,
            A, lda, ipiv
  )
  A, ipiv
end

@time AA,pivot = luFactMKL(AA)

# 101  = LAPACK_ROW_MAJOR
# 102 = LAPACK_COL_MAJOR
function linSOLVE(A::StridedMatrix{Float64}, b::StridedMatrix{Float64}, ipiv::StridedVector{Int64})
  m, n = size(A)
  lda  = max(1,stride(A, 2))
  #lapack_int LAPACKE_dgetrs (int matrix_layout , char trans , lapack_int n ,
  # lapack_int nrhs , const double * a , lapack_int lda ,
  # const lapack_int * ipiv , double * b , lapack_int ldb );
  dd = ccall(("LAPACKE_dgetrs", librt),
              Cint, # Return type
                (Int64, Cuchar, Int64,
                Int64,  Ptr{Float64}, Int64,
                Ptr{Int64},  Ptr{Float64}, Int64),
                102, 'N', m,
                1, A, lda,
                ipiv, b, m
                )
  dd, b, ipiv # dd = 0 if calculation is done, b = output
end

@time dd, sol2, piv = linSOLVE(AA,c, pivot)

Using Intel MKL dgetrf + dgtrs julia is as fast as Matlab ! :wink:

On my PC: with A = rand(10000,10000);

Julia Version 0.5.0
Commit 3c9d753 (2016-09-19 18:14 UTC)
Platform Info:
  System: NT (x86_64-w64-mingw32)
  CPU: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Prescott)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.7.1 (ORCJIT, broadwell)

Mkl "dgetrf" Factorization time 
 12.180575 seconds (4.16 k allocations: 261.296 KB)

JULIA LAPACK factorization time 
 38.910369 seconds (10 allocations: 78.469 KB)

LAPACK is very memory efficient… but MKL is 3 times faster (and use 3 times more memory)

2 Likes