Hi everyone, I have a very simple code to perform parallel vector summation on GPU. The code is written with CUDA C, then compiled to be a shared library. When I try to call the library function via ccall in Julia, it can not find the function. The code is attached.
#include "../common/book.h"
__global__ void add(int *a, int *b, int *c, int N){
int id = threadIdx.x + blockIdx.x * blockDim.x;
while (id < N){
c[id] = a[id] + b[id];
id = id + blockDim.x * gridDim.x;
}
}
extern "C" void vecAdd(int *a, int *b, int *c, int N){
int *d_a, *d_b, *d_c;
HANDLE_ERROR( cudaMalloc( (void **)&d_a, sizeof(int)*N ) );
HANDLE_ERROR( cudaMalloc( (void **)&d_b, sizeof(int)*N ) );
HANDLE_ERROR( cudaMalloc( (void **)&d_c, sizeof(int)*N ) );
HANDLE_ERROR( cudaMemcpy(d_a, a, sizeof(int)*N, cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(d_b, b, sizeof(int)*N, cudaMemcpyHostToDevice) );
add<<<128, 128>>>(d_a, d_b, d_c, N);
HANDLE_ERROR( cudaMemcpy(c, d_c, sizeof(int)*N, cudaMemcpyDeviceToHost) );
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
}
I compiled the source code with the command
nvcc --ptxas-options=-v --compiler-options '-fPIC' -o libtest.so --shared vecAdd.cu
then try to call vecAdd
from Julia with ccall
, the julia code is
N = 123321
a = ones(Int32, N)
b = ones(Int32, N)
c = ones(zeros, N)
ccall((:vecAdd, "libtest.so"), Void, (Ptr{Int32}, Ptr{Int32}, Ptr{Int32}, Int32), a, b, c, N)
the error information shows that
can not find vecAdd from library libtest.so.
Any suggestions how to solve this problem ? Thanks.
Wenlei