A few days ago the IT-department of my company added a virtual GPU to my virtual machine which I would like to use in Julia.
I use julia 1.10.3 (which I can load as a module through (module load julia/1.10.3
) and have added CUDA v.5.4.2 to my environment.
I could locate the CUDA driver library though
$ ldconfig -p | grep libcuda.so
libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
libcuda.so (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so
and also
$ nvidia-smi |grep NVIDIA
| NVIDIA-SMI 535.161.08 Driver Version: 535.161.08 CUDA Version: 12.2 |
looks ok to me.
However the CUDA driver cannot be found
$ JULIA_DEBUG=CUDA_Driver_jll julia --project -e "using CUDA; CUDA.versioninfo()"
┌ Debug: No system CUDA driver found
â”” @ CUDA_Driver_jll ~/.julia/packages/CUDA_Driver_jll/PZjqb/src/wrappers/x86_64-linux-gnu.jl:104
ERROR: CUDA driver not found
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] functional
@ ~/.julia/packages/CUDA/75aiI/src/initialization.jl:24 [inlined]
[3] versioninfo(io::Base.TTY)
@ CUDA ~/.julia/packages/CUDA/75aiI/src/utilities.jl:42
[4] top-level scope
@ none:1
When investigating further I found that could not load the shared CUDA driver library:
julia> using Base.Libc.Libdl
julia> Libdl.dlopen("/lib/x86_64-linux-gnu/libcuda.so")
ERROR: could not load library "/lib/x86_64-linux-gnu/libcuda.so"
/lib/x86_64-linux-gnu/libcuda.so: cannot open shared object file: No such file or directory
Stacktrace:
[1] dlopen(s::String, flags::UInt32; throw_error::Bool)
@ Base.Libc.Libdl ./libdl.jl:117
[2] dlopen
@ ./libdl.jl:116 [inlined]
[3] dlopen(s::String)
@ Base.Libc.Libdl ./libdl.jl:116
[4] top-level scope
@ REPL[1]:1
So wondered what the dependencies of the CUDA driver library are
$ ldd "/lib/x86_64-linux-gnu/libcuda.so"
linux-vdso.so.1 (0x00007fff7f1de000)
libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fd2e0d92000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fd2e0b6a000)
libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007fd2e0b65000)
libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007fd2e0b60000)
librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 (0x00007fd2e0b5b000)
/lib64/ld-linux-x86-64.so.2 (0x00007fd2e2afe000)
and checked whether all of them are already loaded. They are all loaded!
julia> required_libraries = [
"linux-vdso.so.1"
"/lib/x86_64-linux-gnu/libm.so.6"
"/lib/x86_64-linux-gnu/libc.so.6"
"/lib/x86_64-linux-gnu/libdl.so.2"
"/lib/x86_64-linux-gnu/libpthread.so.0"
"/lib/x86_64-linux-gnu/librt.so.1"
"/lib64/ld-linux-x86-64.so.2"
];
julia> loaded_libraries = Libdl.dllist();
julia> for lib in required_libraries
@assert lib in loaded_libraries
end
julia>
So as a last resort I tried to strace -o log.txt ./test.jl
the following file:
#!/usr/bin/env -S julia --color=yes --startup-file=no --project
using Base.Libc.Libdl
Libdl.dlopen("/lib/x86_64-linux-gnu/libcuda.so")
However I can’t really make sense of the output; Can somebody help me with that?
execve("./test.jl", ["./test.jl"], 0x7fff94997190 /* 64 vars */) = 0
brk(NULL) = 0x5628211e8000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffc84173580) = -1 EINVAL (Invalid argument)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f14f4063000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=86471, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 86471, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f14f404d000
close(3) = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\237\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\244;\374\204(\337f#\315I\214\234\f\256\271\32"..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=2216304, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2260560, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f14f3e25000
mmap(0x7f14f3e4d000, 1658880, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f14f3e4d000
mmap(0x7f14f3fe2000, 360448, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bd000) = 0x7f14f3fe2000
mmap(0x7f14f403a000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x214000) = 0x7f14f403a000
mmap(0x7f14f4040000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f14f4040000
close(3) = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f14f3e22000
arch_prctl(ARCH_SET_FS, 0x7f14f3e22740) = 0
set_tid_address(0x7f14f3e22a10) = 602416
set_robust_list(0x7f14f3e22a20, 24) = 0
rseq(0x7f14f3e230e0, 0x20, 0, 0x53053053) = 0
mprotect(0x7f14f403a000, 16384, PROT_READ) = 0
mprotect(0x562820d1e000, 4096, PROT_READ) = 0
mprotect(0x7f14f409d000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x7f14f404d000, 86471) = 0
getrandom("\xd0\xf5\x41\x73\x7d\x58\x1a\xaa", 8, GRND_NONBLOCK) = 8
brk(NULL) = 0x5628211e8000
brk(0x562821209000) = 0x562821209000
openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=3048928, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 3048928, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f14f3b39000
close(3) = 0
execve("/mycompany/apps/bin/julia_1.10.3/julia", ["julia", "--color=yes", "--startup-file=no", "--project", "./test.jl"], 0x7ffc84173768 /* 64 vars */) = -1 ENOEXEC (Exec format error)
execve("/bin/sh", ["/bin/sh", "/mycompany/apps/bin/julia_1.10.3/julia", "--color=yes", "--startup-file=no", "--project", "./test.jl"], 0x7ffc84173768 /* 64 vars */) = 0
brk(NULL) = 0x56328345d000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffd46a0e670) = -1 EINVAL (Invalid argument)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb9a9860000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=86471, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 86471, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fb9a984a000
close(3) = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\237\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\244;\374\204(\337f#\315I\214\234\f\256\271\32"..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=2216304, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2260560, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb9a9622000
mmap(0x7fb9a964a000, 1658880, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7fb9a964a000
mmap(0x7fb9a97df000, 360448, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bd000) = 0x7fb9a97df000
mmap(0x7fb9a9837000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x214000) = 0x7fb9a9837000
mmap(0x7fb9a983d000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb9a983d000
close(3) = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb9a961f000
arch_prctl(ARCH_SET_FS, 0x7fb9a961f740) = 0
set_tid_address(0x7fb9a961fa10) = 602416
set_robust_list(0x7fb9a961fa20, 24) = 0
rseq(0x7fb9a96200e0, 0x20, 0, 0x53053053) = 0
mprotect(0x7fb9a9837000, 16384, PROT_READ) = 0
mprotect(0x5632824d2000, 8192, PROT_READ) = 0
mprotect(0x7fb9a989a000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x7fb9a984a000, 86471) = 0
getuid() = 166922
getgid() = 100513
getpid() = 602416
rt_sigaction(SIGCHLD, {sa_handler=0x5632824c8aa0, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7fb9a9664520}, NULL, 8) = 0
geteuid() = 166922
getrandom("\x4b\x70\x24\xfc\xa3\xe7\x32\x9e", 8, GRND_NONBLOCK) = 8
brk(NULL) = 0x56328345d000
brk(0x56328347e000) = 0x56328347e000
getppid() = 602413
newfstatat(AT_FDCWD, "/path/to/the/working/directory", {st_mode=S_IFDIR|0755, st_size=9, ...}, 0) = 0
newfstatat(AT_FDCWD, ".", {st_mode=S_IFDIR|0755, st_size=9, ...}, 0) = 0
openat(AT_FDCWD, "/mycompany/apps/bin/julia_1.10.3/julia", O_RDONLY) = 3
fcntl(3, F_DUPFD, 10) = 10
close(3) = 0
fcntl(10, F_SETFD, FD_CLOEXEC) = 0
geteuid() = 166922
getegid() = 100513
rt_sigaction(SIGINT, NULL, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGINT, {sa_handler=0x5632824c8aa0, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7fb9a9664520}, NULL, 8) = 0
rt_sigaction(SIGQUIT, NULL, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGQUIT, {sa_handler=SIG_DFL, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7fb9a9664520}, NULL, 8) = 0
rt_sigaction(SIGTERM, NULL, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7fb9a9664520}, NULL, 8) = 0
read(10, "singularity exec /mycompany/apps/contai"..., 8192) = 69
newfstatat(AT_FDCWD, "/mycompany/apps/bin/julia_1.10.3/singularity", 0x7ffd46a0e360, 0) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/opt/anaconda3/bin/singularity", 0x7ffd46a0e360, 0) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/mycompany/home/xyz/micromamba/condabin/singularity", 0x7ffd46a0e360, 0) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/local/sbin/singularity", 0x7ffd46a0e360, 0) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/local/bin/singularity", 0x7ffd46a0e360, 0) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/sbin/singularity", 0x7ffd46a0e360, 0) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/bin/singularity", {st_mode=S_IFREG|0755, st_size=42468904, ...}, 0) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0
vfork() = 602417
rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP RTMIN RT_1], 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], 0, NULL) = 602417
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=602417, si_uid=166922, si_status=1, si_utime=4, si_stime=6} ---
rt_sigreturn({mask=[]}) = 602417
wait4(-1, 0x7ffd46a0e2bc, WNOHANG, NULL) = -1 ECHILD (No child processes)
read(10, "", 8192) = 0
exit_group(1) = ?
+++ exited with 1 +++
Any help or ideas where to look would be much appreciated!