I am getting an issue with CuArray and tried to explain it with simple example.
I have created a device pointer through cudaMalloc in C function “get_device_address()”. The “get_device_address()” is called in Julia and tried to convert into CuArray to call the CUDA function defined in Julia.
C function file: c_function.c
#include <cuda_runtime.h>
float* get_device_address() {
float* d_ptr;
cudaMalloc((void**)&d_ptr, 10 * sizeof(float));
return d_ptr;
}
Julia code: main.jl
using CUDA
using CUDA
function kernel_function(a)
i = threadIdx().x
a[i] += 1.0f0
return
end
# Define the C function signature in Julia
function get_device_address()::Ptr{Float32}
return ccall((:get_device_address, "libcfunction"), Ptr{Float32}, ())
end
# Call the C function to get the device address
device_address = get_device_address()
device_address_float = Ptr{Float32}(device_address)
# Convert the device address to CuPtr{Float32}
#cu_ptr = CuPtr{Float32}(device_address)
# Define the size of the array
size = 10
# Wrap this pointer in a CuArray for further operations
#cu_array = unsafe_wrap(CuArray, device_address_float, size; own=false)
#cu_array = unsafe_wrap(CuArray, device_address_float, size)
#cu_array = unsafe_wrap(CuArray{Float32, 1}, device_address, size)
cu_array = unsafe_wrap(CuArray{Float32, 1}, device_address, (size, ))
# Print the initial content of the array
println("Initial array on GPU: ", cu_array)
# Launch the CUDA kernel
@cuda threads=size kernel_function(cu_array)
# Synchronize to ensure the kernel execution is complete
CUDA.@sync
# Copy the result back to the host and print it
result = Array(cu_array)
println("Modified array on GPU: ", result)
# Free the CUDA memory after usage
CUDA.@sync CUDA.cuFree(cu_ptr)
Here are compilation steps:
$ gcc -g -O0 -fPIC -shared -o libcfunction.so c_malloc.c -I$JULIA/include/julia -L$JULIA/lib -ljulia -lpthread -I$NVHPC_ROOT/cuda/include -L$NVHPC_ROOT/cuda/lib64 -lcuda -lcudart
$ julia main.jl
Error message:
ERROR: LoadError: CUDA error: invalid argument (code 1, ERROR_INVALID_VALUE)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/libcuda.jl:30
[2] check
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/libcuda.jl:37 [inlined]
[3] cuMemHostRegister_v2
@ ~/.julia/packages/CUDA/75aiI/lib/utils/call.jl:34 [inlined]
[4] register(::Type{CUDA.HostMemory}, ptr::Ptr{Float32}, bytesize::Int64, flags::UInt8)
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/memory.jl:170
[5] #1116
@ ~/.julia/packages/CUDA/75aiI/src/array.jl:303 [inlined]
[6] #context!#990
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/state.jl:168 [inlined]
[7] context!
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/state.jl:163 [inlined]
[8] unsafe_wrap(::Type{CuArray{Float32, 1, CUDA.HostMemory}}, p::Ptr{Float32}, dims::Tuple{Int64}; ctx::CuContext)
@ CUDA ~/.julia/packages/CUDA/75aiI/src/array.jl:302
[9] unsafe_wrap
@ ~/.julia/packages/CUDA/75aiI/src/array.jl:289 [inlined]
[10] unsafe_wrap(::Type{CuArray{Float32, 1}}, p::Ptr{Float32}, dims::Tuple{Int64}; ctx::CuContext)
@ CUDA ~/.julia/packages/CUDA/75aiI/src/array.jl:321
[11] unsafe_wrap(::Type{CuArray{Float32, 1}}, p::Ptr{Float32}, dims::Tuple{Int64})
@ CUDA ~/.julia/packages/CUDA/75aiI/src/array.jl:316