I am trying to translate the following function into the GPU version. But faced with the following error:
unsupported call to an unknown function
Reason: unsupported dynamic function invocation (call to libcuda)
begin
using Pkg
Pkg.activate(".")
using CUDA
using Test
using BenchmarkTools
using Cthulhu
using Random
# CUDA.versioninfo()
end
function func!(y::AbstractVector{T}, w::AbstractVector{T2}, lambda::Float64,
z::AbstractVector{T}) where {T<:Real,T2<:Real}
m = length(y)
c = zeros(Float32, m) |> cu
d = zeros(Float32, m) |> cu
e = zeros(Float32, m) |> cu
d[1] = w[1] + lambda
c[1] = -2 * lambda / d[1]
e[1] = lambda / d[1]
z[1] = w[1] * y[1]
d[2] = w[2] + 5 * lambda - d[1] * c[1] * c[1]
c[2] = (-4 * lambda - d[1] * c[1] * e[1]) / d[2]
e[2] = lambda / d[2]
z[2] = w[2] * y[2] - c[1] * z[1]
# for (i = 2; i < m - 1; i++)
m = length(y)
@inbounds for i = 3:(m-1)
i1 = i - 1
i2 = i - 2
d[i] = w[i] + 6 * lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2]
c[i] = (-4 * lambda - d[i1] * c[i1] * e[i1]) / d[i]
e[i] = lambda / d[i]
z[i] = w[i] * y[i] - c[i1] * z[i1] - e[i2] * z[i2]
end
i = m - 1
i1 = i - 1
i2 = i - 2
d[m-1] = w[m-1] + 5 * lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2]
c[m-1] = (-2 * lambda - d[i1] * c[i1] * e[i1]) / d[m-1]
z[m-1] = w[m-1] * y[m-1] - c[i1] * z[i1] - e[i2] * z[i2]
i = m
i1 = i - 1
i2 = i - 2
d[m] = w[m] + lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2]
z[m] = (w[m] * y[m] - c[i1] * z[i1] - e[i2] * z[i2]) / d[m]
z[m-1] = z[m-1] / d[m-1] - c[m-1] * z[m]
@inbounds for i in (m-2):-1:1
z[i] = z[i] / d[i] - c[i] * z[i+1] - e[i] * z[i+2]
end
end
begin
Random.seed!(1)
n = Int(1e4)
y = rand(100, n)
w = rand(100, n)
# gpu version
function kernel(y, w, z)
n = size(y, 2)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
@cuprintln("thread $(threadIdx().x), block $(blockIdx().x)")
if i <= n
func!(y[:, i], w[:, i], 2.0, z[:, i])
end
return
end
function main_gpu()
y2 = cu(y)
w2 = cu(w)
res = cu(zeros(size(y)))
N = size(y, 2)
nthreads = 256 * 2
nblocks = ceil(Int, N / nthreads)
@show N nthreads nblocks
CUDA.@sync begin
# @device_code_warntype
@cuda threads = nthreads blocks = nblocks kernel(y2, w2, res)
end
return
end
@time main_gpu()
end
The error message:
Stacktrace:
[1] unsafe_cuCtxGetDevice
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\libcuda.jl:150
[2] current_device
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\devices.jl:23
[3] device
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\context.jl:287
[4] check_exceptions
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\src\compiler\exceptions.jl:33
[5] #synchronize#134
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\stream.jl:134
[6] multiple call sites
@ unknown:0
Reason: unsupported call to an unknown function (call to ijl_lazy_load_and_lookup)
Stacktrace:
[1] unsafe_cuCtxGetDevice
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\libcuda.jl:150
[2] current_device
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\devices.jl:23
[3] device
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\context.jl:287
[4] check_exceptions
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\src\compiler\exceptions.jl:33
[5] #synchronize#134
@ C:\Users\kong\.julia\packages\CUDA\tTK8Y\lib\cudadrv\stream.jl:134
[6] multiple call sites
@ unknown:0
Reason: unsupported dynamic function invocation (call to libcuda)