Hi,
I am trying to reproduce the Juliacon21-gpu workshop and I am stuck at the last part of kernel abstractions.
I wrote:
@kernel function diffuse_kabs_lmem!(out, @Const(data), a, dt, dx, dy)
i, j = @index(Global, NTuple)
li, lj = @index(Local, NTuple)
lmem = @localmem eltype(data) (@groupsize()[1] + 2, @groupsize()[2] + 2)
@uniform ldata = OffsetArray(lmem, 0:(@groupsize()[1]+1), 0:(@groupsize()[1]+1))
@inbounds begin
ldata[li,lj] = data[i,j]
if li == 1
ldata[li-1,lj] = data[i-1,j]
end
if li == @groupsize()[1]
ldata[li+1,lj] = data[i+1,j]
end
if lj == 1
ldata[li,lj-1] = data[i,j-1]
end
if lj == @groupsize()[2]
ldata[li,lj+1] = data[i,j+1]
end
end
@synchronize()
@inbounds begin
dij = ldata[li,lj]
dim1j = ldata[li-1,lj]
dijm1 = ldata[li,lj-1]
dip1j = ldata[li+1,lj]
dijp1 = ldata[li,lj+1]
dij += a * dt * (
(dim1j - 2 * dij + dip1j)/dx^2 +
(dijm1 - 2 * dij + dijp1)/dy^2)
out[i,j] = dij
end
end
diffusion_kernel_lmem = diffuse_kabs_lmem!(CUDADevice(), (16, 16))
it is a kind of copy-past (the notebook is not available).
And when I run wait(diffusion_kernel_lmem(out, domain, a, dt, dx, dy; ndrange=(N, N)))
I get an error. If I try with diffusion_kernel_lmem = diffuse_kabs_lmem!(CPU(), (16, 16))
, and define arrays as Array
everything works well.
The error seems to be similar to the one obtained in the workshop:
InvalidIRError: compiling kernel gpu_diffuse_kabs_lmem!(Cassette.Context{nametype(CUDACtx), Nothing, Nothing, KernelAbstractions.var"##PassType#257", Nothing, Cassette.DisableHooks}, typeof(gpu_diffuse_kabs_lmem!), KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(16, 16)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, OffsetMatrix{Float64, CuDeviceMatrix{Float64, 1}}, OffsetMatrix{Float64, CuDeviceMatrix{Float64, 1}}, Float64, Float64, Float64, Float64) resulted in invalid LLVM IR
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Did anyone try to redo the workshop and can help me please ?
Thank you,
Ludovic