Juliacon21-gpu_workshop kernel abstractions heat diffusion

Could you post a full MWE?

One issue I found is that @Const doesn’t work if data is an OffsetArray.

using KernelAbstractions
using AMDGPU
using ROCKernels
using OffsetArrays

@kernel function diffuse_kabs_lmem!(out, data, a, dt, dx, dy)
    i, j = @index(Global, NTuple)
    li, lj = @index(Local, NTuple)
    lmem = @localmem eltype(data) (@groupsize()[1] + 2, @groupsize()[2] + 2)
    @uniform ldata = OffsetArray(lmem, 0:(@groupsize()[1]+1), 0:(@groupsize()[1]+1))
    
    @inbounds begin
        ldata[li,lj] = data[i,j]
        if li == 1
            ldata[li-1,lj] = data[i-1,j]
        end
        if li == @groupsize()[1]
            ldata[li+1,lj] = data[i+1,j]
        end
        if lj == 1
            ldata[li,lj-1] = data[i,j-1]
        end
        if lj == @groupsize()[2]
            ldata[li,lj+1] = data[i,j+1]
        end
    end
    
    @synchronize()

    @inbounds begin
        dij   = ldata[li,lj]
        dim1j = ldata[li-1,lj]
        dijm1 = ldata[li,lj-1]
        dip1j = ldata[li+1,lj]
        dijp1 = ldata[li,lj+1]

        dij += a * dt * (
            (dim1j - 2 * dij + dip1j)/dx^2 +
            (dijm1 - 2 * dij + dijp1)/dy^2)

        out[i,j] = dij
    end
end

diffusion_kernel_lmem = diffuse_kabs_lmem!(ROCDevice(), (16, 16))

N = 64

domain = OffsetArray(AMDGPU.zeros(N+2, N+2), 0:(N+1), 0:(N+1))
out = AMDGPU.zeros(N, N)

wait(diffusion_kernel_lmem(out, domain, 0.01, 0.01, 0.01, 0.01; ndrange=(N,N)))

Works for me using the AMD backend.