CUDA | custom structs

Edit and update: 2025-June-4

Hello,

I’m working on converting the following custom struct

# test_struct_CUDA.jl

using FFTW

Base.@kwdef struct FourierTransformPlan3D{n_rows, n_cols, n_slices}

    # FT frequency shift vector arrays
    shifted_freq_vec_row = fftshift(fftfreq(n_rows) * n_rows)
    shifted_freq_vec_col = fftshift(fftfreq(n_cols) * n_cols)
    shifted_freq_vec_slice = fftshift(fftfreq(n_slices) * n_slices)

    # Fourier transform plan
    A_plan_array =
        plan_fft(
            zeros(Complex{Float64}, (n_rows, n_cols, n_slices)); flags=FFTW.ESTIMATE, timelimit=Inf
        )

    # Inverse Fourier transform plan
    A_inv_plan_array =
        plan_ifft(
            zeros(Complex{Float64}, (n_rows, n_cols, n_slices)); flags=FFTW.ESTIMATE, timelimit=Inf
        )
end

function main()

    n_rows::Int64 = 256
    n_cols::Int64 = 256
    n_slices::Int64 = 192

    # Instantiate the Fourier transform struct
    Ftp = FourierTransformPlan3D{n_rows, n_cols, n_slices}()

end

begin
    main()
end

I’m attempting to use

Adapt.@adapt_structure and CUDA.CUFFT but am currently stuck on a CUDA.CUFFT.plan_fft() syntax issue:

# test_struct_CUDA.jl

using Adapt
using CUDA
# using FFTW
using CUDA.CUFFT

Base.@kwdef struct FourierTransformPlan3D{n_rows, n_cols, n_slices}

    # FT frequency shift vector arrays
    shifted_freq_vec_row = fftshift(fftfreq(n_rows) * n_rows)
    shifted_freq_vec_col = fftshift(fftfreq(n_cols) * n_cols)
    shifted_freq_vec_slice = fftshift(fftfreq(n_slices) * n_slices)

    # Fourier transform plan
    A_plan_CuArray = plan_fft(CuArray{Complex{Float32}, 3}(undef, (n_rows, n_cols, n_slices)))

    # Inverse Fourier transform plan
    A_inv_plan_CuArray = plan_ifft(CuArray{Complex{Float32}, 3}(undef, (n_rows, n_cols, n_slices)))
end

Adapt.@adapt_structure FourierTransformPlan3D

function main()

    n_rows::Int64 = 256
    n_cols::Int64 = 256
    n_slices::Int64 = 192

    # Instantiate the Fourier transform struct
    Ftp = FourierTransformPlan3D{n_rows, n_cols, n_slices}()

end

which runs

julia> include("test_struct_CUDA.jl")
FourierTransformPlan3D{256, 256, 192}(-128.0:1.0:127.0, -128.0:1.0:127.0, -96.0:1.0:95.0, CUFFT ComplexF32 forward plan for 256×256×192 CuArray of ComplexF32, 7.947286e-8 * CUFFT ComplexF32 backward plan for 256×256×192 CuArray of ComplexF32)

but the following lines

. . .
A_plan_array = plan_fft(CuArray{Complex{Float32}, 3}(undef, (n_rows, n_cols, n_slices)))
. . .
A_inv_plan_array = plan_ifft(CuArray{Complex{Float32}, 3}(undef, (n_rows, n_cols, n_slices)))
. . .

are underlined with a error message:

Possible method call error.Julia(IncorrectCallArgs)

fft_CuArray::var"CUDA.CuArray{T,N,M}" = CuArray{Complex{Float32}, 3}(undef, (n_rows, n_cols, n_slices))

CUDA.CUDAFFT list 8 possible methods for plan_fft():

Possible method call error.Julia(IncorrectCallArgs)
`CUDA.CUFFT.plan_fft` is a function with **8** methods

Possible method call error.Julia(IncorrectCallArgs)
CUDA.CUFFT.plan_fft is a function with 8 methods

1. plan_fft(X::CUDA.CuArray where M, region::Core.Tuple{Vararg{Core.Int64,R}}) in CUFFT at fft.jl:192
2. plan_fft(x::CUDA.CuArray where M where N where #s8909<:Base.Complex where #s8908<:Union{Core.Integer,Base.Rational where T<:Core.Integer}, region::Core.Any) in CUFFT at fft.jl:123
3. plan_fft(X::CUDA.CuArray where M, region::Core.Any) in CUFFT at fft.jl:160
4. plan_fft(x::CUDA.CuArray where M where N where #s8914<:Core.Real, region::Core.Any) in CUFFT at fft.jl:121
5. plan_fft(X::CUDA.CuArray where M, region::Core.Tuple{Vararg{Core.Int64,R}}) in CUFFT at fft.jl:192
6. plan_fft(x::CUDA.CuArray where M where N where #s8909<:Base.Complex where #s8908<:Union{Core.Integer,Base.Rational where T<:Core.Integer}, region::Core.Any) in CUFFT at fft.jl:123
7. plan_fft(X::CUDA.CuArray where M, region::Core.Any) in CUFFT at fft.jl:160
8. plan_fft(x::CUDA.CuArray where M where N where #s8914<:Core.Real, region::Core.Any) in CUFFT at fft.jl:121

I’d appreciate an explanation of the syntax required for, for example, fft.jl:192:

# out-of-place complex

function plan_fft(X::DenseCuArray{T,N}, region::NTuple{R,Int}) where {T<:cufftComplexes,N,R}

and any other modifications that may be required.

Well, after a bit of experimenting, I think I may understand the syntax:

# out-of-place complex

function plan_fft(X::DenseCuArray{T,N}, region::NTuple{R,Int}) where {T<:cufftComplexes,N,R}

The MWE below

# test_struct_CUDA.jl

using Adapt
using CUDA
# using FFTW
using CUDA.CUFFT

Base.@kwdef struct FourierTransformPlan3D{n_rows, n_cols, n_slices}

    # FT frequency shift vector arrays
    shifted_freq_vec_row = fftshift(fftfreq(n_rows) * n_rows)
    shifted_freq_vec_col = fftshift(fftfreq(n_cols) * n_cols)
    shifted_freq_vec_slice = fftshift(fftfreq(n_slices) * n_slices)

    # FFT and iFFT region
    region::NTuple{3, Int64} = (1, 2, 3)

    # Fourier transform plan
    A_plan_CuArray = 
        plan_fft(
            CUDA.zeros(Complex{Float32}, (n_rows, n_cols, n_slices)),
            region 
        )

    # Inverse Fourier transform plan
    A_inv_plan_CuArray =
        plan_ifft(
            CUDA.zeros(Complex{Float32}, (n_rows, n_cols, n_slices)),
            region
        )
end

Adapt.@adapt_structure FourierTransformPlan3D

function main()

    n_rows::Int64 = 256
    n_cols::Int64 = 256
    n_slices::Int64 = 192

    # Instantiate the Fourier transform struct
    Ftp = FourierTransformPlan3D{n_rows, n_cols, n_slices}()

    @show Ftp.region

    @show Ftp.A_plan_CuArray

    @show Ftp.A_inv_plan_CuArray

end

begin
    main()
end

now runs and returns

julia> include("test_struct_CUDA.jl")
Ftp.region = (1, 2, 3)
Ftp.A_plan_CuArray = CUFFT ComplexF32 forward plan for 256×256×192 CuArray of ComplexF32
Ftp.A_inv_plan_CuArray = 7.947286e-8 * CUFFT ComplexF32 backward plan for 256×256×192 CuArray of ComplexF32
7.947286e-8 * CUFFT ComplexF32 backward plan for 256×256×192 CuArray of ComplexF32

Would still appreciate a comment from one of the resident experts: am I using struct with CUDA correctly?

It’s a bit strange to have a single struct contain both hard-coded Array- and CuArray-based fields – I guess I’d normally do this by parameterizing the field so that it can be both used for CPU and GPU storage – but this may be fine for your use case.

Thank you for your comment.

I see.
My intent is to have all CuArray based fields. I’d assumed that by
using CUDA.CUDAFFT
fftshift would return a CuArray.

Now

# FT frequency shift vector arrays
    shifted_freq_vec_row = CuArray{Float32, 1}(fftshift(fftfreq(n_rows) * n_rows))
    shifted_freq_vec_col = CuArray{Float32, 1}(fftshift(fftfreq(n_cols) * n_cols))
    shifted_freq_vec_slice = CuArray{Float32, 1}(fftshift(fftfreq(n_slices) * n_slices))

Will probably parametrize once I’m more familiar with CUDA in Julia.