Arrays of arrays and arrays of structures in CUDA kernels cause random errors

Here’s a hacky adaptor that does all of the (expensive, as explained above) conversions ahead-of-time:

using CUDA, Adapt

# dummy class that does complex nesting of CuArrays
struct Nested{T}
	x::T
end
Nested(x::T) where T = Nested{T}(x)
Adapt.adapt_structure(to, nested::Nested) = Nested(adapt(to, nested.x))

# a hacky adaptor that eagerly converts all arrays to GPU device arrays.
# for ownership purposes, the adaptor has a list of roots.
# this means you should keep the adaptor alive until you're done with the adapted values.
struct DeepAdaptor
	roots::Array{CuArray}
	DeepAdaptor() = new(CuArray[])
end
function Adapt.adapt_storage(to::DeepAdaptor, xs::Array)
	y = map(xs) do x
		adapt(to, x)
	end

	# upload to the GPU
	gpu = CuArray(y)

	# immediately convert to a CuDeviceArray, since we can't have CuArray{CuArray}
	push!(to.roots, gpu)
	cudaconvert(gpu)
end

function main()
	x = [Nested([42])]

	to = DeepAdaptor()
	y = adapt(to, x)
	GC.@preserve to begin
		# use y here
		@show typeof(y)
	end
end

This successfully converts a Vector{Nested{Vector{Int64}}} into a CuDeviceVector{Nested{CuDeviceVector{Int64, 1}}, 1} for use on the GPU. You’re responsible for keeping the adaptor alive as long as you use its output values.