Hi again,
(sorry for all the questions!)
I am a bit puzzled that this does not work. I want to use the power function on values in a Matrix. if I uncomment x=1.0, it does work.
function kernel(ydepK::CuDeviceMatrix{Float32})
x = ydepK[1,1]
# x = 1.0
y = CUDAnative.pow(x,2.0)
return nothing
end
function cutest()
y = rand(Float32,3,4)
cuy = CuArray(y)
@cuda blocks=2 threads=2 kernel(cuy)
end
output in case 1:
julia> cudaVFI.cutest()
┌ Debug: (Re)compiling function
│ ctx = CUDAnative.CompilerContext(CUDAnative.KernelWrapper{typeof(Main.cudaVFI.kernel)}(Main.cudaVFI.kernel), Tuple{CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}}, v"6.1.0", true, nothing, nothing, nothing, nothing, nothing, Main.cudaVFI.kernel)
└ @ CUDAnative compiler.jl:494
ERROR: could not compile kernel(CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}) for GPU; kernel returning a value
- return_type = Union{}
Stacktrace:
[1] #compiler_error#43 at /home/floswald/.julia/packages/CUDAnative/mXUk/src/compiler.jl:33 [inlined]
[2] (::getfield(CUDAnative, Symbol("#kw##compiler_error")))(::NamedTuple{(:return_type,),Tuple{Core.TypeofBottom}}, ::typeof(CUDAnative.compiler_error), ::CUDAnative.CompilerContext, ::String) at ./<missing>:0
[3] validate_invocation(::CUDAnative.CompilerContext) at /home/floswald/.julia/packages/CUDAnative/mXUk/src/validation.jl:15
[4] compile_function(::CUDAnative.CompilerContext) at /home/floswald/.julia/packages/CUDAnative/mXUk/src/compiler.jl:496
[5] #cufunction#78(::Base.Iterators.Pairs{Symbol,typeof(Main.cudaVFI.kernel),Tuple{Symbol},NamedTuple{(:inner_f,),Tuple{typeof(Main.cudaVFI.kernel)}}}, ::Function, ::CUDAdrv.CuDevice, ::Function, ::Type) at /home/floswald/.julia/packages/CUDAnative/mXUk/src/compiler.jl:572
[6] (::getfield(CUDAnative, Symbol("#kw##cufunction")))(::NamedTuple{(:inner_f,),Tuple{typeof(Main.cudaVFI.kernel)}}, ::typeof(CUDAnative.cufunction), ::CUDAdrv.CuDevice, ::Function, ::Type) at ./<missing>:0
[7] @generated body at /home/floswald/.julia/packages/CUDAnative/mXUk/src/execution.jl:214 [inlined]
[8] _cuda at /home/floswald/.julia/packages/CUDAnative/mXUk/src/execution.jl:171 [inlined]
[9] macro expansion at ./gcutils.jl:87 [inlined]
[10] cutest() at /home/floswald/git/VFI/Julia/cudaVFI/src/aldrich.jl:214
[11] top-level scope
julia> include("cudaVFI.jl")
WARNING: replacing module cudaVFI.
Main.cudaVFI
case 2
julia> cudaVFI.cutest()
┌ Debug: (Re)compiling function
│ ctx = CUDAnative.CompilerContext(CUDAnative.KernelWrapper{typeof(Main.cudaVFI.kernel)}(Main.cudaVFI.kernel), Tuple{CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}}, v"6.1.0", true, nothing, nothing, nothing, nothing, nothing, Main.cudaVFI.kernel)
└ @ CUDAnative compiler.jl:494
┌ Debug: Module entry point:
│ LLVM.name(entry) = "ptxcall_kernel_28"
└ @ CUDAnative utils.jl:7
┌ Debug: Compiled CUDAnative.KernelWrapper{typeof(Main.cudaVFI.kernel)}(Main.cudaVFI.kernel) to PTX 6.1.0 for SM 6.1.0 using 2 registers.
│ Memory usage: 0 B local, 0 B shared, 0 B constant
└ @ CUDAnative compiler.jl:584