Ok, sorry. I will try to be more concise.
I have two problems:
- Routines that were dispatching based on the type returned by threadIdx() are now broken. MWE:
import Pkg
Pkg.activate(".")
using CUDA
Pkg.status()
idx(i::Int64) = zero(i)*1.0
function krnl_foo!(ac)
    b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
    ac[b,r] = idx(b)
 return nothing
end
A = randn(10,10)
ac = CuArray(A)
CUDA.@sync begin
  @device_code_warntype    CUDA.@cuda threads=10 blocks=10 krnl_foo!(ac)
end
A = Array(ac)
A .== zero(A)
works in CUDAv3.3.3, but fails in CUDAv3.5.0.
- Even if this is corrected by changing the type expected by idx(), the code works, but @device_code_warntype does not find the correct types. MWE:
import Pkg
Pkg.activate(".")
using CUDA
Pkg.status()
idx(i::Int32) = zero(i)*1.0 # Change type for CUDAv3.5.0
function krnl_foo!(ac)
    b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
    ac[b,r] = idx(b)
 return nothing
end
A = randn(10,10)
ac = CuArray(A)
CUDA.@sync begin
  @device_code_warntype    CUDA.@cuda threads=10 blocks=10 krnl_foo!(ac)
end
A = Array(ac)
A .== zero(A)
works in CUDAv3.5.0, but claims that b,r are of type Union:
 Activating environment at `~/code/CUDAv3.5.0/Project.toml`
      Status `~/code/CUDAv3.5.0/Project.toml`
  [052768ef] CUDA v3.5.0
PTX CompilerJob of kernel krnl_foo!(CuDeviceMatrix{Float64, 1}) for sm_70
Variables
  #self#::Core.Const(krnl_foo!)
  ac::CuDeviceMatrix{Float64, 1}
  r::Union{}
  b::Union{}
Body::Union{}
1 ─ %1 = CUDA.threadIdx::Core.Const(CUDA.threadIdx)
│        (%1)()
│        Core.Const(:(Base.getproperty(%2, :x)))
│        Core.Const(:(CUDA.blockIdx))
│        Core.Const(:((%4)()))
│        Core.Const(:(Base.getproperty(%5, :x)))
│        Core.Const(:(b = %3))
│        Core.Const(:(r = %6))
│        Core.Const(:(Main.idx(b)))
│        Core.Const(:(Base.setindex!(ac, %9, b, r)))
└──      Core.Const(:(return Main.nothing))
10×10 BitMatrix:
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
 1  1  1  1  1  1  1  1  1  1
In my particular case, because of 1), a substantial part of my codes are broken in CUDAv3.5.0.
Many thanks!