Error of view on CuArrays with discrete indices

It seems that I can’t pass discrete indices ([1,2,1]) with view. I test the following code on both master and v0.9.0. Is there any workaround?

julia> xs = randn(3,3)
3×3 Array{Float64,2}:
  1.37316   -0.378109   0.907578
  0.345409  -0.616303  -0.616897
 -0.153726  -0.162556  -0.604177

julia> x = view(cu(xs), 2:3, [1,2,1])
2×3 view(::CuArray{Float32,2}, 2:3, [1, 2, 1]) with eltype Float32:
  0.345409  -0.616303   0.345409
 -0.153726  -0.162556  -0.153726

julia> -x # make computation with x
ERROR: GPU compilation of #23(CuArrays.CuKernelState, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(-),Tuple{Base.Broadcast.Extruded{SubArray{Float32,2,CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},Tuple{UnitRange{Int64},Array{Int64,1}},false},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}) failed
KernelError: passing and using non-bitstype argument

Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(-),Tuple{Base.Broadcast.Extruded{SubArray{Float32,2,CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},Tuple{UnitRange{Int64},Array{Int64,1}},false},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}.
That type is not isbits, and such arguments are only allowed when they are unused by the kernel.

Stacktrace:
 [1] check_invocation(::CUDAnative.CompilerContext, ::LLVM.Function) at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\compiler\validation.jl:35
 [2] compile(::CUDAnative.CompilerContext) at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\compiler\driver.jl:85
 [3] #compile#95 at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\compiler\driver.jl:38 [inlined]
 [4] compile at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\compiler\driver.jl:36 [inlined]
 [5] #compile#94 at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\compiler\driver.jl:18 [inlined]
 [6] compile at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\compiler\driver.jl:16 [inlined]
 [7] macro expansion at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\execution.jl:266 [inlined]
 [8] #cufunction#109(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(CUDAnative.cufunction), ::getfield(GPUArrays, Symbol("##23#24")), ::Type{Tuple{CuArrays.CuKernelState,CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(-),Tuple{Base.Broadcast.Extruded{SubArray{Float32,2,CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},Tuple{UnitRange{Int64},Array{Int64,1}},false},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}}}) at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\execution.jl:237
 [9] cufunction(::Function, ::Type) at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\execution.jl:237
 [10] macro expansion at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\execution.jl:205 [inlined]
 [11] macro expansion at .\gcutils.jl:87 [inlined]
 [12] macro expansion at C:\Users\juti\.julia\packages\CUDAnative\l7sDn\src\execution.jl:202 [inlined]
 [13] _gpu_call(::CuArrays.CuArrayBackend, ::Function, ::CuArray{Float32,2}, ::Tuple{CuArray{Float32,2},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(-),Tuple{Base.Broadcast.Extruded{SubArray{Float32,2,CuArray{Float32,2},Tuple{UnitRange{Int64},Array{Int64,1}},false},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}}, ::Tuple{Tuple{Int64},Tuple{Int64}}) at C:\Users\juti\.julia\packages\CuArrays\SoBWz\src\gpuarray_interface.jl:59
 [14] gpu_call at C:\Users\juti\.julia\packages\GPUArrays\t8tJB\src\abstract_gpu_interface.jl:151 [inlined]
 [15] gpu_call(::Function, ::CuArray{Float32,2}, ::Tuple{CuArray{Float32,2},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(-),Tuple{Base.Broadcast.Extruded{SubArray{Float32,2,CuArray{Float32,2},Tuple{UnitRange{Int64},Array{Int64,1}},false},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}}) at C:\Users\juti\.julia\packages\GPUArrays\t8tJB\src\abstract_gpu_interface.jl:128
 [16] copyto! at C:\Users\juti\.julia\packages\GPUArrays\t8tJB\src\broadcast.jl:48 [inlined]
 [17] copyto! at .\broadcast.jl:797 [inlined]
 [18] copy(::Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArray},Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(-),Tuple{SubArray{Float32,2,CuArray{Float32,2},Tuple{UnitRange{Int64},Array{Int64,1}},false}}}) at .\broadcast.jl:773
 [19] materialize at .\broadcast.jl:753 [inlined]
 [20] broadcast at .\broadcast.jl:707 [inlined]
 [21] -(::SubArray{Float32,2,CuArray{Float32,2},Tuple{UnitRange{Int64},Array{Int64,1}},false}) at .\arraymath.jl:30
 [22] top-level scope at none:0