Hi,
I’m trying to train denoising autoencoder for data imputation and I manage to get it work on CPU but I get a KernelError when trying to go on GPU. However, I checked that my loss function don’t produce error when using the GPU.
Here is a MWE and the full error I get:
### mwe denoising autoencoder
# load necessary packages
using LinearAlgebra, Random
using Flux
using Flux: @epochs, mse, throttle
using Base.Iterators: partition
using CUDA
### utility functions
# rewrite of the median function to be used in Flux loss
function median_(x)
n = length(x)
p = sortperm(x)
if n % 2 == 1
return 1.0 * x[p[div(n, 2) + 1]]
else
return 0.5 * (x[p[div(n, 2)]] + x[p[div(n, 2) + 1]])
end
end
# Loss for the denoising autoencoder
function myloss(x, y, m, args)
σ, cuda = args.σ, args.cuda
tmp = deepcopy(x)
if cuda
tmp2 = m(tmp + gpu(σ .* randn(size(x))))[y]
else
tmp2 = m(tmp + σ .* randn(size(x)))[y]
end
tmp = tmp[y]
return mse(tmp2, tmp)
end
# Process data to use Denoising Autoencoder to impute missing value
function get_processed_data(X, args)
# localisation of non-zeros values
Ω = .!isinf.(X) #X .!= 0
# replace -Inf with 0.0
X[.!Ω] .= 0.0
# Partition into batches of size batchsize
perm = randperm(size(X, 1))
train_data = [float(permutedims(X[x, :])) for x in partition(perm, args.batchsize)]
train_nz = [permutedims(Ω[x, :]) for x in partition(perm, args.batchsize)]
if args.cuda
train_data = gpu(train_data)
train_nz = gpu(train_nz)
end
return train_data, train_nz
end
###
# create some test data only random so it just to check that the model is training !
X = 10 .+ 10 .* randn(100, 34)
X[X .< 0.0] .= 0.0
# train the Autoencoder for data imputation
struct Args
lr::Float64 # Learning rate
epochs::Int # Number of epochs
N::Int # Size of the encoding
batchsize::Int # Batch size for training
sample_len::Int # Number sample in the data
throttle::Int # Throttle timeout
σ::Float64 # noise standard deviation fot denoising autoencoder
cuda::Bool # is CUDA ON
end
args = Args(1e-3, 10, 150, 10, 34, 1, 0.1, has_cuda())
Y = log.(X)
# construct the Autoencoder model
if args.cuda # use GPU if available
CUDA.allowscalar(false)
encoder = Dense(args.sample_len, args.N, leakyrelu) |> gpu
decoder = Dense(args.N, args.sample_len, leakyrelu) |> gpu
else
encoder = Dense(args.sample_len, args.N, leakyrelu) #|> gpu
decoder = Dense(args.N, args.sample_len, leakyrelu) #|> gpu
end
mae = Chain(encoder, decoder)
train_data, train_nz = get_processed_data(Y, args)
loss = (x, y) -> myloss(x, y, mae, args)
evalcb = throttle(() -> @show(median_([loss(train_data[i], train_nz[i]) for i in 1:length(train_data)])), args.throttle)
# test the loss function
myloss(train_data[1], train_nz[1], mae, args)
3.956487f0
# train with the designed learning rate for the given number of epochs
opt = RADAM(args.lr)
@epochs args.epochs Flux.train!(loss, Flux.params(mae), zip(train_data, train_nz), opt; cb = evalcb)
[ Info: Epoch 1
ERROR: LoadError: GPU compilation of kernel broadcast_kernel(CUDA.CuKernelContext, SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(Zygote.accum),Tuple{Base.Broadcast.Extruded{SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Extruded{CuDeviceArray{Float32,1,1},Tuple{Bool},Tuple{Int64}}}}, Int64) failed
KernelError: passing and using non-bitstype argument
Argument 3 to your kernel function is of type SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false}, which is not isbits:
.indices is of type Tuple{Array{CartesianIndex{2},1}} which is not isbits.
.1 is of type Array{CartesianIndex{2},1} which is not isbits.
Stacktrace:
[1] check_invocation(::GPUCompiler.CompilerJob, ::LLVM.Function) at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\validation.jl:68
[2] macro expansion at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\driver.jl:238 [inlined]
[3] macro expansion at C:\Users\geoff\.julia\packages\TimerOutputs\ZmKD7\src\TimerOutput.jl:206 [inlined]
[4] codegen(::Symbol, ::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\driver.jl:237
[5] compile(::Symbol, ::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\driver.jl:39
[6] compile at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\driver.jl:35 [inlined]
[7] cufunction_compile(::GPUCompiler.FunctionSpec; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at C:\Users\geoff\.julia\packages\CUDA\YeS8q\src\compiler\execution.jl:310
[8] cufunction_compile(::GPUCompiler.FunctionSpec) at C:\Users\geoff\.julia\packages\CUDA\YeS8q\src\compiler\execution.jl:305
[9] check_cache(::Dict{UInt64,Any}, ::Any, ::Any, ::GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#12",Tuple{CUDA.CuKernelContext,SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(Zygote.accum),Tuple{Base.Broadcast.Extruded{SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Extruded{CuDeviceArray{Float32,1,1},Tuple{Bool},Tuple{Int64}}}},Int64}}, ::UInt64; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\cache.jl:40
[10] broadcast_kernel at C:\Users\geoff\.julia\packages\GPUArrays\WV76E\src\host\broadcast.jl:60 [inlined]
[11] cached_compilation at C:\Users\geoff\.julia\packages\GPUCompiler\uTpNx\src\cache.jl:65 [inlined]
[12] cufunction(::GPUArrays.var"#broadcast_kernel#12", ::Type{Tuple{CUDA.CuKernelContext,SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(Zygote.accum),Tuple{Base.Broadcast.Extruded{SubArray{Float32,1,CuDeviceArray{Float32,2,1},Tuple{Array{CartesianIndex{2},1}},false},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Extruded{CuDeviceArray{Float32,1,1},Tuple{Bool},Tuple{Int64}}}},Int64}}; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at C:\Users\geoff\.julia\packages\CUDA\YeS8q\src\compiler\execution.jl:297
[13] cufunction at C:\Users\geoff\.julia\packages\CUDA\YeS8q\src\compiler\execution.jl:294 [inlined]
[14] #launch_heuristic#853 at C:\Users\geoff\.julia\packages\CUDA\YeS8q\src\gpuarrays.jl:19 [inlined]
[15] launch_heuristic at C:\Users\geoff\.julia\packages\CUDA\YeS8q\src\gpuarrays.jl:17 [inlined]
[16] copyto! at C:\Users\geoff\.julia\packages\GPUArrays\WV76E\src\host\broadcast.jl:66 [inlined]
[17] copyto! at .\broadcast.jl:886 [inlined]
[18] materialize! at .\broadcast.jl:848 [inlined]
[19] materialize! at .\broadcast.jl:845 [inlined]
[20] (::Zygote.var"#356#358"{CuArray{Float32,2},Tuple{CuArray{Bool,2}}})(::CuArray{Float32,1}) at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\lib\array.jl:42
[21] (::Zygote.var"#2209#back#352"{Zygote.var"#356#358"{CuArray{Float32,2},Tuple{CuArray{Bool,2}}}})(::CuArray{Float32,1}) at C:\Users\geoff\.julia\packages\ZygoteRules\OjfTt\src\adjoint.jl:59
[22] myloss at d:\Julia\jl\mweDAE.jl:31 [inlined]
[23] (::typeof(∂(myloss)))(::Float32) at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\compiler\interface2.jl:0
[24] #33 at d:\Julia\jl\mweDAE.jl:84 [inlined]
[25] (::typeof(∂(#33)))(::Float32) at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\compiler\interface2.jl:0
[26] #150 at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\lib\lib.jl:191 [inlined]
[27] #1693#back at C:\Users\geoff\.julia\packages\ZygoteRules\OjfTt\src\adjoint.jl:59 [inlined]
[28] #15 at C:\Users\geoff\.julia\packages\Flux\sY3yx\src\optimise\train.jl:103 [inlined]
[29] (::typeof(∂(λ)))(::Float32) at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\compiler\interface2.jl:0
[30] (::Zygote.var"#54#55"{Zygote.Params,Zygote.Context,typeof(∂(λ))})(::Float32) at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\compiler\interface.jl:172
[31] gradient(::Function, ::Zygote.Params) at C:\Users\geoff\.julia\packages\Zygote\ggM8Z\src\compiler\interface.jl:49
[32] macro expansion at C:\Users\geoff\.julia\packages\Flux\sY3yx\src\optimise\train.jl:102 [inlined]
[33] macro expansion at C:\Users\geoff\.julia\packages\Juno\n6wyj\src\progress.jl:134 [inlined]
[34] train!(::Function, ::Zygote.Params, ::Base.Iterators.Zip{Tuple{Array{CuArray{Float32,2},1},Array{CuArray{Bool,2},1}}}, ::RADAM; cb::Flux.var"#throttled#42"{Flux.var"#throttled#38#43"{Bool,Bool,var"#35#37",Int64}}) at C:\Users\geoff\.julia\packages\Flux\sY3yx\src\optimise\train.jl:100
[35] top-level scope at C:\Users\geoff\.julia\packages\Flux\sY3yx\src\optimise\train.jl:137
[36] top-level scope at C:\Users\geoff\.julia\packages\Juno\n6wyj\src\progress.jl:134
[37] include_string(::Function, ::Module, ::String, ::String) at .\loading.jl:1091
[38] invokelatest(::Any, ::Any, ::Vararg{Any,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at .\essentials.jl:710
[39] invokelatest(::Any, ::Any, ::Vararg{Any,N} where N) at .\essentials.jl:709
[40] inlineeval(::Module, ::String, ::Int64, ::Int64, ::String; softscope::Bool) at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:185
[41] (::VSCodeServer.var"#61#65"{String,Int64,Int64,String,Module,Bool,VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:144
[42] withpath(::VSCodeServer.var"#61#65"{String,Int64,Int64,String,Module,Bool,VSCodeServer.ReplRunCodeRequestParams}, ::String) at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\repl.jl:124
[43] (::VSCodeServer.var"#60#64"{String,Int64,Int64,String,Module,Bool,Bool,VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:142
[44] hideprompt(::VSCodeServer.var"#60#64"{String,Int64,Int64,String,Module,Bool,Bool,VSCodeServer.ReplRunCodeRequestParams}) at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\repl.jl:36
[45] (::VSCodeServer.var"#59#63"{String,Int64,Int64,String,Module,Bool,Bool,VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:110
[46] with_logstate(::Function, ::Any) at .\logging.jl:408
[47] with_logger at .\logging.jl:514 [inlined]
[48] (::VSCodeServer.var"#58#62"{VSCodeServer.ReplRunCodeRequestParams})() at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:109
[49] #invokelatest#1 at .\essentials.jl:710 [inlined]
[50] invokelatest(::Any) at .\essentials.jl:709
[51] macro expansion at c:\Users\geoff\.vscode\extensions\julialang.language-julia-1.0.10\scripts\packages\VSCodeServer\src\eval.jl:27 [inlined]
in expression starting at d:\Julia\jl\mweDAE.jl:90
Any help for solving this issue will be much appreciated