I’m running the following Flux code, which comes directly from the Flux “60 Minute Blitz” tutorial. I get a weird error when using the |> gpu
construct, but not when the code runs on the CPU. I’ve posted this as a bug to the Flux project, but they seem to think it’s an issue with CUDA.jl. I am new to both CUDA and Flux, and so I’m sort of at a loss here.
EDIT: Note that this code works when I revert to CUDA@3.5, but not when using CUDA@3.6.0 or 3.6.1. For what that’s worth.
using Statistics
using Flux, Flux.Optimise
using MLDatasets: CIFAR10
using Images.ImageCore
using Flux: onehotbatch, onecold
using Base.Iterators: partition
using CUDA
train_x, train_y = CIFAR10.traindata(Float32)
labels = onehotbatch(train_y, 0:9)
using Plots
image(x) = colorview(RGB, permutedims(x, (3, 2, 1)))
train = ([(train_x[:,:,:,i], labels[:,i]) for i in partition(1:49000, 1000)]) |> gpu
valset = 49001:50000
valX = train_x[:,:,:,valset] |> gpu
valY = labels[:, valset] |> gpu
m = Chain(
Conv((5,5), 3=>16, relu),
MaxPool((2,2)),
Conv((5,5), 16=>8, relu),
MaxPool((2,2)),
x -> reshape(x, :, size(x, 4)),
Dense(200, 120),
Dense(120, 84),
Dense(84, 10),
softmax) |> gpu
using Flux: crossentropy, Momentum
loss(x, y) = sum(crossentropy(m(x), y))
opt = Momentum(0.01)
accuracy(x, y) = mean(onecold(m(x), 0:9) .== onecold(y, 0:9))
epochs = 10
for epoch = 1:epochs
for d in train
gs = gradient(params(m)) do
l = loss(d...)
end
update!(opt, params(m), gs)
end
@show accuracy(valX, valY)
end
and I get the following error:
ERROR: LoadError: TypeError: in typeassert, expected Integer, got a value of type Missing
Stacktrace:
[1] get_size
@ ~/.julia/packages/CUDA/DFAea/lib/utils/call.jl:58 [inlined]
[2] with_workspace(f::CUDA.CUDNN.var"#1150#1153". {Vector{CUDA.CUDNN.cudnnConvolutionFwdAlgoPerfStruct}, Vector{Int32}, Int64, CUDA.CUDNN.cudnnTensorDescriptor, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnFilterDescriptor, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnConvolutionDescriptor, CUDA.CUDNN.cudnnTensorDescriptor, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}}, eltyp::Type{UInt8}, size::CUDA.CUDNN.var"#workspaceSize#1152"{CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}}, fallback::Nothing; keep::Bool)
@ CUDA.APIUtils ~/.julia/packages/CUDA/DFAea/lib/utils/call.jl:61
[3] with_workspace
@ ~/.julia/packages/CUDA/DFAea/lib/utils/call.jl:58 [inlined]
[4] #with_workspace#1
@ ~/.julia/packages/CUDA/DFAea/lib/utils/call.jl:53 [inlined]
[5] with_workspace (repeats 2 times)
@ ~/.julia/packages/CUDA/DFAea/lib/utils/call.jl:53 [inlined]
[6] #1149
@ ~/.julia/packages/CUDA/DFAea/lib/cudnn/convolution.jl:179 [inlined]
[7] get!(default::CUDA.CUDNN.var"#1149#1151"{CUDA.CUDNN.cudnnTensorDescriptor, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnFilterDescriptor, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnConvolutionDescriptor, CUDA.CUDNN.cudnnTensorDescriptor, CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}}, h::Dict{Tuple, CUDA.CUDNN.cudnnConvolutionFwdAlgoPerfStruct}, key::Tuple{CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CUDNN.cudnnFilterDescriptor, CUDA.CUDNN.cudnnConvolutionDescriptor, Nothing, CUDA.CUDNN.cudnnActivationMode_t})
@ Base ./dict.jl:464
[8] cudnnConvolutionFwdAlgoPerf(xDesc::CUDA.CUDNN.cudnnTensorDescriptor, x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, wDesc::CUDA.CUDNN.cudnnFilterDescriptor, w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, convDesc::CUDA.CUDNN.cudnnConvolutionDescriptor, yDesc::CUDA.CUDNN.cudnnTensorDescriptor, y::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, biasDesc::Nothing, activation::CUDA.CUDNN.cudnnActivationMode_t)
@ CUDA.CUDNN ~/.julia/packages/CUDA/DFAea/lib/cudnn/convolution.jl:174
[9] cudnnConvolutionForwardAD(w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, bias::Nothing, z::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; y::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, activation::CUDA.CUDNN.cudnnActivationMode_t, convDesc::CUDA.CUDNN.cudnnConvolutionDescriptor, wDesc::CUDA.CUDNN.cudnnFilterDescriptor, xDesc::CUDA.CUDNN.cudnnTensorDescriptor, yDesc::CUDA.CUDNN.cudnnTensorDescriptor, zDesc::CUDA.CUDNN.cudnnTensorDescriptor, biasDesc::Nothing, alpha::Base.RefValue{Float32}, beta::Base.RefValue{Float32}, dw::Base.RefValue{Any}, dx::Base.RefValue{Any}, dz::Base.RefValue{Any}, dbias::Base.RefValue{Any}, dready::Base.RefValue{Bool})
@ CUDA.CUDNN ~/.julia/packages/CUDA/DFAea/lib/cudnn/convolution.jl:102
[10] #cudnnConvolutionForwardWithDefaults#1143
@ ~/.julia/packages/CUDA/DFAea/lib/cudnn/convolution.jl:96 [inlined]
[11] #cudnnConvolutionForward!#1142
@ ~/.julia/packages/CUDA/DFAea/lib/cudnn/convolution.jl:53 [inlined]
[12] conv!(y::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, (5, 5), 3, 16, 1, (1, 1), (0, 0, 0, 0), (1, 1), false}; alpha::Int64, beta::Int64, algo::Int64)
@ NNlibCUDA ~/.julia/packages/NNlibCUDA/IeeBk/src/cudnn/conv.jl:34
[13] conv!
@ ~/.julia/packages/NNlibCUDA/IeeBk/src/cudnn/conv.jl:27 [inlined]
[14] conv(x::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, (5, 5), 3, 16, 1, (1, 1), (0, 0, 0, 0), (1, 1), false}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ NNlib ~/.julia/packages/NNlib/tvMmZ/src/conv.jl:91
[15] conv
@ ~/.julia/packages/NNlib/tvMmZ/src/conv.jl:89 [inlined]
[16] #rrule#241
@ ~/.julia/packages/NNlib/tvMmZ/src/conv.jl:318 [inlined]
[17] rrule
@ ~/.julia/packages/NNlib/tvMmZ/src/conv.jl:309 [inlined]
[18] rrule
@ ~/.julia/packages/ChainRulesCore/sHMAp/src/rules.jl:134 [inlined]
[19] chain_rrule
@ ~/.julia/packages/Zygote/umM0L/src/compiler/chainrules.jl:216 [inlined]
[20] macro expansion
@ ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:0 [inlined]
[21] _pullback(::Zygote.Context, ::typeof(conv), ::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, ::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, ::DenseConvDims{2, (5, 5), 3, 16, 1, (1, 1), (0, 0, 0, 0), (1, 1), false})
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:9
[22] _pullback
@ ~/.julia/packages/Flux/BPPNj/src/layers/conv.jl:166 [inlined]
[23] _pullback(ctx::Zygote.Context, f::Conv{2, 4, typeof(relu), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, args::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:0
[24] _pullback
@ ~/.julia/packages/Flux/BPPNj/src/layers/basic.jl:47 [inlined]
[25] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{Conv{2, 4, typeof(relu), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, MaxPool{2, 4}, Conv{2, 4, typeof(relu), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, MaxPool{2, 4}, var"#9#10", Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, typeof(softmax)}, ::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:0
[26] _pullback
@ ~/.julia/packages/Flux/BPPNj/src/layers/basic.jl:49 [inlined]
[27] _pullback(ctx::Zygote.Context, f::Chain{Tuple{Conv{2, 4, typeof(relu), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, MaxPool{2, 4}, Conv{2, 4, typeof(relu), CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, MaxPool{2, 4}, var"#9#10", Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, typeof(softmax)}}, args::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:0
[28] _pullback
@ ~/Code/ahrm/DeepKoopmen/CNNTutorial.jl:110 [inlined]
[29] _pullback(::Zygote.Context, ::typeof(loss), ::CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, ::Flux.OneHotArray{UInt32, 10, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}})
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:0
[30] _apply
@ ./boot.jl:814 [inlined]
[31] adjoint
@ ~/.julia/packages/Zygote/umM0L/src/lib/lib.jl:200 [inlined]
[32] _pullback
@ ~/.julia/packages/ZygoteRules/AIbCs/src/adjoint.jl:65 [inlined]
[33] _pullback
@ ~/Code/ahrm/DeepKoopmen/CNNTutorial.jl:120 [inlined]
[34] _pullback(::Zygote.Context, ::var"#11#12"{Tuple{CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, Flux.OneHotArray{UInt32, 10, 1, 2, CuArray{UInt32, 1, CUDA.Mem.DeviceBuffer}}}})
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface2.jl:0
[35] pullback(f::Function, ps::Zygote.Params)
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface.jl:352
[36] gradient(f::Function, args::Zygote.Params)
@ Zygote ~/.julia/packages/Zygote/umM0L/src/compiler/interface.jl:75
[37] top-level scope
@ ~/Code/ahrm/DeepKoopmen/CNNTutorial.jl:119
[38] include(fname::String)
@ Base.MainInclude ./client.jl:451
[39] top-level scope
@ REPL[1]:1
in expression starting at /home/ghenshaw/Code/ahrm/DeepKoopmen/CNNTutorial.jl:117