Flux - Batch data loop in callback causing GPU Memory Error

I’m getting a similar error that seems related to that of the original post:

┌ Warning: Performing scalar operations on GPU arrays: This is very slow, consider disallowing these operations with `allowscalar(false)`
└ @ GPUArrays ~/.julia/packages/GPUArrays/JqOUg/src/host/indexing.jl:43
ERROR: LoadError: CUDA error: out of memory (code 2, ERROR_OUT_OF_MEMORY)
Stacktrace:
 [1] throw_api_error(::CUDAdrv.cudaError_enum) at /home/natale/.julia/packages/CUDAdrv/Uc14X/src/error.jl:105
 [2] CUDAdrv.CuModule(::String, ::Dict{CUDAdrv.CUjit_option_enum,Any}) at /home/natale/.julia/packages/CUDAdrv/Uc14X/src/module.jl:42
 [3] cufunction_slow(::Function, ::Type{T} where T, ::Int64; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:356
 [4] #219 at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:393 [inlined]
 [5] get!(::CUDAnative.var"#219#220"{Nothing,Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},typeof(CuArrays.partial_mapreduce_grid),DataType,Int64}, ::Dict{UInt64,CUDAnative.HostKernel}, ::UInt64) at ./dict.jl:452
 [6] macro expansion at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:392 [inlined]
 [7] macro expansion at ./lock.jl:183 [inlined]
 [8] cufunction_fast(::Function, ::Type{T} where T, ::Int64; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:391
 [9] cufunction(::typeof(CuArrays.partial_mapreduce_grid), ::Type{Tuple{typeof(identity),typeof(|),Int64,CartesianIndices{1,Tuple{Base.OneTo{Int64}}},CartesianIndices{1,Tuple{Base.OneTo{Int64}}},Val{true},CUDAnative.CuDeviceArray{Int64,2,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{CuArrays.CuArrayStyle{1},Tuple{Base.OneTo{Int64}},Flux.var"#33#34"{Tuple{UnitRange{Int64}}},Tuple{CUDAnative.CuDeviceArray{Flux.OneHotVector,1,CUDAnative.AS.Global}}}}}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:422
 [10] cufunction(::Function, ::Type{T} where T) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:422
 [11] macro expansion at /home/natale/.julia/packages/CuArrays/YFdj7/src/mapreduce.jl:197 [inlined]
 [12] mapreducedim!(::Function, ::Function, ::CuArrays.CuArray{Int64,1,Nothing}, ::Base.Broadcast.Broadcasted{CuArrays.CuArrayStyle{1},Tuple{Base.OneTo{Int64}},Flux.var"#33#34"{Tuple{UnitRange{Int64}}},Tuple{CuArrays.CuArray{Flux.OneHotVector,1,Nothing}}}; init::Int64) at /home/natale/.julia/packages/CUDAnative/ierw8/src/nvtx/highlevel.jl:83
 [13] #_mapreduce#27 at /home/natale/.julia/packages/GPUArrays/JqOUg/src/host/mapreduce.jl:62 [inlined]
 [14] #mapreduce#25 at /home/natale/.julia/packages/GPUArrays/JqOUg/src/host/mapreduce.jl:28 [inlined]
 [15] onecold at /home/natale/.julia/packages/Flux/Fj3bt/src/onehot.jl:121 [inlined]
 [16] accuracy(::CuArrays.CuArray{Float32,4,Nothing}, ::Flux.OneHotMatrix{CuArrays.CuArray{Flux.OneHotVector,1,Nothing}}) at /home/natale/brainside/transflearn/yiyu-test.jl:26
 [17] top-level scope at show.jl:613
 [18] top-level scope at /home/natale/brainside/transflearn/yiyu-test.jl:44
 [19] include(::String) at ./client.jl:439
 [20] top-level scope at REPL[1]:1
in expression starting at /home/natale/brainside/transflearn/yiyu-test.jl:34

Here is my code:

@info "Loading libraries"
using Flux
using Statistics
using Flux: onehotbatch, crossentropy, Momentum, update!, onecold
using MLDatasets: CIFAR10
using Base.Iterators: partition

batchsize = 1000
trainsize = 50000 - batchsize

@info "Loading training data"
trainimgs = CIFAR10.traintensor(Float32);
trainlabels = onehotbatch(CIFAR10.trainlabels(Float32) .+ 1, 1:10); 

@info "Building the trainset"
trainset = [(trainimgs[:,:,:,i], trainlabels[:,i]) for i in partition(1:trainsize, batchsize)];
batchnum = size(trainset)[1]

@info "Loading validation data"
valset = (trainsize+1):(trainsize+batchsize)
valX = trainimgs[:,:,:,valset] |> gpu;
valY = trainlabels[:, valset] |> gpu;

loss(x, y) = sum(crossentropy(m(x), y))
opt = Momentum(0.01)
accuracy(x, y) = mean(onecold(m(x), 1:10) .== onecold(y, 1:10))

@info "Loading the model"
include("yiyu-resnet.jl")
m = ResNet([2,2,2,2], 10) |> gpu; #ResNet18

epochs = 10

for epoch = 1:epochs
	@info "epoch" epoch
	for i in 1:batchnum
		batch = trainset[i] |> gpu
		gs = gradient(params(m)) do
			l = loss(batch...)
		end
		@info "batch fraction" i/batchnum
		update!(opt, params(m), gs)
	end
	@show accuracy(valX, valY)
end

where yiyu-resnet.jl is this code by yiyuezhuo.

Since the OP of this thread suggested to avoid using the onecold function, I rewrote the accuracy function as

max_pred(x) = [findmax(m(x[:,:,:,i:i]))[2][1] for i in 1:(size(x)[4])] |> gpu
max_lab(y) = [findmax(y[:,i])[2] for i in 1:(size(y)[2])] |> gpu
accuracy(x, y) = mean(max_pred(x) .== max_lab(y)) |> gpu

but I still get the same error.

1 Like