I’m getting a similar error that seems related to that of the original post:
┌ Warning: Performing scalar operations on GPU arrays: This is very slow, consider disallowing these operations with `allowscalar(false)`
└ @ GPUArrays ~/.julia/packages/GPUArrays/JqOUg/src/host/indexing.jl:43
ERROR: LoadError: CUDA error: out of memory (code 2, ERROR_OUT_OF_MEMORY)
Stacktrace:
[1] throw_api_error(::CUDAdrv.cudaError_enum) at /home/natale/.julia/packages/CUDAdrv/Uc14X/src/error.jl:105
[2] CUDAdrv.CuModule(::String, ::Dict{CUDAdrv.CUjit_option_enum,Any}) at /home/natale/.julia/packages/CUDAdrv/Uc14X/src/module.jl:42
[3] cufunction_slow(::Function, ::Type{T} where T, ::Int64; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:356
[4] #219 at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:393 [inlined]
[5] get!(::CUDAnative.var"#219#220"{Nothing,Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},typeof(CuArrays.partial_mapreduce_grid),DataType,Int64}, ::Dict{UInt64,CUDAnative.HostKernel}, ::UInt64) at ./dict.jl:452
[6] macro expansion at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:392 [inlined]
[7] macro expansion at ./lock.jl:183 [inlined]
[8] cufunction_fast(::Function, ::Type{T} where T, ::Int64; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:391
[9] cufunction(::typeof(CuArrays.partial_mapreduce_grid), ::Type{Tuple{typeof(identity),typeof(|),Int64,CartesianIndices{1,Tuple{Base.OneTo{Int64}}},CartesianIndices{1,Tuple{Base.OneTo{Int64}}},Val{true},CUDAnative.CuDeviceArray{Int64,2,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{CuArrays.CuArrayStyle{1},Tuple{Base.OneTo{Int64}},Flux.var"#33#34"{Tuple{UnitRange{Int64}}},Tuple{CUDAnative.CuDeviceArray{Flux.OneHotVector,1,CUDAnative.AS.Global}}}}}; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:422
[10] cufunction(::Function, ::Type{T} where T) at /home/natale/.julia/packages/CUDAnative/ierw8/src/execution.jl:422
[11] macro expansion at /home/natale/.julia/packages/CuArrays/YFdj7/src/mapreduce.jl:197 [inlined]
[12] mapreducedim!(::Function, ::Function, ::CuArrays.CuArray{Int64,1,Nothing}, ::Base.Broadcast.Broadcasted{CuArrays.CuArrayStyle{1},Tuple{Base.OneTo{Int64}},Flux.var"#33#34"{Tuple{UnitRange{Int64}}},Tuple{CuArrays.CuArray{Flux.OneHotVector,1,Nothing}}}; init::Int64) at /home/natale/.julia/packages/CUDAnative/ierw8/src/nvtx/highlevel.jl:83
[13] #_mapreduce#27 at /home/natale/.julia/packages/GPUArrays/JqOUg/src/host/mapreduce.jl:62 [inlined]
[14] #mapreduce#25 at /home/natale/.julia/packages/GPUArrays/JqOUg/src/host/mapreduce.jl:28 [inlined]
[15] onecold at /home/natale/.julia/packages/Flux/Fj3bt/src/onehot.jl:121 [inlined]
[16] accuracy(::CuArrays.CuArray{Float32,4,Nothing}, ::Flux.OneHotMatrix{CuArrays.CuArray{Flux.OneHotVector,1,Nothing}}) at /home/natale/brainside/transflearn/yiyu-test.jl:26
[17] top-level scope at show.jl:613
[18] top-level scope at /home/natale/brainside/transflearn/yiyu-test.jl:44
[19] include(::String) at ./client.jl:439
[20] top-level scope at REPL[1]:1
in expression starting at /home/natale/brainside/transflearn/yiyu-test.jl:34
Here is my code:
@info "Loading libraries"
using Flux
using Statistics
using Flux: onehotbatch, crossentropy, Momentum, update!, onecold
using MLDatasets: CIFAR10
using Base.Iterators: partition
batchsize = 1000
trainsize = 50000 - batchsize
@info "Loading training data"
trainimgs = CIFAR10.traintensor(Float32);
trainlabels = onehotbatch(CIFAR10.trainlabels(Float32) .+ 1, 1:10);
@info "Building the trainset"
trainset = [(trainimgs[:,:,:,i], trainlabels[:,i]) for i in partition(1:trainsize, batchsize)];
batchnum = size(trainset)[1]
@info "Loading validation data"
valset = (trainsize+1):(trainsize+batchsize)
valX = trainimgs[:,:,:,valset] |> gpu;
valY = trainlabels[:, valset] |> gpu;
loss(x, y) = sum(crossentropy(m(x), y))
opt = Momentum(0.01)
accuracy(x, y) = mean(onecold(m(x), 1:10) .== onecold(y, 1:10))
@info "Loading the model"
include("yiyu-resnet.jl")
m = ResNet([2,2,2,2], 10) |> gpu; #ResNet18
epochs = 10
for epoch = 1:epochs
@info "epoch" epoch
for i in 1:batchnum
batch = trainset[i] |> gpu
gs = gradient(params(m)) do
l = loss(batch...)
end
@info "batch fraction" i/batchnum
update!(opt, params(m), gs)
end
@show accuracy(valX, valY)
end
where yiyu-resnet.jl
is this code by yiyuezhuo.
Since the OP of this thread suggested to avoid using the onecold
function, I rewrote the accuracy
function as
max_pred(x) = [findmax(m(x[:,:,:,i:i]))[2][1] for i in 1:(size(x)[4])] |> gpu
max_lab(y) = [findmax(y[:,i])[2] for i in 1:(size(y)[2])] |> gpu
accuracy(x, y) = mean(max_pred(x) .== max_lab(y)) |> gpu
but I still get the same error.