Hi,
I’m training a model on GPU and trying to save intermediate states to disk as described in the Flux docs. Unfortunately it seems like the information about saving GPU models (in the blue box) is wrong - although I can save and load GPU weights to disk in the same session, it doesn’t work if I try to load them in a new session as described in this BSON issue from 2019.
I figured I could just transfer the model to CPU for saving and then transfer it back to the GPU for further training, but that is also giving me errors:
using Flux
using Dates: now
using BSON: @save, @load
model = Conv((3,3), 1=>1)
model = Flux.gpu(model)
loss(x,y) = Flux.mse(model(x), y)
xs = rand(Float32,5,5,1,1)
ys = ones(Float32,3,3,1,1)
train = [(xs,ys)]
train = Flux.gpu(train)
opt = ADAM(1e-3)
evalcb = Flux.throttle(10) do
savem = Flux.cpu(model)
@save "model-$(now()).bson" savem
end
epochs=10
for i=1:epochs
Flux.Optimise.train!(loss, Flux.params(model), train, opt, cb=evalcb);
end
gives a long and opaque (to me) error (see below). I’ve tried a bunch of other permutations, like wrapping the @save
statement in model = Flux.cpu(model)
and model = Flux.gpu(model)
statements but nothing seems to work. Any help would be really appreciated!
┌ Warning: Performing scalar indexing on task Task (runnable) @0x00002b9d961a3c70.
│ Invocation of getindex resulted in scalar indexing of a GPU array.
│ This is typically caused by calling an iterating implementation of a method.
│ Such implementations *do not* execute on the GPU, but very slowly on the CPU,
│ and therefore are only permitted from the REPL for prototyping purposes.
│ If you did intend to index this array, annotate the caller with @allowscalar.
└ @ GPUArrays /global/home/users/sguns/.julia/packages/GPUArrays/8dzSJ/src/host/indexing.jl:56
TaskFailedException
nested task error: MethodError: no method matching gemm!(::Val{false}, ::Val{false}, ::Int64, ::Int64, ::Int64, ::Float32, ::CUDA.CuPtr{Float32}, ::Ptr{Float32}, ::Float32, ::CUDA.CuPtr{Float32})
Closest candidates are:
gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::Float32, ::Ptr{Float32}, ::Ptr{Float32}, ::Float32, ::Ptr{Float32}) at /global/home/users/sguns/.julia/packages/NNlib/yzagZ/src/gemm.jl:32
gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::Float64, ::Ptr{Float64}, ::Ptr{Float64}, ::Float64, ::Ptr{Float64}) at /global/home/users/sguns/.julia/packages/NNlib/yzagZ/src/gemm.jl:32
gemm!(::Val, ::Val, ::Int64, ::Int64, ::Int64, ::ComplexF64, ::Ptr{ComplexF64}, ::Ptr{ComplexF64}, ::ComplexF64, ::Ptr{ComplexF64}) at /global/home/users/sguns/.julia/packages/NNlib/yzagZ/src/gemm.jl:32
...
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/NNlib/yzagZ/src/impl/conv_im2col.jl:58 [inlined]
[2] (::NNlib.var"#724#threadsfor_fun#367"{CUDA.CuArray{Float32, 3}, Float32, Float32, CUDA.CuArray{Float32, 5}, CUDA.CuArray{Float32, 5}, Array{Float32, 5}, DenseConvDims{3, (3, 3, 1), 1, 1, (1, 1, 1), (0, 0, 0, 0, 0, 0), (1, 1, 1), false}, Int64, Int64, Int64, UnitRange{Int64}})(onethread::Bool)
@ NNlib ./threadingconstructs.jl:81
[3] (::NNlib.var"#724#threadsfor_fun#367"{CUDA.CuArray{Float32, 3}, Float32, Float32, CUDA.CuArray{Float32, 5}, CUDA.CuArray{Float32, 5}, Array{Float32, 5}, DenseConvDims{3, (3, 3, 1), 1, 1, (1, 1, 1), (0, 0, 0, 0, 0, 0), (1, 1, 1), false}, Int64, Int64, Int64, UnitRange{Int64}})()
@ NNlib ./threadingconstructs.jl:48
Stacktrace:
[1] wait
@ ./task.jl:317 [inlined]
[2] threading_run(func::Function)
@ Base.Threads ./threadingconstructs.jl:34
[3] macro expansion
@ ./threadingconstructs.jl:93 [inlined]
[4] conv_im2col!(y::CUDA.CuArray{Float32, 5}, x::CUDA.CuArray{Float32, 5}, w::Array{Float32, 5}, cdims::DenseConvDims{3, (3, 3, 1), 1, 1, (1, 1, 1), (0, 0, 0, 0, 0, 0), (1, 1, 1), false}; col::CUDA.CuArray{Float32, 3}, alpha::Float32, beta::Float32)
@ NNlib ~/.julia/packages/NNlib/yzagZ/src/impl/conv_im2col.jl:49
[5] conv_im2col!
@ ~/.julia/packages/NNlib/yzagZ/src/impl/conv_im2col.jl:30 [inlined]
[6] #conv!#149
@ ~/.julia/packages/NNlib/yzagZ/src/conv.jl:191 [inlined]
[7] conv!(out::CUDA.CuArray{Float32, 5}, in1::CUDA.CuArray{Float32, 5}, in2::Array{Float32, 5}, cdims::DenseConvDims{3, (3, 3, 1), 1, 1, (1, 1, 1), (0, 0, 0, 0, 0, 0), (1, 1, 1), false})
@ NNlib ~/.julia/packages/NNlib/yzagZ/src/conv.jl:191
[8] conv!(y::CUDA.CuArray{Float32, 4}, x::CUDA.CuArray{Float32, 4}, w::Array{Float32, 4}, cdims::DenseConvDims{2, (3, 3), 1, 1, (1, 1), (0, 0, 0, 0), (1, 1), false}; kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ NNlib ~/.julia/packages/NNlib/yzagZ/src/conv.jl:148
[9] conv!
@ ~/.julia/packages/NNlib/yzagZ/src/conv.jl:148 [inlined]
[10] conv(x::CUDA.CuArray{Float32, 4}, w::Array{Float32, 4}, cdims::DenseConvDims{2, (3, 3), 1, 1, (1, 1), (0, 0, 0, 0), (1, 1), false}; kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ NNlib ~/.julia/packages/NNlib/yzagZ/src/conv.jl:91
[11] conv
@ ~/.julia/packages/NNlib/yzagZ/src/conv.jl:89 [inlined]
[12] #rrule#182
@ ~/.julia/packages/NNlib/yzagZ/src/conv.jl:233 [inlined]
[13] rrule
@ ~/.julia/packages/NNlib/yzagZ/src/conv.jl:224 [inlined]
[14] chain_rrule
@ ~/.julia/packages/Zygote/i1R8y/src/compiler/chainrules.jl:89 [inlined]
[15] macro expansion
@ ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:0 [inlined]
[16] _pullback(::Zygote.Context, ::typeof(conv), ::CUDA.CuArray{Float32, 4}, ::Array{Float32, 4}, ::DenseConvDims{2, (3, 3), 1, 1, (1, 1), (0, 0, 0, 0), (1, 1), false})
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:9
[17] _pullback
@ ~/.julia/packages/Flux/0c9kI/src/layers/conv.jl:157 [inlined]
[18] _pullback(ctx::Zygote.Context, f::Conv{2, 4, typeof(identity), Array{Float32, 4}, Vector{Float32}}, args::CUDA.CuArray{Float32, 4})
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:0
[19] _pullback
@ ~/.julia/packages/Flux/0c9kI/src/layers/basic.jl:36 [inlined]
[20] _pullback(::Zygote.Context, ::typeof(Flux.applychain), ::Tuple{Conv{2, 4, typeof(identity), Array{Float32, 4}, Vector{Float32}}}, ::CUDA.CuArray{Float32, 4})
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:0
[21] _pullback
@ ~/.julia/packages/Flux/0c9kI/src/layers/basic.jl:38 [inlined]
[22] _pullback(ctx::Zygote.Context, f::Chain{Tuple{Conv{2, 4, typeof(identity), Array{Float32, 4}, Vector{Float32}}}}, args::CUDA.CuArray{Float32, 4})
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:0
[23] _pullback
@ ./In[1]:5 [inlined]
[24] _pullback(::Zygote.Context, ::typeof(closs), ::CUDA.CuArray{Float32, 4}, ::CUDA.CuArray{Float32, 4})
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:0
[25] _apply
@ ./boot.jl:804 [inlined]
[26] adjoint
@ ~/.julia/packages/Zygote/i1R8y/src/lib/lib.jl:191 [inlined]
[27] _pullback
@ ~/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:57 [inlined]
[28] _pullback
@ ~/.julia/packages/Flux/0c9kI/src/optimise/train.jl:102 [inlined]
[29] _pullback(::Zygote.Context, ::Flux.Optimise.var"#39#45"{typeof(closs), Tuple{CUDA.CuArray{Float32, 4}, CUDA.CuArray{Float32, 4}}})
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface2.jl:0
[30] pullback(f::Function, ps::Zygote.Params)
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface.jl:250
[31] gradient(f::Function, args::Zygote.Params)
@ Zygote ~/.julia/packages/Zygote/i1R8y/src/compiler/interface.jl:58
[32] macro expansion
@ ~/.julia/packages/Flux/0c9kI/src/optimise/train.jl:101 [inlined]
[33] macro expansion
@ ~/.julia/packages/Juno/n6wyj/src/progress.jl:134 [inlined]
[34] train!(loss::Function, ps::Zygote.Params, data::Vector{Tuple{CUDA.CuArray{Float32, 4}, CUDA.CuArray{Float32, 4}}}, opt::ADAM; cb::Flux.var"#throttled#71"{Flux.var"#throttled#67#72"{Bool, Bool, var"#1#2", Int64}})
@ Flux.Optimise ~/.julia/packages/Flux/0c9kI/src/optimise/train.jl:99
[35] top-level scope
@ ./In[2]:10
[36] eval
@ ./boot.jl:360 [inlined]
[37] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base ./loading.jl:1094
versions:
BSON v0.3.3
CUDA v3.3.3
Flux v0.12.4
CUDA.versioninfo():
CUDA toolkit 10.2.89, artifact installation
CUDA driver 10.2.0
NVIDIA driver 440.44.0
Libraries:
- CUBLAS: 10.2.2
- CURAND: 10.1.2
- CUFFT: 10.1.2
- CUSOLVER: 10.3.0
- CUSPARSE: 10.3.1
- CUPTI: 12.0.0
- NVML: 10.0.0+440.44
- CUDNN: 8.20.0 (for CUDA 10.2.0)
- CUTENSOR: 1.3.0 (for CUDA 10.2.0)
Toolchain:
- Julia: 1.6.0
- LLVM: 11.0.1
- PTX ISA support: 3.2, 4.0, 4.1, 4.2, 4.3, 5.0, 6.0, 6.1, 6.3, 6.4, 6.5
- Device capability support: sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75
Environment:
- JULIA_CUDA_USE_BINARYBUILDER: true
1 device:
0: Tesla K80 (sm_37, 11.046 GiB / 11.173 GiB available)