Summary
julia> m₂ = gpu(decoder.layers[1][7:12])
Transformer<6>( PreNormTransformerBlock(
DropoutLayer<nothing>(
SelfAttention(
CausalMultiheadQKVAttenOp(head = 12, p = nothing),
NSplit<3>(Dense(W = (768, 2304), b = true)), # 1_771_776 parametersError showing value of type Transforme
r{NTuple{6, Transformers.Layers.PreNormTransformerBlock{Transformers.Layers.DropoutLayer{Transformers.Layers.SelfA
ttention{NeuralAttentionlib.CausalMultiheadQKVAttenOp{Nothing}, Transformers.Layers.NSplit{Static.StaticInt{3}, Tr
ansformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBu
ffer}}}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Nothing}, Transformers.Layers.LayerNorm{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Fl
oat32, 1, CUDA.Mem.DeviceBuffer}, Float32}, Transformers.Layers.DropoutLayer{Transformers.Layers.Chain{Tuple{Trans
formers.Layers.Dense{typeof(gelu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.
Mem.DeviceBuffer}}}}, Nothing}, Transformers.Layers.LayerNorm{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{
Float32, 1, CUDA.Mem.DeviceBuffer}, Float32}}}, Nothing}:
ERROR: CUDA error: an illegal memory access was encountered (code 700, ERROR_ILLEGAL_ADDRESS)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/tVtYo/lib/cudadrv/libcuda.jl:27 [2] check
@ ~/.julia/packages/CUDA/tVtYo/lib/cudadrv/libcuda.jl:34 [inlined]
[3] cuMemcpyDtoHAsync_v2
@ ~/.julia/packages/CUDA/tVtYo/lib/utils/call.jl:26 [inlined]
[4] #unsafe_copyto!#8 @ ~/.julia/packages/CUDA/tVtYo/lib/cudadrv/memory.jl:397 [inlined]
[5] (::CUDA.var"#1014#1015"{Bool, Vector{Bool}, Int64, CuArray{Bool, 2, CUDA.Mem.DeviceBuffer}, Int64, Int64})()
@ CUDA ~/.julia/packages/CUDA/tVtYo/src/array.jl:482
[6] #context!#887
@ ~/.julia/packages/CUDA/tVtYo/lib/cudadrv/state.jl:170 [inlined]
[7] context!
@ ~/.julia/packages/CUDA/tVtYo/lib/cudadrv/state.jl:170 [inlined] [62/1869] [7] context! @ ~/.julia/packages/CUDA/tVtYo/lib/cudadrv/state.jl:165 [inlined]
[8] unsafe_copyto!(dest::Vector{Bool}, doffs::Int64, src::CuArray{Bool, 2, CUDA.Mem.DeviceBuffer}, soffs::Int64,
n::Int64)
@ CUDA ~/.julia/packages/CUDA/tVtYo/src/array.jl:475
[9] copyto! @ ~/.julia/packages/CUDA/tVtYo/src/array.jl:429 [inlined] [10] getindex @ ~/.julia/packages/GPUArrays/5XhED/src/host/indexing.jl:12 [inlined] [11] macro expansion @ ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:136 [inlined] [12] _mapreduce(f::ComposedFunction{typeof(!), typeof(iszero)}, op::typeof(|), As::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}; dims::Colon, init::Nothing) @ GPUArrays ~/.julia/packages/GPUArrays/5XhED/src/host/mapreduce.jl:73 [13] _mapreduce @ ~/.julia/packages/GPUArrays/5XhED/src/host/mapreduce.jl:35 [inlined]
[14] #mapreduce#29
@ ~/.julia/packages/GPUArrays/5XhED/src/host/mapreduce.jl:31 [inlined] [15] mapreduce
@ ~/.julia/packages/GPUArrays/5XhED/src/host/mapreduce.jl:31 [inlined]
[16] any
@ ~/.julia/packages/GPUArrays/5XhED/src/host/mapreduce.jl:82 [inlined]
[17] _any
@ ~/.julia/packages/Flux/n3cOc/src/layers/show.jl:129 [inlined]
[18] (::Flux.var"#337#338"{ComposedFunction{typeof(!), typeof(iszero)}})(x::CuArray{Float32, 2, CUDA.Mem.DeviceBu
ffer})
@ Flux ~/.julia/packages/Flux/n3cOc/src/layers/show.jl:131
[19] _any(f::Flux.var"#337#338"{ComposedFunction{typeof(!), typeof(iszero)}}, itr::Zygote.Params{Zygote.Buffer{An
y, Vector{Any}}}, #unused#::Colon)
@ Base ./reduce.jl:1215
[20] any
@ ./reduce.jl:1210 [inlined]
[21] _any @ ~/.julia/packages/Flux/n3cOc/src/layers/show.jl:131 [inlined]
[22] _all
@ ~/.julia/packages/Flux/n3cOc/src/layers/show.jl:135 [inlined]
[23] _nan_show(io::IOContext{Base.TTY}, x::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
@ Flux ~/.julia/packages/Flux/n3cOc/src/layers/show.jl:120
[24] _layer_show(io::IOContext{Base.TTY}, layer::Transformers.Layers.NSplit{Static.StaticInt{3}, Transformers.Lay
ers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, inden
t::Int64, name::Nothing)
@ Flux ~/.julia/packages/Flux/n3cOc/src/layers/show.jl:86 [25] _big_show (repeats 2 times)
@ ~/.julia/packages/Transformers/694He/src/layers/utils.jl:117 [inlined]
[26] _big_show(io::IOContext{Base.TTY}, layer::Transformers.Layers.SelfAttention{NeuralAttentionlib.CausalMultiheadQKVAttenOp{Nothing}, Transformers.Layers.NSplit{Static.StaticInt{3}, Transformers.Layers.Dense{Nothing, CuArray{
Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Transformers.Layers.Dense{Nothin
g, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, indent::Int64, name::
Nothing)
@ Transformers.Layers ~/.julia/packages/Transformers/694He/src/layers/architecture.jl:363
[27] _big_show
@ ~/.julia/packages/Transformers/694He/src/layers/architecture.jl:361 [inlined] --- the last 2 lines are repeated 2 more times ---
[32] _big_show(io::IOContext{Base.TTY}, t::Transformer{NTuple{6, Transformers.Layers.PreNormTransformerBlock{Tran
sformers.Layers.DropoutLayer{Transformers.Layers.SelfAttention{NeuralAttentionlib.CausalMultiheadQKVAttenOp{Nothin
g}, Transformers.Layers.NSplit{Static.StaticInt{3}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Me
m.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Nothing}, Transformers.Layers.LayerNorm{
CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32}, Transformers.Lay
ers.DropoutLayer{Transformers.Layers.Chain{Tuple{Transformers.Layers.Dense{typeof(gelu), CuArray{Float32, 2, CUDA.
Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Transformers.Layers.Dense{Nothing, CuArray{Float32
, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Nothing}, Transformers.Layers.LayerNor
m{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32}}}, Nothing}, i, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Nothing}, Transformers.Layers.LayerNorm{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32}}}, Nothing}, indent::Int64, name::Nothing)
@ Transformers.Layers ~/.julia/packages/Transformers/694He/src/layers/layer.jl:254
[33] _big_show
@ ~/.julia/packages/Transformers/694He/src/layers/layer.jl:252 [inlined]
[34] show(io::IOContext{Base.TTY}, m::MIME{Symbol("text/plain")}, x::Transformer{NTuple{6, Transformers.Layers.PreNormTransformerBlock{Transformers.Layers.DropoutLayer{Transformers.Layers.SelfAttention{NeuralAttentionlib.CausalMultiheadQKVAttenOp{Nothing}, Transformers.Layers.NSplit{Static.StaticInt{3}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Nothing}, Transformers.Layers.LayerNorm{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32}, Transformers.Layers.DropoutLayer{Transformers.Layers.Chain{Tuple{Transformers.Layers.Dense{typeof(gelu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Transformers.Layers.Dense{Nothing, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Nothing}, Transformers.Layers.LayerNorm{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32}}}, Nothing})
@ Transformers.Layers ~/.julia/packages/Transformers/694He/src/layers/utils.jl:97
[35] (::REPL.var"#55#56"{REPL.REPLDisplay{REPL.LineEditREPL}, MIME{Symbol("text/plain")}, Base.RefValue{Any}})(io::Any)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:276
[36] with_repl_linfo(f::Any, repl::REPL.LineEditREPL)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:557
[37] display(d::REPL.REPLDisplay, mime::MIME{Symbol("text/plain")}, x::Any)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:262
[38] display
@ /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:281 [inlined]
[39] display(x::Any)
@ Base.Multimedia ./multimedia.jl:340
[40] #invokelatest#2
@ ./essentials.jl:816 [inlined]
[41] invokelatest
@ ./essentials.jl:813 [inlined]
[42] print_response(errio::IO, response::Any, show_value::Bool, have_color::Bool, specialdisplay::Union{Nothing, AbstractDisplay})
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:305
[43] (::REPL.var"#57#58"{REPL.LineEditREPL, Pair{Any, Bool}, Bool, Bool})(io::Any)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:287
[44] with_repl_linfo(f::Any, repl::REPL.LineEditREPL)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:557
[45] print_response(repl::REPL.AbstractREPL, response::Any, show_value::Bool, have_color::Bool)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:285
[46] (::REPL.var"#do_respond#80"{Bool, Bool, REPL.var"#93#103"{REPL.LineEditREPL, REPL.REPLHistoryProvider}, REPL.LineEditREPL, REPL.LineEdit.Prompt})(s::REPL.LineEdit.MIState, buf::Any, ok::Bool)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:899
[47] (::REPL.var"#98#108"{Regex, Regex, Int64, Int64, REPL.LineEdit.Prompt, REPL.LineEdit.Prompt, REPL.LineEdit.Prompt})(::REPL.LineEdit.MIState, ::Any, ::Vararg{Any})
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:1236
[48] #invokelatest#2
@ ./essentials.jl:816 [inlined]
[49] invokelatest
@ ./essentials.jl:813 [inlined]
[50] (::REPL.LineEdit.var"#27#28"{REPL.var"#98#108"{Regex, Regex, Int64, Int64, REPL.LineEdit.Prompt, REPL.LineEdit.Prompt, REPL.LineEdit.Prompt}, String})(s::Any, p::Any)
@ REPL.LineEdit /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/LineEdit.jl:1603
[51] prompt!(term::REPL.Terminals.TextTerminal, prompt::REPL.LineEdit.ModalInterface, s::REPL.LineEdit.MIState)
@ REPL.LineEdit /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/LineEdit.jl:2740
[52] run_interface(terminal::REPL.Terminals.TextTerminal, m::REPL.LineEdit.ModalInterface, s::REPL.LineEdit.MIState)
@ REPL.LineEdit /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/LineEdit.jl:2642
[53] run_frontend(repl::REPL.LineEditREPL, backend::REPL.REPLBackendRef)
@ REPL /opt/julia-1.9.2/share/julia/stdlib/v1.9/REPL/src/REPL.jl:1300