Optimize splitting vector of strings

I am learning how to optimize julia code and came up with the following MWE:

using BenchmarkTools, Profile

strvec = ["AA entryA", "BBC entryB", "CCM entryC"]

function split_vec(strvec)
    mat  = stack(split.(strvec), dims = 1)
    col1 = first.(mat[:, 1], 2)
    col2 = Vector{String}(undef, length(col1))

    idx_nAA = col1 .!= "AA"
    col2[idx_nAA, :] = last.(mat[idx_nAA, 1], 1)
    col2[idx_nAA .== 0, :] .= ""
    return stack([col1, col2, mat[:, 2]])
end

split_vec(strvec)
@benchmark split_vec(strvec)
@Profile.profile for i in 1:Int(1e4) split_vec(strvec) end

Profile.print()

Desired Output:

3Γ—3 Matrix{AbstractString}:
 "AA"  ""   "entryA"
 "BB"  "C"  "entryB"
 "CC"  "M"  "entryC"

Benchmark:

BenchmarkTools.Trial: 10000 samples with 5 evaluations.
 Range (min … max):  6.470 ΞΌs …  3.040 ms  β”Š GC (min … max): 0.00% … 99.17%
 Time  (median):     8.014 ΞΌs              β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   9.368 ΞΌs Β± 30.440 ΞΌs  β”Š GC (mean Β± Οƒ):  3.22% Β±  0.99%

  β–„β–ˆβ–‡β–…β–„β–…β–†β–…β–…β–†β–…β–…β–„β–„β–ƒβ–ƒβ–‚β–β–            ▁ ▅▃▂▃▃▂▃▂▂▂▁▁▁▁            β–‚
  β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‡β–‡β–ˆβ–‡β–‡β–‡β–ˆβ–‡β–‡β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‡β–‡β–‡β–‡β–‡β–†β–†β–…β–… β–ˆ
  6.47 ΞΌs      Histogram: log(frequency) by time     17.4 ΞΌs <

 Memory estimate: 2.62 KiB, allocs estimate: 34.

Profile:

Overhead β•Ž [+additional indent] Count File:Line; Function
=========================================================
  β•Ž105 @Base/client.jl:552; _start()
  β•Ž 105 @Base/client.jl:333; exec_options(opts::Base.JLOptions)
  β•Ž  105 @Base/client.jl:416; run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
  β•Ž   105 @Base/essentials.jl:889; invokelatest
  β•Ž    105 @Base/essentials.jl:892; #invokelatest#2
  β•Ž     105 @Base/client.jl:432; (::Base.var"#1013#1015"{Bool, Bool, Bool})(REPL::Module)
  β•Ž    β•Ž 105 …ot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:375; run_repl(repl::REPL.AbstractREPL, consumer::Any)
  β•Ž    β•Ž  105 …ot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:389; run_repl(repl::REPL.AbstractREPL, consumer::Any; backend_on_current_task::Bool, backe…
  β•Ž    β•Ž   105 …ot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:228; kwcall(::NamedTuple, ::typeof(REPL.start_repl_backend), backend::REPL.REPLBackend, c…
  β•Ž    β•Ž    105 …t-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:231; start_repl_backend(backend::REPL.REPLBackend, consumer::Any; get_module::Function)
  β•Ž    β•Ž     105 …t-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:246; repl_backend_loop(backend::REPL.REPLBackend, get_module::Function)
  β•Ž    β•Ž    β•Ž 105 …-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:150; eval_user_input(ast::Any, backend::REPL.REPLBackend, mod::Module)
  β•Ž    β•Ž    β•Ž  105 @Base/boot.jl:385; eval
  β•Ž    β•Ž    β•Ž   105 REPL[6]:1; top-level scope
  β•Ž    β•Ž    β•Ž    105 …r/share/julia/stdlib/v1.10/Profile/src/Profile.jl:27; macro expansion
 2β•Ž    β•Ž    β•Ž     105 REPL[6]:1; macro expansion
  β•Ž    β•Ž    β•Ž    β•Ž 12  REPL[3]:2; split_vec(strvec::Vector{String})
  β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/abstractarray.jl:2767; stack
  β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/abstractarray.jl:2767; #stack#184
  β•Ž    β•Ž    β•Ž    β•Ž    2   @Base/abstractarray.jl:2799; _stack
  β•Ž    β•Ž    β•Ž    β•Ž     2   @Base/abstractarray.jl:2807; _stack
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/abstractarray.jl:2839; _typed_stack
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/abstractarray.jl:2847; _typed_stack
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/abstractarray.jl:2856; _dim_stack
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/iterators.jl:652; peel
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/array.jl:945; iterate
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/array.jl:945; iterate
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/essentials.jl:13; getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/abstractarray.jl:2868; _dim_stack
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/abstractarray.jl:2877; _dim_stack!(::Val{1}, B::Matrix{SubString{String}}, x1::Vector{SubString{String}}, xrest::Base.It…
  β•Ž    β•Ž    β•Ž    β•Ž  10  @Base/broadcast.jl:903; materialize
  β•Ž    β•Ž    β•Ž    β•Ž   10  @Base/broadcast.jl:928; copy
  β•Ž    β•Ž    β•Ž    β•Ž    10  @Base/broadcast.jl:956; copyto!
  β•Ž    β•Ž    β•Ž    β•Ž     10  @Base/broadcast.jl:1003; copyto!
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 10  @Base/simdloop.jl:77; macro expansion
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  10  @Base/broadcast.jl:1004; macro expansion
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   10  @Base/broadcast.jl:636; getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    10  @Base/broadcast.jl:682; _broadcast_getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     10  @Base/broadcast.jl:709; _broadcast_getindex_evalf
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 10  @Base/strings/util.jl:633; split
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  10  @Base/strings/util.jl:633; #split#488
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   10  @Base/strings/util.jl:626; split
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    10  @Base/strings/util.jl:628; #split#487
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     10  @Base/array.jl:759; collect
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/array.jl:769; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltype…
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/array.jl:713; _similar_for
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/abstractarray.jl:833; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    2   @Base/abstractarray.jl:842; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     2   @Base/boot.jl:486; Array
 2β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/boot.jl:477; Array
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/array.jl:770; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltype…
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/strings/util.jl:555; iterate
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/strings/util.jl:556; iterate
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/strings/search.jl:170; findnext(testf::typeof(isspace), s::String, i::Int64)
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/strings/string.jl:171; nextind
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/strings/string.jl:175; _nextind_str(s::String, i::Int64)
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/promotion.jl:521; ==
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/array.jl:771; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltype…
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/array.jl:1119; push!
 2β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/array.jl:1072; _growend!
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 3   @Base/array.jl:772; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltype…
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/strings/util.jl:556; iterate
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/strings/search.jl:166; findnext(testf::typeof(isspace), s::String, i::Int64)
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/strings/string.jl:521; isvalid(s::String, i::Int64)
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/strings/basic.jl:208; checkbounds
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/int.jl:514; <=
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/strings/search.jl:170; findnext(testf::typeof(isspace), s::String, i::Int64)
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/strings/string.jl:171; nextind
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/strings/string.jl:175; _nextind_str(s::String, i::Int64)
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/promotion.jl:521; ==
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/strings/util.jl:569; iterate
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/strings/substring.jl:42; SubString
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/strings/substring.jl:41; SubString
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/strings/substring.jl:30; SubString{String}(s::String, i::Int64, j::Int64)
 2β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/int.jl:0; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltype, is…
  β•Ž    β•Ž    β•Ž    β•Ž 3   REPL[3]:3; split_vec(strvec::Vector{String})
  β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/abstractarray.jl:1291; getindex
  β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/multidimensional.jl:889; _getindex
  β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/multidimensional.jl:901; _unsafe_getindex
  β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/abstractarray.jl:831; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/array.jl:420; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/boot.jl:486; Array
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/boot.jl:477; Array
  β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/broadcast.jl:903; materialize
  β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/broadcast.jl:928; copy
  β•Ž    β•Ž    β•Ž    β•Ž    2   @Base/broadcast.jl:956; copyto!
  β•Ž    β•Ž    β•Ž    β•Ž     2   @Base/broadcast.jl:1003; copyto!
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/simdloop.jl:77; macro expansion
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/broadcast.jl:1004; macro expansion
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/broadcast.jl:636; getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/broadcast.jl:681; _broadcast_getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/broadcast.jl:705; _getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/broadcast.jl:675; _broadcast_getindex
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/essentials.jl:13; getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/broadcast.jl:682; _broadcast_getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/broadcast.jl:709; _broadcast_getindex_evalf
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/strings/basic.jl:693; first(s::SubString{String}, n::Int64)
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/strings/substring.jl:292; getindex
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/strings/substring.jl:43; SubString
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/strings/substring.jl:47; SubString
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/strings/substring.jl:41; SubString
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/strings/string.jl:174; _nextind_str(s::String, i::Int64)
  β•Ž    β•Ž    β•Ž    β•Ž 27  REPL[3]:4; split_vec(strvec::Vector{String})
27β•Ž    β•Ž    β•Ž    β•Ž  27  @Base/boot.jl:477; Array
  β•Ž    β•Ž    β•Ž    β•Ž 1   REPL[3]:6; split_vec(strvec::Vector{String})
  β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/broadcast.jl:903; materialize
  β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/broadcast.jl:928; copy
  β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/broadcast.jl:223; similar
  β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/broadcast.jl:226; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/abstractarray.jl:876; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/abstractarray.jl:877; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/bitarray.jl:71; BitArray
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/bitarray.jl:39; BitArray
  β•Ž    β•Ž    β•Ž    β•Ž 1   REPL[3]:7; split_vec(strvec::Vector{String})
  β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/abstractarray.jl:1396; setindex!
  β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/multidimensional.jl:944; _setindex!
  β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/multidimensional.jl:955; _unsafe_setindex!(::IndexLinear, ::Vector{String}, ::Vector{SubString{String}}, ::Base.LogicalIndex{…
  β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/cartesian.jl:66; macro expansion
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/multidimensional.jl:842; iterate
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/bitarray.jl:121; _blsr
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/int.jl:347; &
  β•Ž    β•Ž    β•Ž    β•Ž 2   REPL[3]:8; split_vec(strvec::Vector{String})
  β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/broadcast.jl:1244; dotview
  β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/views.jl:148; maybeview
  β•Ž    β•Ž    β•Ž    β•Ž    2   @Base/subarray.jl:186; view
  β•Ž    β•Ž    β•Ž    β•Ž     2   @Base/subarray.jl:223; unsafe_view
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/subarray.jl:28; SubArray
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/multidimensional.jl:855; ensure_indexable
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/multidimensional.jl:792; collect
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    2   @Base/array.jl:839; collect(itr::Base.Generator{Base.LogicalIndex{Int64, BitVector}, typeof(identity)})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     2   @Base/array.jl:723; _array_for
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 2   @Base/abstractarray.jl:876; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/abstractarray.jl:877; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   2   @Base/boot.jl:486; Array
 2β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    2   @Base/boot.jl:477; Array
  β•Ž    β•Ž    β•Ž    β•Ž 57  REPL[3]:9; split_vec(strvec::Vector{String})
  β•Ž    β•Ž    β•Ž    β•Ž  57  @Base/abstractarray.jl:2767; stack
  β•Ž    β•Ž    β•Ž    β•Ž   57  @Base/abstractarray.jl:2767; #stack#184
  β•Ž    β•Ž    β•Ž    β•Ž    57  @Base/abstractarray.jl:2799; _stack
  β•Ž    β•Ž    β•Ž    β•Ž     51  @Base/abstractarray.jl:2811; _stack(dims::Function, ::Base.HasShape{1}, iter::Vector{Vector})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 51  @Base/reducedim.jl:357; mapreduce
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  51  @Base/reducedim.jl:357; #mapreduce#821
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   51  @Base/reducedim.jl:365; _mapreduce_dim
 2β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    31  @Base/reduce.jl:440; _mapreduce(f::typeof(eltype), op::typeof(promote_type), ::IndexLinear, A::Vector{Vector})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     29  @Base/abstractarray.jl:241; eltype
29β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 29  @Base/abstractarray.jl:242; eltype
 5β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    20  @Base/reduce.jl:443; _mapreduce(f::typeof(eltype), op::typeof(promote_type), ::IndexLinear, A::Vector{Vector})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     15  @Base/abstractarray.jl:241; eltype
15β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 15  @Base/abstractarray.jl:242; eltype
 3β•Ž    β•Ž    β•Ž    β•Ž     6   @Base/abstractarray.jl:2812; _stack(dims::Function, ::Base.HasShape{1}, iter::Vector{Vector})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 3   @Base/abstractarray.jl:2817; _typed_stack(::Colon, ::Type{AbstractString}, ::Type{Vector}, A::Vector{Vector})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/abstractarray.jl:2821; _typed_stack(::Colon, ::Type{AbstractString}, ::Type{Vector}, A::Vector{Vector}, Aax::Tuple{Base.On…
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/abstractarray.jl:833; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/array.jl:420; similar
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/boot.jl:487; Array
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/boot.jl:479; Array
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  2   @Base/abstractarray.jl:2827; _typed_stack(::Colon, ::Type{AbstractString}, ::Type{Vector}, A::Vector{Vector}, Aax::Tuple{Base.On…
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/abstractarray.jl:1118; copyto!(dest::Matrix{AbstractString}, dstart::Int64, src::Vector{SubString{String}})
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    1   @Base/array.jl:363; copyto!
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž     1   @Base/array.jl:376; _copyto_impl!
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž 1   @Base/array.jl:353; unsafe_copyto!
  β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž  1   @Base/array.jl:299; _unsafe_copyto!(dest::Matrix{AbstractString}, doffs::Int64, src::Vector{SubString{String}}, soffs::Int6…
 1β•Ž    β•Ž    β•Ž    β•Ž    β•Ž    β•Ž   1   @Base/array.jl:1021; setindex!
Total snapshots: 106. Utilization: 100% across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.

What can I do to make this as fast as possible? Can this be done with zero allocations? It seems one of the bottlenecks is calling stack(), which I had assumed is already pretty optimized.

One other thing is that the AA strings will always be two characters (others are always three) and almost always be only the first entry, but this is not guaranteed.

Thanks for any help!

idk what constraint you have and what you can assume to be the property of the inputs, but here’s one possibility:

julia> @b split_vec($strvec)
6.181 ΞΌs (35.75 allocs: 2.668 KiB)

julia> function split_vec2(strvec)
           mat  = Matrix{String}(undef, length(strvec), 3)
           for i in eachindex(strvec)
               str = strvec[i]
               space_idx = findfirst(' ', str)
               mat[i, 1] = SubString(str, 1:2)
               mat[i, 2] = ifelse(space_idx==3, "", SubString(str, 3:3))
               mat[i, 3] = @view str[space_idx+1:end]
           end
           return mat
       end
split_vec2 (generic function with 1 method)

julia> split_vec2(strvec)
3Γ—3 Matrix{String}:
 "AA"  ""  "entryA"
 "BB"  "C"  "entryB"
 "CC"  "M"  "entryC"

julia> @b split_vec2($strvec)
356.739 ns (12 allocs: 416 bytes)
1 Like

Yes, 10x faster. This seems much more reasonable. Thanks.

This requires the substrings to be copied into Strings when they are stored in mat. If you instead use Matrix{SubString{String}}, then it won’t have to copy the strings at all.

Might also be easier to just put @views in front of the function, or in front of the for loop, and then you can just do:

mat[i, 1] = str[1:2]
mat[i, 2] = str[ifelse(space_idx==3, 1:0, 3:3)]
mat[i, 3] = str[space_idx+1:end]

instead of the explicit SubString calls. Notice that I also used the empty range str[1:0] instead of "" so that everything is a substring.

These two changes speed things up by almost a factor of 2 for me, as well as cutting the number of allocations down to 1.

8 Likes

Ah I tried SubString for matrix but forgot it is parametric… Thanks for the fix;!

1 Like

Just two cents, because it seems nicer to write it like this:

for (i, str) in pairs(strvec)
1 Like

Technically, I would recommend enumerate rather than pairs here, since i is used as an index for a Matrix, to allow strvec to be an arbitrary collection (possibly not 1-based, or possibly an iterator).

1 Like

We can also for variety use :

split_vec(s) = stack([[SubString(x, 1:2),
            SubString(x, 3:3) ,
            SubString(x, 4)] for x in s], dims=1)
julia> @benchmark split_vec(strvec)
BenchmarkTools.Trial: 10000 samples with 10 evaluations.
 Range (min … max):  1.710 ΞΌs …  30.801 ΞΌs  β”Š GC (min … max): 0.00% … 0.00%
 Time  (median):     1.947 ΞΌs               β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   1.994 ΞΌs Β± 610.776 ns  β”Š GC (mean Β± Οƒ):  0.00% Β± 0.00%

     β–†  β–β–ƒβ–ˆβ–                                                   
  β–ƒβ–ƒβ–„β–ˆβ–ˆβ–‡β–ˆβ–ˆβ–ˆβ–ˆβ–…β–ƒβ–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–‚β–β–‚β–‚β–‚β–‚β–β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚ β–ƒ
  1.71 ΞΌs         Histogram: frequency by time        3.68 ΞΌs <

 Memory estimate: 736 bytes, allocs estimate: 5.

2 Likes

FYI, this variant seems to be twice as fast if we can avoid applying permutedims() to the result:

split_vec3(s) = stack((SubString(x, 1:2), SubString(x, 3:3), SubString(x, 4)) for x in s)

That is true

this variant seems to be twice as fast

But the the required matrix will be like this

julia> split_vec3(strvec)
3Γ—3 Matrix{SubString{String}}:
 "AA"      "BB"       "CC"
 " "       "C"        "M"
 "entryA"  " entryB"  " entryC"

Which is the transpose of the answer .

I think using generators was behind that speed , see this

julia> split_vec(s) = stack(((SubString(x, 1:2),
                   SubString(x, 3:3) ,
                   SubString(x, 4)) for x in s), dims=1)
split_vec (generic function with 1 method)

julia> @benchmark split_vec(strvec)
BenchmarkTools.Trial: 10000 samples with 84 evaluations.
 Range (min … max):  834.476 ns …   4.861 ΞΌs  β”Š GC (min … max): 0.00% … 0.00%
 Time  (median):     913.095 ns               β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   922.723 ns Β± 106.815 ns  β”Š GC (mean Β± Οƒ):  0.00% Β± 0.00%

                  β–β–ˆβ–…β–„β–ƒ                                          
  β–‚β–‚β–‚β–‚β–‚β–β–β–β–β–β–β–β–β–β–β–‚β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‡β–ƒβ–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–‚β–‚β–‚β–‚β–‚β–β–‚β–β–‚β–‚β–β–‚β–‚β–‚β–β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚ β–ƒ
  834 ns           Histogram: frequency by time         1.09 ΞΌs <

 Memory estimate: 272 bytes, allocs estimate: 1.

And here is a better version using map

split_vec4(s) = map(x -> SubString.(x, (1:2, 3:3, 4)), s)

julia> @benchmark split_vec4(strvec)
BenchmarkTools.Trial: 10000 samples with 196 evaluations.
 Range (min … max):  472.806 ns …  2.811 ΞΌs  β”Š GC (min … max): 0.00% … 0.00%
 Time  (median):     518.617 ns              β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   521.972 ns Β± 48.326 ns  β”Š GC (mean Β± Οƒ):  0.00% Β± 0.00%

                         β–ƒβ–ˆβ–ƒ                                    
  β–‚β–‚β–‚β–β–‚β–β–β–β–β–β–β–β–β–β–‚β–β–β–β–β–β–‚β–‚β–…β–ˆβ–ˆβ–ˆβ–…β–„β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–β–β–β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚ β–‚
  473 ns          Histogram: frequency by time          584 ns <

 Memory estimate: 272 bytes, allocs estimate: 1.
1 Like