I am learning how to optimize julia code and came up with the following MWE:
using BenchmarkTools, Profile
strvec = ["AA entryA", "BBC entryB", "CCM entryC"]
function split_vec(strvec)
mat = stack(split.(strvec), dims = 1)
col1 = first.(mat[:, 1], 2)
col2 = Vector{String}(undef, length(col1))
idx_nAA = col1 .!= "AA"
col2[idx_nAA, :] = last.(mat[idx_nAA, 1], 1)
col2[idx_nAA .== 0, :] .= ""
return stack([col1, col2, mat[:, 2]])
end
split_vec(strvec)
@benchmark split_vec(strvec)
@Profile.profile for i in 1:Int(1e4) split_vec(strvec) end
Profile.print()
Desired Output:
3Γ3 Matrix{AbstractString}:
"AA" "" "entryA"
"BB" "C" "entryB"
"CC" "M" "entryC"
Benchmark:
BenchmarkTools.Trial: 10000 samples with 5 evaluations.
Range (min β¦ max): 6.470 ΞΌs β¦ 3.040 ms β GC (min β¦ max): 0.00% β¦ 99.17%
Time (median): 8.014 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 9.368 ΞΌs Β± 30.440 ΞΌs β GC (mean Β± Ο): 3.22% Β± 0.99%
ββββ
ββ
ββ
β
ββ
β
βββββββ β β
βββββββββββββ β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β
β
6.47 ΞΌs Histogram: log(frequency) by time 17.4 ΞΌs <
Memory estimate: 2.62 KiB, allocs estimate: 34.
Profile:
Overhead β [+additional indent] Count File:Line; Function
=========================================================
β105 @Base/client.jl:552; _start()
β 105 @Base/client.jl:333; exec_options(opts::Base.JLOptions)
β 105 @Base/client.jl:416; run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
β 105 @Base/essentials.jl:889; invokelatest
β 105 @Base/essentials.jl:892; #invokelatest#2
β 105 @Base/client.jl:432; (::Base.var"#1013#1015"{Bool, Bool, Bool})(REPL::Module)
β β 105 β¦ot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:375; run_repl(repl::REPL.AbstractREPL, consumer::Any)
β β 105 β¦ot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:389; run_repl(repl::REPL.AbstractREPL, consumer::Any; backend_on_current_task::Bool, backeβ¦
β β 105 β¦ot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:228; kwcall(::NamedTuple, ::typeof(REPL.start_repl_backend), backend::REPL.REPLBackend, cβ¦
β β 105 β¦t-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:231; start_repl_backend(backend::REPL.REPLBackend, consumer::Any; get_module::Function)
β β 105 β¦t-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:246; repl_backend_loop(backend::REPL.REPLBackend, get_module::Function)
β β β 105 β¦-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:150; eval_user_input(ast::Any, backend::REPL.REPLBackend, mod::Module)
β β β 105 @Base/boot.jl:385; eval
β β β 105 REPL[6]:1; top-level scope
β β β 105 β¦r/share/julia/stdlib/v1.10/Profile/src/Profile.jl:27; macro expansion
2β β β 105 REPL[6]:1; macro expansion
β β β β 12 REPL[3]:2; split_vec(strvec::Vector{String})
β β β β 2 @Base/abstractarray.jl:2767; stack
β β β β 2 @Base/abstractarray.jl:2767; #stack#184
β β β β 2 @Base/abstractarray.jl:2799; _stack
β β β β 2 @Base/abstractarray.jl:2807; _stack
β β β β β 2 @Base/abstractarray.jl:2839; _typed_stack
β β β β β 2 @Base/abstractarray.jl:2847; _typed_stack
β β β β β 1 @Base/abstractarray.jl:2856; _dim_stack
β β β β β 1 @Base/iterators.jl:652; peel
β β β β β 1 @Base/array.jl:945; iterate
β β β β β β 1 @Base/array.jl:945; iterate
1β β β β β β 1 @Base/essentials.jl:13; getindex
β β β β β 1 @Base/abstractarray.jl:2868; _dim_stack
1β β β β β 1 @Base/abstractarray.jl:2877; _dim_stack!(::Val{1}, B::Matrix{SubString{String}}, x1::Vector{SubString{String}}, xrest::Base.Itβ¦
β β β β 10 @Base/broadcast.jl:903; materialize
β β β β 10 @Base/broadcast.jl:928; copy
β β β β 10 @Base/broadcast.jl:956; copyto!
β β β β 10 @Base/broadcast.jl:1003; copyto!
β β β β β 10 @Base/simdloop.jl:77; macro expansion
β β β β β 10 @Base/broadcast.jl:1004; macro expansion
β β β β β 10 @Base/broadcast.jl:636; getindex
β β β β β 10 @Base/broadcast.jl:682; _broadcast_getindex
β β β β β 10 @Base/broadcast.jl:709; _broadcast_getindex_evalf
β β β β β β 10 @Base/strings/util.jl:633; split
β β β β β β 10 @Base/strings/util.jl:633; #split#488
β β β β β β 10 @Base/strings/util.jl:626; split
β β β β β β 10 @Base/strings/util.jl:628; #split#487
β β β β β β 10 @Base/array.jl:759; collect
β β β β β β β 2 @Base/array.jl:769; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltypeβ¦
β β β β β β β 2 @Base/array.jl:713; _similar_for
β β β β β β β 2 @Base/abstractarray.jl:833; similar
β β β β β β β 2 @Base/abstractarray.jl:842; similar
β β β β β β β 2 @Base/boot.jl:486; Array
2β β β β β β β β 2 @Base/boot.jl:477; Array
β β β β β β β 1 @Base/array.jl:770; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltypeβ¦
β β β β β β β 1 @Base/strings/util.jl:555; iterate
β β β β β β β 1 @Base/strings/util.jl:556; iterate
β β β β β β β 1 @Base/strings/search.jl:170; findnext(testf::typeof(isspace), s::String, i::Int64)
β β β β β β β 1 @Base/strings/string.jl:171; nextind
β β β β β β β β 1 @Base/strings/string.jl:175; _nextind_str(s::String, i::Int64)
1β β β β β β β β 1 @Base/promotion.jl:521; ==
β β β β β β β 2 @Base/array.jl:771; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltypeβ¦
β β β β β β β 2 @Base/array.jl:1119; push!
2β β β β β β β 2 @Base/array.jl:1072; _growend!
β β β β β β β 3 @Base/array.jl:772; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltypeβ¦
β β β β β β β 2 @Base/strings/util.jl:556; iterate
β β β β β β β 1 @Base/strings/search.jl:166; findnext(testf::typeof(isspace), s::String, i::Int64)
β β β β β β β 1 @Base/strings/string.jl:521; isvalid(s::String, i::Int64)
β β β β β β β 1 @Base/strings/basic.jl:208; checkbounds
1β β β β β β β β 1 @Base/int.jl:514; <=
β β β β β β β 1 @Base/strings/search.jl:170; findnext(testf::typeof(isspace), s::String, i::Int64)
β β β β β β β 1 @Base/strings/string.jl:171; nextind
β β β β β β β 1 @Base/strings/string.jl:175; _nextind_str(s::String, i::Int64)
1β β β β β β β β 1 @Base/promotion.jl:521; ==
β β β β β β β 1 @Base/strings/util.jl:569; iterate
β β β β β β β 1 @Base/strings/substring.jl:42; SubString
β β β β β β β 1 @Base/strings/substring.jl:41; SubString
1β β β β β β β 1 @Base/strings/substring.jl:30; SubString{String}(s::String, i::Int64, j::Int64)
2β β β β β β β 2 @Base/int.jl:0; _collect(cont::UnitRange{Int64}, itr::Base.SplitIterator{String, typeof(isspace)}, ::Base.HasEltype, isβ¦
β β β β 3 REPL[3]:3; split_vec(strvec::Vector{String})
β β β β 1 @Base/abstractarray.jl:1291; getindex
β β β β 1 @Base/multidimensional.jl:889; _getindex
β β β β 1 @Base/multidimensional.jl:901; _unsafe_getindex
β β β β 1 @Base/abstractarray.jl:831; similar
β β β β β 1 @Base/array.jl:420; similar
β β β β β 1 @Base/boot.jl:486; Array
1β β β β β 1 @Base/boot.jl:477; Array
β β β β 2 @Base/broadcast.jl:903; materialize
β β β β 2 @Base/broadcast.jl:928; copy
β β β β 2 @Base/broadcast.jl:956; copyto!
β β β β 2 @Base/broadcast.jl:1003; copyto!
β β β β β 2 @Base/simdloop.jl:77; macro expansion
β β β β β 2 @Base/broadcast.jl:1004; macro expansion
β β β β β 2 @Base/broadcast.jl:636; getindex
β β β β β 1 @Base/broadcast.jl:681; _broadcast_getindex
β β β β β 1 @Base/broadcast.jl:705; _getindex
β β β β β β 1 @Base/broadcast.jl:675; _broadcast_getindex
1β β β β β β 1 @Base/essentials.jl:13; getindex
β β β β β 1 @Base/broadcast.jl:682; _broadcast_getindex
β β β β β 1 @Base/broadcast.jl:709; _broadcast_getindex_evalf
β β β β β β 1 @Base/strings/basic.jl:693; first(s::SubString{String}, n::Int64)
β β β β β β 1 @Base/strings/substring.jl:292; getindex
β β β β β β 1 @Base/strings/substring.jl:43; SubString
β β β β β β 1 @Base/strings/substring.jl:47; SubString
β β β β β β 1 @Base/strings/substring.jl:41; SubString
1β β β β β β β 1 @Base/strings/string.jl:174; _nextind_str(s::String, i::Int64)
β β β β 27 REPL[3]:4; split_vec(strvec::Vector{String})
27β β β β 27 @Base/boot.jl:477; Array
β β β β 1 REPL[3]:6; split_vec(strvec::Vector{String})
β β β β 1 @Base/broadcast.jl:903; materialize
β β β β 1 @Base/broadcast.jl:928; copy
β β β β 1 @Base/broadcast.jl:223; similar
β β β β 1 @Base/broadcast.jl:226; similar
β β β β β 1 @Base/abstractarray.jl:876; similar
β β β β β 1 @Base/abstractarray.jl:877; similar
β β β β β 1 @Base/bitarray.jl:71; BitArray
1β β β β β 1 @Base/bitarray.jl:39; BitArray
β β β β 1 REPL[3]:7; split_vec(strvec::Vector{String})
β β β β 1 @Base/abstractarray.jl:1396; setindex!
β β β β 1 @Base/multidimensional.jl:944; _setindex!
β β β β 1 @Base/multidimensional.jl:955; _unsafe_setindex!(::IndexLinear, ::Vector{String}, ::Vector{SubString{String}}, ::Base.LogicalIndex{β¦
β β β β 1 @Base/cartesian.jl:66; macro expansion
β β β β β 1 @Base/multidimensional.jl:842; iterate
β β β β β 1 @Base/bitarray.jl:121; _blsr
β β β β β 1 @Base/int.jl:347; &
β β β β 2 REPL[3]:8; split_vec(strvec::Vector{String})
β β β β 2 @Base/broadcast.jl:1244; dotview
β β β β 2 @Base/views.jl:148; maybeview
β β β β 2 @Base/subarray.jl:186; view
β β β β 2 @Base/subarray.jl:223; unsafe_view
β β β β β 2 @Base/subarray.jl:28; SubArray
β β β β β 2 @Base/multidimensional.jl:855; ensure_indexable
β β β β β 2 @Base/multidimensional.jl:792; collect
β β β β β 2 @Base/array.jl:839; collect(itr::Base.Generator{Base.LogicalIndex{Int64, BitVector}, typeof(identity)})
β β β β β 2 @Base/array.jl:723; _array_for
β β β β β β 2 @Base/abstractarray.jl:876; similar
β β β β β β 2 @Base/abstractarray.jl:877; similar
β β β β β β 2 @Base/boot.jl:486; Array
2β β β β β β 2 @Base/boot.jl:477; Array
β β β β 57 REPL[3]:9; split_vec(strvec::Vector{String})
β β β β 57 @Base/abstractarray.jl:2767; stack
β β β β 57 @Base/abstractarray.jl:2767; #stack#184
β β β β 57 @Base/abstractarray.jl:2799; _stack
β β β β 51 @Base/abstractarray.jl:2811; _stack(dims::Function, ::Base.HasShape{1}, iter::Vector{Vector})
β β β β β 51 @Base/reducedim.jl:357; mapreduce
β β β β β 51 @Base/reducedim.jl:357; #mapreduce#821
β β β β β 51 @Base/reducedim.jl:365; _mapreduce_dim
2β β β β β 31 @Base/reduce.jl:440; _mapreduce(f::typeof(eltype), op::typeof(promote_type), ::IndexLinear, A::Vector{Vector})
β β β β β 29 @Base/abstractarray.jl:241; eltype
29β β β β β β 29 @Base/abstractarray.jl:242; eltype
5β β β β β 20 @Base/reduce.jl:443; _mapreduce(f::typeof(eltype), op::typeof(promote_type), ::IndexLinear, A::Vector{Vector})
β β β β β 15 @Base/abstractarray.jl:241; eltype
15β β β β β β 15 @Base/abstractarray.jl:242; eltype
3β β β β 6 @Base/abstractarray.jl:2812; _stack(dims::Function, ::Base.HasShape{1}, iter::Vector{Vector})
β β β β β 3 @Base/abstractarray.jl:2817; _typed_stack(::Colon, ::Type{AbstractString}, ::Type{Vector}, A::Vector{Vector})
β β β β β 1 @Base/abstractarray.jl:2821; _typed_stack(::Colon, ::Type{AbstractString}, ::Type{Vector}, A::Vector{Vector}, Aax::Tuple{Base.Onβ¦
β β β β β 1 @Base/abstractarray.jl:833; similar
β β β β β 1 @Base/array.jl:420; similar
β β β β β 1 @Base/boot.jl:487; Array
1β β β β β β 1 @Base/boot.jl:479; Array
1β β β β β 2 @Base/abstractarray.jl:2827; _typed_stack(::Colon, ::Type{AbstractString}, ::Type{Vector}, A::Vector{Vector}, Aax::Tuple{Base.Onβ¦
β β β β β 1 @Base/abstractarray.jl:1118; copyto!(dest::Matrix{AbstractString}, dstart::Int64, src::Vector{SubString{String}})
β β β β β 1 @Base/array.jl:363; copyto!
β β β β β 1 @Base/array.jl:376; _copyto_impl!
β β β β β β 1 @Base/array.jl:353; unsafe_copyto!
β β β β β β 1 @Base/array.jl:299; _unsafe_copyto!(dest::Matrix{AbstractString}, doffs::Int64, src::Vector{SubString{String}}, soffs::Int6β¦
1β β β β β β 1 @Base/array.jl:1021; setindex!
Total snapshots: 106. Utilization: 100% across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.
What can I do to make this as fast as possible? Can this be done with zero allocations? It seems one of the bottlenecks is calling stack()
, which I had assumed is already pretty optimized.