Plots borking on SentinelArrays produced by CSV.read

using CSV, DataFrames, DataFramesMeta, Downloads, StatsPlots



Downloads.download("https://data.cdc.gov/api/views/g653-rqe2/rows.csv?accessType=DOWNLOAD","NWSSSarsCov2WastewaterConc.csv")
Downloads.download("https://data.cdc.gov/api/views/2ew6-ywp6/rows.csv?accessType=DOWNLOAD","NWSSSarsCov2WastewaterMetr.csv")


wwdat = CSV.read("NWSSSarsCov2WastewaterConc.csv",DataFrame)

@subset!(wwdat,.!ismissing.(:pcr_conc_smoothed) .&& .!ismissing.(:date))

wwdat.pcr_conc_smoothed = Float64.(wwdat.pcr_conc_smoothed)
scatter(wwdat.date,wwdat.pcr_conc_smoothed) ## fails.

This gives a weird error (when run in VSCode)

ERROR: MethodError: no method matching _cycle(::SentinelArrays.ChainedVector{Float64, Vector{Float64}}, ::SentinelArrays.ChainedVectorIndex{Vector{Float64}})
Closest candidates are:
  _cycle(::AbstractVector, ::Int64) at ~/.julia/packages/Plots/4UTBj/src/utils.jl:179
  _cycle(::AbstractVector, ::AbstractVector{Int64}) at ~/.julia/packages/Plots/4UTBj/src/utils.jl:183
  _cycle(::Any, ::AbstractVector{Int64}) at ~/.julia/packages/Plots/4UTBj/src/utils.jl:185
  ...
Stacktrace:
  [1] gr_draw_markers(series::Plots.Series, x::SentinelArrays.ChainedVector{Float64, Vector{Float64}}, y::SentinelArrays.ChainedVector{Float64, Vector{Float64}}, z::Nothing, clims::Tuple{Float64, Float64}, msize::Int64, strokewidth::Int64)
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:1983
  [2] gr_draw_markers(series::Plots.Series, x::SentinelArrays.ChainedVector{Float64, Vector{Float64}}, y::SentinelArrays.ChainedVector{Float64, Vector{Float64}}, z::Nothing, clims::Tuple{Float64, Float64})
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:1971
  [3] gr_add_series(sp::Plots.Subplot{Plots.GRBackend}, series::Plots.Series)
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:1874
  [4] gr_display(sp::Plots.Subplot{Plots.GRBackend}, w::Measures.AbsoluteLength, h::Measures.AbsoluteLength, viewport_canvas::Vector{Float64})
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:1033
  [5] (::Plots.var"#465#466"{Int64, Int64, Vector{Float64}})(sp::Plots.Subplot{Plots.GRBackend})
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:699
  [6] foreach(f::Plots.var"#465#466"{Int64, Int64, Vector{Float64}}, itr::Vector{Plots.Subplot})
    @ Base ./abstractarray.jl:2774
  [7] gr_display(plt::Plots.Plot{Plots.GRBackend}, dpi_factor::Int64)
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:699
  [8] #502
    @ ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:2177 [inlined]
  [9] withenv(::Plots.var"#502#503"{Plots.Plot{Plots.GRBackend}, Int64}, ::Pair{String, String}, ::Vararg{Pair{String, String}})
    @ Base ./env.jl:172
 [10] _show(io::IOBuffer, #unused#::MIME{Symbol("image/svg+xml")}, plt::Plots.Plot{Plots.GRBackend})
    @ Plots ~/.julia/packages/Plots/4UTBj/src/backends/gr.jl:2172
 [11] #invokelatest#2
    @ ./essentials.jl:729 [inlined]
 [12] invokelatest
    @ ./essentials.jl:726 [inlined]
 [13] show
    @ ~/.julia/packages/Plots/4UTBj/src/output.jl:237 [inlined]
 [14] __binrepr(m::MIME{Symbol("image/svg+xml")}, x::Plots.Plot{Plots.GRBackend}, context::Nothing)
    @ Base.Multimedia ./multimedia.jl:159
 [15] display(d::VSCodeServer.InlineDisplay, m::MIME{Symbol("image/svg+xml")}, x::Plots.Plot{Plots.GRBackend})
    @ VSCodeServer ./strings/string.jl:0
 [16] display(d::VSCodeServer.InlineDisplay, mime::String, x::Any)
    @ Base.Multimedia ./multimedia.jl:216
 [17] display(d::VSCodeServer.InlineDisplay, x::Plots.Plot{Plots.GRBackend})
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/display.jl:165
 [18] display(x::Any)
    @ Base.Multimedia ./multimedia.jl:328
 [19] #invokelatest#2
    @ ./essentials.jl:729 [inlined]
 [20] invokelatest
    @ ./essentials.jl:726 [inlined]
 [21] (::VSCodeServer.var"#66#70"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/eval.jl:199
 [22] withpath(f::VSCodeServer.var"#66#70"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/repl.jl:249
 [23] (::VSCodeServer.var"#65#69"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/eval.jl:155
 [24] hideprompt(f::VSCodeServer.var"#65#69"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/repl.jl:38
 [25] (::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/eval.jl:126
 [26] with_logstate(f::Function, logstate::Any)
    @ Base.CoreLogging ./logging.jl:511
 [27] with_logger
    @ ./logging.jl:623 [inlined]
 [28] (::VSCodeServer.var"#63#67"{VSCodeServer.ReplRunCodeRequestParams})()
    @ VSCodeServer ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/eval.jl:225
 [29] #invokelatest#2
    @ ./essentials.jl:729 [inlined]
 [30] invokelatest(::Any)
    @ Base ./essentials.jl:726
 [31] macro expansion
    @ ~/.vscode-oss/extensions/julialang.language-julia-1.38.2-universal/scripts/packages/VSCodeServer/src/eval.jl:34 [inlined]
 [32] (::VSCodeServer.var"#61#62")()
    @ VSCodeServer ./task.jl:484

I came across this post

which recommends reading the CSV single-threaded. That works:

julia> wwdat1 = CSV.read("NWSSSarsCov2WastewaterConc.csv",DataFrame; ntasks = 1)
335417×4 DataFrame
...

julia> wwdat1.key_plot_id |> typeof
       # now it's a normal Vector, not a SentinelArray type
Vector{String} (alias for Array{String, 1})

julia> scatter(wwdat1.date,wwdat1.pcr_conc_smoothed) ## works.

Screenshot_20221030_113317

The same thread also suggests collecting the column before plotting, as another way of managing this, and that works too:

julia> scatter(collect(wwdat.date), collect(wwdat.pcr_conc_smoothed))  # works

But reading the CSV with ntasks = 1 is a fixed, one-time (per session) cost, so that’s probably better than collecting for every plot call.

1 Like

This is a current fix. However, the problem reported here is a problem on Plots.jl side.

x-ref eachindex call incorrectly assumes that the returned index is IndexLinear by bkamins · Pull Request #4479 · JuliaPlots/Plots.jl · GitHub

1 Like

Would it be possible to reduce the failing example to an offline one - with as few dependencies as possible - so that we can introduce it in Plots test suite for non-regression (in @bkamins 's PR) ?

@t-bltg:
Probably it is best to use OffsetArrays.jl (for non 1-based indexing case) and use:

using SentinelArrays
x = ChainedVector([[1, 2], [3, 4]])

(for the case mentioned in this thread).

2 Likes

Awesome that you guys figured this out and pushed a fix overnight. For the mean time I’m just dropmissing! The data and then constructing a vector to replace the columns in the data frame.

ie.

mydata.mycol = Vector{Float64}(mydata.mycol)

(Do it once after reading rather than every plot). Then I can still get fast threaded reading

You can replace all columns to be Vector in a data frame as follows:

df = copy(df)

or e.g.

mapcols!(copy, df)

or

select!(df, All() => copy, renamecols=false)

that works well, I tried the select!(df,All() => copy, renamecols=false) and it borks on no methods for sentinelarrays:

ERROR: MethodError: no method matching copy(::SentinelArrays.ChainedVector{String, Vector{String}}, ::SentinelArrays.ChainedVector{Date, Vector{Date}}, ::SentinelArrays.ChainedVector{Float64, Vector{Float64}}, ::PooledArrays.PooledVector{String, UInt32, Vector{UInt32}})
Closest candidates are:
  copy(::SentinelArrays.ChainedVector{T, A} where A<:AbstractVector{T}) where T at ~/.julia/packages/SentinelArrays/XvFr7/src/chainedvector.jl:464
  copy(::AbstractArray) at abstractarray.jl:1093
Stacktrace:
 [1] _transformation_helper(df::DataFrame, col_idx::UnitRange{Int64}, ::Base.RefValue{Any})
   @ DataFrames ~/.julia/packages/DataFrames/Lrd7K/src/abstractdataframe/selection.jl:606
 [2] select_transform!(::Base.RefValue{Any}, df::DataFrame, newdf::DataFrame, transformed_cols::Set{Symbol}, copycols::Bool, allow_resizing_newdf
...

Sorry, I made a typo because I was writing from my head.
All() .=> copy should be written (with a . in front of =>)