findmax
and argmax
are still broken with CSV.jl, please help
Test code
using CSV
using DataFrames
using StableRNGs
using Test
function write_data(dims::Dims{3} = (16, 2, 8))
rng = StableRNG(1230)
data = rand(rng, 0:9, dims)
mkpath("test_data")
for i in 1:size(data, 3)
CSV.write(
"test_data/test_$i.csv",
DataFrame(data[:,:,i], [:x,:y])
)
end
return nothing
end
function read_data(n_files = 8)
return CSV.read(
["test_data/test_$i.csv" for i in 1:n_files],
DataFrame
)
end
write_data()
df = read_data()
sq(x) = x^2
@testset "Is CSV.jl broken?" begin
@testset "findmax" begin
@test findmax(df.x) == findmax(collect(df.x))
@test findmax(df.y) == findmax(collect(df.y))
@test findmax(sq, df.x) == findmax(sq, collect(df.x))
@test findmax(sq, df.y) == findmax(sq, collect(df.y))
end
@testset "argmax" begin
@test argmax(df.x) == argmax(collect(df.x))
@test argmax(df.y) == argmax(collect(df.y))
@test argmax(sq, df.x) == argmax(sq, collect(df.x))
@test argmax(sq, df.y) == argmax(sq, collect(df.y))
end
@testset "findmin" begin
@test findmin(df.x) == findmin(collect(df.x))
@test findmin(df.y) == findmin(collect(df.y))
@test findmin(sq, df.x) == findmin(sq, collect(df.x))
@test findmin(sq, df.y) == findmin(sq, collect(df.y))
end
@testset "argmin" begin
@test argmin(df.x) == argmin(collect(df.x))
@test argmin(df.y) == argmin(collect(df.y))
@test argmin(sq, df.x) == argmin(sq, collect(df.x))
@test argmin(sq, df.y) == argmin(sq, collect(df.y))
end
end
When I run the above test code I get the following results.
Test Summary: | Pass Fail Total Time
Is CSV.jl broken? | 10 6 16 1.5s
findmax | 4 4 1.2s
argmax | 2 2 4 0.1s
findmin | 4 4 0.1s
argmin | 4 4 0.1s
Here are the failing tests.
findmax: Test Failed at /home/mkitti/blah/csv_is_broken/test_csv.jl:31
Expression: findmax(df.x) == findmax(collect(df.x))
Evaluated: (9, 105) == (9, 9)
findmax: Test Failed at /home/mkitti/blah/csv_is_broken/test_csv.jl:32
Expression: findmax(df.y) == findmax(collect(df.y))
Evaluated: (9, 120) == (9, 8)
findmax: Test Failed at /home/mkitti/blah/csv_is_broken/test_csv.jl:33
Expression: findmax(sq, df.x) == findmax(sq, collect(df.x))
Evaluated: (81, 105) == (81, 9)
findmax: Test Failed at /home/mkitti/blah/csv_is_broken/test_csv.jl:34
Expression: findmax(sq, df.y) == findmax(sq, collect(df.y))
Evaluated: (81, 120) == (81, 8)
argmax: Test Failed at /home/mkitti/blah/csv_is_broken/test_csv.jl:37
Expression: argmax(df.x) == argmax(collect(df.x))
Evaluated: 105 == 9
argmax: Test Failed at /home/mkitti/blah/csv_is_broken/test_csv.jl:38
Expression: argmax(df.y) == argmax(collect(df.y))
Evaluated: 120 == 8
The problem was previously posted here:
I know the issue is in SentinelArrays.jl:
I think I have a fix:
However, it has been a month, and it is still broken. Please help me fix this.