using Query
using DataFrames
ex_df = DataFrame(id = [1,1,1,2,2,2,2], trip= ["a", "a", "b", "a", "a", "b", "b"], values=rand(7))
function consect(d)
# 0 if consecutive values are the same, 1 otherwise
Int.(view(d, 2:length(d)) .!== view(d, 1:length(d)-1))
end
ex_df |>
@groupby(_.id) |>
@map({id=key(_), run=cumsum([1, consect(_.trip)...]), rows=_ }) |>
@mapmany(i->i.rows, (i,j)->{j..., i.run}) |>
DataFrame
7 rows × 4 columns
id trip values run
Int64 String Float64 Array…
1 1 a 0.430749 [1, 1, 2]
2 1 a 0.806467 [1, 1, 2]
3 1 b 0.744969 [1, 1, 2]
4 2 a 0.652944 [1, 1, 2, 2]
5 2 a 0.326956 [1, 1, 2, 2]
6 2 b 0.717417 [1, 1, 2, 2]
7 2 b 0.448909 [1, 1, 2, 2]
I want the run values to be unraveled so that there is a single integer per row instead of the full array. That is the values for run should be [1, 1, 2, 1, 1, 2, 2] instead of the current array of arrays.
How can I do this with Query? Thanks in advance!