Say that I have a dataframe:
function exampl()
grp = Int8[]
for i in 1:5
grp = append!(grp, fill(i, 7))
end
values = Int8[]
for i in 1:7
values = append!(values, rand(1:15, 5))
end
df = DataFrame(grp = grp, values = values)
gdf = groupby(df, :grp)
combine(gdf, :values => unique)
end
julia> exampl()
25×2 DataFrame
Row │ grp values_unique
│ Int8 Int8
─────┼─────────────────────
1 │ 1 11
2 │ 1 3
3 │ 1 9
4 │ 1 6
5 │ 1 1
6 │ 1 15
7 │ 2 8
8 │ 2 2
9 │ 2 10
10 │ 2 11
11 │ 2 3
12 │ 3 5
13 │ 3 15
14 │ 3 1
15 │ 3 4
16 │ 3 14
17 │ 4 12
18 │ 4 15
19 │ 4 10
20 │ 4 4
21 │ 4 14
22 │ 5 3
23 │ 5 8
24 │ 5 1
25 │ 5 4
How would I create a third column unique_to_group
, a Boolean value that shows if the value in each row is unique to only that group compared with other groups. Example: row 3, with value 9 would be flagged as true
in the new column as it is unique to grp 1 only. I have been trying setdiff!
approaches but haven’t been able to make it work.