I have a grouped DataFrame
with dates and values but only observe dates in which a value was observed. I’d like to create the rows that are implicitly missing and have them as missing e.g. for interpolation later. I came up with the following code but was wondering if there is a faster way to do this?
df = DataFrame(
g = ['a','a', 'b', 'b', 'c', 'c', 'c'],
date = [Date(2021,1,1), Date(2021,1,2), Date(2021,1,2), Date(2021,1,4), Date(2021,1,1),Date(2021,1,3) ,Date(2021,1,7)],
v = rand(7)
)
alldates = DataFrame(date = minimum(df.date):Day(1):maximum(df.date))
gdf = groupby(df, :g)
combdf = DataFrame()
for g in gdf
gout = leftjoin(alldates, g, on = :date)
gout.g .= g.g[1]
disallowmissing!(gout, :g)
append!(combdf, gout, cols = :union)
end