Okay, I think I figured it out.
I ended up modifying the original script – and all I did was got rid of valid_values = [x for x in window if if !ismissing(x)].
That’s what I get for using Gemini to help me code.
Here is the complete function that takes the windowed average and median – and replaces any missing values with data can either copy from other rows, or just replaces with NaN, since all I really need are the average and median after this point:
const WIN_SIZE = 365
# Function to process a single file
function process_file(file, output_path, all_dates_df)
stat = (basename(file))[1:4]
df = CSV.read(file, DataFrame)
dropmissing!(df, :decyr)
# merge with all dates
df_merged = sort(leftjoin(all_dates_df, df, on = :decyr), [:decyr])
decyr = df_merged.decyr
value = df_merged.value
averaged = Vector{Union{Missing, Float64}}(missing, length(value))
medianed = Vector{Union{Missing, Float64}}(missing, length(value))
for i in eachindex(value)
start_idx = max(1, i - div(WIN_SIZE - 1, 2))
end_idx = min(length(value), i + div(WIN_SIZE - 1, 2))
window = value[start_idx:end_idx]
no_miss = skipmissing(window)
if !isempty(no_miss)
averaged[i] = round(mean(no_miss), digits = 5)
medianed[i] = round(median(no_miss), digits = 5)
end
end
if "lon" in names(df_merged) && "lat" in names(df_merged) && "value" in names(df_merged) && "sigma" in names(df_merged)
if any(ismissing.(df_merged.lon)) || any(ismissing.(df_merged.lat)) || any(ismissing.(df_merged.value)) || any(ismissing.(df_merged.sigma))
first_valid_lon = findfirst(!ismissing, df_merged.lon)
first_valid_lat = findfirst(!ismissing, df_merged.lat)
df_merged.value = coalesce.(df_merged.value, NaN)
df_merged.sigma = coalesce.(df_merged.sigma, NaN)
if first_valid_lon !== nothing
df_merged.lon = fill(df_merged.lon[first_valid_lon], nrow(df_merged))
end
if first_valid_lat !== nothing
df_merged.lat = fill(df_merged.lat[first_valid_lat], nrow(df_merged))
end
end
end
df_merged[!, :average] = averaged
df_merged[!, :median] = medianed
df_merged[!,:stat] .= stat
CSV.write(joinpath(output_path, "$(stat)_medave.csv"), df_merged)
end
Then, plotted to confirm that the median and averages populated correctly:
Here, especially at the beginning, the plots are little wonky due to the amount of data within the window in the beginning, but the median and averages look to have interpolated nicely in the smaller gap in the middle.
Thank you everyone for your help
I really appreciate it!
