Two-sided moving average with intermittent missing data:

Okay, I think I figured it out.

I ended up modifying the original script – and all I did was got rid of valid_values = [x for x in window if if !ismissing(x)].

That’s what I get for using Gemini to help me code.

Here is the complete function that takes the windowed average and median – and replaces any missing values with data can either copy from other rows, or just replaces with NaN, since all I really need are the average and median after this point:

const WIN_SIZE = 365

# Function to process a single file
function process_file(file, output_path, all_dates_df)
    stat = (basename(file))[1:4]

    df = CSV.read(file, DataFrame)
    dropmissing!(df, :decyr)

    # merge with all dates
    df_merged = sort(leftjoin(all_dates_df, df, on = :decyr), [:decyr])
    decyr = df_merged.decyr
    value = df_merged.value

    averaged = Vector{Union{Missing, Float64}}(missing, length(value))
    medianed = Vector{Union{Missing, Float64}}(missing, length(value))

    for i in eachindex(value)
        
        start_idx = max(1, i - div(WIN_SIZE - 1, 2))
        end_idx = min(length(value), i + div(WIN_SIZE - 1, 2))
        window = value[start_idx:end_idx]

        no_miss = skipmissing(window)

        if !isempty(no_miss)
            averaged[i] = round(mean(no_miss), digits = 5)
            medianed[i] = round(median(no_miss), digits = 5)
        end
    end

    if "lon" in names(df_merged) && "lat" in names(df_merged) && "value" in names(df_merged) && "sigma" in names(df_merged)
        if any(ismissing.(df_merged.lon)) || any(ismissing.(df_merged.lat)) || any(ismissing.(df_merged.value)) || any(ismissing.(df_merged.sigma))
            first_valid_lon = findfirst(!ismissing, df_merged.lon)
            first_valid_lat = findfirst(!ismissing, df_merged.lat)
            df_merged.value = coalesce.(df_merged.value, NaN)  
            df_merged.sigma = coalesce.(df_merged.sigma, NaN) 
            if first_valid_lon !== nothing
                df_merged.lon = fill(df_merged.lon[first_valid_lon], nrow(df_merged))
            end
            if first_valid_lat !== nothing
                df_merged.lat = fill(df_merged.lat[first_valid_lat], nrow(df_merged))
            end
        end
    end

    df_merged[!, :average] = averaged
    df_merged[!, :median] = medianed
    df_merged[!,:stat] .= stat

    CSV.write(joinpath(output_path, "$(stat)_medave.csv"), df_merged)
end

Then, plotted to confirm that the median and averages populated correctly:

Here, especially at the beginning, the plots are little wonky due to the amount of data within the window in the beginning, but the median and averages look to have interpolated nicely in the smaller gap in the middle.

Thank you everyone for your help :slight_smile: I really appreciate it!