Here is my script… by the way, do grid authorities in other countries publish their historical data of {production by source , consumption by sector} too ?
Script
using Pkg
Pkg.activate(@__DIR__)
using Statistics, CSV, DataFrames, StringEncodings, StatsPlots, Dates, ZipFile, Pipe
### Script parameters...
data_folder = @__DIR__
base_filename = "RealisationDonneesProduction_"
years = 2015:2023
allnames = ["year","day","hour","Heures","Biomasse","Gaz","Charbon","Fioul","Hydraulique STEP","Hydraulique fil de l'eau / éclusée","Hydraulique lacs","Nucléaire","Solaire","Éolien terrestre","Éolien en mer","Déchets","Total"]
### Data container (start empty)...
df = DataFrame()
### Main loop to fill the data container...
for year in years
println("** processing year $year ... ")
zipfile = joinpath(data_folder,"$(base_filename)$(year).zip")
data = read(ZipFile.Reader(zipfile).files[1],String,enc"ISO-8859-1")
ndays = isleapyear(Date(year)) ? 366 : 365
dheaders = [d+2 for d in (0:ndays-1).*27]
#dheaders = [2,29]
dfs = [CSV.read(IOBuffer(data), DataFrame; header=dheader, limit=24, delim="\t", missingstring="*", ntasks=1, buffer_in_memory=true, ignorerepeated=true) for dheader in dheaders]
insertcols!.(dfs, 1, :hour => 0:23)
[insertcols!(dfs[d], 1, :day => d) for d in 1:365]
insertcols!.(dfs, 1, :year => year)
for (i,df) in enumerate(dfs)
for n in allnames
if !(n in names(df))
insertcols!(df, 1, Symbol(n) => 0)
end
end
dfs[i] = df[:,allnames]
end
df = vcat(df,dfs...)
end
# Some further aggregate columns...
fossil_fuel = ["Gaz","Charbon","Fioul"]
ren_controllable = ["Biomasse","Hydraulique STEP","Hydraulique lacs","Déchets"]
ren_variable = ["Hydraulique fil de l'eau / éclusée","Solaire","Éolien terrestre","Éolien en mer"]
df.fossil_fuel .= sum(df[:,c] for c in fossil_fuel )
df.ren_controllable .= sum(df[:,c] for c in ren_controllable)
df.ren_variable .= sum(df[:,c] for c in ren_variable)
# Some checks
balance = df.Total .- (df.Nucléaire .+ df.fossil_fuel .+ df.ren_controllable .+ df.ren_variable)
minimum(skipmissing(balance))
maximum(skipmissing(balance))
# Some plotting...
# We work on 2023 for the plot
winterweek = df[in.(df.day,Ref(10:16)) .&& df.year .== 2023,:]
plot(hcat(winterweek.Total,winterweek.fossil_fuel,winterweek.ren_controllable,winterweek.ren_variable,winterweek.Nucléaire), labels=["Total" "FF" "RC" "RV" "Nuclear"], ylabel="MW", title="Winter week")
#savefig("winterweek.svg")
summerweek = df[in.(df.day,Ref(190:196)) .&& df.year .== 2023,:]
plot(hcat(summerweek.Total,summerweek.fossil_fuel,summerweek.ren_controllable,summerweek.ren_variable,summerweek.Nucléaire), labels=["Total" "FF" "RC" "RV" "Nuclear"], ylabel="MW", title="Summer week")
#savefig("summerweek.svg")
# Sum by sources, within year
avgyear = combine(groupby(df,["day"])) do subdf # slower
(total = mean(subdf.Total), n = mean(subdf.Nucléaire), ff = mean(subdf.fossil_fuel), rc = mean(subdf.ren_controllable), rv = mean(subdf.ren_variable))
end
plot(hcat(avgyear.total,avgyear.ff,avgyear.rc,avgyear.rv,avgyear.n), labels=["Total" "FF" "RC" "RV" "Nuclear"], ylabel="MW", title="Seasonal variations in electricity production",legend=:top, xlabel="Day of the year")
#savefig("avgyear.svg")
# Sum by sources, within year
trend = combine(groupby(df,["year"])) do subdf # slower
(total = mean(skipmissing(subdf.Total)), n = mean(skipmissing(subdf.Nucléaire)), ff = mean(skipmissing(subdf.fossil_fuel)), rc = mean(skipmissing(subdf.ren_controllable)), rv = mean(skipmissing(subdf.ren_variable)))
end
plot(trend.year,hcat(trend.total,trend.ff,trend.rc,trend.rv,trend.n), labels=["Total" "FF" "RC" "RV" "Nuclear"], ylabel="MW", title="Trend by years",legend=:top)
#savefig("trend.svg")
# Some stats.. last year vs first year...
tratio = trend.total[end]/trend.total[1]
nratio = trend.n[end]/trend.n[1]
ffratio = trend.ff[end]/trend.ff[1]
rcratio = trend.rc[end]/trend.rc[1]
rvratio = trend.rv[end]/trend.rv[1]
tdiff = 100*(trend.total[end]-trend.total[1])/trend.total[1]
ndiff = 100*(trend.n[end]-trend.n[1])/trend.n[1]
ffdiff = 100*(trend.ff[end]-trend.ff[1])/trend.ff[1]
rcdiff = 100*(trend.rc[end]-trend.rc[1])/trend.rc[1]
rvdiff = 100*(trend.rv[end]-trend.rv[1])/trend.rv[1]