Thank you @Tamas_Papp and @dawbarton …I used both your approaches… I created a structure that wrap the main data and the ancillary informations in the query and created an anonymous function to get rid of the main data storage on each call, at least in the “get” version…
using OdsIO, DataFrames, DataFramesMeta # LAJuliaUtils
include("lib.jl")
iFile = "input_data.ods"
################ SETS #####################################
sets = ods_read(iFile;sheetName="sets",retType="DataFrame")
const priProducts = toArray(sets[:priProducts],arrayT=String)
const secProducts = toArray(sets[:secProducts],arrayT=String)
const dClasses = toArray(sets[:dClasses],arrayT=String) # converting first to int.. # ..and then to string (ortherwise "25.0")
const forTypeId = toArray(sets[:forTypeId],arrayT=String)
const regId = toArray(sets[:regId],arrayT=String)
const fr2 = regId[1:end-2] # set of french regions ids
const products = vcat(priProducts,secProducts) # all products
yraw = toArray(sets[:years],arrayT=Int64) # getting ymin and ymax
const years = collect(yraw[1]:yraw[2]) # from ymin and ymax creating the set of years
############### LOADING DATA ##############################
# Loading data and standardizing it to parname|region|d1 (forType or product)|d2(region,diameter class, product)|year|value format
forData = ods_read(iFile;sheetName="forData",retType="DataFrame")
forData[:freeDim] = map(string, forData[:freeDim])
rename!(forData, Dict(:forType => :d1, :freeDim => :d2))
prodData = ods_read(iFile;sheetName="prodData",retType="DataFrame")
rename!(prodData, Dict(:prod => :d1, :freeDim => :d2))
rawd = vcat(forData,prodData)
regionMap = ods_read(iFile;sheetName="regionMap",retType="DataFrame") # loading the association of regions l2 -> l1
d = DataStructure(rawd,years[1],regionMap)
test = gd("pl","IF","hardWSawnW","",2005)
lib.jl:
using DataFrames, DataFramesMeta
function toArray(DA;arrayT=Any)
DA = dropna(DA)
if arrayT == String
return [string(i) for i in DA]
else
return Array{arrayT,1}(DA)
end
end
type DataStructure
rd # rawdata
y::Int64 # current year
regMap # map regions level1 -> level2
end
"""
gdd(d, parName,r,d1,d2="",y=-1,op=sum)
Get data specifying the required dimensions.
# Arguments
* `d`: the DataStructure to work on
* `parName`: The wanted parameter
* `r`: The wanted region
* `d1`: The wanted first dimension (either fortype or product)
* `d2` (def=""): The required second dimension (either region, diameter class or product)
* `y` (def=-1): The required year (if different than the current one)
* `op` (def=sum): The operation to perform if multiple values
# Logics of the function:
* `r`: if data ir requested at regional level and only national one is available, return the national one
* `d1`: if d2 is not provided, don't filter for it
* `d2`: if d3 is not provided, don't filter for it
* `y`: (a) if y not provided, use current years; (b) if y don't exists, use the highest y available
* `op`: operation can be any of `sum`, `mean`, `length`, `countnz`, `maximum`, `minimum`, `var`, `std`, `prod`
# Examples
* gd(d,"vol","LO","broadL_highF","35")
"""
function gdd(d, parName::String,r::String,d1::String,d2::String,y::Int64 =-1,op=sum)
# Notes: lookup for a NA is difficult (@where gives error)
if(y == -1)
y = d.y # current year
end
l1 = @where(d.rd, :parName.== parName) # filtering by parName
#return l1
l2 = @where(l1, :region .== r) # filterning by region
if size(l2)[1] == 0 # region not found
country = @where(regionMap, :l2 .== r)[1,:l1]
l2 = @where(l1, :region .== country)
end
#return l2
l3 = l2 # filtering by d1 (if requested)
if d1 != ""
l3 = @where(l2, :d1 .== d1)
end
#return l3
l4 = l3 # filtering by d2 (if requested)
if d2 != ""
l3 = @where(l2, :d2 .== d2)
end
#return l4
maxYear = maximum(l4[:year]) # filtering by year
y = y > maxYear ? maxYear: y
l5 = @where(l4, :year .== y)
#return l5
return op(l5[:value])
end
const gd = (parName,r,d1,d2="",y=-1,op=sum) -> gdd(d, parName,r,d1,d2,y,op)