Arbitrary table join conditions

Here’s my deliberately verbose solution using DataFrames.jl and SplitApplyCombine.jl that produces the same result as my original SQL query:

julia> using DataFrames

julia> using SplitApplyCombine: innerjoin

julia> exchange_rates = DataFrame(time = [1,4,5,7,9,10,11], rate = [1.0,1.1,1.5,2.0,1.8,1.7,1.9])
7×2 DataFrame
│ Row │ time  │ rate    │
│     │ Int64 │ Float64 │
├─────┼───────┼─────────┤
│ 1   │ 1     │ 1.0     │
│ 2   │ 4     │ 1.1     │
│ 3   │ 5     │ 1.5     │
│ 4   │ 7     │ 2.0     │
│ 5   │ 9     │ 1.8     │
│ 6   │ 10    │ 1.7     │
│ 7   │ 11    │ 1.9     │

julia> trades = DataFrame(time = 1:12)
12×1 DataFrame
│ Row │ time  │
│     │ Int64 │
├─────┼───────┤
│ 1   │ 1     │
│ 2   │ 2     │
│ 3   │ 3     │
│ 4   │ 4     │
│ 5   │ 5     │
│ 6   │ 6     │
│ 7   │ 7     │
│ 8   │ 8     │
│ 9   │ 9     │
│ 10  │ 10    │
│ 11  │ 11    │
│ 12  │ 12    │

julia> trade_time(trade::DataFrameRow)::Int64 = trade.time
trade_time (generic function with 1 method)

julia> exchange_rate_time(exchange_rate::DataFrameRow)::Int64 = exchange_rate.time
exchange_rate_time (generic function with 1 method)
julia> trade_and_exchange_rate(trade::DataFrameRow, exchange_rate::DataFrameRow)::NamedTuple{(:trade, :rate),Tuple{Int64,Float64}} = (trade = trade.time, rate = exchange_rate.rate)
trade_and_exchange_rate (generic function with 1 method)
julia> exchange_rate_time_search(exchange_rate_times::Array{Int64,1}, trade_time::Int64)::Int64 = maximum(filter(time -> time <= trade_time, exchange_rate_times))
exchange_rate_time_search (generic function with 1 method)
julia> trade_exchange_rate_join_condition(trade_time::Int64, exchange_rate_time::Int64)::Bool = exchange_rate_time == exchange_rate_time_search(exchange_rates.time, trade_time)
trade_exchange_rate_join_condition (generic function with 1 method)

julia> trades_and_exchange_rates = DataFrame(
         innerjoin(
           trade_time,
           exchange_rate_time,
           trade_and_exchange_rate,
           trade_exchange_rate_join_condition,
           eachrow(trades),
           eachrow(exchange_rates)
         )
       )
12×2 DataFrame
│ Row │ trade │ rate    │
│     │ Int64 │ Float64 │
├─────┼───────┼─────────┤
│ 1   │ 1     │ 1.0     │
│ 2   │ 2     │ 1.0     │
│ 3   │ 3     │ 1.0     │
│ 4   │ 4     │ 1.1     │
│ 5   │ 5     │ 1.5     │
│ 6   │ 6     │ 1.5     │
│ 7   │ 7     │ 2.0     │
│ 8   │ 8     │ 2.0     │
│ 9   │ 9     │ 1.8     │
│ 10  │ 10    │ 1.7     │
│ 11  │ 11    │ 1.9     │
│ 12  │ 12    │ 1.9     │

Just the source code:

using DataFrames
using SplitApplyCombine: innerjoin

exchange_rates = DataFrame(time = [1,4,5,7,9,10,11], rate = [1.0,1.1,1.5,2.0,1.8,1.7,1.9])
trades = DataFrame(time = 1:12)

trade_time(trade::DataFrameRow)::Int64 = trade.time

exchange_rate_time(exchange_rate::DataFrameRow)::Int64 = exchange_rate.time

trade_and_exchange_rate(trade::DataFrameRow, exchange_rate::DataFrameRow)::NamedTuple{(:trade, :rate),Tuple{Int64,Float64}} = (trade = trade.time, rate = exchange_rate.rate)

exchange_rate_time_search(exchange_rate_times::Array{Int64,1}, trade_time::Int64)::Int64 = maximum(filter(time -> time <= trade_time, exchange_rate_times))

trade_exchange_rate_join_condition(trade_time::Int64, exchange_rate_time::Int64)::Bool = exchange_rate_time == exchange_rate_time_search(exchange_rates.time, trade_time)

trades_and_exchange_rates = DataFrame(
  innerjoin(
    trade_time,
    exchange_rate_time,
    trade_and_exchange_rate,
    trade_exchange_rate_join_condition,
    eachrow(trades),
    eachrow(exchange_rates)
  )
)
5 Likes