The SortMerge.jl package is designed exactly for this purpose:
using DataFrames, CSV, SortMerge, Dates
url="""https://gist.githubusercontent.com/kafisatz/ed1f42fb4b5fd1ce1cf3a8a12b8e80b6/raw/70df7040c550bbdedd168c90a78caf4ee2808881/hadata.csv"""
fi = download(url)
df0 = CSV.File(fi) |> DataFrame
df1 = filter(x->x.entity_id == "kitchen_temperature",df0)
df2 = filter(x->x.entity_id == "buero_temperature",df0)
j = sortmerge(df1.datetime, df2.datetime, Minute(5),
sd=(v1, v2, i1, i2, threshold) -> begin
diff = v1[i1] - v2[i2]
(abs(diff) > threshold) && (return sign(diff))
return 0
end)
# Rows common to both dataframes (i.e., inner join)
tmp = hcat(df1[j[1], :], df2[j[2], :], makeunique=true)
# Print maximum clock difference between matched rows
println(Minute(maximum(abs.(df1[j[1], :datetime] .- df2[j[2], :datetime]))))
# Rows from first dataframe without matching rows in the second one:
tmp = df1[countmatch(j, 1) .== 0, :]
# Rows from second dataframe without matching rows in the first one:
tmp = df2[countmatch(j, 2) .== 0, :]