Hello all,
I’m starting to use geo data in Julia with ArcGDAL, after using the R sf package for a while. My use case is simple : I have WGS 84 positions and I want to guess the timezone they belong to.
I have the worldwide map of the timezone from: https://github.com/evansiroky/timezone-boundary-builder/releases/download/2020a/timezones.geojson.zip.
The code works, it delivers the right positions, but memory consumption is very high. It seems proportional to the number of points_without_timezone to tag. I often run out of memory (RAM). I used do-block for ArchGDAL feature functions to reduce the memory footprint, but it seems ineffective.
Any advice to improve it ? Should I release features in some way to avoid memory leaks ? Thx
# SET UP ###################################################################
using Revise, ArchGDAL, HTTP, DataFrames, ZipFile
# Points that need timezones
points_without_timezone = [
(longitude = 1.834, latitude = 50.136),
(longitude = 3.0975, latitude = 50.57),
(longitude = -1.939833, latitude = 49.725167),
(longitude = -0.456167, latitude = 49.18),
(longitude = 1.181667, latitude = 49.383),
(longitude = 4.155333, latitude = 49.209667),
(longitude = -4.412, latitude = 48.444167),
(longitude = -3.473167, latitude = 48.825833),
(longitude = -1.734, latitude = 48.068833),
(longitude = 0.110167, latitude = 48.4455),
(longitude = 2.384333, latitude = 48.716833),
(longitude = 4.02, latitude = 48.324667),
(longitude = 5.959833, latitude = 48.581),
(longitude = 7.640333, latitude = 48.5495),
(longitude = -3.218333, latitude = 47.294333),
(longitude = -1.608833, latitude = 47.15),
(longitude = 0.727333, latitude = 47.4445),
(longitude = 2.359833, latitude = 47.059167),
(longitude = 5.088333, latitude = 47.267833),
(longitude = 7.51, latitude = 47.614333),
(longitude = -1.4115, latitude = 46.046833),
(longitude = 0.314333, latitude = 46.593833),
(longitude = 1.175, latitude = 45.861167),
(longitude = 3.149333, latitude = 45.786833),
(longitude = 3.764, latitude = 45.0745),
(longitude = 5.077833, latitude = 45.7265),
(longitude = -0.691333, latitude = 44.830667),
(longitude = 1.396667, latitude = 44.745),
(longitude = 3.0195, latitude = 44.1185),
(longitude = 4.733, latitude = 44.581167),
(longitude = 6.502333, latitude = 44.565667),
(longitude = -0.500167, latitude = 43.909833),
(longitude = 0.0, latitude = 43.188),
(longitude = 1.106833, latitude = 43.005333),
(longitude = 1.378833, latitude = 43.621),
(longitude = 3.963167, latitude = 43.577),
(longitude = 5.216, latitude = 43.437667),
(longitude = 5.940833, latitude = 43.079333),
(longitude = 7.209, latitude = 43.648833),
(longitude = 2.872833, latitude = 42.737167),
(longitude = 8.792667, latitude = 41.918),
(longitude = 9.485167, latitude = 42.540667),
(longitude = 47.289667, latitude = -11.582667),
(longitude = 42.712, latitude = -17.054667),
(longitude = 40.340667, latitude = -22.344167),
(longitude = 54.520667, latitude = -15.887667),
(longitude = 55.528667, latitude = -20.8925),
(longitude = 77.569167, latitude = -37.795167),
(longitude = 51.856667, latitude = -46.4325),
(longitude = 70.243333, latitude = -49.352333),
(longitude = 45.282833, latitude = -12.8055),
(longitude = -56.179167, latitude = 46.766333),
(longitude = -61.004, latitude = 16.335),
(longitude = -62.852167, latitude = 17.9015),
(longitude = -61.516333, latitude = 16.264),
(longitude = -60.875333, latitude = 14.7745),
(longitude = -60.995667, latitude = 14.595333),
(longitude = -54.031667, latitude = 5.4855),
(longitude = -52.365333, latitude = 4.822333),
(longitude = -51.804667, latitude = 3.890667),
(longitude = -54.028333, latitude = 3.640167),
(longitude = 140.001, latitude = -66.663167),
]
# RETRIEVE TIMEZONE ##################################################################
function get_timezone(points = Array{Tuple{Real,Real},1})
# Define the source to fetch the shapefile as geojson and fetch
source_geojson = "https://github.com/evansiroky/timezone-boundary-builder/releases/download/2020a/timezones.geojson.zip"
geojson_content = HTTP.get(source_geojson)
# In a temp directory with auto closing, unzip
mktempdir() do path
# Fetch online
open(joinpath(path, "tz_map.geojson"), "w") do io
write(
io,
# The first file in the archve contaons the geojson shapefile
read(ZipFile.Reader(IOBuffer(geojson_content.body)).files[1]),
)
end
# Open vector data archive
ArchGDAL.read(joinpath(path, "tz_map.geojson")) do dataset
# Access the first layer
ArchGDAL.getlayer(dataset, 0) do layer
# Container for the results (beyond the do-blocks)
global results_df
results = []
# Iterate over the features
for feat_number = 0:(ArchGDAL.nfeature(layer)-1)
# Open feature to access the geom
ArchGDAL.getfeature(layer, feat_number) do feature
# For a geom (time zone), search if every point belong to it
for point in points
if ArchGDAL.within(
ArchGDAL.createpoint(point[1], point[2]),
ArchGDAL.getgeom(feature, 0),
)
# Record the timezone of the point
push!(
results,
(
point[1],
point[2],
ArchGDAL.getfield(feature, "tzid"),
),
)
end
end
end
end
# Return in a dataframe, even the points that where time zon is not retrieved
results_df = DataFrame(results)
rename!(results_df, ["longitude", "latitude", "timezone"])
# Merge results into
results_df = leftjoin(
DataFrame(points),
results_df,
on = ["longitude", "latitude"],
)
end
end
end
return results_df
end
# Resuts
points_with_timezone = get_timezone(points_without_timezone)