Thank you all for the suggestions and feedback! As suggested, I tried the different packages: Rasters.jl, Meshes.jl, ArchGDAL.jl and GMT.jl. For me the fastest was to use GMT.jl. However, as a disclamer I would like to point out that I am reshaping the results inside the functions so that the function returns a dataframe of true/false and that, I guess, my code could be more efficient. If you have more suggestions, please let me know!
Here is the code:
using DataFrames
using Shapefile
using GeoInterface
using Tables
using GBIF
using Meshes
using GeoTables
using Rasters
using GMT
using BenchmarkTools
using ArchGDAL
# Download some occurrence data
obs = GBIF.occurrences("scientificName" => "Burramys parvus","hasCoordinate" => "true", "limit" => 100)
points = map(o -> (o.longitude, o.latitude), obs)
######################## Rasters ############################
# Read shapefile
geoms = Shapefile.shapes(Shapefile.Table("wwf_terr_ecos.shp"))
# Function using Rasters.jl
function overlay_rasters(points :: Array{Tuple{Float64, Float64},1}, geoms)
# Initiate dataframe to hold results
# How to efficiently create a dataframe of n rows and m cols filled with false?
overlay_df = DataFrame([Vector{Bool}(undef, length(points)) for i in 1:length(geoms)], :auto)
for j in 1:nrow(overlay_df)
for i in 1:ncol(overlay_df)
if overlay_df[j,i] == true
overlay_df[j,i] = false
end
end
end
for i in 1:length(geoms)
# Overlay point with polygons
overlay = inpolygon(points, geoms[i])
# Transform result to table, then populate dataframe
# A bitmatrix = 1 0 , means the point is in the polygon or not? What would be a good way to gather the results in the dataframe?
overlay_tb = Tables.table(overlay)
res = overlay_tb.Column1 + overlay_tb.Column2
overlay_df[!, i] = res
end
return overlay_df
end
df_rasters = overlay_rasters(points, geoms)
################## Meshes #####################
# Read shapefile
geo_table = GeoTables.load("wwf_terr_ecos.shp")
function overlay_meshes(points :: Array{Tuple{Float64, Float64},1}, geo_table)
# Initiate dataframe to hold results
overlay_df = DataFrame([Vector{Bool}(undef, length(points)) for i in 1:length(geo_table.geometry)], :auto)
for j in 1:nrow(overlay_df)
for i in 1:ncol(overlay_df)
if overlay_df[j,i] == true
overlay_df[j,i] = false
end
end
end
# Overlay points with polygons
for j in 1:length(points)
for i in 1:length(geo_table.geometry)
if points[j] in geo_table.geometry[i]
print("point ", j, " in ecoregion", i)
overlay_df[j, i] = true
break
end
end
end
return overlay_df
end
df_meshes = overlay_meshes(points, geo_table)
############################# GMT ##############################
# Read shapefile
# Why the gmt.jl object has a different length than the objects read with rasters.jl, meshes.jl and ArchGDAL.jl?
geo_gmt = gmtread("wwf_terr_ecos.shp")
lon = map(o -> (o.longitude), obs)
lat = map(o -> (o.latitude), obs)
# Function using GMT.jl
function overlay_gmt(geo_gmt, lon, lat)
# Initiate dataframe to hol results
overlay_df = DataFrame([Vector{Bool}(undef, length(lon)) for i in 1:length(geo_gmt)], :auto)
# Replace true by false
for j in 1:nrow(overlay_df)
for i in 1:ncol(overlay_df)
if overlay_df[j,i] == true
overlay_df[j,i] = false
end
end
end
iswithin(bbox, lon, lat) = (lon >= bbox[1] && lon <= bbox[2] && lat >= bbox[3] && lat <= bbox[4])
for j in 1:length(points)
for i = 1:length(geo_gmt)
!iswithin(geo_gmt[i].bbox, lon[j], lat[j]) && continue
r = gmtselect([lon lat], polygon = geo_gmt[i])
if (!isempty(r))
# Populate dataframe with results
overlay_df[j, i] = true
#println("Point falls inside polygon $(i)")
break
end
end
end
return overlay_df
end
df_gmt = overlay_gmt(geo_gmt, lon, lat)
############################### ArchGDAL ######################################
# Read shapefile with ArchGDAL
eco_sf = ArchGDAL.getlayer(ArchGDAL.read("wwf_terr_ecos.shp"), 0)
lon = map(o -> (o.longitude), obs)
lat = map(o -> (o.latitude), obs)
function overlay_archgdal(eco_sf, lon, lat)
# Transform GBIF occurrences in ArchGDAL point geometries
points = ArchGDAL.createpoint.(lon, lat)
# Initiate dataframe to hold results
overlay_df = DataFrame([Vector{Bool}(undef, length(points)) for i in 1:length(eco_sf)], :auto)
# Replace true by false
for j in 1:nrow(overlay_df)
for i in 1:ncol(overlay_df)
if overlay_df[j,i] == true
overlay_df[j,i] = false
end
end
end
for j in 1:length(points)
for i in 0:ArchGDAL.nfeature(eco_sf)-1
ArchGDAL.getfeature(eco_sf, i) do feature
ArchGDAL.getgeom(feature) do geom
# Check if point is contained in the ecoregion polygon
overlay_df[j, i+1] = ArchGDAL.contains(geom, points[j])
end
end
end
end
return overlay_df
end
df_archgdal = overlay_archgdal(eco_sf, lon, lat)
# Benchmark using Benchmark.jl
@btime overlay_archgdal(eco_sf, lon, lat)
@btime overlay_gmt(geo_gmt, lon, lat)
@btime overlay_meshes(points, geo_table)
@btime overlay_rasters(points, geoms)