Skip to content

Commit 9e20700

Browse files
authored
Support GeoPandas Polygons and MultiPolygons (#1285)
* Add rendering of GeoPandas polygons and multipolygons via ragged arrays * Add example notebooks * Correct scope of shapely function call * Calculate geopandas bounds once only * Support dask-geopandas and use lazy imports * Add optional dependencies so can "pip install datashader[geopandas]" * Explicit Shapely version check * Add tests comparing geopandas, dask-geopandas and spatialpandas * Remove demo notebooks * Use sindex.query instead of cx for GeoPandas.GeoDataFrame * Better error reporting * Move geopandas check and spatial filtering to separate function for reuse
1 parent c9f9283 commit 9e20700

File tree

6 files changed

+264
-18
lines changed

6 files changed

+264
-18
lines changed

datashader/core.py

+48-5
Original file line numberDiff line numberDiff line change
@@ -733,22 +733,27 @@ def polygons(self, source, geometry, agg=None):
733733
"""
734734
from .glyphs import PolygonGeom
735735
from .reductions import any as any_rdn
736+
736737
if spatialpandas and isinstance(source, spatialpandas.dask.DaskGeoDataFrame):
737738
# Downselect partitions to those that may contain polygons in viewport
738739
x_range = self.x_range if self.x_range is not None else (None, None)
739740
y_range = self.y_range if self.y_range is not None else (None, None)
740741
source = source.cx_partitions[slice(*x_range), slice(*y_range)]
742+
glyph = PolygonGeom(geometry)
741743
elif spatialpandas and isinstance(source, spatialpandas.GeoDataFrame):
742-
pass
744+
glyph = PolygonGeom(geometry)
745+
elif (geopandas_source := self._source_from_geopandas(source)) is not None:
746+
source = geopandas_source
747+
from .glyphs.polygon import GeopandasPolygonGeom
748+
glyph = GeopandasPolygonGeom(geometry)
743749
else:
744750
raise ValueError(
745-
"source must be an instance of spatialpandas.GeoDataFrame or \n"
746-
"spatialpandas.dask.DaskGeoDataFrame.\n"
747-
" Received value of type {typ}".format(typ=type(source)))
751+
"source must be an instance of spatialpandas.GeoDataFrame, "
752+
"spatialpandas.dask.DaskGeoDataFrame, geopandas.GeoDataFrame or "
753+
f"dask_geopandas.GeoDataFrame, not {type(source)}")
748754

749755
if agg is None:
750756
agg = any_rdn()
751-
glyph = PolygonGeom(geometry)
752757
return bypixel(source, self, glyph, agg)
753758

754759
def quadmesh(self, source, x=None, y=None, agg=None):
@@ -1228,6 +1233,44 @@ def validate(self):
12281233
self.validate_ranges(self.x_range, self.y_range)
12291234
self.validate_size(self.plot_width, self.plot_height)
12301235

1236+
def _source_from_geopandas(self, source):
1237+
"""
1238+
Check if the specified source is a geopandas or dask-geopandas GeoDataFrame.
1239+
If so, spatially filter the source and return it.
1240+
If not, return None.
1241+
"""
1242+
try:
1243+
import geopandas
1244+
except ImportError:
1245+
geopandas = None
1246+
1247+
try:
1248+
import dask_geopandas
1249+
except ImportError:
1250+
dask_geopandas = None
1251+
1252+
if ((geopandas and isinstance(source, geopandas.GeoDataFrame)) or
1253+
(dask_geopandas and isinstance(source, dask_geopandas.GeoDataFrame))):
1254+
# Explicit shapely version check as cannot continue unless shapely >= 2
1255+
from packaging.version import Version
1256+
from shapely import __version__ as shapely_version
1257+
if Version(shapely_version) < Version('2.0.0'):
1258+
raise ImportError("Use of GeoPandas in Datashader requires Shapely >= 2.0.0")
1259+
1260+
if isinstance(source, geopandas.GeoDataFrame):
1261+
x_range = self.x_range if self.x_range is not None else (-np.inf, np.inf)
1262+
y_range = self.y_range if self.y_range is not None else (-np.inf, np.inf)
1263+
from shapely import box
1264+
query = source.sindex.query(box(x_range[0], y_range[0], x_range[1], y_range[1]))
1265+
source = source.iloc[query]
1266+
else:
1267+
x_range = self.x_range if self.x_range is not None else (None, None)
1268+
y_range = self.y_range if self.y_range is not None else (None, None)
1269+
source = source.cx[slice(*x_range), slice(*y_range)]
1270+
return source
1271+
else:
1272+
return None
1273+
12311274

12321275
def bypixel(source, canvas, glyph, agg, *, antialias=False):
12331276
"""Compute an aggregate grouped by pixel sized bins.

datashader/glyphs/points.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
cudf = None
1616
cuda_args = None
1717

18+
try:
19+
from geopandas.array import GeometryDtype as gpd_GeometryDtype
20+
except ImportError:
21+
gpd_GeometryDtype = type(None)
22+
1823
try:
1924
import spatialpandas
2025
except Exception:
@@ -35,6 +40,7 @@ def values(s):
3540
class _GeometryLike(Glyph):
3641
def __init__(self, geometry):
3742
self.geometry = geometry
43+
self._cached_bounds = None
3844

3945
@property
4046
def ndims(self):
@@ -72,11 +78,27 @@ def required_columns(self):
7278
return [self.geometry]
7379

7480
def compute_x_bounds(self, df):
75-
bounds = df[self.geometry].array.total_bounds_x
81+
col = df[self.geometry]
82+
if isinstance(col.dtype, gpd_GeometryDtype):
83+
# geopandas
84+
if self._cached_bounds is None:
85+
self._cached_bounds = col.total_bounds
86+
bounds = self._cached_bounds[::2]
87+
else:
88+
# spatialpandas
89+
bounds = col.array.total_bounds_x
7690
return self.maybe_expand_bounds(bounds)
7791

7892
def compute_y_bounds(self, df):
79-
bounds = df[self.geometry].array.total_bounds_y
93+
col = df[self.geometry]
94+
if isinstance(col.dtype, gpd_GeometryDtype):
95+
# geopandas
96+
if self._cached_bounds is None:
97+
self._cached_bounds = col.total_bounds
98+
bounds = self._cached_bounds[1::2]
99+
else:
100+
# spatialpandas
101+
bounds = col.array.total_bounds_y
80102
return self.maybe_expand_bounds(bounds)
81103

82104
@memoize

datashader/glyphs/polygon.py

+101-6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,40 @@
1111
spatialpandas = None
1212

1313

14+
class GeopandasPolygonGeom(_GeometryLike):
15+
@property
16+
def geom_dtypes(self):
17+
from geopandas.array import GeometryDtype
18+
return (GeometryDtype,)
19+
20+
@memoize
21+
def _build_extend(self, x_mapper, y_mapper, info, append, _antialias_stage_2, _antialias_stage_2_funcs):
22+
expand_aggs_and_cols = self.expand_aggs_and_cols(append)
23+
map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
24+
draw_polygon = _build_draw_polygon(
25+
append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols
26+
)
27+
28+
perform_extend_cpu = _build_extend_geopandas_polygon_geometry(
29+
draw_polygon, expand_aggs_and_cols
30+
)
31+
geom_name = self.geometry
32+
33+
def extend(aggs, df, vt, bounds, plot_start=True):
34+
sx, tx, sy, ty = vt
35+
xmin, xmax, ymin, ymax = bounds
36+
aggs_and_cols = aggs + info(df, aggs[0].shape[:2])
37+
geom_array = df[geom_name].array
38+
39+
perform_extend_cpu(
40+
sx, tx, sy, ty,
41+
xmin, xmax, ymin, ymax,
42+
geom_array, *aggs_and_cols
43+
)
44+
45+
return extend
46+
47+
1448
class PolygonGeom(_GeometryLike):
1549
# spatialpandas must be available if a PolygonGeom object is created.
1650

@@ -52,7 +86,7 @@ def _build_draw_polygon(append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_
5286
@expand_aggs_and_cols
5387
def draw_polygon(
5488
i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
55-
offsets, values, xs, ys, yincreasing, eligible,
89+
offsets, offset_multiplier, values, xs, ys, yincreasing, eligible,
5690
*aggs_and_cols
5791
):
5892
"""Draw a polygon using a winding-number scan-line algorithm
@@ -64,8 +98,8 @@ def draw_polygon(
6498
eligible.fill(1)
6599

66100
# First pass, compute bounding box of polygon vertices in data coordinates
67-
start_index = offsets[0]
68-
stop_index = offsets[-1]
101+
start_index = offset_multiplier*offsets[0]
102+
stop_index = offset_multiplier*offsets[-1]
69103
# num_edges = stop_index - start_index - 2
70104
poly_xmin = np.min(values[start_index:stop_index:2])
71105
poly_ymin = np.min(values[start_index + 1:stop_index:2])
@@ -105,8 +139,8 @@ def draw_polygon(
105139
# Build arrays of edges in canvas coordinates
106140
ei = 0
107141
for j in range(len(offsets) - 1):
108-
start = offsets[j]
109-
stop = offsets[j + 1]
142+
start = offset_multiplier*offsets[j]
143+
stop = offset_multiplier*offsets[j + 1]
110144
for k in range(start, stop - 2, 2):
111145
x0 = values[k]
112146
y0 = values[k + 1]
@@ -201,6 +235,67 @@ def draw_polygon(
201235
return draw_polygon
202236

203237

238+
def _build_extend_geopandas_polygon_geometry(
239+
draw_polygon, expand_aggs_and_cols
240+
):
241+
# Lazy import shapely. Cannot get here if geopandas and shapely are not available.
242+
import shapely
243+
244+
def extend_cpu(
245+
sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, *aggs_and_cols
246+
):
247+
ragged = shapely.to_ragged_array(geometry)
248+
geometry_type = ragged[0]
249+
if geometry_type not in (shapely.GeometryType.POLYGON, shapely.GeometryType.MULTIPOLYGON):
250+
raise ValueError(
251+
"Canvas.polygons supports GeoPandas geometry types of POLYGON and MULTIPOLYGON, "
252+
f"not {repr(geometry_type)}")
253+
254+
coords = ragged[1].ravel()
255+
if geometry_type == shapely.GeometryType.MULTIPOLYGON:
256+
offsets0, offsets1, offsets2 = ragged[2]
257+
else: # POLYGON
258+
offsets0, offsets1 = ragged[2]
259+
offsets2 = np.arange(len(offsets1))
260+
261+
extend_cpu_numba(
262+
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
263+
coords, offsets0, offsets1, offsets2, *aggs_and_cols
264+
)
265+
266+
@ngjit
267+
@expand_aggs_and_cols
268+
def extend_cpu_numba(sx, tx, sy, ty, xmin, xmax, ymin, ymax,
269+
coords, offsets0, offsets1, offsets2, *aggs_and_cols):
270+
# Pre-allocate temp arrays
271+
max_edges = 0
272+
n_multipolygons = len(offsets2) - 1
273+
for i in range(n_multipolygons):
274+
polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1]
275+
for j in range(len(polygon_inds) - 1):
276+
start = offsets0[polygon_inds[j]]
277+
stop = offsets0[polygon_inds[j + 1]]
278+
max_edges = max(max_edges, stop - start - 1)
279+
280+
xs = np.full((max_edges, 2), np.nan, dtype=np.float32)
281+
ys = np.full((max_edges, 2), np.nan, dtype=np.float32)
282+
yincreasing = np.zeros(max_edges, dtype=np.int8)
283+
284+
# Initialize array indicating which edges are still eligible for processing
285+
eligible = np.ones(max_edges, dtype=np.int8)
286+
287+
for i in range(n_multipolygons):
288+
polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1]
289+
for j in range(len(polygon_inds) - 1):
290+
start = polygon_inds[j]
291+
stop = polygon_inds[j + 1]
292+
draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
293+
offsets0[start:stop + 1], 2,
294+
coords, xs, ys, yincreasing, eligible, *aggs_and_cols)
295+
296+
return extend_cpu
297+
298+
204299
def _build_extend_polygon_geometry(
205300
draw_polygon, expand_aggs_and_cols
206301
):
@@ -269,7 +364,7 @@ def extend_cpu_numba(
269364
stop = polygon_inds[j + 1]
270365

271366
draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
272-
offsets2[start:stop + 1], values,
367+
offsets2[start:stop + 1], 1, values,
273368
xs, ys, yincreasing, eligible, *aggs_and_cols)
274369

275370
return extend_cpu

datashader/tests/test_polygons.py

+72-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
import pandas as pd
33
import numpy as np
4+
from numpy import nan
45
import xarray as xr
56
import datashader as ds
67
from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr
@@ -16,6 +17,18 @@
1617
GeoDataFrame = None
1718
MultiPolygonArray = None
1819

20+
try:
21+
from geodatasets import get_path
22+
import geopandas
23+
except ImportError:
24+
get_path = None
25+
geopandas = None
26+
27+
try:
28+
import dask_geopandas
29+
except ImportError:
30+
dask_geopandas = None
31+
1932

2033
def dask_GeoDataFrame(*args, **kwargs):
2134
return dd.from_pandas(GeoDataFrame(*args, **kwargs), npartitions=3)
@@ -122,7 +135,7 @@ def test_multiple_polygons_auto_range(DataFrame):
122135
out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])
123136

124137
assert_eq_xr(agg, out)
125-
138+
126139
assert_eq_ndarray(agg.x_range, (-1, 3.5), close=True)
127140
assert_eq_ndarray(agg.y_range, (0.1, 2), close=True)
128141

@@ -319,3 +332,61 @@ def test_spatial_index_not_dropped():
319332

320333
assert df2.columns == ['some_geom']
321334
assert df2.some_geom.array._sindex == df.some_geom.array._sindex
335+
336+
337+
natural_earth_sol = np.array([
338+
[nan, 7, 7, 7, 7, 7, 0, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, nan],
339+
[nan, nan, nan, nan, 5, nan, 6, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
340+
[nan, nan, nan, nan, nan, nan, 9, nan, nan, nan, nan, nan, nan, 10, nan, nan, nan, nan, 11, 12],
341+
[nan, nan, nan, nan, nan, nan, 95, nan, nan, nan, nan, 112, nan, nan, nan, nan, 21, 21, 21, 13],
342+
[ 17, nan, nan, nan, nan, nan, 95, 95, nan, nan, nan, 112, 20, nan, nan, nan, 31, 32, 34, 22],
343+
[nan, nan, nan, nan, nan, nan, 95, nan, nan, 112, 112, 112, 112, nan, 44, 41, 50, 43, 37, nan],
344+
[nan, 60, nan, nan, 95, 65, 54, nan, nan, 112, 112, 112, 112, nan, 112, 112, 63, nan, nan, nan],
345+
[nan, nan, nan, 95, 95, 95, 74, nan, nan, nan, 72, 68, 112, 112, 112, 112, 112, 71, 73, nan],
346+
[ 87, 82, 78, 95, 95, 88, 95, nan, nan, 80, 83, 112, 112, 112, 112, 112, 112, 112, nan, nan],
347+
[ 94, nan, nan, 116, 118, 125, 125, 126, 126, nan, nan, 121, 122, 109, nan, 123, nan, 101, 106, 93],
348+
])
349+
350+
351+
@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
352+
def test_natural_earth_geopandas():
353+
df = geopandas.read_file(get_path("naturalearth.land"))
354+
df["col"] = np.arange(len(df))
355+
356+
canvas = ds.Canvas(plot_height=10, plot_width=20)
357+
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))
358+
359+
assert_eq_ndarray(agg.data, natural_earth_sol)
360+
361+
362+
@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
363+
@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed")
364+
@pytest.mark.parametrize('npartitions', [1, 2, 5])
365+
def test_natural_earth_dask_geopandas(npartitions):
366+
df = geopandas.read_file(get_path("naturalearth.land"))
367+
df["col"] = np.arange(len(df))
368+
df = dd.from_pandas(df, npartitions=npartitions)
369+
assert df.npartitions == npartitions
370+
df.calculate_spatial_partitions()
371+
372+
canvas = ds.Canvas(plot_height=10, plot_width=20)
373+
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))
374+
375+
assert_eq_ndarray(agg.data, natural_earth_sol)
376+
377+
378+
@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
379+
@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed")
380+
@pytest.mark.parametrize('npartitions', [0, 1, 2, 5])
381+
def test_natural_earth_spatialpandas(npartitions):
382+
df = geopandas.read_file(get_path("naturalearth.land"))
383+
df["col"] = np.arange(len(df))
384+
df = spatialpandas.GeoDataFrame(df)
385+
if npartitions > 0:
386+
df = dd.from_pandas(df, npartitions=npartitions)
387+
assert df.npartitions == npartitions
388+
389+
canvas = ds.Canvas(plot_height=10, plot_width=20)
390+
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))
391+
392+
assert_eq_ndarray(agg.data, natural_earth_sol)

datashader/utils.py

+7
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
except Exception:
2626
cudf = None
2727

28+
try:
29+
from geopandas.array import GeometryDtype as gpd_GeometryDtype
30+
except ImportError:
31+
gpd_GeometryDtype = type(None)
32+
2833
try:
2934
from spatialpandas.geometry import GeometryDtype
3035
except ImportError:
@@ -430,6 +435,8 @@ def dshape_from_pandas_helper(col):
430435
return datashape.Option(datashape.DateTime(tz=tz))
431436
elif isinstance(col.dtype, (RaggedDtype, GeometryDtype)):
432437
return col.dtype
438+
elif gpd_GeometryDtype and isinstance(col.dtype, gpd_GeometryDtype):
439+
return col.dtype
433440
dshape = datashape.CType.from_numpy_dtype(col.dtype)
434441
dshape = datashape.string if dshape == datashape.object_ else dshape
435442
if dshape in (datashape.string, datashape.datetime_):

0 commit comments

Comments
 (0)