diff --git a/datashader/core.py b/datashader/core.py index 4e8ad3f8d..d98109dd2 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -733,22 +733,27 @@ def polygons(self, source, geometry, agg=None): """ from .glyphs import PolygonGeom from .reductions import any as any_rdn + if spatialpandas and isinstance(source, spatialpandas.dask.DaskGeoDataFrame): # Downselect partitions to those that may contain polygons in viewport x_range = self.x_range if self.x_range is not None else (None, None) y_range = self.y_range if self.y_range is not None else (None, None) source = source.cx_partitions[slice(*x_range), slice(*y_range)] + glyph = PolygonGeom(geometry) elif spatialpandas and isinstance(source, spatialpandas.GeoDataFrame): - pass + glyph = PolygonGeom(geometry) + elif (geopandas_source := self._source_from_geopandas(source)) is not None: + source = geopandas_source + from .glyphs.polygon import GeopandasPolygonGeom + glyph = GeopandasPolygonGeom(geometry) else: raise ValueError( - "source must be an instance of spatialpandas.GeoDataFrame or \n" - "spatialpandas.dask.DaskGeoDataFrame.\n" - " Received value of type {typ}".format(typ=type(source))) + "source must be an instance of spatialpandas.GeoDataFrame, " + "spatialpandas.dask.DaskGeoDataFrame, geopandas.GeoDataFrame or " + f"dask_geopandas.GeoDataFrame, not {type(source)}") if agg is None: agg = any_rdn() - glyph = PolygonGeom(geometry) return bypixel(source, self, glyph, agg) def quadmesh(self, source, x=None, y=None, agg=None): @@ -1228,6 +1233,44 @@ def validate(self): self.validate_ranges(self.x_range, self.y_range) self.validate_size(self.plot_width, self.plot_height) + def _source_from_geopandas(self, source): + """ + Check if the specified source is a geopandas or dask-geopandas GeoDataFrame. + If so, spatially filter the source and return it. + If not, return None. + """ + try: + import geopandas + except ImportError: + geopandas = None + + try: + import dask_geopandas + except ImportError: + dask_geopandas = None + + if ((geopandas and isinstance(source, geopandas.GeoDataFrame)) or + (dask_geopandas and isinstance(source, dask_geopandas.GeoDataFrame))): + # Explicit shapely version check as cannot continue unless shapely >= 2 + from packaging.version import Version + from shapely import __version__ as shapely_version + if Version(shapely_version) < Version('2.0.0'): + raise ImportError("Use of GeoPandas in Datashader requires Shapely >= 2.0.0") + + if isinstance(source, geopandas.GeoDataFrame): + x_range = self.x_range if self.x_range is not None else (-np.inf, np.inf) + y_range = self.y_range if self.y_range is not None else (-np.inf, np.inf) + from shapely import box + query = source.sindex.query(box(x_range[0], y_range[0], x_range[1], y_range[1])) + source = source.iloc[query] + else: + x_range = self.x_range if self.x_range is not None else (None, None) + y_range = self.y_range if self.y_range is not None else (None, None) + source = source.cx[slice(*x_range), slice(*y_range)] + return source + else: + return None + def bypixel(source, canvas, glyph, agg, *, antialias=False): """Compute an aggregate grouped by pixel sized bins. diff --git a/datashader/glyphs/points.py b/datashader/glyphs/points.py index ef69d5ee7..172394d0c 100644 --- a/datashader/glyphs/points.py +++ b/datashader/glyphs/points.py @@ -15,6 +15,11 @@ cudf = None cuda_args = None +try: + from geopandas.array import GeometryDtype as gpd_GeometryDtype +except ImportError: + gpd_GeometryDtype = type(None) + try: import spatialpandas except Exception: @@ -35,6 +40,7 @@ def values(s): class _GeometryLike(Glyph): def __init__(self, geometry): self.geometry = geometry + self._cached_bounds = None @property def ndims(self): @@ -72,11 +78,27 @@ def required_columns(self): return [self.geometry] def compute_x_bounds(self, df): - bounds = df[self.geometry].array.total_bounds_x + col = df[self.geometry] + if isinstance(col.dtype, gpd_GeometryDtype): + # geopandas + if self._cached_bounds is None: + self._cached_bounds = col.total_bounds + bounds = self._cached_bounds[::2] + else: + # spatialpandas + bounds = col.array.total_bounds_x return self.maybe_expand_bounds(bounds) def compute_y_bounds(self, df): - bounds = df[self.geometry].array.total_bounds_y + col = df[self.geometry] + if isinstance(col.dtype, gpd_GeometryDtype): + # geopandas + if self._cached_bounds is None: + self._cached_bounds = col.total_bounds + bounds = self._cached_bounds[1::2] + else: + # spatialpandas + bounds = col.array.total_bounds_y return self.maybe_expand_bounds(bounds) @memoize diff --git a/datashader/glyphs/polygon.py b/datashader/glyphs/polygon.py index a9d1c7089..abbebc7ad 100644 --- a/datashader/glyphs/polygon.py +++ b/datashader/glyphs/polygon.py @@ -11,6 +11,40 @@ spatialpandas = None +class GeopandasPolygonGeom(_GeometryLike): + @property + def geom_dtypes(self): + from geopandas.array import GeometryDtype + return (GeometryDtype,) + + @memoize + def _build_extend(self, x_mapper, y_mapper, info, append, _antialias_stage_2, _antialias_stage_2_funcs): + expand_aggs_and_cols = self.expand_aggs_and_cols(append) + map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper) + draw_polygon = _build_draw_polygon( + append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols + ) + + perform_extend_cpu = _build_extend_geopandas_polygon_geometry( + draw_polygon, expand_aggs_and_cols + ) + geom_name = self.geometry + + def extend(aggs, df, vt, bounds, plot_start=True): + sx, tx, sy, ty = vt + xmin, xmax, ymin, ymax = bounds + aggs_and_cols = aggs + info(df, aggs[0].shape[:2]) + geom_array = df[geom_name].array + + perform_extend_cpu( + sx, tx, sy, ty, + xmin, xmax, ymin, ymax, + geom_array, *aggs_and_cols + ) + + return extend + + class PolygonGeom(_GeometryLike): # spatialpandas must be available if a PolygonGeom object is created. @@ -52,7 +86,7 @@ def _build_draw_polygon(append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_ @expand_aggs_and_cols def draw_polygon( i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, - offsets, values, xs, ys, yincreasing, eligible, + offsets, offset_multiplier, values, xs, ys, yincreasing, eligible, *aggs_and_cols ): """Draw a polygon using a winding-number scan-line algorithm @@ -64,8 +98,8 @@ def draw_polygon( eligible.fill(1) # First pass, compute bounding box of polygon vertices in data coordinates - start_index = offsets[0] - stop_index = offsets[-1] + start_index = offset_multiplier*offsets[0] + stop_index = offset_multiplier*offsets[-1] # num_edges = stop_index - start_index - 2 poly_xmin = np.min(values[start_index:stop_index:2]) poly_ymin = np.min(values[start_index + 1:stop_index:2]) @@ -105,8 +139,8 @@ def draw_polygon( # Build arrays of edges in canvas coordinates ei = 0 for j in range(len(offsets) - 1): - start = offsets[j] - stop = offsets[j + 1] + start = offset_multiplier*offsets[j] + stop = offset_multiplier*offsets[j + 1] for k in range(start, stop - 2, 2): x0 = values[k] y0 = values[k + 1] @@ -201,6 +235,67 @@ def draw_polygon( return draw_polygon +def _build_extend_geopandas_polygon_geometry( + draw_polygon, expand_aggs_and_cols +): + # Lazy import shapely. Cannot get here if geopandas and shapely are not available. + import shapely + + def extend_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, *aggs_and_cols + ): + ragged = shapely.to_ragged_array(geometry) + geometry_type = ragged[0] + if geometry_type not in (shapely.GeometryType.POLYGON, shapely.GeometryType.MULTIPOLYGON): + raise ValueError( + "Canvas.polygons supports GeoPandas geometry types of POLYGON and MULTIPOLYGON, " + f"not {repr(geometry_type)}") + + coords = ragged[1].ravel() + if geometry_type == shapely.GeometryType.MULTIPOLYGON: + offsets0, offsets1, offsets2 = ragged[2] + else: # POLYGON + offsets0, offsets1 = ragged[2] + offsets2 = np.arange(len(offsets1)) + + extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + coords, offsets0, offsets1, offsets2, *aggs_and_cols + ) + + @ngjit + @expand_aggs_and_cols + def extend_cpu_numba(sx, tx, sy, ty, xmin, xmax, ymin, ymax, + coords, offsets0, offsets1, offsets2, *aggs_and_cols): + # Pre-allocate temp arrays + max_edges = 0 + n_multipolygons = len(offsets2) - 1 + for i in range(n_multipolygons): + polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1] + for j in range(len(polygon_inds) - 1): + start = offsets0[polygon_inds[j]] + stop = offsets0[polygon_inds[j + 1]] + max_edges = max(max_edges, stop - start - 1) + + xs = np.full((max_edges, 2), np.nan, dtype=np.float32) + ys = np.full((max_edges, 2), np.nan, dtype=np.float32) + yincreasing = np.zeros(max_edges, dtype=np.int8) + + # Initialize array indicating which edges are still eligible for processing + eligible = np.ones(max_edges, dtype=np.int8) + + for i in range(n_multipolygons): + polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1] + for j in range(len(polygon_inds) - 1): + start = polygon_inds[j] + stop = polygon_inds[j + 1] + draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + offsets0[start:stop + 1], 2, + coords, xs, ys, yincreasing, eligible, *aggs_and_cols) + + return extend_cpu + + def _build_extend_polygon_geometry( draw_polygon, expand_aggs_and_cols ): @@ -269,7 +364,7 @@ def extend_cpu_numba( stop = polygon_inds[j + 1] draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, - offsets2[start:stop + 1], values, + offsets2[start:stop + 1], 1, values, xs, ys, yincreasing, eligible, *aggs_and_cols) return extend_cpu diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py index df6f1b57e..38ee96798 100644 --- a/datashader/tests/test_polygons.py +++ b/datashader/tests/test_polygons.py @@ -1,6 +1,7 @@ import pytest import pandas as pd import numpy as np +from numpy import nan import xarray as xr import datashader as ds from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr @@ -16,6 +17,18 @@ GeoDataFrame = None MultiPolygonArray = None +try: + from geodatasets import get_path + import geopandas +except ImportError: + get_path = None + geopandas = None + +try: + import dask_geopandas +except ImportError: + dask_geopandas = None + def dask_GeoDataFrame(*args, **kwargs): return dd.from_pandas(GeoDataFrame(*args, **kwargs), npartitions=3) @@ -122,7 +135,7 @@ def test_multiple_polygons_auto_range(DataFrame): out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) assert_eq_xr(agg, out) - + assert_eq_ndarray(agg.x_range, (-1, 3.5), close=True) assert_eq_ndarray(agg.y_range, (0.1, 2), close=True) @@ -319,3 +332,61 @@ def test_spatial_index_not_dropped(): assert df2.columns == ['some_geom'] assert df2.some_geom.array._sindex == df.some_geom.array._sindex + + +natural_earth_sol = np.array([ + [nan, 7, 7, 7, 7, 7, 0, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, nan], + [nan, nan, nan, nan, 5, nan, 6, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [nan, nan, nan, nan, nan, nan, 9, nan, nan, nan, nan, nan, nan, 10, nan, nan, nan, nan, 11, 12], + [nan, nan, nan, nan, nan, nan, 95, nan, nan, nan, nan, 112, nan, nan, nan, nan, 21, 21, 21, 13], + [ 17, nan, nan, nan, nan, nan, 95, 95, nan, nan, nan, 112, 20, nan, nan, nan, 31, 32, 34, 22], + [nan, nan, nan, nan, nan, nan, 95, nan, nan, 112, 112, 112, 112, nan, 44, 41, 50, 43, 37, nan], + [nan, 60, nan, nan, 95, 65, 54, nan, nan, 112, 112, 112, 112, nan, 112, 112, 63, nan, nan, nan], + [nan, nan, nan, 95, 95, 95, 74, nan, nan, nan, 72, 68, 112, 112, 112, 112, 112, 71, 73, nan], + [ 87, 82, 78, 95, 95, 88, 95, nan, nan, 80, 83, 112, 112, 112, 112, 112, 112, 112, nan, nan], + [ 94, nan, nan, 116, 118, 125, 125, 126, 126, nan, nan, 121, 122, 109, nan, 123, nan, 101, 106, 93], +]) + + +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +def test_natural_earth_geopandas(): + df = geopandas.read_file(get_path("naturalearth.land")) + df["col"] = np.arange(len(df)) + + canvas = ds.Canvas(plot_height=10, plot_width=20) + agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) + + assert_eq_ndarray(agg.data, natural_earth_sol) + + +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed") +@pytest.mark.parametrize('npartitions', [1, 2, 5]) +def test_natural_earth_dask_geopandas(npartitions): + df = geopandas.read_file(get_path("naturalearth.land")) + df["col"] = np.arange(len(df)) + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + df.calculate_spatial_partitions() + + canvas = ds.Canvas(plot_height=10, plot_width=20) + agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) + + assert_eq_ndarray(agg.data, natural_earth_sol) + + +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +@pytest.mark.parametrize('npartitions', [0, 1, 2, 5]) +def test_natural_earth_spatialpandas(npartitions): + df = geopandas.read_file(get_path("naturalearth.land")) + df["col"] = np.arange(len(df)) + df = spatialpandas.GeoDataFrame(df) + if npartitions > 0: + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + + canvas = ds.Canvas(plot_height=10, plot_width=20) + agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) + + assert_eq_ndarray(agg.data, natural_earth_sol) diff --git a/datashader/utils.py b/datashader/utils.py index 70667b1fb..32133ada3 100644 --- a/datashader/utils.py +++ b/datashader/utils.py @@ -25,6 +25,11 @@ except Exception: cudf = None +try: + from geopandas.array import GeometryDtype as gpd_GeometryDtype +except ImportError: + gpd_GeometryDtype = type(None) + try: from spatialpandas.geometry import GeometryDtype except ImportError: @@ -430,6 +435,8 @@ def dshape_from_pandas_helper(col): return datashape.Option(datashape.DateTime(tz=tz)) elif isinstance(col.dtype, (RaggedDtype, GeometryDtype)): return col.dtype + elif gpd_GeometryDtype and isinstance(col.dtype, gpd_GeometryDtype): + return col.dtype dshape = datashape.CType.from_numpy_dtype(col.dtype) dshape = datashape.string if dshape == datashape.object_ else dshape if dshape in (datashape.string, datashape.datetime_): diff --git a/setup.py b/setup.py index ef35712a1..664206be8 100644 --- a/setup.py +++ b/setup.py @@ -24,9 +24,15 @@ 'xarray', ] -examples = [ - 'bokeh >3.1', +geopandas = [ + 'dask-geopandas', 'geopandas', + 'shapely >=2.0.0', +] + +examples = geopandas + [ + 'bokeh >3.1', + 'geodatasets', 'holoviews', 'matplotlib >=3.3', 'panel >1.1', @@ -35,8 +41,9 @@ ] extras_require = { - 'tests': [ + 'tests': geopandas + [ 'codecov', + 'geodatasets', 'fastparquet', # optional dependency 'flake8', 'nbconvert', @@ -62,7 +69,8 @@ 'python-graphviz', 'python-snappy', 'rasterio', - ] + ], + 'geopandas': geopandas, }