Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions python/sedona/spark/geopandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,8 +608,73 @@ def centroid(self):
"""
return _delegate_to_geometry_column("centroid", self)

# def concave_hull(self, ratio=0.0, allow_holes=False):
# raise NotImplementedError("This method is not implemented yet.")
def concave_hull(self, ratio=0.0, allow_holes=False):
"""Return a ``GeoSeries`` of geometries representing the concave hull
of vertices of each geometry.

The concave hull of a geometry is the smallest concave `Polygon`
containing all the points in each geometry, unless the number of points
in the geometric object is less than three. For two points, the concave
hull collapses to a `LineString`; for 1, a `Point`.

The hull is constructed by removing border triangles of the Delaunay
Triangulation of the points as long as their "size" is larger than the
maximum edge length ratio and optionally allowing holes. The edge length factor
is a fraction of the length difference between the longest and shortest edges
in the Delaunay Triangulation of the input points. For further information
on the algorithm used, see
https://libgeos.org/doxygen/classgeos_1_1algorithm_1_1hull_1_1ConcaveHull.html

Parameters
----------
ratio : float, (optional, default 0.0)
Number in the range [0, 1]. Higher numbers will include fewer vertices
in the hull.
allow_holes : bool, (optional, default False)
If set to True, the concave hull may have holes.

Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point, MultiPoint
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (1, 1), (1, 0)]),
... MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0), (0.5, 0.5)]),
... MultiPoint([(0, 0), (1, 1)]),
... Point(0, 0),
... ],
... crs=3857
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 LINESTRING (0 0, 1 1, 1 0)
2 MULTIPOINT ((0 0), (1 1), (0 1), (1 0), (0.5 0...
3 MULTIPOINT ((0 0), (1 1))
4 POINT (0 0)
dtype: geometry

>>> s.concave_hull()
0 POLYGON ((0 1, 1 1, 0 0, 0 1))
1 POLYGON ((0 0, 1 1, 1 0, 0 0))
2 POLYGON ((0.5 0.5, 0 1, 1 1, 1 0, 0 0, 0.5 0.5))
3 LINESTRING (0 0, 1 1)
4 POINT (0 0)
dtype: geometry

See Also
--------
GeoSeries.convex_hull : convex hull geometry

Notes
-----
The algorithms considers only vertices of each geometry. As a result the
hull may not fully enclose input geometry. If that happens, increasing ``ratio``
should resolve the issue.

"""
return _delegate_to_geometry_column("concave_hull", self, ratio, allow_holes)

@property
def convex_hull(self):
Expand Down
9 changes: 6 additions & 3 deletions python/sedona/spark/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,9 +972,12 @@ def centroid(self) -> "GeoSeries":
returns_geom=True,
)

def concave_hull(self, ratio=0.0, allow_holes=False):
# Implementation of the abstract method.
raise NotImplementedError("This method is not implemented yet.")
def concave_hull(self, ratio=0.0, allow_holes=False) -> "GeoSeries":
spark_expr = stf.ST_ConcaveHull(self.spark.column, ratio, allow_holes)
return self._query_geometry_column(
spark_expr,
returns_geom=True,
)

@property
def convex_hull(self) -> "GeoSeries":
Expand Down
29 changes: 28 additions & 1 deletion python/tests/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,7 +1238,34 @@ def test_centroid(self):
self.check_sgpd_equals_gpd(result, expected)

def test_concave_hull(self):
pass
s = GeoSeries(
[
Polygon([(0, 0), (1, 1), (0, 1)]),
LineString([(0, 0), (1, 1), (1, 0)]),
MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0), (0.5, 0.5)]),
MultiPoint([(0, 0), (1, 1)]),
Point(0, 0),
],
crs=3857,
)

result = s.concave_hull()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to have a test for allow_holes=True in this file too. But we should first get to the bottom of the errors.


expected = gpd.GeoSeries(
[
Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]),
Polygon([(0, 0), (1, 1), (1, 0), (0, 0)]),
Polygon([(0.5, 0.5), (0, 1), (1, 1), (1, 0), (0, 0), (0.5, 0.5)]),
LineString([(0, 0), (1, 1)]),
Point(0, 0),
],
crs=3857,
)
self.check_sgpd_equals_gpd(result, expected)

# Check if GeoDataFrame works as well
df_result = s.to_geoframe().concave_hull()
self.check_sgpd_equals_gpd(df_result, expected)

def test_convex_hull(self):
s = GeoSeries(
Expand Down
29 changes: 28 additions & 1 deletion python/tests/geopandas/test_match_geopandas_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,8 +722,35 @@ def test_centroid(self):
gpd_result = gpd.GeoSeries(geom).centroid
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

@pytest.mark.skipif(
parse_version(gpd.__version__) < parse_version("0.14.0"),
reason="geopandas concave_hull requires version 0.14.0 or higher",
)
def test_concave_hull(self):
pass
for geom in self.geoms:
sgpd_result = GeoSeries(geom).concave_hull()
gpd_result = gpd.GeoSeries(geom).concave_hull()
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
Comment on lines +730 to +733
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should test multiple ratio values here, not just the default of 0.0. We could do something like: Do one third of the iterations w/ 0.0, then the next third w/ 0.5, and the last third w/ 1.0. (Note the range of valid values for this argument is [0, 1].

Then also test allow_holes below (as you already are doing).


# Test slightly complex geometry for different ratio and allow_holes settings
geom = [
Polygon(
[(0, 0), (0, 4), (1, 4), (1, 1), (3, 1), (3, 4), (4, 4), (4, 0), (0, 0)]
)
]
for ratio, allow_holes in [(0.5, True), (1.0, True)]:
sgpd_result = GeoSeries(geom).concave_hull(
ratio=ratio, allow_holes=allow_holes
)
gpd_result = gpd.GeoSeries(geom).concave_hull(
ratio=ratio, allow_holes=allow_holes
)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

mixed = [self.points[1], self.linestrings[1], self.polygons[1], None]
sgpd_result = GeoSeries(mixed).concave_hull()
gpd_result = gpd.GeoSeries(mixed).concave_hull()
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
Comment on lines +749 to +753
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mixed = [self.points[1], self.linestrings[1], self.polygons[1], None]
sgpd_result = GeoSeries(mixed).concave_hull()
gpd_result = gpd.GeoSeries(mixed).concave_hull()
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

I don't think this test is necessary. We're already testing all of those cases separately in the for geom in self.geoms above. The function is executed on each geometry separately, so whether they're mixed together or not doesn't matter.


def test_convex_hull(self):
for geom in self.geoms:
Expand Down
Loading