diff --git a/python/sedona/spark/geopandas/geodataframe.py b/python/sedona/spark/geopandas/geodataframe.py index e5adce5119..16815f578b 100644 --- a/python/sedona/spark/geopandas/geodataframe.py +++ b/python/sedona/spark/geopandas/geodataframe.py @@ -45,73 +45,6 @@ # IMPLEMENTATION STATUS TRACKING # ============================================================================ -IMPLEMENTATION_STATUS = { - "IMPLEMENTED": [ - "area", - "buffer", - "crs", - "geometry", - "active_geometry_name", - "sindex", - "rename_geometry", - "copy", - "sjoin", - "to_parquet", - ], - "NOT_IMPLEMENTED": [ - "to_geopandas", - "_to_geopandas", - "geom_type", - "type", - "length", - "is_valid", - "is_valid_reason", - "is_empty", - "is_simple", - "is_ring", - "is_ccw", - "is_closed", - "has_z", - "boundary", - "centroid", - "convex_hull", - "envelope", - "exterior", - "interiors", - "unary_union", - "count_coordinates", - "count_geometries", - "count_interior_rings", - "get_precision", - "get_geometry", - "concave_hull", - "delaunay_triangles", - "voronoi_polygons", - "minimum_rotated_rectangle", - "extract_unique_points", - "offset_curve", - "remove_repeated_points", - "set_precision", - "representative_point", - "minimum_bounding_circle", - "minimum_bounding_radius", - "minimum_clearance", - "normalize", - "make_valid", - "reverse", - "segmentize", - "transform", - "force_2d", - "force_3d", - "line_merge", - "union_all", - "intersection_all", - "contains", - "contains_properly", - ], - "PARTIALLY_IMPLEMENTED": ["set_geometry"], # Only drop=True case is not implemented -} - IMPLEMENTATION_PRIORITY = { "HIGH": [ "to_geopandas", @@ -254,7 +187,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame): - Uses Spark for distributed processing - Geometries are stored in WKB (Well-Known Binary) format internally - Some methods may have different performance characteristics - - Not all GeoPandas methods are implemented yet (see IMPLEMENTATION_STATUS) + - Not all GeoPandas methods are implemented yet (see Sedona GeoPandas docs). Performance Considerations: - Operations are distributed across Spark cluster diff --git a/python/sedona/spark/geopandas/geoseries.py b/python/sedona/spark/geopandas/geoseries.py index ab4b5f5fc8..ec2cebb075 100644 --- a/python/sedona/spark/geopandas/geoseries.py +++ b/python/sedona/spark/geopandas/geoseries.py @@ -60,100 +60,6 @@ # IMPLEMENTATION STATUS TRACKING # ============================================================================ -IMPLEMENTATION_STATUS = { - "IMPLEMENTED": [ - "area", - "buffer", - "bounds", - "centroid", - "contains", - "crs", - "distance", - "envelope", - "geometry", - "intersection", - "intersects", - "is_empty", - "is_simple", - "is_valid", - "is_valid_reason", - "length", - "make_valid", - "relate", - "set_crs", - "to_crs", - "to_geopandas", - "to_wkb", - "to_wkt", - "x", - "y", - "z", - "has_z", - "get_geometry", - "boundary", - "total_bounds", - "estimate_utm_crs", - "isna", - "isnull", - "notna", - "notnull", - "from_xy", - "copy", - "geom_type", - "sindex", - ], - "NOT_IMPLEMENTED": [ - "clip", - "contains_properly", - "convex_hull", - "count_coordinates", - "count_geometries", - "count_interior_rings", - "explode", - "force_2d", - "force_3d", - "from_file", - "from_shapely", - "from_arrow", - "line_merge", - "reverse", - "segmentize", - "to_json", - "to_arrow", - "to_file", - "transform", - "unary_union", - "union_all", - "intersection_all", - "type", - "is_ring", - "is_ccw", - "is_closed", - "get_precision", - "concave_hull", - "delaunay_triangles", - "voronoi_polygons", - "minimum_rotated_rectangle", - "exterior", - "extract_unique_points", - "offset_curve", - "interiors", - "remove_repeated_points", - "set_precision", - "representative_point", - "minimum_bounding_circle", - "minimum_bounding_radius", - "minimum_clearance", - "normalize", - "m", - ], - "PARTIALLY_IMPLEMENTED": [ - "fillna", # Limited parameter support (no 'limit' parameter) - "from_wkb", - "from_wkt", # Limited error handling options (only 'raise' supported) - ], -} - IMPLEMENTATION_PRIORITY = { "HIGH": [ "contains", @@ -281,7 +187,7 @@ class GeoSeries(GeoFrame, pspd.Series): - Uses Spark for distributed processing - Geometries are stored in WKB (Well-Known Binary) format internally - Some methods may have different performance characteristics - - Not all GeoPandas methods are implemented yet (see IMPLEMENTATION_STATUS) + - Not all GeoPandas methods are implemented yet (see Sedona GeoPandas docs). Performance Considerations: - Operations are distributed across Spark cluster diff --git a/python/tests/geopandas/test_sjoin_match.py b/python/tests/geopandas/test_sjoin_match.py new file mode 100644 index 0000000000..8061558035 --- /dev/null +++ b/python/tests/geopandas/test_sjoin_match.py @@ -0,0 +1,43 @@ +import pytest +import geopandas as gpd +from shapely.geometry import Point +from sedona.geopandas import sjoin +from geopandas.tools import sjoin as gpd_sjoin +from geopandas import GeoDataFrame + +class TestSJoinDWithinMatch: + def setup_method(self): + # Create test GeoDataFrames + self.gdf1 = gpd.GeoDataFrame( + {"id": [1, 2, 3]}, + geometry=[Point(0, 0), Point(1, 1), Point(2, 2)], + crs="EPSG:4326" + ) + self.gdf2 = gpd.GeoDataFrame( + {"id": ["a", "b", "c"]}, + geometry=[Point(0.1, 0.1), Point(1.5, 1.5), Point(10, 10)], + crs="EPSG:4326" + ) + + def test_dwithin_equivalence(self): + """Ensure Sedona and GeoPandas produce same results for dwithin() join.""" + distance = 0.3 + + # Sedona join + sedona_result = sjoin(self.gdf1, self.gdf2, predicate="dwithin", distance=distance) + # GeoPandas join + gpd_result = gpd_sjoin(self.gdf1, self.gdf2, predicate="dwithin", distance=distance) + + assert isinstance(sedona_result, GeoDataFrame) + assert isinstance(gpd_result, GeoDataFrame) + + # Sort and compare IDs + sedona_pairs = set(zip(sedona_result["id_left"], sedona_result["id_right"])) + gpd_pairs = set(zip(gpd_result["id_left"], gpd_result["id_right"])) + assert sedona_pairs == gpd_pairs + + def test_dwithin_small_distance(self): + """Ensure small distance returns fewer or no matches.""" + small_distance = 0.01 + sedona_small = sjoin(self.gdf1, self.gdf2, predicate="dwithin", distance=small_distance) + assert len(sedona_small) <= 1