diff --git a/environment.yml b/environment.yml index af006bb..30c4d49 100644 --- a/environment.yml +++ b/environment.yml @@ -6,11 +6,8 @@ dependencies: - shapely - geopandas>=0.9.* - pandas + - h3-py>=4 # Notebooks - matplotlib # Pip - pip - - pip: - # Installing through pip to avoid segfault on Apple Silicon - # https://github.com/uber/h3-py/issues/313 - - h3==3.7.6 diff --git a/h3pandas/h3pandas.py b/h3pandas/h3pandas.py index bef3a66..16b1e26 100644 --- a/h3pandas/h3pandas.py +++ b/h3pandas/h3pandas.py @@ -8,14 +8,14 @@ import pandas as pd import geopandas as gpd -from h3 import h3 +import h3 from pandas.core.frame import DataFrame from geopandas.geodataframe import GeoDataFrame from .const import COLUMN_H3_POLYFILL, COLUMN_H3_LINETRACE from .util.decorator import catch_invalid_h3_address, doc_standard from .util.functools import wrapped_partial -from .util.shapely import polyfill, linetrace +from .util.shapely import cell_to_boundary_lng_lat, polyfill, linetrace, _switch_lat_lng AnyDataFrame = Union[DataFrame, GeoDataFrame] @@ -92,7 +92,7 @@ def geo_to_h3( lats = self._df[lat_col] h3addresses = [ - h3.geo_to_h3(lat, lng, resolution) for lat, lng in zip(lats, lngs) + h3.latlng_to_cell(lat, lng, resolution) for lat, lng in zip(lats, lngs) ] colname = self._format_resolution(resolution) @@ -130,9 +130,9 @@ def h3_to_geo(self) -> GeoDataFrame: """ return self._apply_index_assign( - h3.h3_to_geo, + h3.cell_to_latlng, "geometry", - lambda x: shapely.geometry.Point(reversed(x)), + lambda x: _switch_lat_lng(shapely.geometry.Point(x)), lambda x: gpd.GeoDataFrame(x, crs="epsg:4326"), ) @@ -158,10 +158,9 @@ def h3_to_geo_boundary(self) -> GeoDataFrame: 881e2659c3fffff 1 POLYGON ((14.99201 51.00565, 14.98973 51.00133... """ return self._apply_index_assign( - wrapped_partial(h3.h3_to_geo_boundary, geo_json=True), + wrapped_partial(cell_to_boundary_lng_lat), "geometry", - lambda x: shapely.geometry.Polygon(x), - lambda x: gpd.GeoDataFrame(x, crs="epsg:4326"), + finalizer=lambda x: gpd.GeoDataFrame(x, crs="epsg:4326"), ) @doc_standard("h3_resolution", "containing the resolution of each H3 address") @@ -176,7 +175,7 @@ def h3_get_resolution(self) -> AnyDataFrame: 881e309739fffff 5 8 881e2659c3fffff 1 8 """ - return self._apply_index_assign(h3.h3_get_resolution, "h3_resolution") + return self._apply_index_assign(h3.get_resolution, "h3_resolution") @doc_standard("h3_base_cell", "containing the base cell of each H3 address") def h3_get_base_cell(self): @@ -190,7 +189,7 @@ def h3_get_base_cell(self): 881e309739fffff 5 15 881e2659c3fffff 1 15 """ - return self._apply_index_assign(h3.h3_get_base_cell, "h3_base_cell") + return self._apply_index_assign(h3.get_base_cell_number, "h3_base_cell") @doc_standard("h3_is_valid", "containing the validity of each H3 address") def h3_is_valid(self): @@ -203,7 +202,7 @@ def h3_is_valid(self): 881e309739fffff 5 True INVALID 1 False """ - return self._apply_index_assign(h3.h3_is_valid, "h3_is_valid") + return self._apply_index_assign(h3.is_valid_cell, "h3_is_valid") @doc_standard( "h3_k_ring", "containing a list H3 addresses within a distance of `k`" @@ -250,7 +249,7 @@ def k_ring(self, k: int = 1, explode: bool = False) -> AnyDataFrame: 881e309739fffff 5 881e309739fffff 881e309739fffff 5 881e309731fffff """ - func = wrapped_partial(h3.k_ring, k=k) + func = wrapped_partial(h3.grid_disk, k=k) column_name = "h3_k_ring" if explode: return self._apply_index_explode(func, column_name, list) @@ -295,7 +294,7 @@ def hex_ring(self, k: int = 1, explode: bool = False) -> AnyDataFrame: 881e309739fffff 5 881e309715fffff 881e309739fffff 5 881e309731fffff """ - func = wrapped_partial(h3.hex_ring, k=k) + func = wrapped_partial(h3.grid_ring, k=k) column_name = "h3_hex_ring" if explode: return self._apply_index_explode(func, column_name, list) @@ -330,7 +329,7 @@ def h3_to_parent(self, resolution: int = None) -> AnyDataFrame: else "h3_parent" ) return self._apply_index_assign( - wrapped_partial(h3.h3_to_parent, res=resolution), column + wrapped_partial(h3.cell_to_parent, res=resolution), column ) @doc_standard("h3_center_child", "containing the center child of each H3 address") @@ -352,7 +351,7 @@ def h3_to_center_child(self, resolution: int = None) -> AnyDataFrame: 881e2659c3fffff 1 891e2659c23ffff """ return self._apply_index_assign( - wrapped_partial(h3.h3_to_center_child, res=resolution), "h3_center_child" + wrapped_partial(h3.cell_to_center_child, res=resolution), "h3_center_child" ) @doc_standard( @@ -395,7 +394,7 @@ def polyfill(self, resolution: int, explode: bool = False) -> AnyDataFrame: """ def func(row): - return list(polyfill(row.geometry, resolution, True)) + return list(polyfill(row.geometry, resolution)) result = self._df.apply(func, axis=1) @@ -553,7 +552,7 @@ def h3_to_parent_aggregate( 811e3ffffffffff 6 """ parent_h3addresses = [ - catch_invalid_h3_address(h3.h3_to_parent)(h3address, resolution) + catch_invalid_h3_address(h3.cell_to_parent)(h3address, resolution) for h3address in self._df.index ] h3_parent_column = self._format_resolution(resolution) @@ -758,9 +757,7 @@ def polyfill_resample( return result.h3.h3_to_geo_boundary() if return_geometry else result - def linetrace( - self, resolution : int, explode: bool = False - ) -> AnyDataFrame: + def linetrace(self, resolution: int, explode: bool = False) -> AnyDataFrame: """Experimental. An H3 cell representation of a (Multi)LineString, which permits repeated cells, but not if they are repeated in immediate sequence. @@ -792,6 +789,7 @@ def linetrace( 0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 837541fffffffff """ + def func(row): return list(linetrace(row.geometry, resolution)) diff --git a/h3pandas/util/decorator.py b/h3pandas/util/decorator.py index 8629436..8782575 100644 --- a/h3pandas/util/decorator.py +++ b/h3pandas/util/decorator.py @@ -1,6 +1,5 @@ from functools import wraps from typing import Callable, Iterator -from h3 import H3CellError def catch_invalid_h3_address(f: Callable) -> Callable: @@ -25,7 +24,7 @@ def catch_invalid_h3_address(f: Callable) -> Callable: def safe_f(*args, **kwargs): try: return f(*args, **kwargs) - except (TypeError, ValueError, H3CellError) as e: + except (TypeError, ValueError) as e: message = "H3 method raised an error. Is the H3 address correct?" message += f"\nCaller: {f.__name__}({_print_signature(*args, **kwargs)})" message += f"\nOriginal error: {repr(e)}" @@ -47,6 +46,7 @@ def sequential_deduplication(func: Iterator[str]) -> Iterator[str]: ------- Yields from f, but won't yield two items in a row that are the same. """ + def inner(*args): iterable = func(*args) last = None @@ -54,6 +54,7 @@ def inner(*args): if cell != last: yield cell last = cell + return inner diff --git a/h3pandas/util/shapely.py b/h3pandas/util/shapely.py index adb1095..832771d 100644 --- a/h3pandas/util/shapely.py +++ b/h3pandas/util/shapely.py @@ -1,22 +1,15 @@ -from typing import Union, Set, Tuple, List, Iterator +from typing import Union, Set, Iterator from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString -from h3 import h3 +from shapely.ops import transform +import h3 from .decorator import sequential_deduplication + MultiPolyOrPoly = Union[Polygon, MultiPolygon] MultiLineOrLine = Union[LineString, MultiLineString] -def _extract_coords(polygon: Polygon) -> Tuple[List, List[List]]: - """Extract the coordinates of outer and inner rings from a Polygon""" - outer = list(polygon.exterior.coords) - inners = [list(g.coords) for g in polygon.interiors] - return outer, inners - - -def polyfill( - geometry: MultiPolyOrPoly, resolution: int, geo_json: bool = False -) -> Set[str]: +def polyfill(geometry: MultiPolyOrPoly, resolution: int) -> Set[str]: """h3.polyfill accepting a shapely (Multi)Polygon Parameters @@ -25,8 +18,6 @@ def polyfill( Polygon to fill resolution : int H3 resolution of the filling cells - geo_json : bool - If True, coordinates are assumed to be lng/lat. Default: False (lat/lng) Returns ------- @@ -36,24 +27,45 @@ def polyfill( ------ TypeError if geometry is not a Polygon or MultiPolygon """ - if isinstance(geometry, Polygon): - outer, inners = _extract_coords(geometry) - return h3.polyfill_polygon(outer, resolution, inners, geo_json) - - elif isinstance(geometry, MultiPolygon): - h3_addresses = [] - for poly in geometry.geoms: - h3_addresses.extend(polyfill(poly, resolution, geo_json)) - - return set(h3_addresses) + if isinstance(geometry, (Polygon, MultiPolygon)): + h3shape = h3.geo_to_h3shape(geometry) + return set(h3.polygon_to_cells(h3shape, resolution)) else: raise TypeError(f"Unknown type {type(geometry)}") +def cell_to_boundary_lng_lat(h3_address: str) -> MultiLineString: + """h3.h3_to_geo_boundary equivalent for shapely + + Parameters + ---------- + h3_address : str + H3 address to convert to a boundary + + Returns + ------- + MultiLineString representing the H3 cell boundary + """ + return _switch_lat_lng(Polygon(h3.cell_to_boundary(h3_address))) + + +def _switch_lat_lng(geometry: MultiPolyOrPoly) -> MultiPolyOrPoly: + """Switches the order of coordinates in a Polygon or MultiPolygon + + Parameters + ---------- + geometry : Polygon or Multipolygon + Polygon to switch coordinates + + Returns + ------- + Polygon or Multipolygon with switched coordinates + """ + return transform(lambda x, y: (y, x), geometry) + + @sequential_deduplication -def linetrace( - geometry: MultiLineOrLine, resolution: int -) -> Iterator[str]: +def linetrace(geometry: MultiLineOrLine, resolution: int) -> Iterator[str]: """h3.polyfill equivalent for shapely (Multi)LineString Does not represent lines with duplicate sequential cells, but cells may repeat non-sequentially to represent @@ -82,8 +94,8 @@ def linetrace( coords = zip(geometry.coords, geometry.coords[1:]) while (vertex_pair := next(coords, None)) is not None: i, j = vertex_pair - a = h3.geo_to_h3(*i[::-1], resolution) - b = h3.geo_to_h3(*j[::-1], resolution) - yield from h3.h3_line(a, b) # inclusive of a and b + a = h3.latlng_to_cell(*i[::-1], resolution) + b = h3.latlng_to_cell(*j[::-1], resolution) + yield from h3.grid_path_cells(a, b) # inclusive of a and b else: raise TypeError(f"Unknown type {type(geometry)}") diff --git a/tests/test_h3pandas.py b/tests/test_h3pandas.py index db8b440..cde6fa0 100644 --- a/tests/test_h3pandas.py +++ b/tests/test_h3pandas.py @@ -1,11 +1,12 @@ from h3pandas import h3pandas # noqa: F401 -from h3 import h3 import pytest from shapely.geometry import Polygon, LineString, MultiLineString, box, Point import pandas as pd import geopandas as gpd from geopandas.testing import assert_geodataframe_equal +from h3pandas.util.shapely import cell_to_boundary_lng_lat + # TODO: Make sure methods are tested both for # DataFrame and GeoDataFrame (where applicable) @@ -35,22 +36,24 @@ def basic_geodataframe_polygon(basic_geodataframe): @pytest.fixture def basic_geodataframe_linestring(): - geom = LineString([ - (174.793092, -37.005372), (175.621138, -40.323142) - ]) + geom = LineString([(174.793092, -37.005372), (175.621138, -40.323142)]) return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326") @pytest.fixture # NB one of the LineString parts traverses the antimeridian def basic_geodataframe_multilinestring(basic_geodataframe): - geom = MultiLineString([ - [[174.793092, -37.005372], [175.621138, -40.323142]], + geom = MultiLineString( [ - [168.222656, -45.79817], [171.914063, -34.307144], - [178.769531, -37.926868], [183.515625, -43.992815] + [[174.793092, -37.005372], [175.621138, -40.323142]], + [ + [168.222656, -45.79817], + [171.914063, -34.307144], + [178.769531, -37.926868], + [183.515625, -43.992815], + ], ] - ]) + ) return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326") @@ -97,7 +100,7 @@ def h3_dataframe_with_values(): def h3_geodataframe_with_values(h3_dataframe_with_values): """GeoDataFrame with resolution 9 H3 index, values, and Hexagon geometries""" geometry = [ - Polygon(h3.h3_to_geo_boundary(h, True)) for h in h3_dataframe_with_values.index + Polygon(cell_to_boundary_lng_lat(h)) for h in h3_dataframe_with_values.index ] return gpd.GeoDataFrame( h3_dataframe_with_values, geometry=geometry, crs="epsg:4326" @@ -315,7 +318,7 @@ def test_linetrace(self, basic_geodataframe_linestring): "83bb54fffffffff", "83bb72fffffffff", "83bb0dfffffffff", - "83bb2bfffffffff" + "83bb2bfffffffff", ] assert len(result.iloc[0]["h3_linetrace"]) == 5 assert list(result.iloc[0]["h3_linetrace"]) == expected_indices @@ -327,11 +330,11 @@ def test_linetrace_explode(self, basic_geodataframe_linestring): "83bb54fffffffff", "83bb72fffffffff", "83bb0dfffffffff", - "83bb2bfffffffff" + "83bb2bfffffffff", ] assert result.shape == (5, 2) - assert result.iloc[0]['h3_linetrace'] == expected_indices[0] - assert result.iloc[-1]['h3_linetrace'] == expected_indices[-1] + assert result.iloc[0]["h3_linetrace"] == expected_indices[0] + assert result.iloc[-1]["h3_linetrace"] == expected_indices[-1] def test_linetrace_with_values(self, h3_geodataframe_with_polyline_values): result = h3_geodataframe_with_polyline_values.h3.linetrace(3) @@ -340,40 +343,45 @@ def test_linetrace_with_values(self, h3_geodataframe_with_polyline_values): "83bb54fffffffff", "83bb72fffffffff", "83bb0dfffffffff", - "83bb2bfffffffff" + "83bb2bfffffffff", ] assert result.shape == (1, 3) - assert 'val' in result.columns - assert result.iloc[0]['val'] == 10 + assert "val" in result.columns + assert result.iloc[0]["val"] == 10 assert len(result.iloc[0]["h3_linetrace"]) == 5 assert list(result.iloc[0]["h3_linetrace"]) == expected_indices - def test_linetrace_with_values_explode(self, - h3_geodataframe_with_polyline_values): + def test_linetrace_with_values_explode(self, h3_geodataframe_with_polyline_values): result = h3_geodataframe_with_polyline_values.h3.linetrace(3, explode=True) expected_indices = [ "83bb50fffffffff", "83bb54fffffffff", "83bb72fffffffff", "83bb0dfffffffff", - "83bb2bfffffffff" + "83bb2bfffffffff", ] assert result.shape == (5, 3) - assert 'val' in result.columns - assert result.iloc[0]['val'] == 10 + assert "val" in result.columns + assert result.iloc[0]["val"] == 10 assert result.iloc[0]["h3_linetrace"] == expected_indices[0] - assert result.iloc[-1]['h3_linetrace'] == expected_indices[-1] + assert result.iloc[-1]["h3_linetrace"] == expected_indices[-1] assert not result["val"].isna().any() def test_linetrace_multiline(self, basic_geodataframe_multilinestring): result = basic_geodataframe_multilinestring.h3.linetrace(2) expected_indices = [ - "82bb57fffffffff", "82bb0ffffffffff", - "82da87fffffffff", "82da97fffffffff", - "82bb67fffffffff", "82bb47fffffffff", - "82bb5ffffffffff", "82bb57fffffffff", - "82ba27fffffffff", "82bb1ffffffffff", - "82bb07fffffffff", "82bb37fffffffff" + "82bb57fffffffff", + "82bb0ffffffffff", + "82da87fffffffff", + "82da97fffffffff", + "82bb67fffffffff", + "82bb47fffffffff", + "82bb5ffffffffff", + "82bb57fffffffff", + "82ba27fffffffff", + "82bb1ffffffffff", + "82bb07fffffffff", + "82bb37fffffffff", ] assert len(result.iloc[0]["h3_linetrace"]) == 12 # 12 cells total assert list(result.iloc[0]["h3_linetrace"]) == expected_indices @@ -383,20 +391,21 @@ def test_linetrace_multiline_explode_index_parts( ): result = basic_geodataframe_multilinestring.explode( index_parts=True - ).h3.linetrace( - 2, explode=True - ) + ).h3.linetrace(2, explode=True) expected_indices = [ + ["82bb57fffffffff", "82bb0ffffffffff"], [ - "82bb57fffffffff", "82bb0ffffffffff" + "82da87fffffffff", + "82da97fffffffff", + "82bb67fffffffff", + "82bb47fffffffff", + "82bb5ffffffffff", + "82bb57fffffffff", + "82ba27fffffffff", + "82bb1ffffffffff", + "82bb07fffffffff", + "82bb37fffffffff", ], - [ - "82da87fffffffff", "82da97fffffffff", - "82bb67fffffffff", "82bb47fffffffff", - "82bb5ffffffffff", "82bb57fffffffff", - "82ba27fffffffff", "82bb1ffffffffff", - "82bb07fffffffff", "82bb37fffffffff" - ] ] assert len(result["h3_linetrace"]) == 12 # 12 cells in total assert result.iloc[0]["h3_linetrace"] == expected_indices[0][0] @@ -407,20 +416,21 @@ def test_linetrace_multiline_index_parts_no_explode( ): result = basic_geodataframe_multilinestring.explode( index_parts=True - ).h3.linetrace( - 2, explode=False - ) + ).h3.linetrace(2, explode=False) expected_indices = [ + ["82bb57fffffffff", "82bb0ffffffffff"], [ - "82bb57fffffffff", "82bb0ffffffffff" + "82da87fffffffff", + "82da97fffffffff", + "82bb67fffffffff", + "82bb47fffffffff", + "82bb5ffffffffff", + "82bb57fffffffff", + "82ba27fffffffff", + "82bb1ffffffffff", + "82bb07fffffffff", + "82bb37fffffffff", ], - [ - "82da87fffffffff", "82da97fffffffff", - "82bb67fffffffff", "82bb47fffffffff", - "82bb5ffffffffff", "82bb57fffffffff", - "82ba27fffffffff", "82bb1ffffffffff", - "82bb07fffffffff", "82bb37fffffffff" - ] ] assert len(result["h3_linetrace"]) == 2 # 2 parts assert len(result.iloc[0]["h3_linetrace"]) == 2 # 2 cells @@ -627,7 +637,7 @@ def test_h3_to_parent_aggregate(self, h3_geodataframe_with_values): result = h3_geodataframe_with_values.h3.h3_to_parent_aggregate(8) # TODO: Why does Pandas not preserve the order of groups here? index = pd.Index(["881f1d4811fffff", "881f1d4817fffff"], name="h3_08") - geometry = [Polygon(h3.h3_to_geo_boundary(h, True)) for h in index] + geometry = [Polygon(cell_to_boundary_lng_lat(h)) for h in index] expected = gpd.GeoDataFrame( {"val": [5, 3]}, geometry=geometry, index=index, crs="epsg:4326" ) diff --git a/tests/util/test_decorator.py b/tests/util/test_decorator.py index 16abc42..23df54b 100644 --- a/tests/util/test_decorator.py +++ b/tests/util/test_decorator.py @@ -1,4 +1,4 @@ -from h3 import h3 +import h3 import pytest from h3pandas.util.decorator import catch_invalid_h3_address, sequential_deduplication @@ -8,7 +8,7 @@ class TestCatchInvalidH3Address: def test_catch_invalid_h3_address(self): @catch_invalid_h3_address def safe_h3_to_parent(h3_address): - return h3.h3_to_parent(h3_address, 1) + return h3.cell_to_parent(h3_address, 1) with pytest.raises(ValueError): safe_h3_to_parent("a") # Originally ValueError diff --git a/tests/util/test_shapely.py b/tests/util/test_shapely.py index 4b6a7f0..631f369 100644 --- a/tests/util/test_shapely.py +++ b/tests/util/test_shapely.py @@ -5,19 +5,19 @@ @pytest.fixture def polygon(): - return Polygon([(48, 18), (49, 18), (49, 19), (48, 19)]) + return Polygon([(18, 48), (18, 49), (19, 49), (19, 48)]) @pytest.fixture def polygon_b(): - return Polygon([(54, 11), (56, 11), (56, 12), (54, 12)]) + return Polygon([(11, 54), (11, 56), (12, 56), (12, 54)]) @pytest.fixture def polygon_with_hole(): return Polygon( - [(48, 18), (49, 18), (49, 19), (48, 19)], - [[(48.4, 18.2), (48.8, 18.2), (48.8, 18.6), (48.4, 18.6)]], + [(18, 48), (19, 48), (19, 49), (18, 49)], + [[(18.2, 48.4), (18.6, 48.4), (18.6, 48.8), (18.2, 48.8)]], ) @@ -73,6 +73,6 @@ def test_linetrace_multilinestring(self, multiline): assert expected == result # Lists not sets, repeated items are expected, just not in sequence - expected2 = ['82754ffffffffff', '827547fffffffff', '82754ffffffffff'] + expected2 = ["82754ffffffffff", "827547fffffffff", "82754ffffffffff"] result2 = list(linetrace(multiline, 2)) assert expected2 == result2