Merge pull request #411 from astronomy-commons/use_stable_sorts

Use stable sorts when sorting by the index
astronomy-commons · Oct 17, 2024 · 6fc2a30 · 6fc2a30
2 parents 43ddae9 + da7ec2c
commit 6fc2a30
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 7 deletions.
diff --git a/src/hats_import/catalog/map_reduce.py b/src/hats_import/catalog/map_reduce.py
@@ -279,7 +279,7 @@ def reduce_pixel_shards(
 
         dataframe = merged_table.to_pandas()
         if sort_columns:
-            dataframe = dataframe.sort_values(sort_columns.split(","))
+            dataframe = dataframe.sort_values(sort_columns.split(","), kind="stable")
         if add_healpix_29:
             ## If we had a meaningful index before, preserve it as a column.
             if _has_named_index(dataframe):
@@ -289,14 +289,14 @@ def reduce_pixel_shards(
                 dataframe[ra_column].values,
                 dataframe[dec_column].values,
             )
-            dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN).sort_index()
+            dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN).sort_index(kind="stable")
 
             # Adjust the schema to make sure that the _healpix_29 will
             # be saved as a uint64
         elif use_healpix_29:
             if dataframe.index.name != SPATIAL_INDEX_COLUMN:
                 dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN)
-            dataframe = dataframe.sort_index()
+            dataframe = dataframe.sort_index(kind="stable")
 
         dataframe["Norder"] = np.full(rows_written, fill_value=healpix_pixel.order, dtype=np.uint8)
         dataframe["Dir"] = np.full(rows_written, fill_value=healpix_pixel.dir, dtype=np.uint64)

diff --git a/src/hats_import/margin_cache/margin_cache_map_reduce.py b/src/hats_import/margin_cache/margin_cache_map_reduce.py
@@ -34,7 +34,7 @@ def map_pixel_shards(
         # that **can** be contained in source pixel, then by `margin_order` pixels for rows
         # in source data
         margin_pairs = pd.read_csv(margin_pair_file)
-        explosion_factor = 4 ** (margin_order - source_pixel.order)
+        explosion_factor = 4 ** int(margin_order - source_pixel.order)
         margin_pixel_range_start = source_pixel.pixel * explosion_factor
         margin_pixel_range_end = (source_pixel.pixel + 1) * explosion_factor
         margin_pairs = margin_pairs.query(

diff --git a/tests/hats_import/catalog/test_map_reduce.py b/tests/hats_import/catalog/test_map_reduce.py
@@ -466,7 +466,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
 
     ## sort order is effectively (norder19 healpix, source_id)
     data_frame = pd.read_parquet(output_file, engine="pyarrow")
-    expected_dataframe = combined_data.sort_values(["norder19_healpix", "source_id"])
+    expected_dataframe = combined_data.sort_values(["norder19_healpix", "source_id"], kind="stable")
     pd.testing.assert_frame_equal(
         expected_dataframe[comparison_columns].reset_index(drop=True),
         data_frame[comparison_columns].reset_index(drop=True),
@@ -502,7 +502,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     )
 
     data_frame = pd.read_parquet(output_file, engine="pyarrow")
-    expected_dataframe = combined_data.sort_values(["norder19_healpix", "object_id", "time"])
+    expected_dataframe = combined_data.sort_values(["norder19_healpix", "object_id", "time"], kind="stable")
     pd.testing.assert_frame_equal(
         expected_dataframe[comparison_columns].reset_index(drop=True),
         data_frame[comparison_columns].reset_index(drop=True),
@@ -540,7 +540,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     )
 
     data_frame = pd.read_parquet(output_file, engine="pyarrow")
-    expected_dataframe = combined_data.sort_values(["object_id", "time"])
+    expected_dataframe = combined_data.sort_values(["object_id", "time"], kind="stable")
     pd.testing.assert_frame_equal(
         expected_dataframe[comparison_columns].reset_index(drop=True),
         data_frame[comparison_columns].reset_index(drop=True),