Skip to content

Commit

Permalink
Merge pull request #411 from astronomy-commons/use_stable_sorts
Browse files Browse the repository at this point in the history
Use stable sorts when sorting by the index
  • Loading branch information
jeremykubica authored Oct 17, 2024
2 parents 43ddae9 + da7ec2c commit 6fc2a30
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
6 changes: 3 additions & 3 deletions src/hats_import/catalog/map_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def reduce_pixel_shards(

dataframe = merged_table.to_pandas()
if sort_columns:
dataframe = dataframe.sort_values(sort_columns.split(","))
dataframe = dataframe.sort_values(sort_columns.split(","), kind="stable")
if add_healpix_29:
## If we had a meaningful index before, preserve it as a column.
if _has_named_index(dataframe):
Expand All @@ -289,14 +289,14 @@ def reduce_pixel_shards(
dataframe[ra_column].values,
dataframe[dec_column].values,
)
dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN).sort_index()
dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN).sort_index(kind="stable")

# Adjust the schema to make sure that the _healpix_29 will
# be saved as a uint64
elif use_healpix_29:
if dataframe.index.name != SPATIAL_INDEX_COLUMN:
dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN)
dataframe = dataframe.sort_index()
dataframe = dataframe.sort_index(kind="stable")

dataframe["Norder"] = np.full(rows_written, fill_value=healpix_pixel.order, dtype=np.uint8)
dataframe["Dir"] = np.full(rows_written, fill_value=healpix_pixel.dir, dtype=np.uint64)
Expand Down
2 changes: 1 addition & 1 deletion src/hats_import/margin_cache/margin_cache_map_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def map_pixel_shards(
# that **can** be contained in source pixel, then by `margin_order` pixels for rows
# in source data
margin_pairs = pd.read_csv(margin_pair_file)
explosion_factor = 4 ** (margin_order - source_pixel.order)
explosion_factor = 4 ** int(margin_order - source_pixel.order)
margin_pixel_range_start = source_pixel.pixel * explosion_factor
margin_pixel_range_end = (source_pixel.pixel + 1) * explosion_factor
margin_pairs = margin_pairs.query(
Expand Down
6 changes: 3 additions & 3 deletions tests/hats_import/catalog/test_map_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):

## sort order is effectively (norder19 healpix, source_id)
data_frame = pd.read_parquet(output_file, engine="pyarrow")
expected_dataframe = combined_data.sort_values(["norder19_healpix", "source_id"])
expected_dataframe = combined_data.sort_values(["norder19_healpix", "source_id"], kind="stable")
pd.testing.assert_frame_equal(
expected_dataframe[comparison_columns].reset_index(drop=True),
data_frame[comparison_columns].reset_index(drop=True),
Expand Down Expand Up @@ -502,7 +502,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
)

data_frame = pd.read_parquet(output_file, engine="pyarrow")
expected_dataframe = combined_data.sort_values(["norder19_healpix", "object_id", "time"])
expected_dataframe = combined_data.sort_values(["norder19_healpix", "object_id", "time"], kind="stable")
pd.testing.assert_frame_equal(
expected_dataframe[comparison_columns].reset_index(drop=True),
data_frame[comparison_columns].reset_index(drop=True),
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
)

data_frame = pd.read_parquet(output_file, engine="pyarrow")
expected_dataframe = combined_data.sort_values(["object_id", "time"])
expected_dataframe = combined_data.sort_values(["object_id", "time"], kind="stable")
pd.testing.assert_frame_equal(
expected_dataframe[comparison_columns].reset_index(drop=True),
data_frame[comparison_columns].reset_index(drop=True),
Expand Down

0 comments on commit 6fc2a30

Please sign in to comment.