Skip to content

Commit

Permalink
Update requirements (#41)
Browse files Browse the repository at this point in the history
* Update requirements

* Regenerate data and fix tests (#42)

---------

Co-authored-by: Konstantin Malanchev <[email protected]>
  • Loading branch information
delucchi-cmu and hombit authored Oct 18, 2024
1 parent d39d83c commit 7f601ea
Show file tree
Hide file tree
Showing 49 changed files with 75 additions and 71 deletions.
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
git+https://github.com/astronomy-commons/hipscat.git@hats
git+https://github.com/astronomy-commons/hipscat-import.git@hats
git+https://github.com/astronomy-commons/lsdb.git@hats
git+https://github.com/astronomy-commons/hats.git@main
git+https://github.com/astronomy-commons/hats-import.git@main
git+https://github.com/astronomy-commons/lsdb.git@main
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/cloud/data/small_sky/properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
obs_collection=small_sky
dataproduct_type=object
hats_nrows=131
hats_col_j2000_ra=ra
hats_col_j2000_dec=dec
hats_col_ra=ra
hats_col_dec=dec
hats_max_rows=1000000
hats_order=0
moc_sky_fraction=0.08333
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:57UTC
hats_estsize=17
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:10UTC
hats_estsize=43
hats_release_date=2024-09-18
hats_version=v0.1
Binary file not shown.
Binary file not shown.
Binary file not shown.
6 changes: 3 additions & 3 deletions tests/cloud/data/small_sky_object_index/properties
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ dataproduct_type=index
hats_nrows=131
hats_primary_table_url=small_sky_order1
hats_index_column=id
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:57UTC
hats_estsize=11
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:10UTC
hats_estsize=22
hats_release_date=2024-09-18
hats_version=v0.1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/cloud/data/small_sky_order1/properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
obs_collection=small_sky_order1
dataproduct_type=object
hats_nrows=131
hats_col_j2000_ra=ra
hats_col_j2000_dec=dec
hats_col_ra=ra
hats_col_dec=dec
hats_max_rows=1000000
hats_order=1
moc_sky_fraction=0.08333
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:57UTC
hats_estsize=39
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:10UTC
hats_estsize=88
hats_release_date=2024-09-18
hats_version=v0.1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/cloud/data/small_sky_order1_margin/properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
obs_collection=small_sky_order1_margin
dataproduct_type=margin
hats_nrows=28
hats_col_j2000_ra=ra
hats_col_j2000_dec=dec
hats_col_ra=ra
hats_col_dec=dec
hats_primary_table_url=small_sky_order1
hats_margin_threshold=7200.0
hats_order=1
moc_sky_fraction=0.16667
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:57UTC
hats_estsize=57
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:10UTC
hats_estsize=116
hats_release_date=2024-09-18
hats_version=v0.1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/cloud/data/small_sky_xmatch/properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
obs_collection=small_sky_xmatch
dataproduct_type=object
hats_nrows=111
hats_col_j2000_ra=ra
hats_col_j2000_dec=dec
hats_col_ra=ra
hats_col_dec=dec
hats_max_rows=100
hats_order=1
moc_sky_fraction=0.06250
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:57UTC
hats_estsize=37
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:10UTC
hats_estsize=83
hats_release_date=2024-09-18
hats_version=v0.1
6 changes: 3 additions & 3 deletions tests/data/generate_local_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "hipscatenv",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -108,9 +108,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
8 changes: 4 additions & 4 deletions tests/data/indexed_files_abfs/parquet_list_single.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=44.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=45.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=46.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=47.parquet
8 changes: 4 additions & 4 deletions tests/data/indexed_files_local_s3/parquet_list_single.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
test_bucket/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet
s3://test_bucket/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet
s3://test_bucket/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet
s3://test_bucket/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet
test_bucket/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=44.parquet
s3://test_bucket/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=45.parquet
s3://test_bucket/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=46.parquet
s3://test_bucket/data/small_sky_order1/dataset/Norder=1/Dir=0/Npix=47.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/data/small_sky/properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
obs_collection=small_sky
dataproduct_type=object
hats_nrows=131
hats_col_j2000_ra=ra
hats_col_j2000_dec=dec
hats_col_ra=ra
hats_col_dec=dec
hats_max_rows=1000000
hats_order=0
moc_sky_fraction=0.08333
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:56UTC
hats_estsize=49177
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:09UTC
hats_estsize=43
hats_release_date=2024-09-18
hats_version=v0.1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/data/small_sky_order1/properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
obs_collection=small_sky_order1
dataproduct_type=object
hats_nrows=131
hats_col_j2000_ra=ra
hats_col_j2000_dec=dec
hats_col_ra=ra
hats_col_dec=dec
hats_max_rows=1000000
hats_order=1
moc_sky_fraction=0.08333
hats_builder=hats-import v0.3.6.dev25+g93f6917
hats_creation_date=2024-10-04T14\:56UTC
hats_estsize=39
hats_builder=hats-import v0.4.0
hats_creation_date=2024-10-17T19\:09UTC
hats_estsize=88
hats_release_date=2024-09-18
hats_version=v0.1
13 changes: 4 additions & 9 deletions tests/hats/io/file_io/test_file_pointers_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,23 @@ def test_find_files_matching_path(small_sky_dir_cloud):
def test_find_files_matching_path_directory(small_sky_order1_dir_cloud):
assert len(find_files_matching_path(small_sky_order1_dir_cloud)) == 1

## wildcard in directory - will match all files at indicated depth
assert len(find_files_matching_path(small_sky_order1_dir_cloud, "*", "*", "*")) == 4
## wildcard in directory - will match all files at INDICATED depth
assert len(find_files_matching_path(small_sky_order1_dir_cloud, "*", "*", "*", "*")) == 4


def test_directory_has_contents(small_sky_order1_dir_cloud):
assert directory_has_contents(small_sky_order1_dir_cloud)


def test_get_directory_contents(small_sky_order1_dir_cloud, cloud):
def test_get_directory_contents(small_sky_order1_dir_cloud):
small_sky_contents = get_directory_contents(small_sky_order1_dir_cloud)

expected = [
"Norder=1",
"_common_metadata",
"_metadata",
"dataset",
"partition_info.csv",
"point_map.fits",
"properties",
]
if cloud == "local_s3":
# Sub-directories aren't really "a thing" for s3
expected = expected[1:]

expected = [small_sky_order1_dir_cloud / file_name for file_name in expected]

Expand Down
9 changes: 5 additions & 4 deletions tests/hats/io/test_write_metadata_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,19 @@ def basic_catalog_parquet_metadata():
def test_write_parquet_metadata(tmp_cloud_path, small_sky_dir_cloud, basic_catalog_parquet_metadata):
"""Use existing catalog parquet files and create new metadata files for it"""
catalog_base_dir = tmp_cloud_path
dataset_dir = catalog_base_dir / "dataset"

write_parquet_metadata(catalog_path=small_sky_dir_cloud, output_path=catalog_base_dir)

check_parquet_schema(catalog_base_dir / "_metadata", basic_catalog_parquet_metadata)
check_parquet_schema(dataset_dir / "_metadata", basic_catalog_parquet_metadata)
## _common_metadata has 0 row groups
check_parquet_schema(catalog_base_dir / "_common_metadata", basic_catalog_parquet_metadata, 0)
check_parquet_schema(dataset_dir / "_common_metadata", basic_catalog_parquet_metadata, 0)

## Re-write - should still have the same properties.
write_parquet_metadata(catalog_path=small_sky_dir_cloud, output_path=catalog_base_dir)
check_parquet_schema(catalog_base_dir / "_metadata", basic_catalog_parquet_metadata)
check_parquet_schema(dataset_dir / "_metadata", basic_catalog_parquet_metadata)
## _common_metadata has 0 row groups
check_parquet_schema(catalog_base_dir / "_common_metadata", basic_catalog_parquet_metadata, 0)
check_parquet_schema(dataset_dir / "_common_metadata", basic_catalog_parquet_metadata, 0)


def check_parquet_schema(file_path, expected_schema, expected_num_row_groups=1):
Expand Down
6 changes: 3 additions & 3 deletions tests/hats_import/test_run_catalog_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ def test_catalog_import_write_to_cloud(
assert len(catalog.get_healpix_pixels()) == 1

# Check that the catalog parquet file exists and contains correct object IDs
output_file = args.catalog_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
output_file = args.catalog_path / "dataset" / "Norder=0" / "Dir=0" / "Npix=11.parquet"

expected_ids = [*range(700, 831)]
expected_ids = list(range(700, 831))
assert_parquet_file_ids(output_file, "id", catalog.schema, expected_ids)


Expand Down Expand Up @@ -70,7 +70,7 @@ def test_catalog_import_read_from_cloud(dask_client, small_sky_parts_dir_cloud,
assert len(catalog.get_healpix_pixels()) == 1

# Check that the catalog parquet file exists and contains correct object IDs
output_file = args.catalog_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
output_file = args.catalog_path / "dataset" / "Norder=0" / "Dir=0" / "Npix=11.parquet"

expected_ids = [*range(700, 831)]
assert_parquet_file_ids(output_file, "id", catalog.schema, expected_ids)
Expand Down
16 changes: 10 additions & 6 deletions tests/hats_import/test_run_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,16 @@ def test_run_index(
]
)

outfile = args.catalog_path / "index" / "part.0.parquet"
outfile = args.catalog_path / "dataset" / "index" / "part.0.parquet"
schema = read_parquet_metadata(outfile).schema.to_arrow_schema()
assert schema.equals(basic_index_parquet_schema, check_metadata=False)

schema = read_parquet_metadata(args.catalog_path / "_metadata").schema.to_arrow_schema()
schema = read_parquet_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema()
assert schema.equals(basic_index_parquet_schema, check_metadata=False)

schema = read_parquet_metadata(args.catalog_path / "_common_metadata").schema.to_arrow_schema()
schema = read_parquet_metadata(
args.catalog_path / "dataset" / "_common_metadata"
).schema.to_arrow_schema()
assert schema.equals(basic_index_parquet_schema, check_metadata=False)


Expand Down Expand Up @@ -79,12 +81,14 @@ def test_run_index_read_from_cloud(small_sky_order1_dir_cloud, tmp_path, dask_cl
]
)

outfile = args.catalog_path / "index" / "part.0.parquet"
outfile = args.catalog_path / "dataset" / "index" / "part.0.parquet"
schema = read_parquet_metadata(outfile).schema.to_arrow_schema()
assert schema.equals(basic_index_parquet_schema, check_metadata=False)

schema = read_parquet_metadata(args.catalog_path / "_metadata").schema.to_arrow_schema()
schema = read_parquet_metadata(args.catalog_path / "dataset" / "_metadata").schema.to_arrow_schema()
assert schema.equals(basic_index_parquet_schema, check_metadata=False)

schema = read_parquet_metadata(args.catalog_path / "_common_metadata").schema.to_arrow_schema()
schema = read_parquet_metadata(
args.catalog_path / "dataset" / "_common_metadata"
).schema.to_arrow_schema()
assert schema.equals(basic_index_parquet_schema, check_metadata=False)
8 changes: 6 additions & 2 deletions tests/hats_import/test_run_soap.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def test_object_to_self_write_to_cloud(
assert catalog.catalog_info.total_rows == 131
assert catalog.catalog_info.contains_leaf_files

parquet_file_name = small_sky_soap_args.catalog_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
parquet_file_name = (
small_sky_soap_args.catalog_path / "dataset" / "Norder=0" / "Dir=0" / "Npix=11.parquet"
)
parquet_file_metadata = read_parquet_metadata(parquet_file_name)
assert parquet_file_metadata.num_row_groups == 4
assert parquet_file_metadata.num_rows == 131
Expand Down Expand Up @@ -106,7 +108,9 @@ def test_object_to_self_read_from_cloud(
assert catalog.catalog_info.total_rows == 131
assert catalog.catalog_info.contains_leaf_files

parquet_file_name = small_sky_soap_args.catalog_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
parquet_file_name = (
small_sky_soap_args.catalog_path / "dataset" / "Norder=0" / "Dir=0" / "Npix=11.parquet"
)
parquet_file_metadata = read_parquet_metadata(parquet_file_name)
assert parquet_file_metadata.num_row_groups == 4
assert parquet_file_metadata.num_rows == 131
Expand Down

0 comments on commit 7f601ea

Please sign in to comment.