From 2f07189d8e59e5d720eb55f742c11f7fd84317c5 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Mon, 1 Apr 2024 15:17:18 -0400 Subject: [PATCH] Add test for lsdb.to_hipscat (#18) --- tests/conftest.py | 5 +++++ tests/hipscat_import/conftest.py | 5 ----- tests/lsdb/io/test_to_hipscat.py | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 tests/lsdb/io/test_to_hipscat.py diff --git a/tests/conftest.py b/tests/conftest.py index b998220..dbb8842 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,11 @@ def small_sky_order1_dir_local(local_data_dir): return os.path.join(local_data_dir, SMALL_SKY_ORDER1_DIR_NAME) +@pytest.fixture +def small_sky_parts_dir_local(local_data_dir): + return os.path.join(local_data_dir, "small_sky_parts") + + @pytest.fixture def tmp_dir_cloud(example_cloud_path): return os.path.join(example_cloud_path, "tmp") diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py index acb9004..8e24ad2 100644 --- a/tests/hipscat_import/conftest.py +++ b/tests/hipscat_import/conftest.py @@ -16,8 +16,3 @@ def dask_client(): @pytest.fixture def small_sky_parts_dir_cloud(example_cloud_path): return os.path.join(example_cloud_path, "hipscat_import", "data", "small_sky_parts") - - -@pytest.fixture -def small_sky_parts_dir_local(local_data_dir): - return os.path.join(local_data_dir, "small_sky_parts") diff --git a/tests/lsdb/io/test_to_hipscat.py b/tests/lsdb/io/test_to_hipscat.py new file mode 100644 index 0000000..e46a6e8 --- /dev/null +++ b/tests/lsdb/io/test_to_hipscat.py @@ -0,0 +1,33 @@ +import os + +import lsdb +import pandas as pd + +from hipscat_cloudtests import TempCloudDirectory + + +def test_save_catalog_and_margin(local_data_dir, example_cloud_storage_options, tmp_dir_cloud): + pathway = os.path.join(local_data_dir, "xmatch", "xmatch_catalog_raw.csv") + input_df = pd.read_csv(pathway) + catalog = lsdb.from_dataframe( + input_df, margin_threshold=5000, catalog_name="small_sky_from_dataframe", catalog_type="object" + ) + + with TempCloudDirectory( + tmp_dir_cloud, "lsdb_save_catalog_and_margin", example_cloud_storage_options + ) as temp_path: + base_catalog_path = f"{temp_path}/new_catalog_name" + catalog.to_hipscat(base_catalog_path, storage_options=example_cloud_storage_options) + expected_catalog = lsdb.read_hipscat(base_catalog_path, storage_options=example_cloud_storage_options) + assert expected_catalog.hc_structure.catalog_name == catalog.hc_structure.catalog_name + assert expected_catalog.hc_structure.catalog_info == catalog.hc_structure.catalog_info + assert expected_catalog.get_healpix_pixels() == catalog.get_healpix_pixels() + pd.testing.assert_frame_equal(expected_catalog.compute(), catalog._ddf.compute()) + + base_catalog_path = f"{temp_path}/new_margin_name" + catalog.margin.to_hipscat(base_catalog_path, storage_options=example_cloud_storage_options) + expected_catalog = lsdb.read_hipscat(base_catalog_path, storage_options=example_cloud_storage_options) + assert expected_catalog.hc_structure.catalog_name == catalog.margin.hc_structure.catalog_name + assert expected_catalog.hc_structure.catalog_info == catalog.margin.hc_structure.catalog_info + assert expected_catalog.get_healpix_pixels() == catalog.margin.get_healpix_pixels() + pd.testing.assert_frame_equal(expected_catalog.compute(), catalog.margin._ddf.compute())