Skip to content

Commit

Permalink
Remove (most) storage options and clean up imports.
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed Aug 29, 2024
1 parent d251d5b commit 8100557
Show file tree
Hide file tree
Showing 22 changed files with 134 additions and 342 deletions.
16 changes: 5 additions & 11 deletions src/hipscat_cloudtests/file_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from hipscat.io.file_io.file_pointer import does_file_or_directory_exist


def assert_text_file_matches(expected_lines, file_name, storage_options: dict = None):
def assert_text_file_matches(expected_lines, file_name):
"""Convenience method to read a text file and compare the contents, line for line.
When file contents get even a little bit big, it can be difficult to see
Expand All @@ -24,12 +24,9 @@ def assert_text_file_matches(expected_lines, file_name, storage_options: dict =
Args:
expected_lines(:obj:`string array`) list of strings, formatted as regular expressions.
file_name (str): fully-specified path of the file to read
storage_options (dict): dictionary of filesystem storage options
"""
assert does_file_or_directory_exist(
file_name, storage_options=storage_options
), f"file not found [{file_name}]"
contents = load_text_file(file_name, storage_options=storage_options)
assert does_file_or_directory_exist(file_name), f"file not found [{file_name}]"
contents = load_text_file(file_name)

assert len(expected_lines) == len(
contents
Expand All @@ -40,9 +37,7 @@ def assert_text_file_matches(expected_lines, file_name, storage_options: dict =
)


def assert_parquet_file_ids(
file_name, id_column, schema: pa.Schema, expected_ids, resort_ids=True, storage_options: dict = None
):
def assert_parquet_file_ids(file_name, id_column, schema: pa.Schema, expected_ids, resort_ids=True):
"""
Convenience method to read a parquet file and compare the object IDs to
a list of expected objects.
Expand All @@ -53,9 +48,8 @@ def assert_parquet_file_ids(
expected_ids (:obj:`int[]`): list of expected ids in `id_column`
resort_ids (bool): should we re-sort the ids? if False, we will check that the ordering
is the same between the read IDs and expected_ids
storage_options (dict): dictionary of filesystem storage options
"""
data_frame = pd.read_parquet(file_name, engine="pyarrow", schema=schema, storage_options=storage_options)
data_frame = pd.read_parquet(file_name.path, engine="pyarrow", schema=schema, filesystem=file_name.fs)
assert id_column in data_frame.columns
ids = data_frame[id_column].tolist()
if resort_ids:
Expand Down
1 change: 0 additions & 1 deletion src/hipscat_cloudtests/temp_cloud_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import shortuuid
from hipscat.io.file_io import file_io
from upath import UPath


class TempCloudDirectory:
Expand Down
18 changes: 9 additions & 9 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def small_sky_margin_dir_cloud(cloud_path):


@pytest.fixture(scope="session", name="tmp_dir_cloud")
def tmp_dir_cloud(cloud_path, storage_options):
def tmp_dir_cloud(cloud_path):
"""Create a single client for use by all unit test cases."""
tmp = TempCloudDirectory(
cloud_path / "tmp",
Expand All @@ -136,23 +136,23 @@ def small_sky_xmatch_dir_cloud(cloud_path):


@pytest.fixture
def small_sky_catalog_cloud(small_sky_dir_cloud, storage_options):
return lsdb.read_hipscat(small_sky_dir_cloud, storage_options=storage_options)
def small_sky_catalog_cloud(small_sky_dir_cloud):
return lsdb.read_hipscat(small_sky_dir_cloud)


@pytest.fixture
def small_sky_xmatch_catalog_cloud(small_sky_xmatch_dir_cloud, storage_options):
return lsdb.read_hipscat(small_sky_xmatch_dir_cloud, storage_options=storage_options)
def small_sky_xmatch_catalog_cloud(small_sky_xmatch_dir_cloud):
return lsdb.read_hipscat(small_sky_xmatch_dir_cloud)


@pytest.fixture
def small_sky_order1_hipscat_catalog_cloud(small_sky_order1_dir_cloud, storage_options):
return hc.catalog.Catalog.read_from_hipscat(small_sky_order1_dir_cloud, storage_options=storage_options)
def small_sky_order1_hipscat_catalog_cloud(small_sky_order1_dir_cloud):
return hc.catalog.Catalog.read_from_hipscat(small_sky_order1_dir_cloud)


@pytest.fixture
def small_sky_order1_catalog_cloud(small_sky_order1_dir_cloud, storage_options):
return lsdb.read_hipscat(small_sky_order1_dir_cloud, storage_options=storage_options)
def small_sky_order1_catalog_cloud(small_sky_order1_dir_cloud):
return lsdb.read_hipscat(small_sky_order1_dir_cloud)


@pytest.fixture
Expand Down
59 changes: 12 additions & 47 deletions tests/data/generate_cloud_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import tempfile\n",
"from upath import UPath\n",
"\n",
"import hipscat_import.pipeline as runner\n",
"from hipscat_import.catalog.arguments import ImportArguments\n",
"from hipscat_import.index.arguments import IndexArguments\n",
"from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments\n",
"import tempfile\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"tmp_path = tempfile.TemporaryDirectory()\n",
"tmp_dir = tmp_path.name\n",
Expand All @@ -36,7 +37,9 @@
" \"account_key\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_KEY\"),\n",
" \"account_name\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_NAME\"),\n",
"}\n",
"storage_options"
"storage_options\n",
"\n",
"output_path = UPath(\"abfs://hipscat/pytests/data\", protocol=\"abfs\", **storage_options)"
]
},
{
Expand All @@ -58,9 +61,8 @@
" input_path=\"small_sky_parts\",\n",
" highest_healpix_order=1,\n",
" file_reader=\"csv\",\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_path=output_path,\n",
" output_artifact_name=\"small_sky\",\n",
" output_storage_options=storage_options,\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
")\n",
Expand Down Expand Up @@ -92,8 +94,7 @@
" input_path=\"small_sky_parts\",\n",
" file_reader=\"csv\",\n",
" constant_healpix_order=1,\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_storage_options=storage_options,\n",
" output_path=output_path,\n",
" output_artifact_name=\"small_sky_order1\",\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
Expand All @@ -119,9 +120,8 @@
"args = IndexArguments(\n",
" input_catalog_path=\"small_sky_order1\",\n",
" indexing_column=\"id\",\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_path=output_path,\n",
" output_artifact_name=\"small_sky_object_index\",\n",
" output_storage_options=storage_options,\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
")\n",
Expand All @@ -137,9 +137,8 @@
"margin_args = MarginCacheArguments(\n",
" margin_threshold=7200,\n",
" input_catalog_path=\"small_sky_order1\",\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_path=output_path,\n",
" output_artifact_name=\"small_sky_order1_margin\",\n",
" output_storage_options=storage_options,\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
")\n",
Expand All @@ -163,8 +162,7 @@
" input_file_list=[\"xmatch/xmatch_catalog_raw.csv\"],\n",
" file_reader=\"csv\",\n",
" constant_healpix_order=1,\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_storage_options=storage_options,\n",
" output_path=output_path,\n",
" output_artifact_name=\"small_sky_xmatch\",\n",
" pixel_threshold=100,\n",
" tmp_dir=tmp_dir,\n",
Expand All @@ -173,39 +171,6 @@
"runner.pipeline(args)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Almanac info\n",
"\n",
"For the above catalogs, create almanac data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hipscat.inspection.almanac import Almanac\n",
"from hipscat.inspection.almanac_info import AlmanacInfo\n",
"\n",
"almanac_info = AlmanacInfo.from_catalog_dir(\n",
" \"abfs://hipscat/pytests/data/small_sky\", storage_options=storage_options\n",
")\n",
"almanac_info.write_to_file(\n",
" directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n",
")\n",
"\n",
"almanac_info = AlmanacInfo.from_catalog_dir(\n",
" \"abfs://hipscat/pytests/data/small_sky_order1\", storage_options=storage_options\n",
")\n",
"almanac_info.write_to_file(\n",
" directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
6 changes: 2 additions & 4 deletions tests/data/generate_local_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@
"metadata": {},
"outputs": [],
"source": [
"import tempfile\n",
"\n",
"import hipscat_import.pipeline as runner\n",
"from hipscat_import.catalog.arguments import ImportArguments\n",
"from hipscat_import.index.arguments import IndexArguments\n",
"from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments\n",
"import tempfile\n",
"from pathlib import Path\n",
"\n",
"tmp_path = tempfile.TemporaryDirectory()\n",
"tmp_dir = tmp_path.name"
Expand Down
10 changes: 3 additions & 7 deletions tests/hipscat/catalog/dataset/test_base_catalog_info_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,10 @@
from hipscat.io import file_io


def test_read_from_file(test_data_dir_cloud, storage_options):
def test_read_from_file(test_data_dir_cloud):
base_catalog_info_file_cloud = test_data_dir_cloud / "dataset" / "catalog_info.json"
catalog_info = BaseCatalogInfo.read_from_metadata_file(
base_catalog_info_file_cloud, storage_options=storage_options
)
catalog_info_json = file_io.file_io.load_json_file(
base_catalog_info_file_cloud, storage_options=storage_options
)
catalog_info = BaseCatalogInfo.read_from_metadata_file(base_catalog_info_file_cloud)
catalog_info_json = file_io.file_io.load_json_file(base_catalog_info_file_cloud)

catalog_info_dict = dataclasses.asdict(catalog_info)
for key, value in catalog_info_json.items():
Expand Down
30 changes: 12 additions & 18 deletions tests/hipscat/catalog/test_catalog_cloud.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Tests of catalog functionality"""

import os

import pytest
from hipscat.catalog import Catalog, PartitionInfo
from hipscat.io.file_io import file_io
Expand All @@ -10,51 +8,47 @@
from hipscat.pixel_math import HealpixPixel


def test_load_catalog_small_sky(small_sky_dir_cloud, storage_options):
def test_load_catalog_small_sky(small_sky_dir_cloud):
"""Instantiate a catalog with 1 pixel"""
cat = Catalog.read_from_hipscat(small_sky_dir_cloud, storage_options=storage_options)
cat = Catalog.read_from_hipscat(small_sky_dir_cloud)

assert cat.catalog_name == "small_sky"
assert len(cat.get_healpix_pixels()) == 1

assert is_valid_catalog(small_sky_dir_cloud, storage_options=storage_options)
assert is_valid_catalog(small_sky_dir_cloud)


def test_load_catalog_small_sky_with_loader(small_sky_dir_cloud, storage_options):
def test_load_catalog_small_sky_with_loader(small_sky_dir_cloud):
"""Instantiate a catalog with 1 pixel"""
cat = read_from_hipscat(small_sky_dir_cloud, storage_options=storage_options)
cat = read_from_hipscat(small_sky_dir_cloud)

assert isinstance(cat, Catalog)
assert cat.catalog_name == "small_sky"
assert len(cat.get_healpix_pixels()) == 1

assert is_valid_catalog(small_sky_dir_cloud, storage_options=storage_options)
assert is_valid_catalog(small_sky_dir_cloud)


def test_empty_directory(tmp_cloud_path, storage_options):
def test_empty_directory(tmp_cloud_path):
"""Test loading empty or incomplete data"""
catalog_path = tmp_cloud_path

## Path exists but there's nothing there (which means it doesn't exist!)
with pytest.raises(FileNotFoundError, match="No directory"):
Catalog.read_from_hipscat(catalog_path, storage_options=storage_options)
Catalog.read_from_hipscat(catalog_path)

## catalog_info file exists - getting closer
file_name = catalog_path / "catalog_info.json"
file_io.write_string_to_file(
file_name,
string='{"catalog_name":"empty", "catalog_type":"source"}',
storage_options=storage_options,
)
file_io.write_string_to_file(file_name, string='{"catalog_name":"empty", "catalog_type":"source"}')

with pytest.raises(FileNotFoundError, match="metadata"):
Catalog.read_from_hipscat(catalog_path, storage_options=storage_options)
Catalog.read_from_hipscat(catalog_path)

## partition_info file exists - enough to create a catalog
## Now we create the needed _metadata and everything is right.
part_info = PartitionInfo.from_healpix([HealpixPixel(0, 11)])
part_info.write_to_metadata_files(catalog_path=catalog_path, storage_options=storage_options)
part_info.write_to_metadata_files(catalog_path=catalog_path)

with pytest.warns(UserWarning, match="slow"):
catalog = Catalog.read_from_hipscat(catalog_path, storage_options=storage_options)
catalog = Catalog.read_from_hipscat(catalog_path)
assert catalog.catalog_name == "empty"
4 changes: 2 additions & 2 deletions tests/hipscat/catalog/test_index_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from hipscat.pixel_math import HealpixPixel


def test_loc_partition(small_sky_index_dir_cloud, storage_options):
catalog = read_from_hipscat(small_sky_index_dir_cloud, storage_options=storage_options)
def test_loc_partition(small_sky_index_dir_cloud):
catalog = read_from_hipscat(small_sky_index_dir_cloud)

assert isinstance(catalog, IndexCatalog)
assert catalog.on_disk
Expand Down
4 changes: 2 additions & 2 deletions tests/hipscat/catalog/test_margin_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from hipscat.pixel_math.healpix_pixel import HealpixPixel


def test_read_margin_from_file(small_sky_margin_dir_cloud, storage_options):
catalog = read_from_hipscat(small_sky_margin_dir_cloud, storage_options=storage_options)
def test_read_margin_from_file(small_sky_margin_dir_cloud):
catalog = read_from_hipscat(small_sky_margin_dir_cloud)

assert isinstance(catalog, MarginCatalog)
assert catalog.on_disk
Expand Down
4 changes: 2 additions & 2 deletions tests/hipscat/inspection/test_visualize_catalog_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
# pylint: disable=no-member


def test_generate_map_order1(small_sky_dir_cloud, storage_options, mocker):
def test_generate_map_order1(small_sky_dir_cloud, mocker):
"""Basic test that map data can be generated (does not test that a plot is rendered)"""
cat = Catalog.read_from_hipscat(small_sky_dir_cloud, storage_options=storage_options)
cat = Catalog.read_from_hipscat(small_sky_dir_cloud)

mocker.patch("healpy.mollview")
plot_pixels(cat)
Expand Down
Loading

0 comments on commit 8100557

Please sign in to comment.