Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up test data generation #16

Merged
merged 3 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 0 additions & 86 deletions copy_data_to_fs.py

This file was deleted.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
dev = [
"asv==0.6.1", # Used to compute performance benchmarks
"black", # Used for static linting of files
"jupyter", # clear notebook result cells
"pre-commit", # Used to run checks before finalizing a git commit
"pylint", # Used for static linting of files
"pytest",
Expand Down
30 changes: 29 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import pytest

DATA_DIR_NAME = "data"
ALMANAC_DIR_NAME = "almanac"
SMALL_SKY_DIR_NAME = "small_sky"
SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1"


TEST_DIR = os.path.dirname(__file__)
SMALL_SKY_DIR_NAME = "small_sky"
Expand Down Expand Up @@ -49,3 +52,28 @@ def local_data_dir():
@pytest.fixture
def small_sky_dir_local(local_data_dir):
return os.path.join(local_data_dir, SMALL_SKY_DIR_NAME)


@pytest.fixture
def tmp_dir_cloud(example_cloud_path):
return os.path.join(example_cloud_path, "tmp")


@pytest.fixture
def test_data_dir_cloud(example_cloud_path):
return os.path.join(example_cloud_path, "data")


@pytest.fixture
def almanac_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, ALMANAC_DIR_NAME)


@pytest.fixture
def small_sky_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME)


@pytest.fixture
def small_sky_order1_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME)
170 changes: 170 additions & 0 deletions tests/data/generate_cloud_data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CLOUD unit test data\n",
"\n",
"There are two types of data used in unit tests in this repo: local and cloud. This notebook concerns itself only with the CLOUD versions of test data, so you can re-generate it.\n",
"\n",
"This also works to initialize data in a new cloud provider, instead of simply copying an existing data set.\n",
"\n",
"## Object catalog: small sky\n",
"\n",
"This is the same \"object catalog\" with 131 randomly generated radec values inside the order0-pixel11 healpix pixel that is used in hipscat and LSDB unit test suites."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import hipscat_import.pipeline as runner\n",
"from hipscat_import.catalog.arguments import ImportArguments\n",
"from hipscat_import.index.arguments import IndexArguments\n",
"from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments\n",
"import tempfile\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"tmp_path = tempfile.TemporaryDirectory()\n",
"tmp_dir = tmp_path.name\n",
"\n",
"storage_options = {\n",
" \"account_key\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_KEY\"),\n",
" \"account_name\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_NAME\"),\n",
"}\n",
"storage_options"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### small_sky\n",
"\n",
"This catalog was generated with the following snippet:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"args = ImportArguments(\n",
" input_path=\"small_sky_parts\",\n",
" highest_healpix_order=1,\n",
" file_reader=\"csv\",\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_artifact_name=\"small_sky\",\n",
" output_storage_options=storage_options,\n",
" overwrite=True,\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
")\n",
"runner.pipeline(args)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### small_sky_order1\n",
"\n",
"This catalog has the same data points as other small sky catalogs, but is coerced to spreading these data points over partitions at order 1, instead of order 0.\n",
"\n",
"This means there are 4 leaf partition files, instead of just 1, and so can be useful for confirming reads/writes over multiple leaf partition files.\n",
"\n",
"NB: Setting `constant_healpix_order` coerces the import pipeline to create leaf partitions at order 1.\n",
"\n",
"This catalog was generated with the following snippet:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"args = ImportArguments(\n",
" input_path=\"small_sky_parts\",\n",
" file_reader=\"csv\",\n",
" constant_healpix_order=1,\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_storage_options=storage_options,\n",
" output_artifact_name=\"small_sky_order1\",\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
" overwrite=True,\n",
")\n",
"runner.pipeline(args)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Almanac info\n",
"\n",
"For the above catalogs, create almanac data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hipscat.inspection.almanac import Almanac\n",
"from hipscat.inspection.almanac_info import AlmanacInfo\n",
"\n",
"almanac_info = AlmanacInfo.from_catalog_dir(\n",
" \"abfs://hipscat/pytests/data/small_sky\", storage_options=storage_options\n",
")\n",
"almanac_info.write_to_file(\n",
" directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n",
")\n",
"\n",
"almanac_info = AlmanacInfo.from_catalog_dir(\n",
" \"abfs://hipscat/pytests/data/small_sky_order1\", storage_options=storage_options\n",
")\n",
"almanac_info.write_to_file(\n",
" directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tmp_path.cleanup()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hipscatenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading