Skip to content

Commit

Permalink
Merge pull request #107 from astronomy-commons/issue/43/black
Browse files Browse the repository at this point in the history
Mechanical application of black formatting
  • Loading branch information
delucchi-cmu authored Jun 30, 2023
2 parents 95d6cb2 + 836fa7e commit 6c11c4b
Show file tree
Hide file tree
Showing 48 changed files with 207 additions and 424 deletions.
12 changes: 6 additions & 6 deletions docs/notebooks/catalog_size_inspection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"import os\n",
"\n",
"### Change this path!!!\n",
"catalog_dir = '../../tests/data/small_sky_order1'\n",
"catalog_dir = \"../../tests/data/small_sky_order1\"\n",
"\n",
"### ----------------\n",
"### You probably won't have to change anything from here.\n",
Expand All @@ -48,11 +48,11 @@
"info_frame = catalog.get_pixels().copy()\n",
"\n",
"for index, partition in info_frame.iterrows():\n",
" file_name = result = paths.pixel_catalog_file(catalog_dir, partition['Norder'], partition['Npix'])\n",
" file_name = result = paths.pixel_catalog_file(catalog_dir, partition[\"Norder\"], partition[\"Npix\"])\n",
" info_frame.loc[index, \"size_on_disk\"] = os.path.getsize(file_name)\n",
"\n",
"info_frame = info_frame.astype(int)\n",
"info_frame[\"gbs\"] = info_frame[\"size_on_disk\"]/(1024 * 1024 * 1024)"
"info_frame[\"gbs\"] = info_frame[\"size_on_disk\"] / (1024 * 1024 * 1024)"
]
},
{
Expand Down Expand Up @@ -89,11 +89,11 @@
"source": [
"print(f'healpix orders: {info_frame[\"Norder\"].unique()}')\n",
"print(f'num partitions: {len(info_frame[\"Npix\"])}')\n",
"print('------')\n",
"print(\"------\")\n",
"print(f'min rows: {info_frame[\"num_rows\"].min()}')\n",
"print(f'max rows: {info_frame[\"num_rows\"].max()}')\n",
"print(f'row ratio: {info_frame[\"num_rows\"].max()/info_frame[\"num_rows\"].min():.2f}')\n",
"print('------')\n",
"print(\"------\")\n",
"print(f'min size_on_disk: {info_frame[\"gbs\"].min():.2f}')\n",
"print(f'max size_on_disk: {info_frame[\"gbs\"].max():.2f}')\n",
"print(f'size_on_disk ratio: {info_frame[\"gbs\"].max()/info_frame[\"gbs\"].min():.2f}')\n",
Expand Down Expand Up @@ -127,7 +127,7 @@
"\n",
"plt.hist(info_frame[\"gbs\"])\n",
"\n",
"bins = [0,.5,1,2,100]\n",
"bins = [0, 0.5, 1, 2, 100]\n",
"labels = [\"small-ish\", \"sweet-spot\", \"big-ish\", \"too-big\"]\n",
"hist = np.histogram(info_frame[\"gbs\"], bins=bins)[0]\n",
"pcts = hist / len(info_frame)\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/cone_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"catalog_path = \"\"\n",
"ra = 24.7035278\n",
"dec = -9.3653083\n",
"radius = 2 # arcsec"
"radius = 2 # arcsec"
]
},
{
Expand Down
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,12 @@ build-backend = "setuptools.build_meta"
write_to = "src/hipscat/_version.py"

[tool.coverage.run]
omit=["src/hipscat/_version.py"]
omit=["src/hipscat/_version.py"]

[tool.black]
line-length = 110
target-version = ["py38"]

[tool.isort]
profile = "black"
line_length = 110
2 changes: 1 addition & 1 deletion src/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ indent-after-paren=4
indent-string=' '

# Maximum number of characters on a single line.
max-line-length=100
max-line-length=110

# Maximum number of lines in a module.
max-module-lines=1000
Expand Down
26 changes: 9 additions & 17 deletions src/hipscat/catalog/association_catalog/association_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
import pandas as pd

from hipscat.catalog import CatalogType
from hipscat.catalog.association_catalog.association_catalog_info import \
AssociationCatalogInfo
from hipscat.catalog.association_catalog.partition_join_info import \
PartitionJoinInfo
from hipscat.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
from hipscat.catalog.association_catalog.partition_join_info import PartitionJoinInfo
from hipscat.catalog.dataset.dataset import Dataset
from hipscat.io import FilePointer, paths

Expand All @@ -25,15 +23,13 @@ class AssociationCatalog(Dataset):
JoinPixelInputTypes = Union[list, pd.DataFrame, PartitionJoinInfo]

def __init__(
self,
catalog_info: CatalogInfoClass,
join_pixels: JoinPixelInputTypes,
catalog_path=None,
self,
catalog_info: CatalogInfoClass,
join_pixels: JoinPixelInputTypes,
catalog_path=None,
) -> None:
if not catalog_info.catalog_type == CatalogType.ASSOCIATION:
raise ValueError(
"Catalog info `catalog_type` must be 'association'"
)
raise ValueError("Catalog info `catalog_type` must be 'association'")
super().__init__(catalog_info, catalog_path)
self.join_info = self._get_partition_join_info_from_pixels(join_pixels)

Expand All @@ -47,19 +43,15 @@ def get_join_pixels(self) -> pd.DataFrame:
return self.join_info.data_frame

@staticmethod
def _get_partition_join_info_from_pixels(
join_pixels: JoinPixelInputTypes
) -> PartitionJoinInfo:
def _get_partition_join_info_from_pixels(join_pixels: JoinPixelInputTypes) -> PartitionJoinInfo:
if isinstance(join_pixels, PartitionJoinInfo):
return join_pixels
if isinstance(join_pixels, pd.DataFrame):
return PartitionJoinInfo(join_pixels)
raise TypeError("join_pixels must be of type PartitionJoinInfo or DataFrame")

@classmethod
def _read_args(
cls, catalog_base_dir: FilePointer
) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]:
def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]:
args = super()._read_args(catalog_base_dir)
partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir)
partition_join_info = PartitionJoinInfo.read_from_file(partition_join_info_file)
Expand Down
4 changes: 1 addition & 3 deletions src/hipscat/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,4 @@ def _check_files_exist(cls, catalog_base_dir: FilePointer):
super()._check_files_exist(catalog_base_dir)
partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
if not file_io.does_file_or_directory_exist(partition_info_file):
raise FileNotFoundError(
f"No partition info found where expected: {str(partition_info_file)}"
)
raise FileNotFoundError(f"No partition info found where expected: {str(partition_info_file)}")
4 changes: 1 addition & 3 deletions src/hipscat/catalog/dataset/base_catalog_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,4 @@ def _check_required_fields(self):
fields_dict = dataclasses.asdict(self)
for field_name in self.required_fields:
if field_name not in fields_dict or fields_dict[field_name] is None:
raise ValueError(
f"{field_name} is required in the Catalog Info and a value must be provided"
)
raise ValueError(f"{field_name} is required in the Catalog Info and a value must be provided")
12 changes: 3 additions & 9 deletions src/hipscat/catalog/dataset/catalog_info_factory.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import dataclasses
from typing import Optional

from hipscat.catalog.association_catalog.association_catalog_info import (
AssociationCatalogInfo,
)
from hipscat.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
from hipscat.catalog.catalog_info import CatalogInfo
from hipscat.catalog.catalog_type import CatalogType
from hipscat.catalog.dataset.base_catalog_info import BaseCatalogInfo
from hipscat.catalog.index.index_catalog_info import IndexCatalogInfo
from hipscat.catalog.margin_cache.margin_cache_catalog_info import (
MarginCacheCatalogInfo,
)
from hipscat.catalog.margin_cache.margin_cache_catalog_info import MarginCacheCatalogInfo
from hipscat.catalog.source_catalog.source_catalog_info import SourceCatalogInfo
from hipscat.io import FilePointer, file_io, paths

Expand All @@ -24,9 +20,7 @@
"""Map of catalog types to their expected subclass of BaseCatalogInfo."""


def create_catalog_info(
keywords: dict, catalog_type: Optional[CatalogType] = None
) -> BaseCatalogInfo:
def create_catalog_info(keywords: dict, catalog_type: Optional[CatalogType] = None) -> BaseCatalogInfo:
"""Generate a typed catalog info object from the type specified explicitly or
using ``catalog_type`` keyword.
Expand Down
4 changes: 1 addition & 3 deletions src/hipscat/catalog/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,4 @@ def _check_files_exist(cls, catalog_base_dir: FilePointer):
raise FileNotFoundError(f"No directory exists at {str(catalog_base_dir)}")
catalog_info_file = paths.get_catalog_info_pointer(catalog_base_dir)
if not file_io.does_file_or_directory_exist(catalog_info_file):
raise FileNotFoundError(
f"No catalog info found where expected: {str(catalog_info_file)}"
)
raise FileNotFoundError(f"No catalog info found where expected: {str(catalog_info_file)}")
10 changes: 3 additions & 7 deletions src/hipscat/catalog/partition_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,11 @@ def get_healpix_pixels(self) -> List[HealpixPixel]:

def get_highest_order(self) -> int:
"""Get the highest healpix order for the dataset.
Returns:
int representing highest order.
"""
highest_order = np.max(
self.data_frame[self.METADATA_ORDER_COLUMN_NAME].values
)
highest_order = np.max(self.data_frame[self.METADATA_ORDER_COLUMN_NAME].values)

return highest_order

Expand All @@ -56,9 +54,7 @@ def read_from_file(cls, partition_info_file: FilePointer):
A `PartitionInfo` object with the data from the file
"""
if not file_io.does_file_or_directory_exist(partition_info_file):
raise FileNotFoundError(
f"No partition info found where expected: {str(partition_info_file)}"
)
raise FileNotFoundError(f"No partition info found where expected: {str(partition_info_file)}")

data_frame = file_io.load_csv_to_pandas(partition_info_file)
return cls(data_frame)
24 changes: 6 additions & 18 deletions src/hipscat/inspection/almanac.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ def _init_catalog_objects(self):
else:
full_name = catalog_info.catalog_name
if full_name in self.entries:
warnings.warn(
f"Duplicate catalog name ({full_name}). Try using namespaces."
)
warnings.warn(f"Duplicate catalog name ({full_name}). Try using namespaces.")
else:
self.entries[full_name] = catalog_info
self.dir_to_catalog_name[catalog_info.catalog_path] = full_name
Expand All @@ -121,9 +119,7 @@ def _init_catalog_links(self):
elif catalog_entry.catalog_type == CatalogType.SOURCE:
## Source catalogs MAY indicate their primary object catalog.
if catalog_entry.primary:
object_catalog = self._get_linked_catalog(
catalog_entry.primary, catalog_entry.namespace
)
object_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
if not object_catalog:
warnings.warn(
f"source catalog {catalog_entry.catalog_name} missing "
Expand All @@ -135,9 +131,7 @@ def _init_catalog_links(self):
object_catalog.sources.append(catalog_entry)
elif catalog_entry.catalog_type == CatalogType.ASSOCIATION:
## Association table MUST have a primary and join catalog
primary_catalog = self._get_linked_catalog(
catalog_entry.primary, catalog_entry.namespace
)
primary_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
if not primary_catalog:
warnings.warn(
f"association table {catalog_entry.catalog_name} missing "
Expand All @@ -161,9 +155,7 @@ def _init_catalog_links(self):
join_catalog.associations_right.append(catalog_entry)
elif catalog_entry.catalog_type == CatalogType.MARGIN:
## Margin catalogs MUST have a primary catalog
primary_catalog = self._get_linked_catalog(
catalog_entry.primary, catalog_entry.namespace
)
primary_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
if not primary_catalog:
warnings.warn(
f"margin table {catalog_entry.catalog_name} missing "
Expand All @@ -174,9 +166,7 @@ def _init_catalog_links(self):
primary_catalog.margins.append(catalog_entry)
elif catalog_entry.catalog_type == CatalogType.INDEX:
## Index tables MUST have a primary catalog
primary_catalog = self._get_linked_catalog(
catalog_entry.primary, catalog_entry.namespace
)
primary_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
if not primary_catalog:
warnings.warn(
f"index table {catalog_entry.catalog_name} missing "
Expand Down Expand Up @@ -256,6 +246,4 @@ def get_catalog(self, catalog_name: str) -> Dataset:
This will load the ``catalog_info.join`` and other relevant metadata files
from disk."""
return Dataset.read_from_hipscat(
self.get_almanac_info(catalog_name=catalog_name).catalog_path
)
return Dataset.read_from_hipscat(self.get_almanac_info(catalog_name=catalog_name).catalog_path)
20 changes: 4 additions & 16 deletions src/hipscat/inspection/almanac_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,10 @@ class AlmanacInfo:

def __post_init__(self):
if len(self.catalog_info):
self.catalog_info_object = catalog_info_factory.create_catalog_info(
self.catalog_info
)
if (
self.catalog_info
and "primary_catalog" in self.catalog_info
and not self.primary
):
self.catalog_info_object = catalog_info_factory.create_catalog_info(self.catalog_info)
if self.catalog_info and "primary_catalog" in self.catalog_info and not self.primary:
self.primary = self.catalog_info["primary_catalog"]
if (
self.catalog_info
and "join_catalog" in self.catalog_info
and not self.join
):
if self.catalog_info and "join_catalog" in self.catalog_info and not self.join:
self.join = self.catalog_info["join_catalog"]

## Allows use of $HIPSCAT_DEFAULT_DIR in paths
Expand Down Expand Up @@ -84,9 +74,7 @@ def get_default_dir() -> str:
@classmethod
def from_catalog_dir(cls, catalog_base_dir: str) -> Self:
"""Create almanac information from the catalog information found at the target directory"""
catalog_info = catalog_info_factory.from_catalog_dir(
catalog_base_dir=catalog_base_dir
)
catalog_info = catalog_info_factory.from_catalog_dir(catalog_base_dir=catalog_base_dir)
args = {
"catalog_path": catalog_base_dir,
"catalog_name": catalog_info.catalog_name,
Expand Down
9 changes: 3 additions & 6 deletions src/hipscat/inspection/visualize_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def _read_point_map(catalog_base_dir):
Args:
catalog_base_dir: path to a catalog
Returns:
one-dimensional numpy array of long integers where the value at each index
one-dimensional numpy array of long integers where the value at each index
corresponds to the number of objects found at the healpix pixel.
"""
map_file_pointer = paths.get_point_map_file_pointer(catalog_base_dir)
Expand Down Expand Up @@ -64,14 +64,11 @@ def plot_pixels(catalog: Catalog, projection="moll", draw_map=True):
order_map = np.full(hp.order2npix(max_order), hp.pixelfunc.UNSEEN)

for _, pixel in pixels.iterrows():
explosion_factor = 4 ** (
max_order - pixel[PartitionInfo.METADATA_ORDER_COLUMN_NAME]
)
explosion_factor = 4 ** (max_order - pixel[PartitionInfo.METADATA_ORDER_COLUMN_NAME])
exploded_pixels = [
*range(
pixel[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] * explosion_factor,
(pixel[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] + 1)
* explosion_factor,
(pixel[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] + 1) * explosion_factor,
)
]
order_map[exploded_pixels] = pixel[PartitionInfo.METADATA_ORDER_COLUMN_NAME]
Expand Down
28 changes: 20 additions & 8 deletions src/hipscat/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
"""Utilities for reading and writing catalog files"""

from .file_io import FilePointer, get_file_pointer_from_path
from .paths import (create_hive_directory_name, create_hive_parquet_file_name,
get_catalog_info_pointer, get_common_metadata_pointer,
get_parquet_metadata_pointer, get_partition_info_pointer,
get_point_map_file_pointer, get_provenance_pointer,
pixel_association_directory, pixel_association_file,
pixel_catalog_file, pixel_directory)
from .write_metadata import (write_catalog_info, write_parquet_metadata,
write_partition_info, write_provenance_info)
from .paths import (
create_hive_directory_name,
create_hive_parquet_file_name,
get_catalog_info_pointer,
get_common_metadata_pointer,
get_parquet_metadata_pointer,
get_partition_info_pointer,
get_point_map_file_pointer,
get_provenance_pointer,
pixel_association_directory,
pixel_association_file,
pixel_catalog_file,
pixel_directory,
)
from .write_metadata import (
write_catalog_info,
write_parquet_metadata,
write_partition_info,
write_provenance_info,
)
Loading

0 comments on commit 6c11c4b

Please sign in to comment.