Merge pull request #107 from astronomy-commons/issue/43/black

Mechanical application of black formatting
astronomy-commons · Jun 30, 2023 · 6c11c4b · 6c11c4b
2 parents 95d6cb2 + 836fa7e
commit 6c11c4b
Show file tree

Hide file tree

Showing 48 changed files with 207 additions and 424 deletions.
diff --git a/docs/notebooks/catalog_size_inspection.ipynb b/docs/notebooks/catalog_size_inspection.ipynb
@@ -38,7 +38,7 @@
     "import os\n",
     "\n",
     "### Change this path!!!\n",
-    "catalog_dir = '../../tests/data/small_sky_order1'\n",
+    "catalog_dir = \"../../tests/data/small_sky_order1\"\n",
     "\n",
     "### ----------------\n",
     "### You probably won't have to change anything from here.\n",
@@ -48,11 +48,11 @@
     "info_frame = catalog.get_pixels().copy()\n",
     "\n",
     "for index, partition in info_frame.iterrows():\n",
-    "    file_name = result = paths.pixel_catalog_file(catalog_dir, partition['Norder'], partition['Npix'])\n",
+    "    file_name = result = paths.pixel_catalog_file(catalog_dir, partition[\"Norder\"], partition[\"Npix\"])\n",
     "    info_frame.loc[index, \"size_on_disk\"] = os.path.getsize(file_name)\n",
     "\n",
     "info_frame = info_frame.astype(int)\n",
-    "info_frame[\"gbs\"] = info_frame[\"size_on_disk\"]/(1024 * 1024 * 1024)"
+    "info_frame[\"gbs\"] = info_frame[\"size_on_disk\"] / (1024 * 1024 * 1024)"
    ]
   },
   {
@@ -89,11 +89,11 @@
    "source": [
     "print(f'healpix orders: {info_frame[\"Norder\"].unique()}')\n",
     "print(f'num partitions: {len(info_frame[\"Npix\"])}')\n",
-    "print('------')\n",
+    "print(\"------\")\n",
     "print(f'min rows: {info_frame[\"num_rows\"].min()}')\n",
     "print(f'max rows: {info_frame[\"num_rows\"].max()}')\n",
     "print(f'row ratio: {info_frame[\"num_rows\"].max()/info_frame[\"num_rows\"].min():.2f}')\n",
-    "print('------')\n",
+    "print(\"------\")\n",
     "print(f'min size_on_disk: {info_frame[\"gbs\"].min():.2f}')\n",
     "print(f'max size_on_disk: {info_frame[\"gbs\"].max():.2f}')\n",
     "print(f'size_on_disk ratio: {info_frame[\"gbs\"].max()/info_frame[\"gbs\"].min():.2f}')\n",
@@ -127,7 +127,7 @@
     "\n",
     "plt.hist(info_frame[\"gbs\"])\n",
     "\n",
-    "bins = [0,.5,1,2,100]\n",
+    "bins = [0, 0.5, 1, 2, 100]\n",
     "labels = [\"small-ish\", \"sweet-spot\", \"big-ish\", \"too-big\"]\n",
     "hist = np.histogram(info_frame[\"gbs\"], bins=bins)[0]\n",
     "pcts = hist / len(info_frame)\n",

diff --git a/docs/notebooks/cone_search.ipynb b/docs/notebooks/cone_search.ipynb
@@ -21,7 +21,7 @@
     "catalog_path = \"\"\n",
     "ra = 24.7035278\n",
     "dec = -9.3653083\n",
-    "radius = 2 # arcsec"
+    "radius = 2  # arcsec"
    ]
   },
   {

diff --git a/pyproject.toml b/pyproject.toml
@@ -61,4 +61,12 @@ build-backend = "setuptools.build_meta"
 write_to = "src/hipscat/_version.py"
 
 [tool.coverage.run]
-omit=["src/hipscat/_version.py"]
+omit=["src/hipscat/_version.py"]
+
+[tool.black]
+line-length = 110
+target-version = ["py38"]
+
+[tool.isort]
+profile = "black"
+line_length = 110
diff --git a/src/.pylintrc b/src/.pylintrc
@@ -329,7 +329,7 @@ indent-after-paren=4
 indent-string='    '
 
 # Maximum number of characters on a single line.
-max-line-length=100
+max-line-length=110
 
 # Maximum number of lines in a module.
 max-module-lines=1000

diff --git a/src/hipscat/catalog/association_catalog/association_catalog.py b/src/hipscat/catalog/association_catalog/association_catalog.py
@@ -3,10 +3,8 @@
 import pandas as pd
 
 from hipscat.catalog import CatalogType
-from hipscat.catalog.association_catalog.association_catalog_info import \
-    AssociationCatalogInfo
-from hipscat.catalog.association_catalog.partition_join_info import \
-    PartitionJoinInfo
+from hipscat.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
+from hipscat.catalog.association_catalog.partition_join_info import PartitionJoinInfo
 from hipscat.catalog.dataset.dataset import Dataset
 from hipscat.io import FilePointer, paths
 
@@ -25,15 +23,13 @@ class AssociationCatalog(Dataset):
     JoinPixelInputTypes = Union[list, pd.DataFrame, PartitionJoinInfo]
 
     def __init__(
-            self,
-            catalog_info: CatalogInfoClass,
-            join_pixels: JoinPixelInputTypes,
-            catalog_path=None,
+        self,
+        catalog_info: CatalogInfoClass,
+        join_pixels: JoinPixelInputTypes,
+        catalog_path=None,
     ) -> None:
         if not catalog_info.catalog_type == CatalogType.ASSOCIATION:
-            raise ValueError(
-                "Catalog info `catalog_type` must be 'association'"
-            )
+            raise ValueError("Catalog info `catalog_type` must be 'association'")
         super().__init__(catalog_info, catalog_path)
         self.join_info = self._get_partition_join_info_from_pixels(join_pixels)
 
@@ -47,19 +43,15 @@ def get_join_pixels(self) -> pd.DataFrame:
         return self.join_info.data_frame
 
     @staticmethod
-    def _get_partition_join_info_from_pixels(
-            join_pixels: JoinPixelInputTypes
-    ) -> PartitionJoinInfo:
+    def _get_partition_join_info_from_pixels(join_pixels: JoinPixelInputTypes) -> PartitionJoinInfo:
         if isinstance(join_pixels, PartitionJoinInfo):
             return join_pixels
         if isinstance(join_pixels, pd.DataFrame):
             return PartitionJoinInfo(join_pixels)
         raise TypeError("join_pixels must be of type PartitionJoinInfo or DataFrame")
 
     @classmethod
-    def _read_args(
-            cls, catalog_base_dir: FilePointer
-    ) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]:
+    def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]:
         args = super()._read_args(catalog_base_dir)
         partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir)
         partition_join_info = PartitionJoinInfo.read_from_file(partition_join_info_file)

diff --git a/src/hipscat/catalog/catalog.py b/src/hipscat/catalog/catalog.py
@@ -93,6 +93,4 @@ def _check_files_exist(cls, catalog_base_dir: FilePointer):
         super()._check_files_exist(catalog_base_dir)
         partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
         if not file_io.does_file_or_directory_exist(partition_info_file):
-            raise FileNotFoundError(
-                f"No partition info found where expected: {str(partition_info_file)}"
-            )
+            raise FileNotFoundError(f"No partition info found where expected: {str(partition_info_file)}")
diff --git a/src/hipscat/catalog/dataset/base_catalog_info.py b/src/hipscat/catalog/dataset/base_catalog_info.py
@@ -62,6 +62,4 @@ def _check_required_fields(self):
         fields_dict = dataclasses.asdict(self)
         for field_name in self.required_fields:
             if field_name not in fields_dict or fields_dict[field_name] is None:
-                raise ValueError(
-                    f"{field_name} is required in the Catalog Info and a value must be provided"
-                )
+                raise ValueError(f"{field_name} is required in the Catalog Info and a value must be provided")
diff --git a/src/hipscat/catalog/dataset/catalog_info_factory.py b/src/hipscat/catalog/dataset/catalog_info_factory.py
@@ -1,16 +1,12 @@
 import dataclasses
 from typing import Optional
 
-from hipscat.catalog.association_catalog.association_catalog_info import (
-    AssociationCatalogInfo,
-)
+from hipscat.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
 from hipscat.catalog.catalog_info import CatalogInfo
 from hipscat.catalog.catalog_type import CatalogType
 from hipscat.catalog.dataset.base_catalog_info import BaseCatalogInfo
 from hipscat.catalog.index.index_catalog_info import IndexCatalogInfo
-from hipscat.catalog.margin_cache.margin_cache_catalog_info import (
-    MarginCacheCatalogInfo,
-)
+from hipscat.catalog.margin_cache.margin_cache_catalog_info import MarginCacheCatalogInfo
 from hipscat.catalog.source_catalog.source_catalog_info import SourceCatalogInfo
 from hipscat.io import FilePointer, file_io, paths
 
@@ -24,9 +20,7 @@
 """Map of catalog types to their expected subclass of BaseCatalogInfo."""
 
 
-def create_catalog_info(
-    keywords: dict, catalog_type: Optional[CatalogType] = None
-) -> BaseCatalogInfo:
+def create_catalog_info(keywords: dict, catalog_type: Optional[CatalogType] = None) -> BaseCatalogInfo:
     """Generate a typed catalog info object from the type specified explicitly or
     using ``catalog_type`` keyword.
 

diff --git a/src/hipscat/catalog/dataset/dataset.py b/src/hipscat/catalog/dataset/dataset.py
@@ -71,6 +71,4 @@ def _check_files_exist(cls, catalog_base_dir: FilePointer):
             raise FileNotFoundError(f"No directory exists at {str(catalog_base_dir)}")
         catalog_info_file = paths.get_catalog_info_pointer(catalog_base_dir)
         if not file_io.does_file_or_directory_exist(catalog_info_file):
-            raise FileNotFoundError(
-                f"No catalog info found where expected: {str(catalog_info_file)}"
-            )
+            raise FileNotFoundError(f"No catalog info found where expected: {str(catalog_info_file)}")
diff --git a/src/hipscat/catalog/partition_info.py b/src/hipscat/catalog/partition_info.py
@@ -35,13 +35,11 @@ def get_healpix_pixels(self) -> List[HealpixPixel]:
 
     def get_highest_order(self) -> int:
         """Get the highest healpix order for the dataset.
-        
+
         Returns:
             int representing highest order.
         """
-        highest_order = np.max(
-            self.data_frame[self.METADATA_ORDER_COLUMN_NAME].values
-        )
+        highest_order = np.max(self.data_frame[self.METADATA_ORDER_COLUMN_NAME].values)
 
         return highest_order
 
@@ -56,9 +54,7 @@ def read_from_file(cls, partition_info_file: FilePointer):
             A `PartitionInfo` object with the data from the file
         """
         if not file_io.does_file_or_directory_exist(partition_info_file):
-            raise FileNotFoundError(
-                f"No partition info found where expected: {str(partition_info_file)}"
-            )
+            raise FileNotFoundError(f"No partition info found where expected: {str(partition_info_file)}")
 
         data_frame = file_io.load_csv_to_pandas(partition_info_file)
         return cls(data_frame)
diff --git a/src/hipscat/inspection/almanac.py b/src/hipscat/inspection/almanac.py
@@ -99,9 +99,7 @@ def _init_catalog_objects(self):
                 else:
                     full_name = catalog_info.catalog_name
                 if full_name in self.entries:
-                    warnings.warn(
-                        f"Duplicate catalog name ({full_name}). Try using namespaces."
-                    )
+                    warnings.warn(f"Duplicate catalog name ({full_name}). Try using namespaces.")
                 else:
                     self.entries[full_name] = catalog_info
                     self.dir_to_catalog_name[catalog_info.catalog_path] = full_name
@@ -121,9 +119,7 @@ def _init_catalog_links(self):
             elif catalog_entry.catalog_type == CatalogType.SOURCE:
                 ## Source catalogs MAY indicate their primary object catalog.
                 if catalog_entry.primary:
-                    object_catalog = self._get_linked_catalog(
-                        catalog_entry.primary, catalog_entry.namespace
-                    )
+                    object_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
                     if not object_catalog:
                         warnings.warn(
                             f"source catalog {catalog_entry.catalog_name} missing "
@@ -135,9 +131,7 @@ def _init_catalog_links(self):
                         object_catalog.sources.append(catalog_entry)
             elif catalog_entry.catalog_type == CatalogType.ASSOCIATION:
                 ## Association table MUST have a primary and join catalog
-                primary_catalog = self._get_linked_catalog(
-                    catalog_entry.primary, catalog_entry.namespace
-                )
+                primary_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
                 if not primary_catalog:
                     warnings.warn(
                         f"association table {catalog_entry.catalog_name} missing "
@@ -161,9 +155,7 @@ def _init_catalog_links(self):
                     join_catalog.associations_right.append(catalog_entry)
             elif catalog_entry.catalog_type == CatalogType.MARGIN:
                 ## Margin catalogs MUST have a primary catalog
-                primary_catalog = self._get_linked_catalog(
-                    catalog_entry.primary, catalog_entry.namespace
-                )
+                primary_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
                 if not primary_catalog:
                     warnings.warn(
                         f"margin table {catalog_entry.catalog_name} missing "
@@ -174,9 +166,7 @@ def _init_catalog_links(self):
                     primary_catalog.margins.append(catalog_entry)
             elif catalog_entry.catalog_type == CatalogType.INDEX:
                 ## Index tables MUST have a primary catalog
-                primary_catalog = self._get_linked_catalog(
-                    catalog_entry.primary, catalog_entry.namespace
-                )
+                primary_catalog = self._get_linked_catalog(catalog_entry.primary, catalog_entry.namespace)
                 if not primary_catalog:
                     warnings.warn(
                         f"index table {catalog_entry.catalog_name} missing "
@@ -256,6 +246,4 @@ def get_catalog(self, catalog_name: str) -> Dataset:
 
         This will load the ``catalog_info.join`` and other relevant metadata files
         from disk."""
-        return Dataset.read_from_hipscat(
-            self.get_almanac_info(catalog_name=catalog_name).catalog_path
-        )
+        return Dataset.read_from_hipscat(self.get_almanac_info(catalog_name=catalog_name).catalog_path)
diff --git a/src/hipscat/inspection/almanac_info.py b/src/hipscat/inspection/almanac_info.py
@@ -42,20 +42,10 @@ class AlmanacInfo:
 
     def __post_init__(self):
         if len(self.catalog_info):
-            self.catalog_info_object = catalog_info_factory.create_catalog_info(
-                self.catalog_info
-            )
-            if (
-                self.catalog_info
-                and "primary_catalog" in self.catalog_info
-                and not self.primary
-            ):
+            self.catalog_info_object = catalog_info_factory.create_catalog_info(self.catalog_info)
+            if self.catalog_info and "primary_catalog" in self.catalog_info and not self.primary:
                 self.primary = self.catalog_info["primary_catalog"]
-            if (
-                self.catalog_info
-                and "join_catalog" in self.catalog_info
-                and not self.join
-            ):
+            if self.catalog_info and "join_catalog" in self.catalog_info and not self.join:
                 self.join = self.catalog_info["join_catalog"]
 
         ## Allows use of $HIPSCAT_DEFAULT_DIR in paths
@@ -84,9 +74,7 @@ def get_default_dir() -> str:
     @classmethod
     def from_catalog_dir(cls, catalog_base_dir: str) -> Self:
         """Create almanac information from the catalog information found at the target directory"""
-        catalog_info = catalog_info_factory.from_catalog_dir(
-            catalog_base_dir=catalog_base_dir
-        )
+        catalog_info = catalog_info_factory.from_catalog_dir(catalog_base_dir=catalog_base_dir)
         args = {
             "catalog_path": catalog_base_dir,
             "catalog_name": catalog_info.catalog_name,

diff --git a/src/hipscat/inspection/visualize_catalog.py b/src/hipscat/inspection/visualize_catalog.py
@@ -17,7 +17,7 @@ def _read_point_map(catalog_base_dir):
     Args:
         catalog_base_dir: path to a catalog
     Returns:
-        one-dimensional numpy array of long integers where the value at each index 
+        one-dimensional numpy array of long integers where the value at each index
         corresponds to the number of objects found at the healpix pixel.
     """
     map_file_pointer = paths.get_point_map_file_pointer(catalog_base_dir)
@@ -64,14 +64,11 @@ def plot_pixels(catalog: Catalog, projection="moll", draw_map=True):
     order_map = np.full(hp.order2npix(max_order), hp.pixelfunc.UNSEEN)
 
     for _, pixel in pixels.iterrows():
-        explosion_factor = 4 ** (
-            max_order - pixel[PartitionInfo.METADATA_ORDER_COLUMN_NAME]
-        )
+        explosion_factor = 4 ** (max_order - pixel[PartitionInfo.METADATA_ORDER_COLUMN_NAME])
         exploded_pixels = [
             *range(
                 pixel[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] * explosion_factor,
-                (pixel[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] + 1)
-                * explosion_factor,
+                (pixel[PartitionInfo.METADATA_PIXEL_COLUMN_NAME] + 1) * explosion_factor,
             )
         ]
         order_map[exploded_pixels] = pixel[PartitionInfo.METADATA_ORDER_COLUMN_NAME]

diff --git a/src/hipscat/io/__init__.py b/src/hipscat/io/__init__.py
@@ -1,11 +1,23 @@
 """Utilities for reading and writing catalog files"""
 
 from .file_io import FilePointer, get_file_pointer_from_path
-from .paths import (create_hive_directory_name, create_hive_parquet_file_name,
-                    get_catalog_info_pointer, get_common_metadata_pointer,
-                    get_parquet_metadata_pointer, get_partition_info_pointer,
-                    get_point_map_file_pointer, get_provenance_pointer,
-                    pixel_association_directory, pixel_association_file,
-                    pixel_catalog_file, pixel_directory)
-from .write_metadata import (write_catalog_info, write_parquet_metadata,
-                             write_partition_info, write_provenance_info)
+from .paths import (
+    create_hive_directory_name,
+    create_hive_parquet_file_name,
+    get_catalog_info_pointer,
+    get_common_metadata_pointer,
+    get_parquet_metadata_pointer,
+    get_partition_info_pointer,
+    get_point_map_file_pointer,
+    get_provenance_pointer,
+    pixel_association_directory,
+    pixel_association_file,
+    pixel_catalog_file,
+    pixel_directory,
+)
+from .write_metadata import (
+    write_catalog_info,
+    write_parquet_metadata,
+    write_partition_info,
+    write_provenance_info,
+)