-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #85 from astronomy-commons/sean/catalog-refactor
Refactor Catalogs to inherit from base Dataset and not initialize from files
- Loading branch information
Showing
21 changed files
with
543 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from dataclasses import dataclass | ||
|
||
from hipscat.catalog.dataset.base_catalog_info import BaseCatalogInfo | ||
|
||
|
||
@dataclass | ||
class CatalogInfo(BaseCatalogInfo): | ||
"""Catalog Info for a HEALPix Hive partitioned Catalog""" | ||
epoch: str = "J2000" | ||
ra_column: str = "ra" | ||
dec_column: str = "dec" | ||
|
||
required_fields = BaseCatalogInfo.required_fields + ["epoch", "ra_column", "dec_column"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from enum import Enum | ||
|
||
|
||
class CatalogType(str, Enum): | ||
"""Enum for possible types of catalog""" | ||
|
||
OBJECT = "object" | ||
SOURCE = "source" | ||
ASSOCIATION = "association" | ||
INDEX = "index" | ||
MARGIN = "margin" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import dataclasses | ||
from dataclasses import dataclass | ||
|
||
from typing_extensions import Self | ||
|
||
from hipscat.catalog.catalog_type import CatalogType | ||
from hipscat.io import FilePointer, file_io | ||
|
||
|
||
@dataclass | ||
class BaseCatalogInfo: | ||
"""Container class for catalog metadata""" | ||
|
||
catalog_name: str = "" | ||
catalog_type: CatalogType = None | ||
total_rows: int = None | ||
|
||
CATALOG_TYPES = [t.value for t in CatalogType] | ||
|
||
required_fields = ["catalog_type"] | ||
|
||
def __post_init__( | ||
self, | ||
): | ||
self._check_required_fields() | ||
if self.catalog_type not in self.CATALOG_TYPES: | ||
raise ValueError(f"Unknown catalog type: {self.catalog_type}") | ||
|
||
def __str__(self): | ||
parameters = dataclasses.asdict(self) | ||
formatted_string = "" | ||
for name, value in parameters.items(): | ||
formatted_string += f" {name} {value}\n" | ||
return formatted_string | ||
|
||
@classmethod | ||
def read_from_metadata_file(cls, catalog_info_file: FilePointer) -> Self: | ||
"""Read catalog info from the `catalog_info.json` metadata file | ||
Args: | ||
catalog_info_file: FilePointer pointing to the `catalog_info.json` file | ||
Returns: | ||
A CatalogInfo object with the data from the `catalog_info.json` file | ||
""" | ||
metadata_keywords = file_io.load_json_file(catalog_info_file) | ||
catalog_info_keywords = {} | ||
for field in dataclasses.fields(cls): | ||
if field.name in metadata_keywords: | ||
catalog_info_keywords[field.name] = metadata_keywords[field.name] | ||
return cls(**catalog_info_keywords) | ||
|
||
def _check_required_fields(self): | ||
fields_dict = dataclasses.asdict(self) | ||
for field_name in self.required_fields: | ||
if field_name not in fields_dict or fields_dict[field_name] is None: | ||
raise ValueError( | ||
f"{field_name} is required in the Catalog Info and a value must be provided" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from typing import Tuple, Type | ||
|
||
from typing_extensions import Self | ||
|
||
from hipscat.catalog.dataset.base_catalog_info import BaseCatalogInfo | ||
from hipscat.io import FilePointer, file_io, paths | ||
|
||
|
||
class Dataset: | ||
"""A base HiPSCat dataset | ||
A base dataset contains a catalog_info metadata file and the data contained in parquet files | ||
""" | ||
CatalogInfoClass: Type[BaseCatalogInfo] = BaseCatalogInfo | ||
|
||
def __init__( | ||
self, | ||
catalog_info: CatalogInfoClass, | ||
catalog_path=None, | ||
) -> None: | ||
"""Initializes a Dataset | ||
Args: | ||
catalog_info: A catalog_info object with the catalog metadata | ||
catalog_path: If the catalog is stored on disk, specify the location of the catalog | ||
Does not load the catalog from this path, only store as metadata | ||
""" | ||
if not isinstance(catalog_info, self.CatalogInfoClass): | ||
raise TypeError(f"catalog_info type must be {self.CatalogInfoClass}") | ||
|
||
self.catalog_info = catalog_info | ||
self.catalog_name = self.catalog_info.catalog_name | ||
|
||
self.catalog_path = catalog_path | ||
self.on_disk = catalog_path is not None | ||
self.catalog_base_dir = file_io.get_file_pointer_from_path(self.catalog_path) | ||
|
||
@classmethod | ||
def read_from_hipscat(cls, catalog_path: str) -> Self: | ||
"""Reads a HiPSCat Catalog from a HiPSCat directory | ||
Args: | ||
catalog_path: path to the root directory of the catalog | ||
Returns: | ||
The initialized catalog object | ||
""" | ||
catalog_base_dir = file_io.get_file_pointer_from_path(catalog_path) | ||
cls._check_files_exist(catalog_base_dir) | ||
args = cls._read_args(catalog_base_dir) | ||
kwargs = cls._read_kwargs(catalog_base_dir) | ||
return cls(*args, **kwargs) | ||
|
||
@classmethod | ||
def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass]: | ||
catalog_info_file = paths.get_catalog_info_pointer(catalog_base_dir) | ||
catalog_info = cls.CatalogInfoClass.read_from_metadata_file(catalog_info_file) | ||
return (catalog_info,) | ||
|
||
@classmethod | ||
def _read_kwargs(cls, catalog_base_dir: FilePointer) -> dict: | ||
return {"catalog_path": str(catalog_base_dir)} | ||
|
||
@classmethod | ||
def _check_files_exist(cls, catalog_base_dir: FilePointer): | ||
if not file_io.does_file_or_directory_exist(catalog_base_dir): | ||
raise FileNotFoundError(f"No directory exists at {str(catalog_base_dir)}") | ||
catalog_info_file = paths.get_catalog_info_pointer(catalog_base_dir) | ||
if not file_io.does_file_or_directory_exist(catalog_info_file): | ||
raise FileNotFoundError( | ||
f"No catalog info found where expected: {str(catalog_info_file)}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"catalog_name": "test_name", | ||
"catalog_type": "object", | ||
"epoch": "J2000", | ||
"ra_column": "ra", | ||
"dec_column": "dec", | ||
"total_rows": 10 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"catalog_name": "test_name", | ||
"catalog_type": "object", | ||
"total_rows": 10 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.