diff --git a/src/nnja_ai/catalog.py b/src/nnja_ai/catalog.py index 8f06b8f..fd7bfef 100644 --- a/src/nnja_ai/catalog.py +++ b/src/nnja_ai/catalog.py @@ -42,7 +42,7 @@ class DataCatalog: def __init__( self, - mirror: Optional[str] = DEFAULT_MIRROR, + mirror: Optional[str] = None, base_path: Optional[str] = None, catalog_json: Optional[str] = None, ): @@ -60,7 +60,9 @@ def __init__( Raises: ValueError: If both mirror and custom parameters are specified. """ - # Validate parameters - no mix and match + # Validate parameters and specify a default mirror if not provided - no mix and match + if mirror is None and base_path is None and catalog_json is None: + mirror = DEFAULT_MIRROR if mirror is not None and (base_path is not None or catalog_json is not None): raise ValueError( "Cannot specify both 'mirror' and custom parameters ('base_path', 'catalog_json'). " diff --git a/src/nnja_ai/io.py b/src/nnja_ai/io.py index f482828..3a112f0 100644 --- a/src/nnja_ai/io.py +++ b/src/nnja_ai/io.py @@ -145,17 +145,21 @@ def load_parquet( case "pandas": import pandas as pd - return pd.concat( - [ - pd.read_parquet( - uri, - columns=columns, - storage_options=auth_args, - **backend_kwargs, - ) - for uri in parquet_uris - ] - ) + if not auth_args: + # This is a local file system, so we can just read the parquet files directly + return pd.read_parquet(parquet_uris, columns=columns, **backend_kwargs) + else: + return pd.concat( + [ + pd.read_parquet( + uri, + columns=columns, + storage_options=auth_args, + **backend_kwargs, + ) + for uri in parquet_uris + ] + ) case "polars": import polars as pl