Skip to content

Commit 8673663

Browse files
authored
fix: improve catalog and data loading locally (#69)
1 parent 6e2f37a commit 8673663

2 files changed

Lines changed: 19 additions & 13 deletions

File tree

src/nnja_ai/catalog.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class DataCatalog:
4242

4343
def __init__(
4444
self,
45-
mirror: Optional[str] = DEFAULT_MIRROR,
45+
mirror: Optional[str] = None,
4646
base_path: Optional[str] = None,
4747
catalog_json: Optional[str] = None,
4848
):
@@ -60,7 +60,9 @@ def __init__(
6060
Raises:
6161
ValueError: If both mirror and custom parameters are specified.
6262
"""
63-
# Validate parameters - no mix and match
63+
# Validate parameters and specify a default mirror if not provided - no mix and match
64+
if mirror is None and base_path is None and catalog_json is None:
65+
mirror = DEFAULT_MIRROR
6466
if mirror is not None and (base_path is not None or catalog_json is not None):
6567
raise ValueError(
6668
"Cannot specify both 'mirror' and custom parameters ('base_path', 'catalog_json'). "

src/nnja_ai/io.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,17 +145,21 @@ def load_parquet(
145145
case "pandas":
146146
import pandas as pd
147147

148-
return pd.concat(
149-
[
150-
pd.read_parquet(
151-
uri,
152-
columns=columns,
153-
storage_options=auth_args,
154-
**backend_kwargs,
155-
)
156-
for uri in parquet_uris
157-
]
158-
)
148+
if not auth_args:
149+
# This is a local file system, so we can just read the parquet files directly
150+
return pd.read_parquet(parquet_uris, columns=columns, **backend_kwargs)
151+
else:
152+
return pd.concat(
153+
[
154+
pd.read_parquet(
155+
uri,
156+
columns=columns,
157+
storage_options=auth_args,
158+
**backend_kwargs,
159+
)
160+
for uri in parquet_uris
161+
]
162+
)
159163
case "polars":
160164
import polars as pl
161165

0 commit comments

Comments
 (0)