diff --git a/ibis_ml/__init__.py b/ibis_ml/__init__.py index 5fa7ac0..cc9b028 100644 --- a/ibis_ml/__init__.py +++ b/ibis_ml/__init__.py @@ -44,11 +44,12 @@ def _auto_patch_skorch() -> None: return import ibis.expr.types as ir - import numpy as np old_fit = skorch.net.NeuralNet.fit def fit(self, X, y=None, **fit_params): + import numpy as np + if isinstance(y, ir.Column): y = np.asarray(y) diff --git a/ibis_ml/core.py b/ibis_ml/core.py index 72f314a..68dacf9 100644 --- a/ibis_ml/core.py +++ b/ibis_ml/core.py @@ -12,14 +12,14 @@ import ibis import ibis.expr.operations as ops import ibis.expr.types as ir -import numpy as np -import pandas as pd -import pyarrow as pa from ibis.common.dispatch import lazy_singledispatch if TYPE_CHECKING: import dask.dataframe as dd + import numpy as np + import pandas as pd import polars as pl + import pyarrow as pa import xgboost as xgb from sklearn.utils._estimator_html_repr import _VisualBlock @@ -45,6 +45,9 @@ def _ibis_table_to_numpy(table: ir.Table) -> np.ndarray: def _y_as_dataframe(y: Any) -> pd.DataFrame: """Coerce `y` to a pandas dataframe""" + import numpy as np + import pandas as pd + if isinstance(y, pd.DataFrame): return y elif isinstance(y, pd.Series): @@ -144,8 +147,11 @@ def _(X, y=None, maintain_order=False): return table, tuple(y.columns), None -@normalize_table.register(pd.DataFrame) +@normalize_table.register("pd.DataFrame") def _(X, y=None, maintain_order=False): + import numpy as np + import pandas as pd + if y is not None: y = _y_as_dataframe(y) table = pd.concat([X, y], axis=1) @@ -162,8 +168,11 @@ def _(X, y=None, maintain_order=False): return ibis.memtable(table), targets, index -@normalize_table.register(np.ndarray) +@normalize_table.register("np.ndarray") def _(X, y=None, maintain_order=False): + import numpy as np + import pandas as pd + X = pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])]) if y is not None: y = _y_as_dataframe(y) @@ -181,8 +190,11 @@ def _(X, y=None, maintain_order=False): return ibis.memtable(table), targets, index -@normalize_table.register(pa.Table) +@normalize_table.register("pa.Table") def _(X, y=None, maintain_order=False): + import numpy as np + import pyarrow as pa + if y is not None: if isinstance(y, (pa.ChunkedArray, pa.Array)): y = pa.Table.from_pydict({"y": y}) @@ -246,6 +258,8 @@ def get_categories(self, column: str) -> pa.Array | None: return self.categories.get(column) def set_categories(self, column: str, values: pa.Array | list[Any]) -> None: + import pyarrow as pa + self.categories[column] = pa.array(values) def drop_categories(self, column: str) -> None: @@ -255,6 +269,8 @@ def drop_categories(self, column: str) -> None: def _categorize_wrap_reader( reader: pa.RecordBatchReader, categories: dict[str, pa.Array] ) -> Iterable[pa.RecordBatch]: + import pyarrow as pa + for batch in reader: out = {} for name, col in zip(batch.schema.names, batch.columns): @@ -620,6 +636,8 @@ def _categorize_pandas(self, df: pd.DataFrame) -> pd.DataFrame: return df def _categorize_pyarrow(self, table: pa.Table) -> pa.Table: + import pyarrow as pa + if not self.metadata_.categories: return table @@ -645,6 +663,8 @@ def _categorize_dask_dataframe(self, ddf: dd.DataFrame) -> dd.DataFrame: def _categorize_pyarrow_batches( self, reader: pa.RecordBatchReader ) -> pa.RecordBatchReader: + import pyarrow as pa + if not self.metadata_.categories: return reader diff --git a/ibis_ml/steps/_discretize.py b/ibis_ml/steps/_discretize.py index b40ad1b..50ae92a 100644 --- a/ibis_ml/steps/_discretize.py +++ b/ibis_ml/steps/_discretize.py @@ -4,7 +4,6 @@ import ibis import ibis.expr.types as ir -import numpy as np from ibis_ml.core import Metadata, Step from ibis_ml.select import SelectionType, selector @@ -94,6 +93,8 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None: def _fit_uniform_strategy( self, table: ir.Table, columns: list[str] ) -> dict[str, list[float]]: + import numpy as np + aggs = [] for col_name in columns: col = table[col_name] @@ -117,6 +118,8 @@ def _fit_uniform_strategy( def _fit_quantile_strategy( self, table: ir.Table, columns: list[str] ) -> dict[str, list[float]]: + import numpy as np + aggs = [] percentiles = np.linspace(0, 1, self.n_bins + 1) for col_name in columns: diff --git a/tests/test_optional_dependencies.py b/tests/test_optional_dependencies.py new file mode 100644 index 0000000..f0d4508 --- /dev/null +++ b/tests/test_optional_dependencies.py @@ -0,0 +1,15 @@ +import sys +from importlib import import_module, reload +from unittest.mock import patch + +import pytest + + +# https://stackoverflow.com/a/65163627 +@pytest.mark.parametrize("optional_dependency", ["numpy", "pandas", "pyarrow"]) +def test_without_dependency(optional_dependency): + with patch.dict(sys.modules, {optional_dependency: None}): + if "ibis_ml" in sys.modules: + reload(sys.modules["ibis_ml"]) + else: + import_module("ibis_ml")