Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(datasets): verify file exists if on Polars 1.0 #957

Merged
merged 9 commits into from
Dec 6, 2024
10 changes: 8 additions & 2 deletions kedro-datasets/kedro_datasets/polars/csv_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,21 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]):

.. code-block:: pycon

>>> from kedro_datasets.polars import CSVDataset
>>> import sys
>>>
>>> import polars as pl
>>> import pytest
>>> from kedro_datasets.polars import CSVDataset
>>>
>>> if sys.platform.startswith("win"):
... pytest.skip("this doctest fails on Windows CI runner")
...
>>> data = pl.DataFrame({"col1": [1, 2], "col2": [4, 5], "col3": [5, 6]})
>>>
>>> dataset = CSVDataset(filepath=tmp_path / "test.csv")
>>> dataset.save(data)
>>> reloaded = dataset.load()
>>> assert data.frame_equal(reloaded)
>>> assert data.equals(reloaded)

"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class EagerPolarsDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]):
>>> dataset = EagerPolarsDataset(filepath=tmp_path / "test.parquet", file_format="parquet")
>>> dataset.save(data)
>>> reloaded = dataset.load()
>>> assert data.frame_equal(reloaded)
>>> assert data.equals(reloaded)

"""

Expand Down
6 changes: 5 additions & 1 deletion kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
"""
from __future__ import annotations

import errno
import logging
import os
from copy import deepcopy
from pathlib import PurePosixPath
from typing import Any, ClassVar
Expand Down Expand Up @@ -69,7 +71,7 @@ class LazyPolarsDataset(
>>> dataset = LazyPolarsDataset(filepath=tmp_path / "test.csv", file_format="csv")
>>> dataset.save(data)
>>> reloaded = dataset.load()
>>> assert data.frame_equal(reloaded.collect())
>>> assert data.equals(reloaded.collect())

"""

Expand Down Expand Up @@ -199,6 +201,8 @@ def _describe(self) -> dict[str, Any]:

def load(self) -> pl.LazyFrame:
load_path = str(self._get_load_path())
if not self._exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), load_path)

if self._protocol == "file":
# With local filesystems, we can use Polar's build-in I/O method:
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ test = [
"pandas>=2.0",
"Pillow~=10.0",
"plotly>=4.8.0, <6.0",
"polars[xlsx2csv, deltalake]~=0.18.0",
"polars[deltalake,xlsx2csv]>=1.0",
"pyarrow>=1.0; python_version < '3.11'",
"pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors
"pyodbc~=5.0",
Expand Down
1 change: 1 addition & 0 deletions kedro-datasets/tests/polars/test_eager_polars_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def excel_dataset(dummy_dataframe: pl.DataFrame, filepath_excel):
return EagerPolarsDataset(
filepath=filepath_excel.as_posix(),
file_format="excel",
load_args={"engine": "xlsx2csv"},
)


Expand Down
Loading