Skip to content

Commit

Permalink
ci(datasets): Unpin dask (#522)
Browse files Browse the repository at this point in the history
* Unpin dask

Signed-off-by: Ankita Katiyar <[email protected]>

* Update doctest

Signed-off-by: Ankita Katiyar <[email protected]>

* Update doctest

Signed-off-by: Ankita Katiyar <[email protected]>

* Update kedro-datasets/setup.py

Co-authored-by: Nok Lam Chan <[email protected]>
Signed-off-by: Ankita Katiyar <[email protected]>

---------

Signed-off-by: Ankita Katiyar <[email protected]>
Signed-off-by: Ankita Katiyar <[email protected]>
Co-authored-by: Nok Lam Chan <[email protected]>
  • Loading branch information
ankatiyar and noklam authored Jan 22, 2024
1 parent 0986bfa commit 52c2563
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
10 changes: 6 additions & 4 deletions kedro-datasets/kedro_datasets/dask/parquet_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ class ParquetDataset(AbstractDataset[dd.DataFrame, dd.DataFrame]):
>>> import dask.dataframe as dd
>>> import pandas as pd
>>> from kedro_datasets.dask import ParquetDataset
>>> from pandas.testing import assert_frame_equal
>>> import numpy as np
>>>
>>> data = pd.DataFrame({"col1": [1, 2], "col2": [4, 5], "col3": [[5, 6], [7, 8]]})
>>> data = pd.DataFrame({"col1": [1, 2], "col2": [4, 5], "col3": [6, 7]})
>>> ddf = dd.from_pandas(data, npartitions=2)
>>>
>>> dataset = ParquetDataset(
Expand All @@ -50,7 +50,7 @@ class ParquetDataset(AbstractDataset[dd.DataFrame, dd.DataFrame]):
>>> dataset.save(ddf)
>>> reloaded = dataset.load()
>>>
>>> assert_frame_equal(ddf.compute(), reloaded.compute())
>>> assert np.array_equal(ddf.compute(), reloaded.compute())
The output schema can also be explicitly specified using
`Triad <https://triad.readthedocs.io/en/latest/api/\
Expand Down Expand Up @@ -145,7 +145,9 @@ def _load(self) -> dd.DataFrame:

def _save(self, data: dd.DataFrame) -> None:
self._process_schema()
data.to_parquet(self._filepath, storage_options=self.fs_args, **self._save_args)
data.to_parquet(
path=self._filepath, storage_options=self.fs_args, **self._save_args
)

def _process_schema(self) -> None:
"""This method processes the schema in the catalog.yml or the API, if provided.
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def _collect_requirements(requires):
"cloudpickle<=2.0.0",
"compress-pickle[lz4]~=2.1.0",
"coverage[toml]",
"dask[complete]~=2021.10", # pinned by Snyk to avoid a vulnerability
"dask[complete]>=2021.10",
"delta-spark>=1.0, <3.0",
"deltalake>=0.10.0",
"dill~=0.3.1",
Expand Down

0 comments on commit 52c2563

Please sign in to comment.