diff --git a/ci/doc.yml b/ci/doc.yml index 5a3afbdf..db4f92b1 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -2,18 +2,19 @@ name: datatree-doc channels: - conda-forge dependencies: + - h5netcdf + - ipython + - netcdf4 + - numpydoc - pip + - pooch - python>=3.8 - - netcdf4 - scipy - sphinx - - sphinx-copybutton - - numpydoc - sphinx-autosummary-accessors - - ipython - - h5netcdf + - sphinx-copybutton + - xarray >= 2022.6.0 - zarr - pip: - - git+https://github.com/xarray-contrib/datatree - pangeo-sphinx-book-theme - - xarray>=2022.05.0.dev0 + - -e .. diff --git a/ci/environment.yml b/ci/environment.yml index 1aa9af93..f1ebe03d 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -3,14 +3,14 @@ channels: - conda-forge - nodefaults dependencies: - - python>=3.8 - - netcdf4 - - pytest - - flake8 - black - codecov - - pytest-cov + - flake8 - h5netcdf + - netcdf4 + - pooch + - pytest + - pytest-cov + - python>=3.8 + - xarray>=2022.6.0 - zarr - - pip: - - xarray>=2022.05.0.dev0 diff --git a/datatree/__init__.py b/datatree/__init__.py index 8de251a4..72f34d58 100644 --- a/datatree/__init__.py +++ b/datatree/__init__.py @@ -1,3 +1,5 @@ +from . import tutorial + # import public API from .datatree import DataTree from .io import open_datatree @@ -17,4 +19,5 @@ "TreeIsomorphismError", "map_over_subtree", "__version__", + "tutorial", ) diff --git a/datatree/tests/test_tutorial.py b/datatree/tests/test_tutorial.py new file mode 100644 index 00000000..a1c234c4 --- /dev/null +++ b/datatree/tests/test_tutorial.py @@ -0,0 +1,29 @@ +import sys + +import pytest + +from datatree import DataTree, tutorial + + +@pytest.mark.network +class TestLoadSampleData: + @pytest.fixture(autouse=True) + def setup(self): + self.asset = "cesm2-lens" + + @pytest.fixture + def monkeypatch_import_error(self, monkeypatch): + monkeypatch.setitem(sys.modules, "pooch", None) + + def test_download(self, tmp_path) -> None: + cache_dir = tmp_path / tutorial._default_cache_dir_name + dt = tutorial.open_datatree(self.asset, cache_dir=cache_dir) + assert isinstance(dt, DataTree) + + def test_pooch_import_error(self, monkeypatch_import_error): + with pytest.raises(ImportError): + tutorial.open_datatree(self.asset) + + def test_invalid_datatree_sample_key(self): + with pytest.raises(KeyError): + tutorial.open_datatree("invalid") diff --git a/datatree/tutorial.py b/datatree/tutorial.py new file mode 100644 index 00000000..0122fd62 --- /dev/null +++ b/datatree/tutorial.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import os +import pathlib +import typing + +from .datatree import DataTree +from .io import open_datatree as _open_datatree + +_default_cache_dir_name = "xarray_datatree_data" +base_url = "https://carbonplan-share.s3.us-west-2.amazonaws.com/xarray-datatree" + + +SAMPLE_DATASETS = { + "cesm2-lens": "cesm2-lens.nc", + "cmip6": "cmip6.nc", +} + + +def _construct_cache_dir(path): + import pooch + + if isinstance(path, os.PathLike): + path = os.fspath(path) + elif path is None: + path = pooch.os_cache(_default_cache_dir_name) + + return path + + +def open_datatree( + name: typing.Literal["cesm2-lens", "cmip6"], + cache_dir: str | pathlib.Path | None = None, + *, + engine: str = "netcdf4", + **kwargs, +) -> DataTree: + """ + Open a datatree from the xarray-datatree online repository (requires internet access). + + Parameters + ---------- + name : str + The name of the datatree to open. Valid names are + + * ``'cesm2-lens'`` + * ``'cmip6'`` + + cache_dir : str | pathlib.Path + The directory to cache the datatree in. If None, the default cache directory is used. + engine : str + The engine to use for the datatree. + kwargs : dict + Additional keyword arguments to pass to the xarray.open_dataset function. + + Returns + ------- + datatree : DataTree + The datatree. + + """ + try: + import pooch + except ImportError as e: + raise ImportError( + "pooch is required to download and open sample datasets. To proceed please install pooch using: `python -m pip install pooch` or `conda install -c conda-forge pooch`." + ) from e + + logger = pooch.get_logger() + logger.setLevel("WARNING") + cache_dir = _construct_cache_dir(cache_dir) + try: + path = SAMPLE_DATASETS[name] + except KeyError as exc: + raise KeyError( + f"{name} is not a valid sample dataset. Valid names are {list(SAMPLE_DATASETS.keys())}" + ) from exc + + url = f"{base_url}/{path}" + asset_path = pooch.retrieve(url=url, known_hash=None, path=cache_dir) + return _open_datatree(asset_path, engine=engine, **kwargs) diff --git a/docs/source/api.rst b/docs/source/api.rst index 9ad74190..e27c4c12 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -287,6 +287,12 @@ Create or Tutorial ======== +.. autosummary:: + :toctree: generated/ + + tutorial.open_datatree + + Testing ======= diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index 514dda9e..d368124c 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -23,6 +23,9 @@ v0.0.10 (unreleased) New Features ~~~~~~~~~~~~ +- Add `tutorial` module for accessing sample datasets (:pull:`142`). + By `Anderson Banihirwe `_. + Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/setup.cfg b/setup.cfg index 9a5664de..5111a899 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,3 +52,7 @@ known_first_party = datatree [mypy] files = datatree/**/*.py show_error_codes = True + +[tool:pytest] +markers = + network: tests requiring a network connection