Skip to content
This repository was archived by the owner on Oct 24, 2024. It is now read-only.

Add tutorial module for sample datasets #142

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions ci/doc.yml
Original file line number Diff line number Diff line change
@@ -2,18 +2,19 @@ name: datatree-doc
channels:
- conda-forge
dependencies:
- h5netcdf
- ipython
- netcdf4
- numpydoc
- pip
- pooch
- python>=3.8
- netcdf4
- scipy
- sphinx
- sphinx-copybutton
- numpydoc
- sphinx-autosummary-accessors
- ipython
- h5netcdf
- sphinx-copybutton
- xarray >= 2022.6.0
- zarr
- pip:
- git+https://github.com/xarray-contrib/datatree
- pangeo-sphinx-book-theme
- xarray>=2022.05.0.dev0
- -e ..
14 changes: 7 additions & 7 deletions ci/environment.yml
Original file line number Diff line number Diff line change
@@ -3,14 +3,14 @@ channels:
- conda-forge
- nodefaults
dependencies:
- python>=3.8
- netcdf4
- pytest
- flake8
- black
- codecov
- pytest-cov
- flake8
- h5netcdf
- netcdf4
- pooch
- pytest
- pytest-cov
- python>=3.8
- xarray>=2022.6.0
- zarr
- pip:
- xarray>=2022.05.0.dev0
3 changes: 3 additions & 0 deletions datatree/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from . import tutorial

# import public API
from .datatree import DataTree
from .io import open_datatree
@@ -17,4 +19,5 @@
"TreeIsomorphismError",
"map_over_subtree",
"__version__",
"tutorial",
)
29 changes: 29 additions & 0 deletions datatree/tests/test_tutorial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import sys

import pytest

from datatree import DataTree, tutorial


@pytest.mark.network
class TestLoadSampleData:
@pytest.fixture(autouse=True)
def setup(self):
self.asset = "cesm2-lens"

@pytest.fixture
def monkeypatch_import_error(self, monkeypatch):
monkeypatch.setitem(sys.modules, "pooch", None)

def test_download(self, tmp_path) -> None:
cache_dir = tmp_path / tutorial._default_cache_dir_name
dt = tutorial.open_datatree(self.asset, cache_dir=cache_dir)
assert isinstance(dt, DataTree)

def test_pooch_import_error(self, monkeypatch_import_error):
with pytest.raises(ImportError):
tutorial.open_datatree(self.asset)

def test_invalid_datatree_sample_key(self):
with pytest.raises(KeyError):
tutorial.open_datatree("invalid")
81 changes: 81 additions & 0 deletions datatree/tutorial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

import os
import pathlib
import typing

from .datatree import DataTree
from .io import open_datatree as _open_datatree

_default_cache_dir_name = "xarray_datatree_data"
base_url = "https://carbonplan-share.s3.us-west-2.amazonaws.com/xarray-datatree"


SAMPLE_DATASETS = {
"cesm2-lens": "cesm2-lens.nc",
"cmip6": "cmip6.nc",
}


def _construct_cache_dir(path):
import pooch

if isinstance(path, os.PathLike):
path = os.fspath(path)
elif path is None:
path = pooch.os_cache(_default_cache_dir_name)

return path


def open_datatree(
name: typing.Literal["cesm2-lens", "cmip6"],
cache_dir: str | pathlib.Path | None = None,
*,
engine: str = "netcdf4",
**kwargs,
) -> DataTree:
"""
Open a datatree from the xarray-datatree online repository (requires internet access).

Parameters
----------
name : str
The name of the datatree to open. Valid names are

* ``'cesm2-lens'``
* ``'cmip6'``

cache_dir : str | pathlib.Path
The directory to cache the datatree in. If None, the default cache directory is used.
engine : str
The engine to use for the datatree.
kwargs : dict
Additional keyword arguments to pass to the xarray.open_dataset function.

Returns
-------
datatree : DataTree
The datatree.

"""
try:
import pooch
except ImportError as e:
raise ImportError(
"pooch is required to download and open sample datasets. To proceed please install pooch using: `python -m pip install pooch` or `conda install -c conda-forge pooch`."
) from e

logger = pooch.get_logger()
logger.setLevel("WARNING")
cache_dir = _construct_cache_dir(cache_dir)
try:
path = SAMPLE_DATASETS[name]
except KeyError as exc:
raise KeyError(
f"{name} is not a valid sample dataset. Valid names are {list(SAMPLE_DATASETS.keys())}"
) from exc

url = f"{base_url}/{path}"
asset_path = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
return _open_datatree(asset_path, engine=engine, **kwargs)
6 changes: 6 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
@@ -287,6 +287,12 @@ Create or
Tutorial
========

.. autosummary::
:toctree: generated/

tutorial.open_datatree


Testing
=======

3 changes: 3 additions & 0 deletions docs/source/whats-new.rst
Original file line number Diff line number Diff line change
@@ -23,6 +23,9 @@ v0.0.10 (unreleased)
New Features
~~~~~~~~~~~~

- Add `tutorial` module for accessing sample datasets (:pull:`142`).
By `Anderson Banihirwe <https://github.com/andersy005>`_.

Breaking changes
~~~~~~~~~~~~~~~~

4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -52,3 +52,7 @@ known_first_party = datatree
[mypy]
files = datatree/**/*.py
show_error_codes = True

[tool:pytest]
markers =
network: tests requiring a network connection