diff --git a/doc/api.rst b/doc/api.rst index 87f116514cc..63fb59bc5e0 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -705,16 +705,16 @@ Pathlib-like Interface DataTree.parents DataTree.relative_to -Missing: +.. Missing: -.. +.. .. - ``DataTree.glob`` - ``DataTree.joinpath`` - ``DataTree.with_name`` - ``DataTree.walk`` - ``DataTree.rename`` - ``DataTree.replace`` +.. ``DataTree.glob`` +.. ``DataTree.joinpath`` +.. ``DataTree.with_name`` +.. ``DataTree.walk`` +.. ``DataTree.rename`` +.. ``DataTree.replace`` DataTree Contents ----------------- @@ -725,17 +725,18 @@ Manipulate the contents of all nodes in a ``DataTree`` simultaneously. :toctree: generated/ DataTree.copy - DataTree.assign_coords - DataTree.merge - DataTree.rename - DataTree.rename_vars - DataTree.rename_dims - DataTree.swap_dims - DataTree.expand_dims - DataTree.drop_vars - DataTree.drop_dims - DataTree.set_coords - DataTree.reset_coords + + .. DataTree.assign_coords + .. DataTree.merge + .. DataTree.rename + .. DataTree.rename_vars + .. DataTree.rename_dims + .. DataTree.swap_dims + .. DataTree.expand_dims + .. DataTree.drop_vars + .. DataTree.drop_dims + .. DataTree.set_coords + .. DataTree.reset_coords DataTree Node Contents ---------------------- @@ -760,129 +761,129 @@ Compare one ``DataTree`` object to another. DataTree.equals DataTree.identical -Indexing --------- - -Index into all nodes in the subtree simultaneously. - -.. autosummary:: - :toctree: generated/ - - DataTree.isel - DataTree.sel - DataTree.drop_sel - DataTree.drop_isel - DataTree.head - DataTree.tail - DataTree.thin - DataTree.squeeze - DataTree.interp - DataTree.interp_like - DataTree.reindex - DataTree.reindex_like - DataTree.set_index - DataTree.reset_index - DataTree.reorder_levels - DataTree.query - -.. - - Missing: - ``DataTree.loc`` - - -Missing Value Handling ----------------------- - -.. autosummary:: - :toctree: generated/ - - DataTree.isnull - DataTree.notnull - DataTree.combine_first - DataTree.dropna - DataTree.fillna - DataTree.ffill - DataTree.bfill - DataTree.interpolate_na - DataTree.where - DataTree.isin - -Computation ------------ - -Apply a computation to the data in all nodes in the subtree simultaneously. - -.. autosummary:: - :toctree: generated/ - - DataTree.map - DataTree.reduce - DataTree.diff - DataTree.quantile - DataTree.differentiate - DataTree.integrate - DataTree.map_blocks - DataTree.polyfit - DataTree.curvefit - -Aggregation ------------ - -Aggregate data in all nodes in the subtree simultaneously. - -.. autosummary:: - :toctree: generated/ - - DataTree.all - DataTree.any - DataTree.argmax - DataTree.argmin - DataTree.idxmax - DataTree.idxmin - DataTree.max - DataTree.min - DataTree.mean - DataTree.median - DataTree.prod - DataTree.sum - DataTree.std - DataTree.var - DataTree.cumsum - DataTree.cumprod - -ndarray methods ---------------- - -Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data in all nodes in the subtree. - -.. autosummary:: - :toctree: generated/ - - DataTree.argsort - DataTree.astype - DataTree.clip - DataTree.conj - DataTree.conjugate - DataTree.round - DataTree.rank - -Reshaping and reorganising --------------------------- - -Reshape or reorganise the data in all nodes in the subtree. - -.. autosummary:: - :toctree: generated/ - - DataTree.transpose - DataTree.stack - DataTree.unstack - DataTree.shift - DataTree.roll - DataTree.pad - DataTree.sortby - DataTree.broadcast_like +.. Indexing +.. -------- + +.. Index into all nodes in the subtree simultaneously. + +.. .. autosummary:: +.. :toctree: generated/ + +.. DataTree.isel +.. DataTree.sel +.. DataTree.drop_sel +.. DataTree.drop_isel +.. DataTree.head +.. DataTree.tail +.. DataTree.thin +.. DataTree.squeeze +.. DataTree.interp +.. DataTree.interp_like +.. DataTree.reindex +.. DataTree.reindex_like +.. DataTree.set_index +.. DataTree.reset_index +.. DataTree.reorder_levels +.. DataTree.query + +.. .. + +.. Missing: +.. ``DataTree.loc`` + + +.. Missing Value Handling +.. ---------------------- + +.. .. autosummary:: +.. :toctree: generated/ + +.. DataTree.isnull +.. DataTree.notnull +.. DataTree.combine_first +.. DataTree.dropna +.. DataTree.fillna +.. DataTree.ffill +.. DataTree.bfill +.. DataTree.interpolate_na +.. DataTree.where +.. DataTree.isin + +.. Computation +.. ----------- + +.. Apply a computation to the data in all nodes in the subtree simultaneously. + +.. .. autosummary:: +.. :toctree: generated/ + +.. DataTree.map +.. DataTree.reduce +.. DataTree.diff +.. DataTree.quantile +.. DataTree.differentiate +.. DataTree.integrate +.. DataTree.map_blocks +.. DataTree.polyfit +.. DataTree.curvefit + +.. Aggregation +.. ----------- + +.. Aggregate data in all nodes in the subtree simultaneously. + +.. .. autosummary:: +.. :toctree: generated/ + +.. DataTree.all +.. DataTree.any +.. DataTree.argmax +.. DataTree.argmin +.. DataTree.idxmax +.. DataTree.idxmin +.. DataTree.max +.. DataTree.min +.. DataTree.mean +.. DataTree.median +.. DataTree.prod +.. DataTree.sum +.. DataTree.std +.. DataTree.var +.. DataTree.cumsum +.. DataTree.cumprod + +.. ndarray methods +.. --------------- + +.. Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data in all nodes in the subtree. + +.. .. autosummary:: +.. :toctree: generated/ + +.. DataTree.argsort +.. DataTree.astype +.. DataTree.clip +.. DataTree.conj +.. DataTree.conjugate +.. DataTree.round +.. DataTree.rank + +.. Reshaping and reorganising +.. -------------------------- + +.. Reshape or reorganise the data in all nodes in the subtree. + +.. .. autosummary:: +.. :toctree: generated/ + +.. DataTree.transpose +.. DataTree.stack +.. DataTree.unstack +.. DataTree.shift +.. DataTree.roll +.. DataTree.pad +.. DataTree.sortby +.. DataTree.broadcast_like IO / Conversion =============== @@ -961,10 +962,10 @@ DataTree methods DataTree.to_netcdf DataTree.to_zarr -.. +.. .. - Missing: - ``open_mfdatatree`` +.. Missing: +.. ``open_mfdatatree`` Coordinates objects =================== @@ -1476,10 +1477,10 @@ Advanced API backends.list_engines backends.refresh_engines -.. +.. .. - Missing: - ``DataTree.set_close`` +.. Missing: +.. ``DataTree.set_close`` Default, pandas-backed indexes built-in Xarray: diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst index 5efe3acc609..fbe81b2e895 100644 --- a/doc/getting-started-guide/quick-overview.rst +++ b/doc/getting-started-guide/quick-overview.rst @@ -314,23 +314,29 @@ And you can get a copy of just the node local values of :py:class:`~xarray.Datas ds_node_local = dt["simulation/coarse"].to_dataset(inherited=False) ds_node_local -Operations map over subtrees, so we can take a mean over the ``x`` dimension of both the ``fine`` and ``coarse`` groups just by: +.. note:: -.. ipython:: python + We intend to eventually implement most :py:class:`~xarray.Dataset` methods + (indexing, aggregation, arithmetic, etc) on :py:class:`~xarray.DataTree` + objects, but many methods have not been implemented yet. - avg = dt["simulation"].mean(dim="x") - avg +.. Operations map over subtrees, so we can take a mean over the ``x`` dimension of both the ``fine`` and ``coarse`` groups just by: -Here the ``"x"`` dimension used is always the one local to that subgroup. +.. .. ipython:: python +.. avg = dt["simulation"].mean(dim="x") +.. avg -You can do almost everything you can do with :py:class:`~xarray.Dataset` objects with :py:class:`~xarray.DataTree` objects -(including indexing and arithmetic), as operations will be mapped over every subgroup in the tree. -This allows you to work with multiple groups of non-alignable variables at once. +.. Here the ``"x"`` dimension used is always the one local to that subgroup. -.. note:: - If all of your variables are mutually alignable (i.e. they live on the same +.. You can do almost everything you can do with :py:class:`~xarray.Dataset` objects with :py:class:`~xarray.DataTree` objects +.. (including indexing and arithmetic), as operations will be mapped over every subgroup in the tree. +.. This allows you to work with multiple groups of non-alignable variables at once. + +.. tip:: + + If all of your variables are mutually alignable (i.e., they live on the same grid, such that every common dimension name maps to the same length), then you probably don't need :py:class:`xarray.DataTree`, and should consider just sticking with :py:class:`xarray.Dataset`. diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 82769fcdda1..f8da26c28b5 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -24,12 +24,6 @@ check_isomorphic, map_over_subtree, ) -from xarray.core.datatree_ops import ( - DataTreeArithmeticMixin, - MappedDatasetMethodsMixin, - MappedDataWithCoords, -) -from xarray.core.datatree_render import RenderDataTree from xarray.core.formatting import datatree_repr, dims_and_coords_repr from xarray.core.formatting_html import ( datatree_repr as datatree_repr_html, @@ -404,9 +398,6 @@ def map( # type: ignore[override] class DataTree( NamedNode["DataTree"], - MappedDatasetMethodsMixin, - MappedDataWithCoords, - DataTreeArithmeticMixin, TreeAttrAccessMixin, Mapping[str, "DataArray | DataTree"], ): @@ -1420,34 +1411,6 @@ def map_over_subtree( # TODO fix this typing error return map_over_subtree(func)(self, *args, **kwargs) - def map_over_subtree_inplace( - self, - func: Callable, - *args: Iterable[Any], - **kwargs: Any, - ) -> None: - """ - Apply a function to every dataset in this subtree, updating data in place. - - Parameters - ---------- - func : callable - Function to apply to datasets with signature: - `func(node.dataset, *args, **kwargs) -> Dataset`. - - Function will not be applied to any nodes without datasets, - *args : tuple, optional - Positional arguments passed on to `func`. - **kwargs : Any - Keyword arguments passed on to `func`. - """ - - # TODO if func fails on some node then the previous nodes will still have been updated... - - for node in self.subtree: - if node.has_data: - node.dataset = func(node.dataset, *args, **kwargs) - def pipe( self, func: Callable | tuple[Callable, str], *args: Any, **kwargs: Any ) -> Any: @@ -1508,29 +1471,11 @@ def pipe( args = (self,) + args return func(*args, **kwargs) - def render(self): - """Print tree structure, including any data stored at each node.""" - for pre, fill, node in RenderDataTree(self): - print(f"{pre}DataTree('{self.name}')") - for ds_line in repr(node.dataset)[1:]: - print(f"{fill}{ds_line}") - - def merge(self, datatree: DataTree) -> DataTree: - """Merge all the leaves of a second DataTree into this one.""" - raise NotImplementedError - - def merge_child_nodes(self, *paths, new_path: T_Path) -> DataTree: - """Merge a set of child nodes into a single new node.""" - raise NotImplementedError - # TODO some kind of .collapse() or .flatten() method to merge a subtree - def to_dataarray(self) -> DataArray: - return self.dataset.to_dataarray() - @property def groups(self): - """Return all netCDF4 groups in the tree, given as a tuple of path-like strings.""" + """Return all groups in the tree, given as a tuple of path-like strings.""" return tuple(node.path for node in self.subtree) def to_netcdf( @@ -1662,6 +1607,3 @@ def to_zarr( compute=compute, **kwargs, ) - - def plot(self): - raise NotImplementedError diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 3365e493090..3a3afb0647a 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -668,10 +668,11 @@ def test_modify(self): actual.coords["x"] = ("x", [-1]) assert_identical(actual, dt) # should not be modified - actual = dt.copy() - del actual.coords["b"] - expected = dt.reset_coords("b", drop=True) - assert_identical(expected, actual) + # TODO: re-enable after implementing reset_coords() + # actual = dt.copy() + # del actual.coords["b"] + # expected = dt.reset_coords("b", drop=True) + # assert_identical(expected, actual) with pytest.raises(KeyError): del dt.coords["not_found"] @@ -679,14 +680,15 @@ def test_modify(self): with pytest.raises(KeyError): del dt.coords["foo"] - actual = dt.copy(deep=True) - actual.coords.update({"c": 11}) - expected = dt.assign_coords({"c": 11}) - assert_identical(expected, actual) + # TODO: re-enable after implementing assign_coords() + # actual = dt.copy(deep=True) + # actual.coords.update({"c": 11}) + # expected = dt.assign_coords({"c": 11}) + # assert_identical(expected, actual) - # regression test for GH3746 - del actual.coords["x"] - assert "x" not in actual.xindexes + # # regression test for GH3746 + # del actual.coords["x"] + # assert "x" not in actual.xindexes # test that constructors can also handle the `DataTreeCoordinates` object ds2 = Dataset(coords=dt.coords) @@ -969,6 +971,7 @@ def test_ipython_key_completions(self, create_test_datatree): var_keys = list(dt.variables.keys()) assert all(var_key in key_completions for var_key in var_keys) + @pytest.mark.xfail(reason="sel not implemented yet") def test_operation_with_attrs_but_no_data(self): # tests bug from xarray-datatree GH262 xs = xr.Dataset({"testvar": xr.DataArray(np.ones((2, 3)))}) @@ -1559,6 +1562,7 @@ def test_filter(self): class TestDSMethodInheritance: + @pytest.mark.xfail(reason="isel not implemented yet") def test_dataset_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) dt = DataTree.from_dict( @@ -1578,6 +1582,7 @@ def test_dataset_method(self): result = dt.isel(x=1) assert_equal(result, expected) + @pytest.mark.xfail(reason="reduce methods not implemented yet") def test_reduce_method(self): ds = xr.Dataset({"a": ("x", [False, True, False])}) dt = DataTree.from_dict({"/": ds, "/results": ds}) @@ -1587,6 +1592,7 @@ def test_reduce_method(self): result = dt.any() assert_equal(result, expected) + @pytest.mark.xfail(reason="reduce methods not implemented yet") def test_nan_reduce_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) dt = DataTree.from_dict({"/": ds, "/results": ds}) @@ -1596,6 +1602,7 @@ def test_nan_reduce_method(self): result = dt.mean() assert_equal(result, expected) + @pytest.mark.xfail(reason="cum methods not implemented yet") def test_cum_method(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}) dt = DataTree.from_dict({"/": ds, "/results": ds}) @@ -1612,6 +1619,7 @@ def test_cum_method(self): class TestOps: + @pytest.mark.xfail(reason="arithmetic not implemented yet") def test_binary_op_on_int(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) @@ -1623,6 +1631,7 @@ def test_binary_op_on_int(self): result: DataTree = dt * 5 # type: ignore[assignment,operator] assert_equal(result, expected) + @pytest.mark.xfail(reason="arithmetic not implemented yet") def test_binary_op_on_dataset(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) @@ -1645,6 +1654,7 @@ def test_binary_op_on_dataset(self): result = dt * other_ds assert_equal(result, expected) + @pytest.mark.xfail(reason="arithmetic not implemented yet") def test_binary_op_on_datatree(self): ds1 = xr.Dataset({"a": [5], "b": [3]}) ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]}) @@ -1657,6 +1667,7 @@ def test_binary_op_on_datatree(self): result = dt * dt # type: ignore[operator] assert_equal(result, expected) + @pytest.mark.xfail(reason="arithmetic not implemented yet") def test_arithmetic_inherited_coords(self): tree = DataTree(xr.Dataset(coords={"x": [1, 2, 3]})) tree["/foo"] = DataTree(xr.Dataset({"bar": ("x", [4, 5, 6])})) @@ -1671,6 +1682,8 @@ def test_arithmetic_inherited_coords(self): class TestUFuncs: + + @pytest.mark.xfail(reason="__array_ufunc__ not implemented yet") def test_tree(self, create_test_datatree): dt = create_test_datatree() expected = create_test_datatree(modify=lambda ds: np.sin(ds))