diff --git a/src/silx/io/tiledh5.py b/src/silx/io/tiledh5.py new file mode 100644 index 0000000000..adb020b259 --- /dev/null +++ b/src/silx/io/tiledh5.py @@ -0,0 +1,159 @@ +# /*########################################################################## +# Copyright (C) 2024 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +Provides a wrapper to expose `Tiled `_ + +This is a preview feature. +""" +from __future__ import annotations + + +from functools import lru_cache +import logging +import numpy +from . import commonh5 +import h5py +import tiled.client +from tiled.client.cache import Cache + + +_logger = logging.getLogger(__name__) + + +def _get_children( + parent: TiledH5 | TiledGroup, + container: tiled.client.container.Container, + max_children: int | None = None, +): + """Return first max_children entries of given container as a dict of commonh5 wrappers. + + :param parent: The commonh5 wrapper for which to retrieve children. + :param container: The tiled container from which to retrieve the entries. + :param max_children: The maximum number of children to retrieve. + """ + items = container.items() + + if max_children is not None and len(items) > max_children: + items = items.head(max_children) + _logger.warning( + f"{container.uri} contains too many entries: Only loading first {max_children}." + ) + + children = {} + for key, client in items: + if isinstance(client, tiled.client.container.Container): + children[key] = TiledGroup(client, name=key, parent=parent) + elif isinstance(client, tiled.client.array.ArrayClient): + children[key] = TiledDataset(client, name=key, parent=parent) + else: + _logger.warning(f"Unsupported child type: {key}: {client}") + children[key] = commonh5.Dataset( + key, + numpy.array("Unsupported", h5py.special_dtype(vlen=str)), + parent=parent, + ) + return children + + +class TiledH5(commonh5.File): + """tiled client wrapper""" + + MAX_CHILDREN_PER_GROUP: int | None = None + """Maximum number of children to instantiate for each group. + + Set to None for allowing an unbound number of children per group. + """ + + _cache = None + """Shared tiled cache with lazy initialization""" + + def __init__( + self, + name: str, + mode: str | None = None, + attrs: dict | None = None, + ): + assert mode in ("r", None) + super().__init__(name, mode, attrs) + if self._cache is None: + TiledH5._cache = Cache() # Use tiled cache default + self.__container = tiled.client.from_uri(name, cache=self._cache) + assert isinstance(self.__container, tiled.client.container.Container) + _logger.warning("tiled support is a preview feature: This may change or be removed without notice.") + + def close(self): + super().close() + self.__container = None + + @lru_cache + def _get_items(self): + return _get_children(self, self.__container, self.MAX_CHILDREN_PER_GROUP) + + +class TiledGroup(commonh5.Group): + """tiled Container wrapper""" + + def __init__( + self, + container: tiled.client.container.Container, + name: str, + parent: TiledH5 | TiledGroup | None = None, + attrs: dict | None = None, + ): + super().__init__(name, parent, attrs) + self.__container = container + + @lru_cache + def _get_items(self): + return _get_children(self, self.__container, self.file.MAX_CHILDREN_PER_GROUP) + + +class TiledDataset(commonh5.Dataset): + """tiled ArrayClient wrapper""" + + def __init__( + self, + client: tiled.client.array.ArrayClient, + name: str, + parent: TiledH5 | TiledGroup | None = None, + attrs: dict | None = None, + ): + super().__init__(name, client, parent, attrs) + + @property + def shape(self): + return self._get_data().shape + + @property + def size(self): + return self._get_data().size + + def __len__(self): + return len(self.__client) + + def __getitem__(self, item): + return self._get_data()[item] + + @property + def value(self): + return self._get_data()[()] diff --git a/src/silx/io/utils.py b/src/silx/io/utils.py index 352b0c0db1..f655ec0db8 100644 --- a/src/silx/io/utils.py +++ b/src/silx/io/utils.py @@ -1,5 +1,5 @@ # /*########################################################################## -# Copyright (C) 2016-2023 European Synchrotron Radiation Facility +# Copyright (C) 2016-2024 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -33,13 +33,11 @@ import time import logging from typing import Generator, Union, Optional -import urllib.parse import numpy from silx.utils.proxy import Proxy from .url import DataUrl -from . import h5py_utils from .._version import calc_hexversion import h5py @@ -51,6 +49,12 @@ except ImportError as e: h5pyd = None +try: + from .tiledh5 import TiledH5 +except ImportError as e: + TiledH5 = None + + logger = logging.getLogger(__name__) NEXUS_HDF5_EXT = [".h5", ".nx5", ".nxs", ".hdf", ".hdf5", ".cxi"] @@ -693,17 +697,30 @@ def open(filename): # pylint:disable=redefined-builtin :rtype: h5py-like node """ url = DataUrl(filename) + if url.scheme() in ("http", "https"): + errors = [f"Failed to open {filename}"] + if h5pyd is not None: + try: + return _open_url_with_h5pyd(filename) + except Exception as e: + errors.append(f"- h5pyd failed: {type(e)} {e}") - if url.scheme() in [None, "file", "silx"]: - # That's a local file - if not url.is_valid(): - raise IOError("URL '%s' is not valid" % filename) - h5_file = _open_local_file(url.file_path()) - elif url.scheme() in ("http", "https"): - return _open_url_with_h5pyd(filename) - else: + if TiledH5 is not None: + try: + return TiledH5(filename) + except Exception as e: + errors.append(f"- tiled failed: {type(e)} {e}") + + raise IOError("\n".join(errors)) + + if url.scheme() not in (None, "file", "silx"): raise IOError(f"Unsupported URL scheme {url.scheme}: {filename}") + # That's a local file + if not url.is_valid(): + raise IOError("URL '%s' is not valid" % filename) + h5_file = _open_local_file(url.file_path()) + if url.data_path() in [None, "/", ""]: # The full file is requested if url.data_slice(): raise IOError(f"URL '{filename}' containing slicing is not supported")