Skip to content

HDF5 error on loading slab dataset #329

@mikekryjak

Description

@mikekryjak

I'm loading a simple 2D slab Hermes-3 case:

case = "neutlim-base-init_only"
ds = xbout.load.open_boutdataset(
    datapath = rf"/home/mike/work/cases/devtests/{case}/BOUT.dmp.0.nc",
    inputfilepath= rf"/home/mike/work/cases/devtests/{case}/BOUT.inp",
    keep_xboundaries = True,
    keep_yboundaries = True,
    info = False,
)

And getting the below bug. I assume this is to do with the recent change to h5netcdf. @dschwoerer

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/file_manager.py:219, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    218 try:
--> 219     file = self._cache[self._key]
    220 except KeyError:

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
     55 with self._lock:
---> 56     value = self._cache[key]
     57     self._cache.move_to_end(key)

KeyError: [<class 'h5netcdf.core.File'>, ('/home/mike/work/cases/devtests/neutlim-base-init_only/BOUT.dmp.0.nc',), 'r', (('decode_vlen_strings', True), ('driver', None), ('format', 'NETCDF4'), ('invalid_netcdf', None), ('phony_dims', 'access')), 'b1964a73-96e3-47f2-9565-6b2c06295c16']

During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
Cell In[2], line 2
      1 case = "neutlim-base-init_only"
----> 2 ds = xbout.load.open_boutdataset(
      3     datapath = rf"/home/mike/work/cases/devtests/{case}/BOUT.dmp.0.nc",
      4     inputfilepath= rf"/home/mike/work/cases/devtests/{case}/BOUT.inp",
      5     keep_xboundaries = True,
      6     keep_yboundaries = True,
      7     info = False,
      8     engine = "netcdf4"  # Force netcdf4 backend instead of h5netcdf
      9 )
     11 # dsx = ds.isel(y = 5)
     12 # dsy = ds.isel(x = 5)
     13 
     14 # ds.options

File ~/work/xbout/xbout/load.py:182, in open_boutdataset(datapath, inputfilepath, geometry, gridfilepath, grid_mismatch, chunks, keep_xboundaries, keep_yboundaries, run_name, info, is_restart, is_mms_dump, **kwargs)
    179 if chunks is None:
    180     chunks = {}
--> 182 input_type = _check_dataset_type(datapath)
    183 if is_restart is None:
    184     is_restart = input_type == "restart"

File ~/work/xbout/xbout/load.py:587, in _check_dataset_type(datapath)
    583         return _check_dataset_type(datapath[0])
    585 filepaths, filetype = _expand_filepaths(datapath)
--> 587 ds = xr.open_dataset(filepaths[0], engine=filetype)
    588 ds.close()
    589 if "metadata:keep_yboundaries" in ds.attrs:
    590     # (i)

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/api.py:606, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, create_default_indexes, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    594 decoders = _resolve_decoders_kwargs(
    595     decode_cf,
    596     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)    602     decode_coords=decode_coords,
    603 )
    605 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 606 backend_ds = backend.open_dataset(
    607     filename_or_obj,
    608     drop_variables=drop_variables,
    609     **decoders,
    610     **kwargs,
    611 )
    612 ds = _dataset_from_backend_dataset(
    613     backend_ds,
    614     filename_or_obj,
   (...)    625     **kwargs,
    626 )
    627 return ds

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/h5netcdf_.py:540, in H5netcdfBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, format, group, lock, invalid_netcdf, phony_dims, decode_vlen_strings, driver, driver_kwds, storage_options)
    537 emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims)
    539 filename_or_obj = _normalize_filename_or_obj(filename_or_obj)
--> 540 store = H5NetCDFStore.open(
    541     filename_or_obj,
    542     format=format,
    543     group=group,
    544     lock=lock,
    545     invalid_netcdf=invalid_netcdf,
    546     phony_dims=phony_dims,
    547     decode_vlen_strings=decode_vlen_strings,
    548     driver=driver,
    549     driver_kwds=driver_kwds,
    550     storage_options=storage_options,
    551 )
    553 store_entrypoint = StoreBackendEntrypoint()
    555 ds = store_entrypoint.open_dataset(
    556     store,
    557     mask_and_scale=mask_and_scale,
   (...)    563     decode_timedelta=decode_timedelta,
    564 )

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/h5netcdf_.py:242, in H5NetCDFStore.open(cls, filename, mode, format, group, lock, autoclose, invalid_netcdf, phony_dims, decode_vlen_strings, driver, driver_kwds, storage_options)
    235 manager_cls = (
    236     CachingFileManager
    237     if isinstance(filename, str) and not is_remote_uri(filename)
    238     else PickleableFileManager
    239 )
    240 manager = manager_cls(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
--> 242 return cls(
    243     manager,
    244     group=group,
    245     format=format,
    246     mode=mode,
    247     lock=lock,
    248     autoclose=autoclose,
    249 )

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/h5netcdf_.py:152, in H5NetCDFStore.__init__(self, manager, group, mode, format, lock, autoclose)
    149 self.format = format or "NETCDF4"
    150 # todo: utilizing find_root_and_group seems a bit clunky
    151 #  making filename available on h5netcdf.Group seems better
--> 152 self._filename = find_root_and_group(self.ds)[0].filename
    153 self.is_remote = is_remote_uri(self._filename)
    154 self.lock = ensure_lock(lock)

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/h5netcdf_.py:260, in H5NetCDFStore.ds(self)
    258 @property
    259 def ds(self):
--> 260     return self._acquire()

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/h5netcdf_.py:252, in H5NetCDFStore._acquire(self, needs_lock)
    251 def _acquire(self, needs_lock=True):
--> 252     with self._manager.acquire_context(needs_lock) as root:
    253         ds = _nc4_require_group(
    254             root, self._group, self._mode, create_group=_h5netcdf_create_group
    255         )
    256     return ds

File ~/.pyenv/versions/3.12.5/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
    135 del self.args, self.kwds, self.func
    136 try:
--> 137     return next(self.gen)
    138 except StopIteration:
    139     raise RuntimeError("generator didn't yield") from None

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/file_manager.py:207, in CachingFileManager.acquire_context(self, needs_lock)
    204 @contextmanager
    205 def acquire_context(self, needs_lock: bool = True) -> Iterator[T_File]:
    206     """Context manager for acquiring a file."""
--> 207     file, cached = self._acquire_with_cache_info(needs_lock)
    208     try:
    209         yield file

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/xarray/backends/file_manager.py:225, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    223     kwargs = kwargs.copy()
    224     kwargs["mode"] = self._mode
--> 225 file = self._opener(*self._args, **kwargs)
    226 if self._mode == "w":
    227     # ensure file doesn't get overridden when opened again
    228     self._mode = "a"

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/h5netcdf/core.py:1962, in File.__init__(self, path, mode, format, invalid_netcdf, phony_dims, backend, **kwargs)
   1960 # This maps keeps track of all HDF5 datasets corresponding to this group.
   1961 self._all_h5groups = ChainMap(self._h5group)
-> 1962 super().__init__(self, self._h5path)
   1963 # get maximum dimension id and count of labeled dimensions
   1964 if self._writable:

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/h5netcdf/core.py:1174, in Group.__init__(self, parent, name)
   1171 else:
   1172     if self._root._phony_dims_mode is not None:
   1173         # check if malformed variable and raise
-> 1174         if _unlabeled_dimension_mix(v) == "unlabeled":
   1175             # if unscaled variable, get phony dimensions
   1176             phony_dims |= Counter(v.shape)
   1178 if not _netcdf_dimension_but_not_variable(v):

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/h5netcdf/core.py:914, in _unlabeled_dimension_mix(h5py_dataset)
    912     status = "nodim"
    913 else:
--> 914     dimset = {len(j) for j in dimlist}
    915     # either all dimensions have exactly one scale
    916     # or all dimensions have no scale
    917     if dimset ^ {0} == set():

File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File ~/pyenvs/base-3.12.5/lib/python3.12/site-packages/h5py/_hl/dims.py:60, in DimensionProxy.__len__(self)
     58 @with_phil
     59 def __len__(self):
---> 60     return h5ds.get_num_scales(self._id, self._dimension)

File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File h5py/h5ds.pyx:71, in h5py.h5ds.get_num_scales()

File h5py/defs.pyx:4282, in h5py.defs.H5DSget_num_scales()

RuntimeError: Unspecified error in H5DSget_num_scales (return value <0)

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions