Skip to content

Commit 5809526

Browse files
author
David Stuebe
committed
wip
1 parent f28f41b commit 5809526

File tree

4 files changed

+582
-59
lines changed

4 files changed

+582
-59
lines changed

Diff for: kerchunk/combine.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,13 @@ def store_coords(self):
311311
for _ in v
312312
]
313313
).ravel()
314-
if "fill_value" not in kw and data.dtype.kind == "i":
315-
kw["fill_value"] = None
314+
if "fill_value" not in kw:
315+
if data.dtype.kind == "i":
316+
kw["fill_value"] = None
317+
else:
318+
# Fall back to existing fill value
319+
kw["fill_value"] = z[k].fill_value
320+
316321
arr = group.create_dataset(
317322
name=k,
318323
data=data,
@@ -323,6 +328,9 @@ def store_coords(self):
323328
)
324329
if k in z:
325330
# copy attributes if values came from an original variable
331+
logger.warning(
332+
"Zarr attrs: %s", {ii: vv for ii, vv in z[k].attrs.items()}
333+
)
326334
arr.attrs.update(z[k].attrs)
327335
arr.attrs["_ARRAY_DIMENSIONS"] = [k]
328336
if self.cf_units and k in self.cf_units:

Diff for: kerchunk/grib2.py

+40-56
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import fsspec
2020
import zarr
21+
import xarray
2122
import numpy as np
2223

2324
from kerchunk.utils import class_factory, _encode_for_JSON
@@ -376,6 +377,7 @@ def grib_tree(
376377
Grib message variable names that decode as "unknown" are dropped
377378
Grib typeOfLevel attributes that decode as unknown are treated as a single group
378379
Grib steps that are missing due to WrongStepUnitError are patched with NaT
380+
The input message_groups should not be modified by this method
379381
380382
:param message_groups: a collection of zarr store like dictionaries as produced by scan_grib
381383
:param remote_options: remote options to pass to ZarrToMultiZarr
@@ -418,10 +420,10 @@ def grib_tree(
418420
# To resolve unknown variables add custom grib tables.
419421
# https://confluence.ecmwf.int/display/UDOC/Creating+your+own+local+definitions+-+ecCodes+GRIB+FAQ
420422
# If you process the groups from a single file in order, you can use the msg# to compare with the
421-
# IDX file.
423+
# IDX file. The idx files message index is 1 based where the grib_tree message count is zero based
422424
logger.warning(
423-
"Dropping unknown variable in msg# %d. Compare with the grib idx file to identify and build"
424-
" a ecCodes local grib definitions to fix it.",
425+
"Dropping unknown variable in msg# %d. Compare with the grib idx file to help identify it"
426+
" and build an ecCodes local grib definitions file to fix it.",
425427
msg_ind,
426428
)
427429
unknown_counter += 1
@@ -496,21 +498,13 @@ def grib_tree(
496498
catdims,
497499
)
498500

499-
fix_group_step = add_missing_step_var(aggregations[path], path)
500501
mzz = MultiZarrToZarr(
501-
fix_group_step,
502+
aggregations[path],
502503
remote_options=remote_options,
503504
concat_dims=catdims,
504505
identical_dims=idims,
505506
)
506-
try:
507-
group = mzz.translate()
508-
except Exception:
509-
import pprint
510-
511-
gstr = pprint.pformat(fix_group_step)
512-
logger.exception(f"Failed to multizarr {path}\n{gstr}")
513-
raise
507+
group = mzz.translate()
514508

515509
for key, value in group["refs"].items():
516510
if key not in [".zattrs", ".zgroup"]:
@@ -519,53 +513,43 @@ def grib_tree(
519513
return result
520514

521515

522-
def add_missing_step_var(groups: List[dict], path: str) -> List[dict]:
516+
def correct_hrrr_subhf_group_step(group: dict) -> dict:
523517
"""
524-
Attempt to fill in missing step var. Should this be done where the step unit error is handled
525-
in scan grib?
526-
:param groups:
527-
:param path:
528-
:return:
518+
Overrides the definition of the step variable. Sets the value equal to the `valid_time - time`
519+
in hours as a floating point value. This fixes issues with the HRRR SubHF grib2 step as read by
520+
cfgrib.
521+
The result is a deep copy, the original data is unmodified.
522+
:param groups: the list of groups to fix
523+
:param path: the path of the group
524+
:return: a new deep copy of the corrected listed
529525
"""
530-
result = []
531-
for group in groups:
532-
group = copy.deepcopy(group)
533-
if "step/.zarray" not in group["refs"]:
534-
logger.warning("Adding missing step variable to group path %s", path)
535-
group["refs"]["step/.zarray"] = (
536-
'{"chunks":[],"compressor":null,"dtype":"<f8","fill_value":"NaN","filters":null,"order":"C",'
537-
'"shape":[],"zarr_format":2}'
538-
)
539-
group["refs"]["step/.zattrs"] = (
540-
'{"_ARRAY_DIMENSIONS":[],"long_name":"time since forecast_reference_time",'
541-
'"standard_name":"forecast_period","units":"hours"}'
542-
)
526+
group = copy.deepcopy(group)
527+
group["refs"]["step/.zarray"] = (
528+
'{"chunks":[],"compressor":null,"dtype":"<f8","fill_value":"NaN","filters":null,"order":"C",'
529+
'"shape":[],"zarr_format":2}'
530+
)
531+
group["refs"]["step/.zattrs"] = (
532+
'{"_ARRAY_DIMENSIONS":[],"long_name":"time since forecast_reference_time",'
533+
'"standard_name":"forecast_period","units":"hours"}'
534+
)
543535

544-
# Try to set the value - this doesn't work
545-
import xarray
536+
fo = fsspec.filesystem("reference", fo=group, mode="r")
537+
xd = xarray.open_dataset(fo.get_mapper(), engine="zarr", consolidated=False)
546538

547-
gcopy = copy.deepcopy(group)
548-
fo = fsspec.filesystem("reference", fo=gcopy, mode="r")
549-
xd = xarray.open_dataset(fo.get_mapper(), engine="zarr", consolidated=False)
539+
correct_step = xd.valid_time.values - xd.time.values
550540

551-
logger.info("%s has step val %s", path, xd.step.values)
552-
correct_step = xd.valid_time.values - xd.time.values
553-
assert correct_step.shape == ()
554-
step_float = correct_step.astype("timedelta64[s]").astype("float") / 3600.0
555-
step_bytes = step_float.tobytes()
556-
try:
557-
# easiest way to test if data is ascii
558-
bytes = step_bytes.decode("ascii")
559-
except UnicodeDecodeError:
560-
bytes = (b"base64:" + base64.b64encode(step_bytes)).decode("ascii")
541+
assert correct_step.shape == ()
542+
step_float = correct_step.astype("timedelta64[s]").astype("float") / 3600.0
543+
logger.info(
544+
"Orig val %s, new val %s", xd.step.values.astype("timedelta64[s]"), step_float
545+
)
561546

562-
group["refs"]["step/0"] = bytes
563-
logger.info(
564-
"Computed step float: %s, current step/0 %s, orig step %s",
565-
step_float,
566-
group["refs"].get("step/0"),
567-
gcopy["refs"].get("step/0"),
568-
)
569-
result.append(group)
547+
step_bytes = step_float.tobytes()
548+
try:
549+
enocded_val = step_bytes.decode("ascii")
550+
except UnicodeDecodeError:
551+
enocded_val = (b"base64:" + base64.b64encode(step_bytes)).decode("ascii")
570552

571-
return result
553+
group["refs"]["step/0"] = enocded_val
554+
555+
return group

0 commit comments

Comments
 (0)