18
18
19
19
import fsspec
20
20
import zarr
21
+ import xarray
21
22
import numpy as np
22
23
23
24
from kerchunk .utils import class_factory , _encode_for_JSON
@@ -376,6 +377,7 @@ def grib_tree(
376
377
Grib message variable names that decode as "unknown" are dropped
377
378
Grib typeOfLevel attributes that decode as unknown are treated as a single group
378
379
Grib steps that are missing due to WrongStepUnitError are patched with NaT
380
+ The input message_groups should not be modified by this method
379
381
380
382
:param message_groups: a collection of zarr store like dictionaries as produced by scan_grib
381
383
:param remote_options: remote options to pass to ZarrToMultiZarr
@@ -418,10 +420,10 @@ def grib_tree(
418
420
# To resolve unknown variables add custom grib tables.
419
421
# https://confluence.ecmwf.int/display/UDOC/Creating+your+own+local+definitions+-+ecCodes+GRIB+FAQ
420
422
# If you process the groups from a single file in order, you can use the msg# to compare with the
421
- # IDX file.
423
+ # IDX file. The idx files message index is 1 based where the grib_tree message count is zero based
422
424
logger .warning (
423
- "Dropping unknown variable in msg# %d. Compare with the grib idx file to identify and build "
424
- " a ecCodes local grib definitions to fix it." ,
425
+ "Dropping unknown variable in msg# %d. Compare with the grib idx file to help identify it "
426
+ " and build an ecCodes local grib definitions file to fix it." ,
425
427
msg_ind ,
426
428
)
427
429
unknown_counter += 1
@@ -496,21 +498,13 @@ def grib_tree(
496
498
catdims ,
497
499
)
498
500
499
- fix_group_step = add_missing_step_var (aggregations [path ], path )
500
501
mzz = MultiZarrToZarr (
501
- fix_group_step ,
502
+ aggregations [ path ] ,
502
503
remote_options = remote_options ,
503
504
concat_dims = catdims ,
504
505
identical_dims = idims ,
505
506
)
506
- try :
507
- group = mzz .translate ()
508
- except Exception :
509
- import pprint
510
-
511
- gstr = pprint .pformat (fix_group_step )
512
- logger .exception (f"Failed to multizarr { path } \n { gstr } " )
513
- raise
507
+ group = mzz .translate ()
514
508
515
509
for key , value in group ["refs" ].items ():
516
510
if key not in [".zattrs" , ".zgroup" ]:
@@ -519,53 +513,43 @@ def grib_tree(
519
513
return result
520
514
521
515
522
- def add_missing_step_var ( groups : List [ dict ], path : str ) -> List [ dict ] :
516
+ def correct_hrrr_subhf_group_step ( group : dict ) -> dict :
523
517
"""
524
- Attempt to fill in missing step var. Should this be done where the step unit error is handled
525
- in scan grib?
526
- :param groups:
527
- :param path:
528
- :return:
518
+ Overrides the definition of the step variable. Sets the value equal to the `valid_time - time`
519
+ in hours as a floating point value. This fixes issues with the HRRR SubHF grib2 step as read by
520
+ cfgrib.
521
+ The result is a deep copy, the original data is unmodified.
522
+ :param groups: the list of groups to fix
523
+ :param path: the path of the group
524
+ :return: a new deep copy of the corrected listed
529
525
"""
530
- result = []
531
- for group in groups :
532
- group = copy .deepcopy (group )
533
- if "step/.zarray" not in group ["refs" ]:
534
- logger .warning ("Adding missing step variable to group path %s" , path )
535
- group ["refs" ]["step/.zarray" ] = (
536
- '{"chunks":[],"compressor":null,"dtype":"<f8","fill_value":"NaN","filters":null,"order":"C",'
537
- '"shape":[],"zarr_format":2}'
538
- )
539
- group ["refs" ]["step/.zattrs" ] = (
540
- '{"_ARRAY_DIMENSIONS":[],"long_name":"time since forecast_reference_time",'
541
- '"standard_name":"forecast_period","units":"hours"}'
542
- )
526
+ group = copy .deepcopy (group )
527
+ group ["refs" ]["step/.zarray" ] = (
528
+ '{"chunks":[],"compressor":null,"dtype":"<f8","fill_value":"NaN","filters":null,"order":"C",'
529
+ '"shape":[],"zarr_format":2}'
530
+ )
531
+ group ["refs" ]["step/.zattrs" ] = (
532
+ '{"_ARRAY_DIMENSIONS":[],"long_name":"time since forecast_reference_time",'
533
+ '"standard_name":"forecast_period","units":"hours"}'
534
+ )
543
535
544
- # Try to set the value - this doesn't work
545
- import xarray
536
+ fo = fsspec . filesystem ( "reference" , fo = group , mode = "r" )
537
+ xd = xarray . open_dataset ( fo . get_mapper (), engine = "zarr" , consolidated = False )
546
538
547
- gcopy = copy .deepcopy (group )
548
- fo = fsspec .filesystem ("reference" , fo = gcopy , mode = "r" )
549
- xd = xarray .open_dataset (fo .get_mapper (), engine = "zarr" , consolidated = False )
539
+ correct_step = xd .valid_time .values - xd .time .values
550
540
551
- logger .info ("%s has step val %s" , path , xd .step .values )
552
- correct_step = xd .valid_time .values - xd .time .values
553
- assert correct_step .shape == ()
554
- step_float = correct_step .astype ("timedelta64[s]" ).astype ("float" ) / 3600.0
555
- step_bytes = step_float .tobytes ()
556
- try :
557
- # easiest way to test if data is ascii
558
- bytes = step_bytes .decode ("ascii" )
559
- except UnicodeDecodeError :
560
- bytes = (b"base64:" + base64 .b64encode (step_bytes )).decode ("ascii" )
541
+ assert correct_step .shape == ()
542
+ step_float = correct_step .astype ("timedelta64[s]" ).astype ("float" ) / 3600.0
543
+ logger .info (
544
+ "Orig val %s, new val %s" , xd .step .values .astype ("timedelta64[s]" ), step_float
545
+ )
561
546
562
- group ["refs" ]["step/0" ] = bytes
563
- logger .info (
564
- "Computed step float: %s, current step/0 %s, orig step %s" ,
565
- step_float ,
566
- group ["refs" ].get ("step/0" ),
567
- gcopy ["refs" ].get ("step/0" ),
568
- )
569
- result .append (group )
547
+ step_bytes = step_float .tobytes ()
548
+ try :
549
+ enocded_val = step_bytes .decode ("ascii" )
550
+ except UnicodeDecodeError :
551
+ enocded_val = (b"base64:" + base64 .b64encode (step_bytes )).decode ("ascii" )
570
552
571
- return result
553
+ group ["refs" ]["step/0" ] = enocded_val
554
+
555
+ return group
0 commit comments