Skip to content

Commit 715b122

Browse files
aldenksclaude
andauthored
Add 500 hPa geopotential height variable to GEFS datasets (#490)
* Add 500 hPa geopotential height to GEFS analysis and forecast datasets https://claude.ai/code/session_01LBd3sDwYwxRihWA2hLLnPj * Add integration tests for geopotential_height_500hpa download and read across all GEFS data periods https://claude.ai/code/session_01LBd3sDwYwxRihWA2hLLnPj * Add geopotential_height_500hpa to GEFS analysis and forecast datasets - Add geopotential_height_500hpa data variable to shared GEFS template config with gefs_file_type="a" (a-files only; 500 hPa not in s-files), correct grib_description using Pascal units ('50000[Pa] ISBL="Isobaric surface"'), and index_position=31 - Add "500 mb": "pres_abv700mb" to GEFS_REFORECAST_LEVELS_SHORT so reforecast URLs resolve to the hgt_pres_abv700mb_* files (500 hPa is not in hgt_pres_*) - Regenerate zarr templates for both datasets - Replace single-variable slow tests with 4 comprehensive all-vars integration tests per dataset: reforecast, pre-v12, current early lead, current later lead (forecast only); uses source_groups() with full groups per test, skipping vars absent from the reforecast archive via FileNotFoundError https://claude.ai/code/session_01LBd3sDwYwxRihWA2hLLnPj * Fix slow tests to handle vars missing from older GEFS periods Some variables aren't present in pre-v12 or reforecast GRIB files (e.g. cloud ceiling HGT, PRMSL). Catch FileNotFoundError, ValueError, and AssertionError to skip missing vars rather than failing the test. https://claude.ai/code/session_01LBd3sDwYwxRihWA2hLLnPj * Fix reforecast 0.5deg files and add explicit missing-var allow-lists read_rasterio: reforecast pressure-level files (e.g. hgt_pres_abv700mb) are 0.5deg while surface files are 0.25deg. When the reforecast file resolution doesn't match the output grid, reproject it the same way as a/b-files instead of asserting equal shape. This fixes geopotential_ height_500hpa in the GEFSv12 reforecast period (2000-2019). Tests: replace broad exception catches with explicit frozenset allow-lists (_REFORECAST_MISSING_VARS, _PRE_V12_MISSING_VARS) so unexpected failures are caught rather than silently skipped. https://claude.ai/code/session_01LBd3sDwYwxRihWA2hLLnPj * Address PR review comments on GEFS geopotential height PR - DRY read_data.py: merge "a"|"b"|"reforecast" into one case with a single reproject block; separate "s" case with assert reader.shape == out_spatial_shape restored - Remove reforecast and pre-v12 forecast download/read tests (dataset starts 2020-10-01 with GEFSv12) - Consolidate current early/later lead forecast tests into one parametrized test with ThreadPoolExecutor over source groups - Add ThreadPoolExecutor parallelism to all analysis download/read tests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 3fcbce9 commit 715b122

9 files changed

Lines changed: 553 additions & 13 deletions

File tree

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
{
2+
"shape": [
3+
1,
4+
721,
5+
1440
6+
],
7+
"data_type": "float32",
8+
"chunk_grid": {
9+
"name": "regular",
10+
"configuration": {
11+
"chunk_shape": [
12+
2880,
13+
384,
14+
384
15+
]
16+
}
17+
},
18+
"chunk_key_encoding": {
19+
"name": "default",
20+
"configuration": {
21+
"separator": "/"
22+
}
23+
},
24+
"fill_value": 0.0,
25+
"codecs": [
26+
{
27+
"name": "sharding_indexed",
28+
"configuration": {
29+
"chunk_shape": [
30+
1440,
31+
32,
32+
32
33+
],
34+
"codecs": [
35+
{
36+
"name": "bytes",
37+
"configuration": {
38+
"endian": "little"
39+
}
40+
},
41+
{
42+
"name": "blosc",
43+
"configuration": {
44+
"typesize": 4,
45+
"cname": "zstd",
46+
"clevel": 3,
47+
"shuffle": "shuffle",
48+
"blocksize": 0
49+
}
50+
}
51+
],
52+
"index_codecs": [
53+
{
54+
"name": "bytes",
55+
"configuration": {
56+
"endian": "little"
57+
}
58+
},
59+
{
60+
"name": "crc32c"
61+
}
62+
],
63+
"index_location": "end"
64+
}
65+
}
66+
],
67+
"attributes": {
68+
"long_name": "Geopotential height",
69+
"short_name": "gh",
70+
"standard_name": "geopotential_height",
71+
"units": "m",
72+
"step_type": "instant",
73+
"coordinates": "spatial_ref",
74+
"_FillValue": "AAAAAAAA+H8="
75+
},
76+
"dimension_names": [
77+
"time",
78+
"latitude",
79+
"longitude"
80+
],
81+
"zarr_format": 3,
82+
"node_type": "array",
83+
"storage_transformers": []
84+
}

src/reformatters/noaa/gefs/analysis/templates/latest.zarr/zarr.json

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,90 @@
521521
"node_type": "array",
522522
"storage_transformers": []
523523
},
524+
"geopotential_height_500hpa": {
525+
"shape": [
526+
1,
527+
721,
528+
1440
529+
],
530+
"data_type": "float32",
531+
"chunk_grid": {
532+
"name": "regular",
533+
"configuration": {
534+
"chunk_shape": [
535+
2880,
536+
384,
537+
384
538+
]
539+
}
540+
},
541+
"chunk_key_encoding": {
542+
"name": "default",
543+
"configuration": {
544+
"separator": "/"
545+
}
546+
},
547+
"fill_value": 0.0,
548+
"codecs": [
549+
{
550+
"name": "sharding_indexed",
551+
"configuration": {
552+
"chunk_shape": [
553+
1440,
554+
32,
555+
32
556+
],
557+
"codecs": [
558+
{
559+
"name": "bytes",
560+
"configuration": {
561+
"endian": "little"
562+
}
563+
},
564+
{
565+
"name": "blosc",
566+
"configuration": {
567+
"typesize": 4,
568+
"cname": "zstd",
569+
"clevel": 3,
570+
"shuffle": "shuffle",
571+
"blocksize": 0
572+
}
573+
}
574+
],
575+
"index_codecs": [
576+
{
577+
"name": "bytes",
578+
"configuration": {
579+
"endian": "little"
580+
}
581+
},
582+
{
583+
"name": "crc32c"
584+
}
585+
],
586+
"index_location": "end"
587+
}
588+
}
589+
],
590+
"attributes": {
591+
"long_name": "Geopotential height",
592+
"short_name": "gh",
593+
"standard_name": "geopotential_height",
594+
"units": "m",
595+
"step_type": "instant",
596+
"coordinates": "spatial_ref",
597+
"_FillValue": "AAAAAAAA+H8="
598+
},
599+
"dimension_names": [
600+
"time",
601+
"latitude",
602+
"longitude"
603+
],
604+
"zarr_format": 3,
605+
"node_type": "array",
606+
"storage_transformers": []
607+
},
524608
"geopotential_height_cloud_ceiling": {
525609
"shape": [
526610
1,

src/reformatters/noaa/gefs/common_gefs_template_config.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,25 @@ def get_shared_data_var_configs(
474474
keep_mantissa_bits=8,
475475
),
476476
),
477+
GEFSDataVar(
478+
name="geopotential_height_500hpa",
479+
encoding=encoding_float32,
480+
attrs=DataVarAttrs(
481+
short_name="gh",
482+
long_name="Geopotential height",
483+
units="m",
484+
step_type="instant",
485+
standard_name="geopotential_height",
486+
),
487+
internal_attrs=GEFSInternalAttrs(
488+
grib_element="HGT",
489+
grib_description='50000[Pa] ISBL="Isobaric surface"',
490+
grib_index_level="500 mb",
491+
gefs_file_type="a",
492+
index_position=31,
493+
keep_mantissa_bits=11,
494+
),
495+
),
477496
GEFSDataVar(
478497
name="downward_short_wave_radiation_flux_surface",
479498
encoding=encoding_float32,
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
{
2+
"shape": [
3+
1,
4+
31,
5+
181,
6+
721,
7+
1440
8+
],
9+
"data_type": "float32",
10+
"chunk_grid": {
11+
"name": "regular",
12+
"configuration": {
13+
"chunk_shape": [
14+
1,
15+
31,
16+
192,
17+
374,
18+
368
19+
]
20+
}
21+
},
22+
"chunk_key_encoding": {
23+
"name": "default",
24+
"configuration": {
25+
"separator": "/"
26+
}
27+
},
28+
"fill_value": 0.0,
29+
"codecs": [
30+
{
31+
"name": "sharding_indexed",
32+
"configuration": {
33+
"chunk_shape": [
34+
1,
35+
31,
36+
64,
37+
17,
38+
16
39+
],
40+
"codecs": [
41+
{
42+
"name": "bytes",
43+
"configuration": {
44+
"endian": "little"
45+
}
46+
},
47+
{
48+
"name": "blosc",
49+
"configuration": {
50+
"typesize": 4,
51+
"cname": "zstd",
52+
"clevel": 3,
53+
"shuffle": "shuffle",
54+
"blocksize": 0
55+
}
56+
}
57+
],
58+
"index_codecs": [
59+
{
60+
"name": "bytes",
61+
"configuration": {
62+
"endian": "little"
63+
}
64+
},
65+
{
66+
"name": "crc32c"
67+
}
68+
],
69+
"index_location": "end"
70+
}
71+
}
72+
],
73+
"attributes": {
74+
"long_name": "Geopotential height",
75+
"short_name": "gh",
76+
"standard_name": "geopotential_height",
77+
"units": "m",
78+
"step_type": "instant",
79+
"coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time",
80+
"_FillValue": "AAAAAAAA+H8="
81+
},
82+
"dimension_names": [
83+
"init_time",
84+
"ensemble_member",
85+
"lead_time",
86+
"latitude",
87+
"longitude"
88+
],
89+
"zarr_format": 3,
90+
"node_type": "array",
91+
"storage_transformers": []
92+
}

src/reformatters/noaa/gefs/forecast_35_day/templates/latest.zarr/zarr.json

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,98 @@
676676
"node_type": "array",
677677
"storage_transformers": []
678678
},
679+
"geopotential_height_500hpa": {
680+
"shape": [
681+
1,
682+
31,
683+
181,
684+
721,
685+
1440
686+
],
687+
"data_type": "float32",
688+
"chunk_grid": {
689+
"name": "regular",
690+
"configuration": {
691+
"chunk_shape": [
692+
1,
693+
31,
694+
192,
695+
374,
696+
368
697+
]
698+
}
699+
},
700+
"chunk_key_encoding": {
701+
"name": "default",
702+
"configuration": {
703+
"separator": "/"
704+
}
705+
},
706+
"fill_value": 0.0,
707+
"codecs": [
708+
{
709+
"name": "sharding_indexed",
710+
"configuration": {
711+
"chunk_shape": [
712+
1,
713+
31,
714+
64,
715+
17,
716+
16
717+
],
718+
"codecs": [
719+
{
720+
"name": "bytes",
721+
"configuration": {
722+
"endian": "little"
723+
}
724+
},
725+
{
726+
"name": "blosc",
727+
"configuration": {
728+
"typesize": 4,
729+
"cname": "zstd",
730+
"clevel": 3,
731+
"shuffle": "shuffle",
732+
"blocksize": 0
733+
}
734+
}
735+
],
736+
"index_codecs": [
737+
{
738+
"name": "bytes",
739+
"configuration": {
740+
"endian": "little"
741+
}
742+
},
743+
{
744+
"name": "crc32c"
745+
}
746+
],
747+
"index_location": "end"
748+
}
749+
}
750+
],
751+
"attributes": {
752+
"long_name": "Geopotential height",
753+
"short_name": "gh",
754+
"standard_name": "geopotential_height",
755+
"units": "m",
756+
"step_type": "instant",
757+
"coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time",
758+
"_FillValue": "AAAAAAAA+H8="
759+
},
760+
"dimension_names": [
761+
"init_time",
762+
"ensemble_member",
763+
"lead_time",
764+
"latitude",
765+
"longitude"
766+
],
767+
"zarr_format": 3,
768+
"node_type": "array",
769+
"storage_transformers": []
770+
},
679771
"geopotential_height_cloud_ceiling": {
680772
"shape": [
681773
1,

0 commit comments

Comments
 (0)