Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ An overview of the content of the YAML configuration file specified via `-c` / `
│ inittime: │ Forecast initialization time │
│ leadtime: │ Forecast leadtime │
│ validtime: │ Forecast validtime │
│ format: │ 'grib', 'netcdf', or 'zarr' (optional) │
│ mask: │ Sequence of [lat, lon] pairs (optional) │
│ name: │ Dataset descriptive name │
│ path: │ Filesystem path to Zarr/netCDF dataset │
Expand Down Expand Up @@ -145,6 +146,10 @@ Specify values under `forecast.coords.time` as follows:

If a variable specified under `forecast.coords.time` names a coordinate dimension variable, that variable will be used. If no such variable exists, `wxvx` will look for a dataset attribute with the given name and try to use it, coercing it to the expected type (e.g. `datetime` or `timedelta`) as needed. For example, it will parse an ISO8601-formatted string to a Python `datetime` object.

### forecast.format

If this optional value is omitted, `wxvx` will introspect forecast datasets to determine if they are GRIB, netCDF, or Zarr. As a performance optimization, or as an override for correctness in case `wxvx` makes the wrong determination, a value of `grib`, `netcdf`, or `zarr` may be supplied to indicate that forecast datasets are formatted as GRIB, netCDF, or Zarr, respectively. In this case, `wxvx` will behave as if it had determined the format, and will likely fail if the indicated format is incorrect.

### forecast.mask

A sequence of latitude/longitude pairs describing a masking polygon. See the [Example](#example). The specified mask will be applied to forecast, baseline, or truth grids before verification.
Expand Down
4 changes: 1 addition & 3 deletions recipe/meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"pytest-xdist ==3.8.*",
"python ==3.13",
"python-eccodes ==2.42.*",
"python-magic ==0.4.*",
"pyyaml ==6.0.*",
"requests ==2.32.*",
"ruff ==0.15.*",
Expand All @@ -41,7 +40,6 @@
"pyproj ==3.7.*",
"python ==3.13",
"python-eccodes ==2.42.*",
"python-magic ==0.4.*",
"pyyaml ==6.0.*",
"requests ==2.32.*",
"seaborn ==0.13.*",
Expand All @@ -50,5 +48,5 @@
"zarr ==3.1.*"
]
},
"version": "0.5.1"
"version": "0.6.0"
}
1 change: 0 additions & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ requirements:
- netcdf4 1.7.*
- pyproj 3.7.*
- python-eccodes 2.42.*
- python-magic 0.4.*
- pyyaml 6.0.*
- requests 2.32.*
- seaborn 0.13.*
Expand Down
2 changes: 2 additions & 0 deletions src/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ warn_return_any = true

[tool.pytest.ini_options]
filterwarnings = [
"ignore:Consolidated metadata .*:UserWarning",
"ignore:This process .* is multi-threaded:DeprecationWarning",
"ignore:jsonschema.RefResolver is deprecated:DeprecationWarning" # from uwtools
]
Expand All @@ -42,6 +43,7 @@ ignore = [
"ANN202", # missing-return-type-private-function
"ANN204", # missing-return-type-special-method
"ANN401", # any-type
"BLE001", # blind-except
"C408", # unnecessary-collection-call
"C901", # complex-structure
"COM812", # missing-trailing-comma
Expand Down
1 change: 1 addition & 0 deletions src/wxvx/resources/config-grid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ forecast:
time:
inittime: time
leadtime: lead_time
format: zarr
mask:
- [52.61564933, 225.90452027]
- [52.61564933, 255.00000000]
Expand Down
1 change: 1 addition & 0 deletions src/wxvx/resources/config-point.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ forecast:
time:
inittime: forecast_reference_time
validtime: time
format: zarr
name: AIWX
path: /path/to/forecast.zarr
projection:
Expand Down
7 changes: 7 additions & 0 deletions src/wxvx/resources/config.jsonschema
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@
],
"type": "object"
},
"format": {
"enum": [
"grib",
"netcdf",
"zarr"
]
},
"mask": {
"items": {
"items": {
Expand Down
2 changes: 1 addition & 1 deletion src/wxvx/resources/info.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"buildnum": "0",
"version": "0.5.1"
"version": "0.6.0"
}
1 change: 1 addition & 0 deletions src/wxvx/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ class _S(_ValsMatchKeys):
firstbyte: str = _
forecast: str = _
forecast_reference_time: str = _
format: str = _
grid: str = _
grids: str = _
grids_baseline: str = _
Expand Down
4 changes: 3 additions & 1 deletion src/wxvx/tests/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ def with_del(d: dict, *args: Any) -> dict:
p = new
for key in args[:-1]:
p = p[key]
del p[args[-1]]
key = args[-1]
if key in p:
del p[key]
return new


Expand Down
14 changes: 13 additions & 1 deletion src/wxvx/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ def test_schema_forecast(logged, config_data, fs):
# Additional keys are not allowed:
assert not ok(with_set(config, 42, "n"))
assert logged("'n' was unexpected")
# Some keys have enum values:
for key in [S.format]:
for val in ["grib", "netcdf", "zarr"]:
assert ok(with_set(config, val, key))
assert not ok(with_set(config, "foo", key))
assert logged(r"'foo' is not one of \['grib', 'netcdf', 'zarr'\]")
# Some keys have object values:
for key in [S.coords, S.projection]:
assert not ok(with_set(config, None, key))
Expand All @@ -122,7 +128,7 @@ def test_schema_forecast(logged, config_data, fs):
assert not ok(with_set(config, None, key))
assert logged("None is not of type 'string'")
# Some keys are optional:
for key in [S.mask]:
for key in [S.format, S.mask]:
assert ok(with_del(config, key))


Expand Down Expand Up @@ -349,6 +355,12 @@ def test_schema_variables(logged, config_data, fs):
assert logged("None is not of type 'string'")


def test_support_with_del():
# Test case where with_del() finds nothing to delete, for 100% branch coverage:
c = {"a": "apple"}
assert with_del(c, "b") == c


# Helpers


Expand Down
11 changes: 10 additions & 1 deletion src/wxvx/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from wxvx import types
from wxvx.strings import EC, MET, S
from wxvx.util import WXVXError, resource_path
from wxvx.util import DataFormat, WXVXError, resource_path

# Fixtures

Expand Down Expand Up @@ -247,6 +247,7 @@ def test_types_Forecast(config_data, forecast):
assert obj.coords.longitude == "longitude"
assert obj.coords.time.inittime == "time"
assert obj.coords.time.leadtime == "lead_time"
assert obj.format is None
assert obj.name == "Forecast"
assert obj.path == "/path/to/forecast-{{ yyyymmdd }}-{{ hh }}-{{ '%03d' % fh }}.nc"
cfg = config_data[S.forecast]
Expand All @@ -258,6 +259,14 @@ def test_types_Forecast(config_data, forecast):
del cfg_no_proj[S.projection]
default = types.Forecast(**cfg_no_proj)
assert default.projection == {S.proj: S.latlon}
for k, v in {
"grib": DataFormat.GRIB,
"netcdf": DataFormat.NETCDF,
"zarr": DataFormat.ZARR,
None: None,
}.items():
obj = types.Forecast(**{**config_data[S.forecast], "format": k})
assert obj.format == v


def test_types_Leadtimes():
Expand Down
65 changes: 21 additions & 44 deletions src/wxvx/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path
from unittest.mock import Mock, patch

import xarray as xr
from pytest import mark, raises

from wxvx import util
Expand Down Expand Up @@ -36,60 +37,36 @@ def test_util_atomic(fakefs):
assert recipient.read_text() == s2


@mark.parametrize(
("expected", "inferred"),
[
(util.DataFormat.BUFR, "Binary Universal Form data (BUFR) Edition 3"),
(util.DataFormat.GRIB, "Gridded binary (GRIB) version 2"),
(util.DataFormat.NETCDF, "Hierarchical Data Format (version 5) data"),
],
)
def test_util_classify_data_format__file(expected, fakefs, inferred):
path = fakefs / "datafile"
path.touch()
util.classify_data_format.cache_clear()
with patch.object(util.magic, "from_file", return_value=inferred):
assert util.classify_data_format(path=path) == expected


def test_util_classify_data_format__file_missing(fakefs, logged):
path = fakefs / "no-such-file"
util.classify_data_format.cache_clear()
def test_util_classify_data_format__fail_missing(fakefs, logged):
path = fakefs / "a.missing"
assert util.classify_data_format(path=path) == util.DataFormat.UNKNOWN
assert logged(f"Path not found: {path}")


def test_util_classify_data_format__file_unrecognized(fakefs, logged):
path = fakefs / "datafile"
path.touch()
util.classify_data_format.cache_clear()
with patch.object(util.magic, "from_file", return_value="What Is This I Don't Even"):
assert util.classify_data_format(path=path) == util.DataFormat.UNKNOWN
assert logged(f"Could not determine format of {path}")
def test_util_classify_data_format__fail_unknown(logged, tmp_path):
path = tmp_path / "a.foo"
path.write_text("foo")
assert util.classify_data_format(path=path) == util.DataFormat.UNKNOWN
assert logged(f"Could not determine format of: {path}")


def test_util_classify_data_format__zarr(fakefs):
path = fakefs / "datadir"
path.mkdir()
util.classify_data_format.cache_clear()
with patch.object(util.zarr, "open"):
assert util.classify_data_format(path=path) == util.DataFormat.ZARR
def test_util_classify_data_format__pass_grib(tmp_path):
path = tmp_path / "a.grib"
for edition in [1, 2]:
path.write_bytes(b"GRIB\x00\x00\x00" + int.to_bytes(edition))
assert util.classify_data_format(path=path) == util.DataFormat.GRIB


def test_util_classify_data_format__zarr_corrupt(fakefs, logged):
path = fakefs / "datadir"
path.mkdir()
util.classify_data_format.cache_clear()
with patch.object(util.zarr, "open", side_effect=Exception("failure")):
assert util.classify_data_format(path=path) == util.DataFormat.UNKNOWN
assert logged(f"Could not determine format of {path}")
def test_util_classify_data_format__pass_netcdf(tmp_path):
path = tmp_path / "a.nc"
xr.DataArray([1]).to_netcdf(path)
assert util.classify_data_format(path=path) == util.DataFormat.NETCDF


def test_util_classify_data_format__zarr_missing(fakefs, logged):
path = fakefs / "no-such-dir"
util.classify_data_format.cache_clear()
assert util.classify_data_format(path=path) == util.DataFormat.UNKNOWN
assert logged(f"Path not found: {path}")
def test_util_classify_data_format__pass_zarr(tmp_path):
path = tmp_path / "a.zarr"
xr.DataArray([1]).to_zarr(path)
assert util.classify_data_format(path=path) == util.DataFormat.ZARR


@mark.parametrize(
Expand Down
22 changes: 16 additions & 6 deletions src/wxvx/tests/test_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,18 +726,28 @@ def test_workflow__enforce_point_truth_type(c):


@mark.parametrize(
("fmt", "path"),
[(DataFormat.NETCDF, "/path/to/a.nc"), (DataFormat.ZARR, "/path/to/a.zarr")],
("datafmt_expected", "fmtstr", "path"),
[
(DataFormat.NETCDF, "netcdf", "/path/to/a.nc"),
(DataFormat.NETCDF, None, "/path/to/a.nc"),
(DataFormat.ZARR, "zarr", "/path/to/a.zarr"),
(DataFormat.ZARR, None, "/path/to/a.zarr"),
],
)
def test_workflow__forecast_grid(c, fmt, path, tc, testvars):
with patch.object(workflow, "classify_data_format", return_value=fmt):
req, datafmt = workflow._forecast_grid(
def test_workflow__forecast_grid(c, datafmt_expected, fmtstr, path, tc, testvars):
c.forecast._format = fmtstr
with patch.object(
workflow, "classify_data_format", return_value=datafmt_expected
) as classify_data_format:
req, datafmt_actual = workflow._forecast_grid(
path=path, c=c, varname="foo", tc=tc, var=testvars[EC.t2]
)
expected_classify_data_format_call_count = 0 if fmtstr else 1
assert classify_data_format.call_count == expected_classify_data_format_call_count
# For netCDF and Zarr forecast datasets, the grid will be extracted from the dataset and CF-
# decorated, so the requirement is a _grid_nc task, whose taskname is "Forecast grid ..."
assert req.taskname.startswith("Forecast grid")
assert datafmt == fmt
assert datafmt_actual == datafmt_expected


def test_workflow__forecast_grid__grib(c, tc, testvars):
Expand Down
19 changes: 18 additions & 1 deletion src/wxvx/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,15 @@
from uwtools.api.config import YAMLConfig, validate

from wxvx.strings import MET, S
from wxvx.util import LINETYPE, WXVXError, expand, resource_path, to_datetime, to_timedelta
from wxvx.util import (
LINETYPE,
DataFormat,
WXVXError,
expand,
resource_path,
to_datetime,
to_timedelta,
)

_TRUTH_NAMES_GRID = (S.GFS, S.HRRR)
_TRUTH_NAMES_POINT = (S.PREPBUFR,)
Expand Down Expand Up @@ -225,6 +233,7 @@ def values(self) -> list[datetime]:
class Forecast:
KEYS = (
S.coords,
S.format,
S.mask,
S.name,
S.path,
Expand All @@ -236,12 +245,14 @@ def __init__(
name: str,
path: str,
coords: Coords | dict | None = None,
format: DataFormat | str | None = None, # noqa: A002
mask: list[list[float]] | None = None,
projection: dict | None = None,
):
self._name = name
self._path = path
self._coords = coords
self._format = format
self._mask = mask
self._projection = projection

Expand All @@ -263,6 +274,12 @@ def name(self) -> str:
def path(self) -> str:
return self._path

@property
def format(self) -> DataFormat | None:
if isinstance(self._format, str):
self._format = cast(DataFormat, getattr(DataFormat, self._format.upper()))
return self._format

@property
def coords(self) -> Coords | None:
if isinstance(self._coords, dict):
Expand Down
Loading