diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 126d0c5..a3063f4 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -2,7 +2,7 @@ name: Lint with pylint on: push: branches: - - 'main' + - "main" pull_request: jobs: lint: @@ -13,7 +13,7 @@ jobs: shell: bash -el {0} strategy: matrix: - python-version: [ "3.11" ] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} @@ -24,7 +24,7 @@ jobs: - name: Install python dependencies run: | python -m pip install --upgrade pip - pip install pytest pytest_httpserver requests==2.32.5 pylint==2.17.5 python-dateutil==2.9.0 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj==3.7.2 numpy==1.26.4 shapely==2.1.2 netcdf4==1.7.3 h5netcdf==1.7.3 pillow==10.2.0 python-logging-rabbitmq==2.3.0 + pip install pytest pytest_httpserver requests==2.32.5 pylint==2.17.5 python-dateutil==2.9.0 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj==3.7.2 numpy==1.26.4 shapely==2.1.2 netcdf4==1.7.3 h5netcdf==1.8.1 h5py==3.15.1 pillow==10.2.0 python-logging-rabbitmq==2.3.0 - name: Checkout idss-engine-commons uses: actions/checkout@v3 @@ -57,5 +57,5 @@ jobs: - name: Run black formatter uses: psf/black@stable with: - options: "--check --line-length 99" # effectively 100, but black formatter has a bug + options: "--check --line-length 99" # effectively 100, but black formatter has a bug src: "./python" diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index e08986c..449576e 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -2,7 +2,7 @@ name: Run Pytest on: push: branches: - - 'main' + - "main" pull_request: jobs: build: @@ -12,7 +12,7 @@ jobs: shell: bash -el {0} strategy: matrix: - python-version: [ "3.11" ] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} @@ -23,12 +23,12 @@ jobs: - name: Install python dependencies run: | python -m pip install --upgrade pip - pip install pytest pytest_httpserver requests==2.32.5 pylint==2.17.5 python-dateutil==2.9.0 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj==3.7.2 numpy==1.26.4 shapely==2.1.2 netcdf4==1.7.3 h5netcdf==1.7.3 pillow==10.2.0 python-logging-rabbitmq==2.3.0 pytest-cov==4.1.0 + pip install pytest pytest_httpserver requests==2.32.5 pylint==2.17.5 python-dateutil==2.9.0 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj==3.7.2 numpy==1.26.4 shapely==2.1.2 netcdf4==1.7.3 h5netcdf==1.8.1 h5py==3.15.1 pillow==10.2.0 python-logging-rabbitmq==2.3.0 pytest-cov==4.1.0 - name: Set PYTHONPATH for pytest run: | echo "PYTHONPATH=python/idsse_common/idsse/common" >> $GITHUB_ENV - + - name: Checkout idss-engine-commons uses: actions/checkout@v3 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0b3e04..f483808 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: # Using this mirror lets us use mypyc-compiled black, which is about 2x faster - repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.1.0 + rev: 26.1.0 hooks: - id: black # It is recommended to specify the latest version of Python diff --git a/docker/python-sci/Dockerfile b/docker/python-sci/Dockerfile index a78251f..5141616 100644 --- a/docker/python-sci/Dockerfile +++ b/docker/python-sci/Dockerfile @@ -27,7 +27,8 @@ RUN conda config --set solver classic && \ pint==0.25 \ numpy==1.26.4 \ netcdf4==1.7.3 \ - h5netcdf==1.7.3 \ + h5netcdf==1.8.1 \ + h5py==3.15.1 \ pygrib==2.1.6 \ pyproj==3.7.2 \ awscli==1.42.60 \ diff --git a/python/idsse_common/idsse/common/rabbitmq_utils.py b/python/idsse_common/idsse/common/rabbitmq_utils.py index 3a09fa9..0fd9393 100644 --- a/python/idsse_common/idsse/common/rabbitmq_utils.py +++ b/python/idsse_common/idsse/common/rabbitmq_utils.py @@ -305,6 +305,7 @@ def stop(self): connection: BlockingConnection = self.channel.connection if connection and connection.is_open: connection.process_data_events(time_limit=1) + # BUG: this does not guarantee all data events are waited for threadsafe_call(self.channel, self.channel.close, connection.close) def _connect(self) -> BlockingChannel: @@ -336,6 +337,7 @@ def _connect(self) -> BlockingChannel: return channel +# HACK: delete me, should be unused by all services now def subscribe_to_queue( connection: Conn | BlockingConnection, rmq_params: RabbitMqParams, diff --git a/python/idsse_common/idsse/common/sci/netcdf_io.py b/python/idsse_common/idsse/common/sci/netcdf_io.py index e573434..d0bb6e9 100644 --- a/python/idsse_common/idsse/common/sci/netcdf_io.py +++ b/python/idsse_common/idsse/common/sci/netcdf_io.py @@ -18,9 +18,9 @@ from datetime import datetime from typing import Protocol -# from netCDF4 import Dataset # pylint: disable=no-name-in-module import h5netcdf as h5nc from dateutil.parser import ParserError, parse as dt_parse +from netCDF4 import Dataset # pylint: disable=no-name-in-module from numpy import ndarray from idsse.common.utils import to_iso @@ -50,7 +50,7 @@ def getncattr(self, key: str) -> any: """ -def read_netcdf_global_attrs(filepath: str) -> dict: +def read_netcdf_global_attrs(filepath: str, use_h5_lib=False) -> dict: """Read the global attributes from a Netcdf file Args: @@ -59,17 +59,17 @@ def read_netcdf_global_attrs(filepath: str) -> dict: Returns: dict: Global attributes as dictionary """ - # if use_h5_lib: - with h5nc.File(filepath, "r") as nc_file: - attrs = _attrs_to_dict(nc_file) + if use_h5_lib: + with h5nc.File(filepath, "r") as nc_file: + attrs = _attrs_to_dict(nc_file, use_h5_lib=True) return attrs - # with Dataset(filepath) as in_file: - # attrs = _attrs_to_dict(in_file) - # return attrs + with Dataset(filepath) as in_file: + attrs = _attrs_to_dict(in_file) + return attrs -def read_netcdf(filepath: str) -> tuple[dict, ndarray]: +def read_netcdf(filepath: str, use_h5_lib=False) -> tuple[dict, ndarray]: """Reads DAS Netcdf file. Args: @@ -80,22 +80,22 @@ def read_netcdf(filepath: str) -> tuple[dict, ndarray]: Returns: tuple[dict, ndarray]: Global attributes and data """ - # if use_h5_lib: - with h5nc.File(filepath, "r") as nc_file: - grid = nc_file.variables["grid"][:] - attrs = _attrs_to_dict(nc_file) - return attrs, grid + if use_h5_lib: + with h5nc.File(filepath, "r") as nc_file: + grid = nc_file.variables["grid"][:] + attrs = _attrs_to_dict(nc_file, use_h5_lib=True) + return attrs, grid - # # otherwise, use netcdf4 library (default) - # with Dataset(filepath) as dataset: - # dataset.set_auto_maskandscale(False) - # grid = dataset.variables["grid"][:] + # otherwise, use netcdf4 library (default) + with Dataset(filepath) as dataset: + dataset.set_auto_maskandscale(False) + grid = dataset.variables["grid"][:] - # global_attrs = _attrs_to_dict(dataset) - # return global_attrs, grid + global_attrs = _attrs_to_dict(dataset) + return global_attrs, grid -def write_netcdf(attrs: dict, grid: ndarray, filepath: str) -> str: +def write_netcdf(attrs: dict, grid: ndarray, filepath: str, use_h5_lib=False) -> str: """Store data and attributes to a Netcdf4 file Args: @@ -113,44 +113,45 @@ def write_netcdf(attrs: dict, grid: ndarray, filepath: str) -> str: os.makedirs(dirname, exist_ok=True) logger.debug("Writing data to: %s", filepath) - # if use_h5_lib: - with h5nc.File(filepath, "w") as file: + if use_h5_lib: + with h5nc.File(filepath, "w") as file: + y_dimensions, x_dimensions = grid.shape + # set dimensions with a dictionary + file.dimensions = {"x": x_dimensions, "y": y_dimensions} + + grid_var = file.create_variable("grid", ("y", "x"), "f4") + grid_var[:] = grid + + for key, value in attrs.items(): + # write datetimes to ISO-8601; h5py.Attributes only understand numpy scalars/strings + if isinstance(value, datetime): + file.attrs[key] = to_iso(value) + # force non-string attribute to be string (shouldn't be necessary anyway) + elif not isinstance(value, str): + file.attrs[key] = str(value) + else: + file.attrs[key] = value + return filepath + + # otherwise, write file using netCDF4 library (default) + with Dataset(filepath, "w", format="NETCDF4") as dataset: y_dimensions, x_dimensions = grid.shape - # set dimensions with a dictionary - file.dimensions = {"x": x_dimensions, "y": y_dimensions} + dataset.createDimension("x", x_dimensions) + dataset.createDimension("y", y_dimensions) - grid_var = file.create_variable("grid", ("y", "x"), "f4") + grid_var = dataset.createVariable("grid", "f4", ("y", "x")) grid_var[:] = grid for key, value in attrs.items(): - # write datetimes to ISO-8601; h5py.Attributes only understand numpy scalars/strings - if isinstance(value, datetime): - file.attrs[key] = to_iso(value) - # force non-string attribute to be string (shouldn't be necessary anyway) - elif not isinstance(value, str): - file.attrs[key] = str(value) - else: - file.attrs[key] = value + setattr(dataset, key, str(value)) return filepath - # otherwise, write file using netCDF4 library (default) - # with Dataset(filepath, "w", format="NETCDF4") as dataset: - # y_dimensions, x_dimensions = grid.shape - # dataset.createDimension("x", x_dimensions) - # dataset.createDimension("y", y_dimensions) - - # grid_var = dataset.createVariable("grid", "f4", ("y", "x")) - # grid_var[:] = grid - - # for key, value in attrs.items(): - # setattr(dataset, key, str(value)) - - # return filepath +def _attrs_to_dict(dataset: HasNcAttr | h5nc.File, use_h5_lib=False) -> dict: + if not use_h5_lib: + return {key: dataset.getncattr(key) for key in dataset.ncattrs()} -def _attrs_to_dict(dataset: HasNcAttr | h5nc.File) -> dict: - # if use_h5_lib: attrs_dict = {} for key, value in dataset.attrs.items(): # if an attribute is an ISO-8601 string, restore to Python datetime type @@ -160,4 +161,3 @@ def _attrs_to_dict(dataset: HasNcAttr | h5nc.File) -> dict: except ParserError: attrs_dict[key] = value # must not have been an ISO-8601 string return attrs_dict - # return {key: dataset.getncattr(key) for key in dataset.ncattrs()} diff --git a/python/idsse_common/idsse/common/utils.py b/python/idsse_common/idsse/common/utils.py index 97321a5..00f5265 100644 --- a/python/idsse_common/idsse/common/utils.py +++ b/python/idsse_common/idsse/common/utils.py @@ -158,13 +158,16 @@ def expired(self) -> bool: return False # lock cannot be expired if it isn't locked try: - creation_time = os.stat(self._lock_path).st_birthtime - except AttributeError: - # Linux (and maybe Windows) don't support birthtime - creation_time = os.stat(self._lock_path).st_ctime + file_stat = os.stat(self._lock_path) except FileNotFoundError: - # lock file disappeared since start of function call?? Treat it as unexpired - creation_time = datetime.now(UTC).timestamp() + # lock file disappeared since start of function call; treat it as unexpired + return False + + try: + creation_time = file_stat.st_birthtime + except AttributeError: + # Linux (and maybe Windows) doesn't support birthtime + creation_time = file_stat.st_ctime return (datetime.now(UTC).timestamp() - creation_time) >= self._max_age def acquire(self, timeout=300.0) -> bool: diff --git a/python/idsse_common/test/sci/test_netcdf_io.py b/python/idsse_common/test/sci/broken_test_netcdf_io.py similarity index 71% rename from python/idsse_common/test/sci/test_netcdf_io.py rename to python/idsse_common/test/sci/broken_test_netcdf_io.py index 4036c7c..3305914 100644 --- a/python/idsse_common/test/sci/test_netcdf_io.py +++ b/python/idsse_common/test/sci/broken_test_netcdf_io.py @@ -72,12 +72,23 @@ def is_attributes_equal(actual: dict, expected: dict) -> bool: # pytest fixtures @fixture -def example_netcdf_data() -> tuple[dict[str, any], ndarray]: - return read_netcdf(EXAMPLE_NETCDF_FILEPATH) +def example_netcdf_data(netcdf_lock) -> tuple[dict[str, any], ndarray]: + # file lock protects against other unit tests having NetCDF file open (HDF throws OSError 101) + with netcdf_lock: + result = read_netcdf(EXAMPLE_NETCDF_FILEPATH) + return result -def test_read_netcdf_global_attrs(): - attrs = read_netcdf_global_attrs(EXAMPLE_NETCDF_FILEPATH) +def test_read_netcdf_global_attrs(netcdf_lock): + with netcdf_lock: + attrs = read_netcdf_global_attrs(EXAMPLE_NETCDF_FILEPATH) + + # attrs should be same as input attrs + assert is_attributes_equal(attrs, EXAMPLE_ATTRIBUTES) + + +def test_read_netcdf_global_attrs_with_h5nc(): + attrs = read_netcdf_global_attrs(EXAMPLE_NETCDF_FILEPATH, use_h5_lib=True) # attrs should be same as input attrs, except any ISO strings transformed to Python datetimes assert is_attributes_equal(attrs, EXAMPLE_ATTRIBUTES) @@ -94,6 +105,17 @@ def test_read_netcdf(example_netcdf_data: tuple[dict, ndarray]): assert is_attributes_equal(attrs, EXAMPLE_ATTRIBUTES) +def test_read_netcdf_with_h5nc(): + attrs, grid = read_netcdf(EXAMPLE_NETCDF_FILEPATH, use_h5_lib=True) + + assert grid.shape == (1597, 2345) + y_max, x_max = grid.shape + assert grid[0, 0] == approx(72.80599) + assert grid[round(y_max / 2), round(x_max / 2)] == approx(26.005991) + assert grid[y_max - 1, x_max - 1] == approx(15.925991) + assert is_attributes_equal(attrs, EXAMPLE_ATTRIBUTES) + + @fixture def destination_nc_file() -> str: parent_dir = os.path.abspath("./tmp") @@ -116,20 +138,45 @@ def destination_nc_file() -> str: def test_read_and_write_netcdf( - example_netcdf_data: tuple[dict[str, any], ndarray], destination_nc_file: str + example_netcdf_data: tuple[dict[str, any], ndarray], destination_nc_file: str, netcdf_lock ): attrs, grid = example_netcdf_data - - # verify write_netcdf functionality attrs["prodKey"] = EXAMPLE_PROD_KEY attrs["prodSource"] = attrs["product"] - written_filepath = write_netcdf(attrs, grid, destination_nc_file) + + # verify write_netcdf functionality + with netcdf_lock: + written_filepath = write_netcdf(attrs, grid, destination_nc_file) + assert written_filepath == destination_nc_file assert os.path.exists(destination_nc_file) - new_file_attrs, new_file_grid = read_netcdf(written_filepath) + # verify read_netcdf functionality + with netcdf_lock: + new_file_attrs, new_file_grid = read_netcdf(written_filepath) + assert is_attributes_equal(new_file_attrs, attrs) assert new_file_grid[123][321] == grid[123][321] + assert is_attributes_equal(new_file_attrs, attrs) + + +def test_read_and_write_netcdf_with_h5( + example_netcdf_data: tuple[dict[str, any], ndarray], destination_nc_file: str +): + attrs, grid = example_netcdf_data + attrs["prodKey"] = EXAMPLE_PROD_KEY + attrs["prodSource"] = attrs["product"] + + # verify write_netcdf_with_h5nc functionality + written_filepath = write_netcdf(attrs, grid, destination_nc_file, use_h5_lib=True) + + assert written_filepath == destination_nc_file + + # verify read_netcdf with h5nc functionality + new_file_attrs, new_file_grid = read_netcdf(written_filepath, use_h5_lib=True) + + assert new_file_grid[123][321] == grid[123][321] + assert is_attributes_equal(new_file_attrs, attrs) def test_write_netcdf_nonstring_attrs( @@ -142,10 +189,10 @@ def test_write_netcdf_nonstring_attrs( try: # verify write_netcdf successfully writes non-string attrs (they shouldn't be anyway) - _ = write_netcdf(in_attrs, grid, destination_nc_file) + _ = write_netcdf(in_attrs, grid, destination_nc_file, use_h5_lib=True) except Exception as exc: # pylint: disable=broad-exception-caught fail(f"Unable to write NetCDF to {destination_nc_file} due to exception: {str(exc)}") - out_attrs = read_netcdf_global_attrs(destination_nc_file) + out_attrs = read_netcdf_global_attrs(destination_nc_file, use_h5_lib=True) for value in out_attrs.values(): assert isinstance(value, (str, datetime)) diff --git a/python/idsse_common/test/sci/test_geo_image.py b/python/idsse_common/test/sci/test_geo_image.py index f48369c..9bc0cc4 100644 --- a/python/idsse_common/test/sci/test_geo_image.py +++ b/python/idsse_common/test/sci/test_geo_image.py @@ -394,7 +394,8 @@ def test_add_all_states(proj): "resources", "nbm_temp-202211111100-202211121300.nc", ) - attrs, data = read_netcdf(filename) + + attrs, data = read_netcdf(filename, use_h5_lib=True) if attrs["data_order"] == "latitude,longitude": data = numpy.transpose(data) geo_image = GeoImage.from_data_grid(proj, data) diff --git a/python/idsse_common/test/sci/test_grid_proj.py b/python/idsse_common/test/sci/test_grid_proj.py index eef5940..d901161 100644 --- a/python/idsse_common/test/sci/test_grid_proj.py +++ b/python/idsse_common/test/sci/test_grid_proj.py @@ -22,7 +22,6 @@ from idsse.common.sci.grid_proj import GridProj from idsse.common.utils import round_, RoundingMethod - # example data EXAMPLE_PROJ_SPEC = "+proj=lcc +lat_0=25.0 +lon_0=-95.0 +lat_1=25.0 +a=6371200" EXAMPLE_GRID_SPEC = "+dx=2539.703 +dy=2539.703 +w=2345 +h=1597 +lat_ll=19.229 +lon_ll=-126.2766" diff --git a/python/idsse_common/test/sci/test_vectaster.py b/python/idsse_common/test/sci/test_vectaster.py index 39000d4..1691f6b 100644 --- a/python/idsse_common/test/sci/test_vectaster.py +++ b/python/idsse_common/test/sci/test_vectaster.py @@ -31,7 +31,6 @@ rasterize_polygon, ) - EXAMPLE_PROJ_SPEC = "+proj=lcc +lat_0=25.0 +lon_0=-95.0 +lat_1=25.0 +a=6371200" EXAMPLE_GRID_SPEC = "+dx=2539.703 +dy=2539.703 +w=2345 +h=1597 +lat_ll=19.229 +lon_ll=-126.2766" diff --git a/python/idsse_common/test/test_http_utils.py b/python/idsse_common/test/test_http_utils.py index 4f859a6..b7c0263 100644 --- a/python/idsse_common/test/test_http_utils.py +++ b/python/idsse_common/test/test_http_utils.py @@ -21,7 +21,6 @@ from idsse.common.http_utils import HttpUtils from idsse.testing.utils.resources import get_resource_from_file - EXAMPLE_ISSUE = datetime(2024, 10, 30, 20, 56, 40, tzinfo=UTC) EXAMPLE_VALID = datetime(2024, 10, 30, 20, 56, 40, tzinfo=UTC) diff --git a/python/idsse_common/test/test_rabbitmq_rpc.py b/python/idsse_common/test/test_rabbitmq_rpc.py index 60822db..c1bee58 100644 --- a/python/idsse_common/test/test_rabbitmq_rpc.py +++ b/python/idsse_common/test/test_rabbitmq_rpc.py @@ -33,7 +33,6 @@ RpcResponse, ) - # Example data objects CONN = Conn("localhost", "/", port=5672, username="user", password="password") RMQ_PARAMS = RabbitMqParams( diff --git a/python/idsse_common/test/test_rabbitmq_utils.py b/python/idsse_common/test/test_rabbitmq_utils.py index 58ccce1..1b0525d 100644 --- a/python/idsse_common/test/test_rabbitmq_utils.py +++ b/python/idsse_common/test/test_rabbitmq_utils.py @@ -38,7 +38,6 @@ threadsafe_nack, ) - # Example data objects CONN = Conn("localhost", "/", port=5672, username="user", password="password") RMQ_PARAMS = RabbitMqParams( diff --git a/python/idsse_common/test/test_utils.py b/python/idsse_common/test/test_utils.py index 64eeb87..c46f5f8 100644 --- a/python/idsse_common/test/test_utils.py +++ b/python/idsse_common/test/test_utils.py @@ -30,9 +30,9 @@ hash_code, round_, round_half_away, + strtobool, to_compact, to_iso, - strtobool, )