Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 145 additions & 7 deletions src/bitsea/commons/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
import xarray as xr
from numpy.typing import ArrayLike
from scipy.sparse import dok_array

from bitsea.commons.bathymetry import Bathymetry
from bitsea.commons.geodistances import extend_from_average
Expand Down Expand Up @@ -185,7 +186,7 @@ def convert_lon_lat_wetpoint_indices(
# Remove all the points whose distance is greater than the max_radius
cut_mask[distances > max_radius] = False

# If there is no water in the slice, we return (jp, ip) but we
# If there is no water in the slice, we return (jp, ip), but we
# also raise a warning
if not np.any(cut_mask):
warn(
Expand All @@ -200,7 +201,7 @@ def convert_lon_lat_wetpoint_indices(
distances[~cut_mask] = max_radius * max_radius + 1

# We get the index of the minimum value. We need to unravel because
# argmin works on the flatten array
# argmin works on the flattened array
local_min = np.unravel_index(
np.argmin(distances, axis=None), distances.shape
)
Expand Down Expand Up @@ -236,7 +237,7 @@ def mask_at_level(self, z: float) -> np.ndarray:

def bathymetry_in_cells(self) -> np.ndarray:
"""
Returns a 2d map that for each columns associates the number of water
Returns a 2d map that associates for each column the number of water
cells that are present on that column.

Returns:
Expand All @@ -247,7 +248,7 @@ def bathymetry_in_cells(self) -> np.ndarray:
def rough_bathymetry(self) -> np.ndarray:
"""
Calculates the bathymetry used by the model
It does not takes in account e3t
It does not take into account e3t

Returns:
np.ndarray: a 2d numpy array of floats
Expand All @@ -259,7 +260,7 @@ def rough_bathymetry(self) -> np.ndarray:
def bathymetry(self) -> np.ndarray:
"""
Calculates the bathymetry used by the model
Best evaluation, since it takes in account e3t.
Best evaluation, since it takes into account e3t.

Returns:
a 2d numpy array of floats
Expand Down Expand Up @@ -670,7 +671,7 @@ def save_as_netcdf(self, file_path: Union[PathLike, str]):

class MaskBathymetry(Bathymetry):
"""
This class is a bathymetry, generated starting from a mask, i.e., it
This class is a bathymetry generated starting from a mask, i.e., it
returns the z-coordinate of the bottom face of the deepest cell of the
column that contains the point (lon, lat).
"""
Expand All @@ -680,7 +681,7 @@ def __init__(self, mask: Mask):
self._bathymetry_data = mask.bathymetry()

# Fix the bathymetry of the land cells to 0 (to be coherent with the
# behaviour of the bathymetry classes). Otherwise, if we let the land
# behavior of the bathymetry classes). Otherwise, if we let the land
# points to have bathymetry = 1e20, they will be in every
# BathymetricBasin
self._bathymetry_data[np.logical_not(self._mask[0, :, :])] = 0
Expand Down Expand Up @@ -712,3 +713,140 @@ def __call__(self, lon, lat):
return output

return float(output.squeeze().item())


class MaskWithRivers(Mask):
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The MaskWithRivers class is missing a docstring. As a public class that extends Mask, it should have documentation explaining its purpose, parameters, and how it differs from the base Mask class (specifically how it handles river cells separately from sea cells).

Suggested change
class MaskWithRivers(Mask):
class MaskWithRivers(Mask):
"""
A subclass of Mask that distinguishes river cells from sea and land cells.
This class extends the base Mask functionality by allowing certain cells to be
marked as rivers, in addition to the standard sea and land classification.
River cells are handled separately from sea cells, which is useful for models
or analyses that require explicit treatment of riverine regions.
Parameters
----------
grid : Grid
The grid object describing the spatial layout of the mask.
zlevels : ArrayLike
The vertical levels of the grid.
mask_array : ArrayLike
The mask array indicating sea (typically 1) and land (typically 0) cells.
river_positions : ArrayLike
An array indicating the positions of river cells within the grid.
allow_broadcast : bool, optional
Whether to allow broadcasting of the mask array to match the grid shape.
e3t : Optional[np.ndarray], optional
Optional array of vertical cell thicknesses.
Notes
-----
Unlike the base Mask class, MaskWithRivers treats river cells as a separate
category from both sea and land. This allows for specialized handling of
riverine processes in downstream applications.
"""

Copilot uses AI. Check for mistakes.
def __init__(
self,
grid: Grid,
zlevels: ArrayLike,
mask_array: ArrayLike,
river_positions: ArrayLike,
allow_broadcast: bool = False,
e3t: Optional[np.ndarray] = None,
):
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The init method lacks a docstring documenting its parameters. Given the complexity of this constructor and the specific role of the river_positions parameter, documentation is essential for users to understand how to properly instantiate this class.

Suggested change
):
):
"""
Initialize a MaskWithRivers object.
Parameters
----------
grid : Grid
The grid object defining the spatial domain.
zlevels : ArrayLike
The vertical levels of the mask (e.g., depth layers).
mask_array : ArrayLike
A boolean array indicating the presence of water (True) or land (False)
at each grid cell and depth level. Shape should be (len(zlevels), grid.shape[0], grid.shape[1]).
river_positions : ArrayLike
An array or sparse matrix indicating the positions of river cells in the grid.
Typically, this should be a 2D array (grid.shape[0], grid.shape[1]) where nonzero entries
indicate river cells and their values may represent river indices or IDs.
allow_broadcast : bool, optional
If True, allows broadcasting of mask_array to match the required shape. Default is False.
e3t : Optional[np.ndarray], optional
Optional array specifying the thickness of each vertical layer.
Notes
-----
River cells are removed from the mask and stored separately for further processing.
"""

Copilot uses AI. Check for mistakes.
rivers = np.asarray(river_positions)

mask_dim = len(zlevels), grid.shape[0], grid.shape[1]

try:
mask_data = np.asarray(mask_array, dtype=bool, copy=False)
input_copied = False
except TypeError:
# Old versions of Numpy do not support the "copy=False" syntax;
# In this case, we call the function without specifying the value
# of the "copy" argument, and we must assume that the data has not
# been copied (for safety)
mask_data = np.asarray(mask_array, dtype=bool)
input_copied = False
except ValueError:
# In this case, it has been impossible to avoid the copy. We run
# again the same command, but this time we force the copy
mask_data = np.asarray(mask_array, dtype=bool)
input_copied = True

# If mask_data is too small to cover all the mask, we try to broadcast
# it. This enforces a copy
if allow_broadcast and mask_data.shape != mask_dim:
mask_data = np.copy(np.broadcast_to(mask_array, mask_dim))
input_copied = True

# Ensure that mask_array is writeable
if not input_copied:
mask_data = np.copy(mask_data)

# Here we save the cells that belong to each river into a list of
# sparse arrays. We must use a list of 2D array because there are no
# 3D sparse arrays implemented inside scipy.
self._river_cells = []
rivers = dok_array(rivers)
for depth_index in range(mask_data.shape[0]):
found_cells = False
current_river_cells = dok_array(mask_data.shape[1:], dtype=int)
for (i, j), value in rivers.items():
if not mask_data[depth_index, i, j]:
continue
current_river_cells[i, j] = value
found_cells = True

if not found_cells:
break
Comment on lines +772 to +773
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The early break when no river cells are found at a depth level assumes that rivers don't exist at deeper levels if they don't exist at a shallower level. This assumption may not hold if different rivers have different depths or if there are gaps in the vertical distribution of river cells. Consider processing all depth levels instead of breaking early, or document this assumption clearly if it's intentional.

Suggested change
if not found_cells:
break

Copilot uses AI. Check for mistakes.
self._river_cells.append(current_river_cells)

# Remove rivers from mask_array
for i, j in zip(*rivers.nonzero()):
mask_data[:, i, j] = False

super().__init__(
grid=grid,
zlevels=zlevels,
mask_array=mask_data,
allow_broadcast=allow_broadcast,
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The allow_broadcast parameter is passed to the parent's init method, but at this point the mask_data has already been processed and potentially broadcasted. This could lead to inconsistent behavior because the parent class won't perform any broadcasting (since mask_data already has the correct shape). Consider setting allow_broadcast=False when calling super().init() to make the intent clearer.

Suggested change
allow_broadcast=allow_broadcast,
allow_broadcast=False,

Copilot uses AI. Check for mistakes.
e3t=e3t,
)
Comment on lines +718 to +786
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The MaskWithRivers class should override the copy() method to properly handle the _river_cells attribute. The parent class's copy() method (which uses self.class) will fail because it doesn't pass the required river_positions parameter. This will cause a TypeError when attempting to copy a MaskWithRivers instance.

Copilot uses AI. Check for mistakes.

def get_water_cells(self) -> np.ndarray:
output_data = np.copy(self[:])
for depth_index, river_cells in enumerate(self._river_cells):
lat_indices, lon_indices = river_cells.nonzero()
output_data[depth_index, lat_indices, lon_indices] = True
return output_data
Comment on lines +788 to +793
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get_water_cells method lacks a docstring. This is particularly important because this method's behavior differs from the parent class's implementation - it returns the combined mask of both sea and river cells, which is critical for users to understand.

Copilot uses AI. Check for mistakes.

@classmethod
def from_file_pointer(
cls,
file_pointer: netCDF4.Dataset,
*,
zlevels_var_name: str = "nav_lev",
ylevels_var_name: str = "nav_lat",
xlevels_var_name: str = "nav_lon",
e3t_var_name: Optional[str] = None,
mask_var_name: str = "tmask",
rivers_var_name: str = "rivers",
read_e3t: bool = True,
):
raw_mask = Mask.from_file_pointer(
file_pointer,
zlevels_var_name=zlevels_var_name,
ylevels_var_name=ylevels_var_name,
xlevels_var_name=xlevels_var_name,
e3t_var_name=e3t_var_name,
mask_var_name=mask_var_name,
read_e3t=read_e3t,
)
rivers = np.asarray(
file_pointer.variables[rivers_var_name][:], dtype=int
)
return MaskWithRivers(
grid=raw_mask.grid,
zlevels=raw_mask.zlevels,
mask_array=raw_mask.as_mutable_array(),
river_positions=rivers,
allow_broadcast=False,
e3t=raw_mask.e3t,
)

@classmethod
def from_file(
cls,
file_path: PathLike,
*,
zlevels_var_name: str = "nav_lev",
ylevels_var_name: str = "nav_lat",
xlevels_var_name: str = "nav_lon",
e3t_var_name: Optional[str] = None,
mask_var_name: str = "tmask",
rivers_var_name: str = "rivers",
read_e3t: bool = True,
):
with netCDF4.Dataset(file_path, "r") as f:
return cls.from_file_pointer(
f,
zlevels_var_name=zlevels_var_name,
ylevels_var_name=ylevels_var_name,
xlevels_var_name=xlevels_var_name,
e3t_var_name=e3t_var_name,
mask_var_name=mask_var_name,
rivers_var_name=rivers_var_name,
read_e3t=read_e3t,
)
27 changes: 27 additions & 0 deletions tests/commons/test_mask.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from itertools import product as cart_prod

import netCDF4
import numpy as np
import pytest

from bitsea.commons.grid import RegularGrid
from bitsea.commons.mask import FILL_VALUE
from bitsea.commons.mask import Mask
from bitsea.commons.mask import MaskBathymetry
from bitsea.commons.mask import MaskWithRivers
from bitsea.commons.mesh import Mesh


Expand Down Expand Up @@ -409,3 +411,28 @@ def test_mask_to_xarray(mask):
assert np.allclose(xarray_mask.longitude, mask.xlevels)
assert np.allclose(xarray_mask.depth, mask.zlevels)
assert np.allclose(xarray_mask.tmask, mask)


Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test function is missing the @pytest.mark.uses_test_data decorator. This decorator is needed for all tests that use the test_data_dir fixture (as shown in other tests like test_mask_from_file and test_mask_from_file_when_regular). Without this marker, the test will fail rather than be skipped when test data is unavailable.

Suggested change
@pytest.mark.uses_test_data

Copilot uses AI. Check for mistakes.
def test_mask_with_rivers_from_file(test_data_dir):
mask_dir = test_data_dir / "masks"
mask_file = mask_dir / "mask_with_rivers.nc"

rivers_mask = MaskWithRivers.from_file(mask_file)
water_cells = rivers_mask.get_water_cells()

with netCDF4.Dataset(mask_file, "r") as ds:
original_mask = np.asarray(ds.variables["tmask"]) > 0.5
rivers = np.asarray(ds.variables["rivers"])

assert np.all(original_mask == water_cells)

# Check that in the current mask every cell assigned to a river is set
# to "False"
for i, j in zip(*np.where(rivers)):
assert not np.any(rivers_mask[:, i, j])

# Assert that if a value inside water_cells is True, then it is also True
# inside rivers_mask (rivers_mask is contained inside water_cells)
assert np.all(
np.logical_or(np.logical_and(rivers_mask, water_cells), ~rivers_mask)
)
Comment on lines +416 to +438
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no test coverage for the copy() method with MaskWithRivers instances. Since the base Mask class has a test_mask_copy test, and given that MaskWithRivers has additional state (_river_cells) that needs to be properly copied, a test should be added to verify that copying a MaskWithRivers instance works correctly and preserves the river information.

Copilot uses AI. Check for mistakes.