C2SM
diff --git a/‎model/atmosphere/diffusion/tests/diffusion/stencil_tests/test_apply_diffusion_to_vn.py‎
Lines changed: 0 additions & 5 deletions b/‎model/atmosphere/diffusion/tests/diffusion/stencil_tests/test_apply_diffusion_to_vn.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎model/common/src/icon4py/model/common/decomposition/definitions.py‎
Lines changed: 44 additions & 22 deletions b/‎model/common/src/icon4py/model/common/decomposition/definitions.py‎
Lines changed: 44 additions & 22 deletions
diff --git a/‎model/common/src/icon4py/model/common/decomposition/mpi_decomposition.py‎
Lines changed: 40 additions & 26 deletions b/‎model/common/src/icon4py/model/common/decomposition/mpi_decomposition.py‎
Lines changed: 40 additions & 26 deletions
@@ -170,8 +170,3 @@ def input_data(self, grid: base.Grid) -> dict:
             vertical_start=0,
             vertical_end=grid.num_levels,
         )
-
-
-@pytest.mark.continuous_benchmarking
-class TestApplyDiffusionToVnContinuousBenchmarking(TestApplyDiffusionToVn):
-    pass
@@ -8,16 +8,16 @@
 
 from __future__ import annotations
 
+import dataclasses
 import functools
 import logging
 from collections.abc import Sequence
-from dataclasses import dataclass
-from enum import IntEnum
+from enum import Enum
 from typing import Any, Literal, Protocol, overload, runtime_checkable
 
 import dace  # type: ignore[import-untyped]
+import gt4py.next as gtx
 import numpy as np
-from gt4py.next import Dimension, Field
 
 from icon4py.model.common import utils
 from icon4py.model.common.orchestration.halo_exchange import DummyNestedSDFG
@@ -34,7 +34,7 @@ class ProcessProperties(Protocol):
     comm_size: int
 
 
-@dataclass(frozen=True, init=False)
+@dataclasses.dataclass(frozen=True, init=False)
 class SingleNodeProcessProperties(ProcessProperties):
     comm: None
     rank: int
@@ -69,14 +69,14 @@ def __call__(self) -> int:
 
 
 class DecompositionInfo:
-    class EntryType(IntEnum):
+    class EntryType(int, Enum):
         ALL = 0
         OWNED = 1
         HALO = 2
 
     @utils.chainable
     def with_dimension(
-        self, dim: Dimension, global_index: data_alloc.NDArray, owner_mask: data_alloc.NDArray
+        self, dim: gtx.Dimension, global_index: data_alloc.NDArray, owner_mask: data_alloc.NDArray
     ) -> None:
         self._global_index[dim] = global_index
         self._owner_mask[dim] = owner_mask
@@ -87,8 +87,8 @@ def __init__(
         num_edges: int | None = None,
         num_vertices: int | None = None,
     ):
-        self._global_index: dict[Dimension, data_alloc.NDArray] = {}
-        self._owner_mask: dict[Dimension, data_alloc.NDArray] = {}
+        self._global_index: dict[gtx.Dimension, data_alloc.NDArray] = {}
+        self._owner_mask: dict[gtx.Dimension, data_alloc.NDArray] = {}
         self._num_vertices = num_vertices
         self._num_cells = num_cells
         self._num_edges = num_edges
@@ -106,7 +106,7 @@ def num_vertices(self) -> int | None:
         return self._num_vertices
 
     def local_index(
-        self, dim: Dimension, entry_type: EntryType = EntryType.ALL
+        self, dim: gtx.Dimension, entry_type: EntryType = EntryType.ALL
     ) -> data_alloc.NDArray:
         match entry_type:
             case DecompositionInfo.EntryType.ALL:
@@ -120,7 +120,7 @@ def local_index(
                 mask = self._owner_mask[dim]
                 return index[mask]
 
-    def _to_local_index(self, dim: Dimension) -> data_alloc.NDArray:
+    def _to_local_index(self, dim: gtx.Dimension) -> data_alloc.NDArray:
         data = self._global_index[dim]
         assert data.ndim == 1
         if isinstance(data, np.ndarray):
@@ -131,11 +131,11 @@ def _to_local_index(self, dim: Dimension) -> data_alloc.NDArray:
             xp.arange(data.shape[0])
         return xp.arange(data.shape[0])
 
-    def owner_mask(self, dim: Dimension) -> data_alloc.NDArray:
+    def owner_mask(self, dim: gtx.Dimension) -> data_alloc.NDArray:
         return self._owner_mask[dim]
 
     def global_index(
-        self, dim: Dimension, entry_type: EntryType = EntryType.ALL
+        self, dim: gtx.Dimension, entry_type: EntryType = EntryType.ALL
     ) -> data_alloc.NDArray:
         match entry_type:
             case DecompositionInfo.EntryType.ALL:
@@ -156,30 +156,45 @@ def is_ready(self) -> bool: ...
 
 @runtime_checkable
 class ExchangeRuntime(Protocol):
-    def exchange(self, dim: Dimension, *fields: Field) -> ExchangeResult: ...
+    @overload
+    def exchange(self, dim: gtx.Dimension, *fields: gtx.Field) -> ExchangeResult: ...
 
-    def exchange_and_wait(self, dim: Dimension, *fields: Field) -> None: ...
+    @overload
+    def exchange(self, dim: gtx.Dimension, *buffers: data_alloc.NDArray) -> ExchangeResult: ...
+
+    @overload
+    def exchange_and_wait(self, dim: gtx.Dimension, *fields: gtx.Field) -> None: ...
+
+    @overload
+    def exchange_and_wait(self, dim: gtx.Dimension, *buffers: data_alloc.NDArray) -> None: ...
 
     def get_size(self) -> int: ...
 
     def my_rank(self) -> int: ...
 
+    def __str__(self) -> str:
+        return f"{self.__class__} (rank = {self.my_rank()} / {self.get_size()})"
+
 
-@dataclass
+@dataclasses.dataclass
 class SingleNodeExchange:
-    def exchange(self, dim: Dimension, *fields: Field) -> ExchangeResult:
+    def exchange(
+        self, dim: gtx.Dimension, *fields: gtx.Field | data_alloc.NDArray
+    ) -> ExchangeResult:
         return SingleNodeResult()
 
-    def exchange_and_wait(self, dim: Dimension, *fields: Field) -> None:
-        return
+    def exchange_and_wait(
+        self, dim: gtx.Dimension, *fields: gtx.Field | data_alloc.NDArray
+    ) -> None:
+        return None
 
     def my_rank(self) -> int:
         return 0
 
     def get_size(self) -> int:
         return 1
 
-    def __call__(self, *args: Any, dim: Dimension, wait: bool = True) -> ExchangeResult | None:  # type: ignore[return] # return statment in else condition
+    def __call__(self, *args: Any, dim: gtx.Dimension, wait: bool = True) -> ExchangeResult | None:  # type: ignore[return] # return statment in else condition
         """Perform a halo exchange operation.
 
         Args:
@@ -198,7 +213,9 @@ def __call__(self, *args: Any, dim: Dimension, wait: bool = True) -> ExchangeRes
 
     # Implementation of DaCe SDFGConvertible interface
     # For more see [dace repo]/dace/frontend/python/common.py#[class SDFGConvertible]
-    def dace__sdfg__(self, *args: Any, dim: Dimension, wait: bool = True) -> dace.sdfg.sdfg.SDFG:
+    def dace__sdfg__(
+        self, *args: Any, dim: gtx.Dimension, wait: bool = True
+    ) -> dace.sdfg.sdfg.SDFG:
         sdfg = DummyNestedSDFG().__sdfg__()
         sdfg.name = "_halo_exchange_"
         return sdfg
@@ -234,15 +251,17 @@ def __sdfg_signature__(self) -> tuple[Sequence[str], Sequence[str]]:
         ...
 
 
-@dataclass
+@dataclasses.dataclass
 class HaloExchangeWait:
     exchange_object: SingleNodeExchange  # maintain the same interface with the MPI counterpart
 
     def __call__(self, communication_handle: SingleNodeResult) -> None:
         communication_handle.wait()
 
     # Implementation of DaCe SDFGConvertible interface
-    def dace__sdfg__(self, *args: Any, dim: Dimension, wait: bool = True) -> dace.sdfg.sdfg.SDFG:
+    def dace__sdfg__(
+        self, *args: Any, dim: gtx.Dimension, wait: bool = True
+    ) -> dace.sdfg.sdfg.SDFG:
         sdfg = DummyNestedSDFG().__sdfg__()
         sdfg.name = "_halo_exchange_wait_"
         return sdfg
@@ -344,3 +363,6 @@ def create_single_node_exchange(
     props: SingleNodeProcessProperties, decomp_info: DecompositionInfo
 ) -> ExchangeRuntime:
     return SingleNodeExchange()
+
+
+single_node_default = SingleNodeExchange()
@@ -46,7 +46,6 @@
     ghex = None
     unstructured = None
 
-
 if TYPE_CHECKING:
     import mpi4py.MPI  # type: ignore [import-not-found]
 
@@ -203,50 +202,65 @@ def _create_pattern(self, horizontal_dim: gtx.Dimension) -> DomainDescriptor:
     def _slice_field_based_on_dim(self, field: gtx.Field, dim: gtx.Dimension) -> data_alloc.NDArray:
         """
         Slices the field based on the dimension passed in.
+
+        This operation is *necessary* for the use inside FORTRAN as there fields are larger than the grid (nproma size). where it does not do anything in a purely Python setup.
+        the granule context where fields otherwise have length nproma.
         """
         if dim == dims.VertexDim:
-            return field.ndarray[: self._decomposition_info.num_vertices, :]
+            return field.ndarray[: self._decomposition_info.num_vertices]
         elif dim == dims.EdgeDim:
-            return field.ndarray[: self._decomposition_info.num_edges, :]
+            return field.ndarray[: self._decomposition_info.num_edges]
         elif dim == dims.CellDim:
-            return field.ndarray[: self._decomposition_info.num_cells, :]
+            return field.ndarray[: self._decomposition_info.num_cells]
         else:
             raise ValueError(f"Unknown dimension {dim}")
 
-    def _get_applied_pattern(self, dim: gtx.Dimension, f: gtx.Field) -> str:
-        # TODO(havogt): the cache is never cleared, consider using functools.lru_cache in a bigger refactoring.
-        assert hasattr(f, "__gt_buffer_info__")
-        # dimension and buffer_info uniquely identifies the exchange pattern
-        key = (dim, f.__gt_buffer_info__.hash_key)
-        try:
-            return self._applied_patterns_cache[key]
-        except KeyError:
-            assert dim in f.domain.dims
-            array = self._slice_field_based_on_dim(f, dim)
-            self._applied_patterns_cache[key] = self._patterns[dim](
-                make_field_descriptor(
-                    self._domain_descriptors[dim],
-                    array,
-                    arch=Architecture.CPU if isinstance(f, np.ndarray) else Architecture.GPU,
+    def _make_field_descriptor(self, dim: gtx.Dimension, array: data_alloc.NDArray) -> Any:
+        return make_field_descriptor(
+            self._domain_descriptors[dim],
+            array,
+            arch=Architecture.CPU if isinstance(array, np.ndarray) else Architecture.GPU,
+        )
+
+    def _get_applied_pattern(self, dim: gtx.Dimension, f: gtx.Field | data_alloc.NDArray) -> str:
+        if isinstance(f, gtx.Field):
+            assert hasattr(f, "__gt_buffer_info__")
+            # dimension and buffer_info uniquely identifies the exchange pattern
+            # TODO(havogt): the cache is never cleared, consider using functools.lru_cache in a bigger refactoring.
+            key = (dim, f.__gt_buffer_info__.hash_key)
+            try:
+                return self._applied_patterns_cache[key]
+            except KeyError:
+                assert dim in f.domain.dims
+                array = self._slice_field_based_on_dim(f, dim)
+                self._applied_patterns_cache[key] = self._patterns[dim](
+                    self._make_field_descriptor(dim, array)
                 )
-            )
-            return self._applied_patterns_cache[key]
+                return self._applied_patterns_cache[key]
+        else:
+            assert f.ndim in (1, 2), "Buffers must be 1d or 2d"
+            return self._patterns[dim](self._make_field_descriptor(dim, f))
 
-    def exchange(self, dim: gtx.Dimension, *fields: gtx.Field) -> MultiNodeResult:
+    def exchange(
+        self, dim: gtx.Dimension, *fields: gtx.Field | data_alloc.NDArray
+    ) -> MultiNodeResult:
         """
         Exchange method that slices the fields based on the dimension and then performs halo exchange.
-
-            This operation is *necessary* for the use inside FORTRAN as there fields are larger than the grid (nproma size). where it does not do anything in a purely Python setup.
-            the granule context where fields otherwise have length nproma.
         """
+        assert (
+            dim in dims.MAIN_HORIZONTAL_DIMENSIONS.values()
+        ), f"first dimension must be one of ({dims.MAIN_HORIZONTAL_DIMENSIONS.values()})"
+
         applied_patterns = [self._get_applied_pattern(dim, f) for f in fields]
         # With https://github.com/ghex-org/GHEX/pull/186, ghex will schedule/sync work on the default stream,
         # otherwise we need an explicit device synchronize here.
         handle = self._comm.exchange(applied_patterns)
         log.debug(f"exchange for {len(fields)} fields of dimension ='{dim.value}' initiated.")
         return MultiNodeResult(handle, applied_patterns)
 
-    def exchange_and_wait(self, dim: gtx.Dimension, *fields: gtx.Field) -> None:
+    def exchange_and_wait(
+        self, dim: gtx.Dimension, *fields: gtx.Field | data_alloc.NDArray
+    ) -> None:
         res = self.exchange(dim, *fields)
         res.wait()
         log.debug(f"exchange for {len(fields)} fields of dimension ='{dim.value}' done.")
Original file line number	Diff line number	Diff line change
`@@ -170,8 +170,3 @@ def input_data(self, grid: base.Grid) -> dict:`
`170`	`170`	`vertical_start=0,`
`171`	`171`	`vertical_end=grid.num_levels,`
`172`	`172`	`)`
`173`		`-`
`174`		`-`
`175`		`-@pytest.mark.continuous_benchmarking`
`176`		`-class TestApplyDiffusionToVnContinuousBenchmarking(TestApplyDiffusionToVn):`
`177`		`- pass`