diff --git a/ci/distributed.yml b/ci/distributed.yml
index 640184805b..50c7f85444 100644
--- a/ci/distributed.yml
+++ b/ci/distributed.yml
@@ -38,26 +38,22 @@ build_distributed_baseimage_aarch64:
     DOCKERFILE: ci/docker/checkout_mpi.Dockerfile
     DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]'
     PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi
-    USE_MPI: NO
-    SLURM_MPI_TYPE: pmix
-    PMIX_MCA_psec: native
-    PMIX_MCA_gds: "^shmem2"
 
-.build_distributed_cpu:
+.build_distributed:
   extends: [.build_distributed_template]
   variables:
     UV_PROJECT_ENVIRONMENT: venv_dist
 
-build_distributed_cpu:
+build_distributed:
   stage: image
-  extends: [.container-builder-cscs-gh200, .build_distributed_cpu]
+  extends: [.container-builder-cscs-gh200, .build_distributed]
   needs: [build_distributed_baseimage_aarch64]
 
 .test_template_distributed:
   timeout: 8h
   image: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi
-  extends: [.container-runner-santis-gh200, .build_distributed_cpu]
-  needs: [build_distributed_cpu]
+  extends: [.container-runner-santis-gh200, .build_distributed]
+  needs: [build_distributed]
   variables:
     SLURM_JOB_NUM_NODES: 1
     # Use only one GPU for all ranks of the test as all tests are serialized
@@ -68,8 +64,19 @@ build_distributed_cpu:
     ICON4PY_TEST_DATA_PATH: "/icon4py/testdata"
     ICON4PY_ENABLE_GRID_DOWNLOAD: false
     ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false
+    GT4PY_BUILD_CACHE_LIFETIME: "persistent"
     PYTEST_ADDOPTS: "--durations=0"
     CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/cwci02/icon4py/ci/testdata:$ICON4PY_TEST_DATA_PATH"]'
+    # Do not use libfabric from the host system. Libfabric with slingshot
+    # support is built into the container image.
+    USE_MPI: NO
+    # Use libfabric slingshot (cxi) provider and recommended settings from
+    # https://docs.cscs.ch/software/communication/openmpi.
+    SLURM_MPI_TYPE: pmix
+    PMIX_MCA_psec: native
+    FI_PROVIDER: cxi
+    OMPI_MCA_pml: cm
+    OMPI_MCA_mtl: ofi
 
 .test_distributed_aarch64:
   stage: test
@@ -81,18 +88,28 @@ build_distributed_cpu:
     - echo "running with $(python --version)"
     - source ci/scripts/start-cuda-mps.sh
   script:
-    - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT
+    - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT --level=$LEVEL
   parallel:
     matrix:
       - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common]
-        BACKEND: [embedded, gtfn_cpu, dace_cpu]
+        # TODO(msimberg): Enable dace_gpu when compilation doesn't take as long
+        # or when we can cache across CI jobs.
+        BACKEND: [embedded, gtfn_cpu, dace_cpu, gtfn_gpu]
+        LEVEL: [integration]
   rules:
     - if: $COMPONENT == 'atmosphere/diffusion'
       variables:
         SLURM_TIMELIMIT: '00:10:00'
-    - if: $COMPONENT == 'atmosphere/dycore' && $BACKEND == 'dace_cpu'
+    - if: $COMPONENT == 'atmosphere/dycore' && ($BACKEND == 'dace_cpu' || $BACKEND == 'dace_gpu')
       variables:
         SLURM_TIMELIMIT: '00:30:00'
+    - if: $COMPONENT == 'common' && ($BACKEND == 'dace_gpu' || $BACKEND == 'gtfn_gpu')
+      variables:
+        # TODO(msimberg): Decrease this when enabling dace_gpu above, if possible.
+        SLURM_TIMELIMIT: '01:30:00'
+        # TODO(msimberg): Use shared partition when time limit can be set to at
+        # most an hour. The shared partition only accepts jobs maximum an hour long.
+        SLURM_PARTITION: "normal"
     - if: $COMPONENT == 'atmosphere/dycore'
       variables:
         SLURM_TIMELIMIT: '00:15:00'
diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile
index 3fcdb21297..a600b4ff1c 100644
--- a/ci/docker/base_mpi.Dockerfile
+++ b/ci/docker/base_mpi.Dockerfile
@@ -1,27 +1,124 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
 
 ARG DEBIAN_FRONTEND=noninteractive
-RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \
-    strace \
-    build-essential \
-    tar \
-    wget \
-    curl \
-    libboost-dev \
-    libnuma-dev \
-    libopenmpi-dev \
-    ca-certificates \
-    libssl-dev \
-    autoconf \
-    automake \
-    libtool \
-    pkg-config \
-    libreadline-dev \
-    git && \
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        autoconf \
+        automake \
+        build-essential \
+        ca-certificates \
+        curl \
+        git \
+        libboost-dev \
+        libconfig-dev \
+        libcurl4-openssl-dev \
+        libfuse-dev \
+        libjson-c-dev \
+        libnl-3-dev \
+        libnuma-dev \
+        libreadline-dev \
+        libsensors-dev \
+        libssl-dev \
+        libtool \
+        libuv1-dev \
+        libyaml-dev \
+        nvidia-cuda-dev \
+        nvidia-cuda-toolkit \
+        nvidia-cuda-toolkit-gcc \
+        pkg-config \
+        python3 \
+        strace \
+        tar \
+        wget && \
     rm -rf /var/lib/apt/lists/*
 
+ENV CC=/usr/bin/cuda-gcc
+ENV CXX=/usr/bin/cuda-g++
+ENV CUDAHOSTCXX=/usr/bin/cuda-g++
+
+# Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use
+# on Alps. This is based on examples in
+# https://github.com/eth-cscs/cray-network-stack.
+ARG gdrcopy_version=2.5.1
+RUN set -eux; \
+    git clone --depth 1 --branch "v${gdrcopy_version}" https://github.com/NVIDIA/gdrcopy.git; \
+    cd gdrcopy; \
+    make lib -j"$(nproc)" lib_install; \
+    cd /; \
+    rm -rf /gdrcopy; \
+    ldconfig
+
+ARG cassini_headers_version=release/shs-13.0.0
+RUN set -eux; \
+    git clone --depth 1 --branch "${cassini_headers_version}" https://github.com/HewlettPackard/shs-cassini-headers.git; \
+    cd shs-cassini-headers; \
+    cp -r include/* /usr/include/; \
+    cp -r share/* /usr/share/; \
+    rm -rf /shs-cassini-headers
+
+ARG cxi_driver_version=release/shs-13.0.0
+RUN set -eux; \
+    git clone --depth 1 --branch "${cxi_driver_version}" https://github.com/HewlettPackard/shs-cxi-driver.git; \
+    cd shs-cxi-driver; \
+    cp -r include/* /usr/include/; \
+    rm -rf /shs-cxi-driver
+
+ARG libcxi_version=release/shs-13.0.0
+RUN set -eux; \
+    git clone --depth 1 --branch "${libcxi_version}" https://github.com/HewlettPackard/shs-libcxi.git; \
+    cd shs-libcxi; \
+    ./autogen.sh; \
+    ./configure \
+      --with-cuda; \
+    make -j"$(nproc)" install; \
+    cd /; \
+    rm -rf /shs-libcxi; \
+    ldconfig
+
+ARG xpmem_version=0d0bad4e1d07b38d53ecc8f20786bb1328c446da
+RUN set -eux; \
+    git clone https://github.com/hpc/xpmem.git; \
+    cd xpmem; \
+    git checkout "${xpmem_version}"; \
+    ./autogen.sh; \
+    ./configure --disable-kernel-module; \
+    make -j"$(nproc)" install; \
+    cd /; \
+    rm -rf /xpmem; \
+    ldconfig
+
+# NOTE: xpmem is not found correctly without setting the prefix explicitly in
+# --enable-xpmem
+ARG libfabric_version=v2.4.0
+RUN set -eux; \
+    git clone --depth 1 --branch "${libfabric_version}" https://github.com/ofiwg/libfabric.git; \
+    cd libfabric; \
+    ./autogen.sh; \
+    ./configure \
+      --with-cuda \
+      --enable-xpmem=/usr \
+      --enable-tcp \
+      --enable-cxi; \
+    make -j"$(nproc)" install; \
+    cd /; \
+    rm -rf /libfabric; \
+    ldconfig
+
+ARG openmpi_version=5.0.9
+RUN set -eux; \
+    curl -fsSL "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-${openmpi_version}.tar.gz" -o /tmp/ompi.tar.gz; \
+    tar -C /tmp -xzf /tmp/ompi.tar.gz; \
+    cd "/tmp/openmpi-${openmpi_version}"; \
+    ./configure \
+      --with-ofi \
+      --with-cuda=/usr; \
+    make -j"$(nproc)" install; \
+    cd /; \
+    rm -rf "/tmp/openmpi-${openmpi_version}" /tmp/ompi.tar.gz; \
+    ldconfig
+
 # Install uv: https://docs.astral.sh/uv/guides/integration/docker
 COPY --from=ghcr.io/astral-sh/uv:0.9.24@sha256:816fdce3387ed2142e37d2e56e1b1b97ccc1ea87731ba199dc8a25c04e4997c5 /uv /uvx /bin/
diff --git a/ci/docker/checkout_mpi.Dockerfile b/ci/docker/checkout_mpi.Dockerfile
index c229d6c374..01e26702b4 100644
--- a/ci/docker/checkout_mpi.Dockerfile
+++ b/ci/docker/checkout_mpi.Dockerfile
@@ -7,5 +7,9 @@ WORKDIR /icon4py
 ARG PYVERSION
 ARG VENV
 ENV UV_PROJECT_ENVIRONMENT=$VENV
-ENV MPI4PY_BUILD_BACKEND="scikit-build-core"
-RUN uv sync --extra distributed --python=$PYVERSION
+ENV MPI4PY_BUILD_BACKEND=scikit-build-core
+ENV GHEX_USE_GPU=ON
+ENV GHEX_GPU_TYPE=NVIDIA
+ENV GHEX_GPU_ARCH=90
+ENV GHEX_TRANSPORT_BACKEND=MPI
+RUN uv sync --extra all --extra cuda12 --python=$PYVERSION
diff --git a/model/common/src/icon4py/model/common/grid/utils.py b/model/common/src/icon4py/model/common/grid/utils.py
index 39b48c9dd5..dbb3d69449 100644
--- a/model/common/src/icon4py/model/common/grid/utils.py
+++ b/model/common/src/icon4py/model/common/grid/utils.py
@@ -5,21 +5,20 @@
 #
 # Please, refer to the LICENSE file in the root directory.
 # SPDX-License-Identifier: BSD-3-Clause
-from types import ModuleType
 
 import numpy as np
 
 from icon4py.model.common.grid import gridfile
 
 
-def revert_repeated_index_to_invalid(offset: np.ndarray, array_ns: ModuleType):
+def revert_repeated_index_to_invalid(offset: np.ndarray):
     num_elements = offset.shape[0]
     for i in range(num_elements):
         # convert repeated indices back into -1
-        for val in array_ns.flip(offset[i, :]):
-            if array_ns.count_nonzero(val == offset[i, :]) > 1:
-                unique_values, counts = array_ns.unique(offset[i, :], return_counts=True)
+        for val in np.flip(offset[i, :]):
+            if np.count_nonzero(val == offset[i, :]) > 1:
+                unique_values, counts = np.unique(offset[i, :], return_counts=True)
                 rep_values = unique_values[counts > 1]
-                rep_indices = array_ns.where(array_ns.isin(offset[i, :], rep_values))[0]
+                rep_indices = np.where(np.isin(offset[i, :], rep_values))[0]
                 offset[i, rep_indices[1:]] = gridfile.GridFile.INVALID_INDEX
     return offset
diff --git a/model/common/src/icon4py/model/common/metrics/metrics_factory.py b/model/common/src/icon4py/model/common/metrics/metrics_factory.py
index a164e82fe4..8a6ed38dda 100644
--- a/model/common/src/icon4py/model/common/metrics/metrics_factory.py
+++ b/model/common/src/icon4py/model/common/metrics/metrics_factory.py
@@ -880,7 +880,7 @@ def _register_computed_fields(self) -> None:  # noqa: PLR0915 [too-many-statemen
         self.register_provider(compute_maxslp_maxhgtd)
 
         compute_weighted_cell_neighbor_sum = factory.ProgramFieldProvider(
-            func=mf.compute_weighted_cell_neighbor_sum,
+            func=mf.compute_weighted_cell_neighbor_sum.with_backend(self._backend),
             deps={
                 "maxslp": attrs.MAXSLP,
                 "maxhgtd": attrs.MAXHGTD,
@@ -978,7 +978,9 @@ def _register_computed_fields(self) -> None:  # noqa: PLR0915 [too-many-statemen
         self.register_provider(compute_diffusion_intcoef_and_vertoffset)
 
         compute_advection_deepatmo_fields = factory.ProgramFieldProvider(
-            func=compute_advection_metrics.compute_advection_deepatmo_fields,
+            func=compute_advection_metrics.compute_advection_deepatmo_fields.with_backend(
+                self._backend
+            ),
             domain={
                 dims.KDim: (
                     vertical_domain(v_grid.Zone.TOP),
diff --git a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py
index a4324fefc5..3f66caca96 100644
--- a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py
+++ b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py
@@ -290,9 +290,10 @@ def test_exchange_on_dummy_data(
     decomposition_info: definitions.DecompositionInfo,
     grid_savepoint: serialbox.IconGridSavepoint,
     dimension: gtx.Dimension,
+    backend: gtx.typing.Backend | None,
 ) -> None:
     exchange = definitions.create_exchange(processor_props, decomposition_info)
-    grid = grid_savepoint.construct_icon_grid()
+    grid = grid_savepoint.construct_icon_grid(backend=backend)
 
     number = processor_props.rank + 10
     input_field = data_alloc.constant_field(
@@ -300,15 +301,16 @@ def test_exchange_on_dummy_data(
         number,
         dimension,
         dims.KDim,
+        allocator=backend,
     )
 
-    halo_points = decomposition_info.local_index(
-        dimension, definitions.DecompositionInfo.EntryType.HALO
+    halo_points = data_alloc.as_numpy(
+        decomposition_info.local_index(dimension, definitions.DecompositionInfo.EntryType.HALO)
     )
-    local_points = decomposition_info.local_index(
-        dimension, definitions.DecompositionInfo.EntryType.OWNED
+    local_points = data_alloc.as_numpy(
+        decomposition_info.local_index(dimension, definitions.DecompositionInfo.EntryType.OWNED)
     )
-    assert np.all(input_field.asnumpy() == number)
+    assert (input_field.ndarray == number).all()
     exchange.exchange(dimension, input_field, stream=definitions.BLOCK)
     result = input_field.asnumpy()
     _log.info(f"rank={processor_props.rank} - num of halo points ={halo_points.shape}")
@@ -319,12 +321,13 @@ def test_exchange_on_dummy_data(
     changed_points = np.argwhere(result[:, 2] != number)
     _log.info(f"rank={processor_props.rank} - num changed points {changed_points.shape} ")
 
-    assert np.all(result[local_points, :] == number)
-    assert np.all(result[halo_points, :] != number)
+    assert (result[local_points, :] == number).all()
+    assert (result[halo_points, :] != number).all()
 
 
 @pytest.mark.mpi
 @pytest.mark.datatest
+@pytest.mark.embedded_only
 @pytest.mark.parametrize("processor_props", [False], indirect=True)
 def test_halo_exchange_for_sparse_field(
     interpolation_savepoint: serialbox.InterpolationSavepoint,
diff --git a/model/common/tests/common/decomposition/unit_tests/test_halo.py b/model/common/tests/common/decomposition/unit_tests/test_halo.py
index 567965e4b1..dd0983598b 100644
--- a/model/common/tests/common/decomposition/unit_tests/test_halo.py
+++ b/model/common/tests/common/decomposition/unit_tests/test_halo.py
@@ -13,6 +13,7 @@
 from icon4py.model.common import dimension as dims, exceptions, model_backends
 from icon4py.model.common.decomposition import decomposer as decomp, definitions, halo
 from icon4py.model.common.grid import base as base_grid, simple
+from icon4py.model.common.utils import data_allocation as data_alloc
 from icon4py.model.testing import test_utils
 
 from ...fixtures import backend_like, processor_props
@@ -32,7 +33,10 @@ def test_halo_constructor_owned_cells(rank, simple_neighbor_tables, backend_like
         run_properties=processor_props,
         allocator=allocator,
     )
-    my_owned_cells = halo_generator.owned_cells(utils.SIMPLE_DISTRIBUTION)
+    xp = data_alloc.import_array_ns(allocator)
+    my_owned_cells = data_alloc.as_numpy(
+        halo_generator.owned_cells(xp.asarray(utils.SIMPLE_DISTRIBUTION))
+    )
 
     print(f"rank {processor_props.rank} owns {my_owned_cells} ")
     assert my_owned_cells.size == len(utils._CELL_OWN[processor_props.rank])
diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py
index 72d0305f43..c062594487 100644
--- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py
+++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py
@@ -5,9 +5,7 @@
 #
 # Please, refer to the LICENSE file in the root directory.
 # SPDX-License-Identifier: BSD-3-Clause
-import functools
 import logging
-import operator
 
 import numpy as np
 import pytest
@@ -33,7 +31,7 @@
 from icon4py.model.common.metrics import metrics_attributes, metrics_factory
 from icon4py.model.common.states import utils as state_utils
 from icon4py.model.common.utils import data_allocation as data_alloc
-from icon4py.model.testing import definitions as test_defs, grid_utils, test_utils
+from icon4py.model.testing import definitions as test_defs, grid_utils, parallel_helpers, test_utils
 from icon4py.model.testing.fixtures.datatest import (
     backend,
     experiment,
@@ -85,104 +83,6 @@ def _get_neighbor_tables(grid: base.Grid) -> dict:
     }
 
 
-def gather_field(field: np.ndarray, props: decomp_defs.ProcessProperties) -> tuple:
-    constant_dims = tuple(field.shape[1:])
-    _log.info(f"gather_field on rank={props.rank} - gathering field of local shape {field.shape}")
-    # Because of sparse indexing the field may have a non-contigous layout,
-    # which Gatherv doesn't support. Make sure the field is contiguous.
-    field = np.ascontiguousarray(field)
-    constant_length = functools.reduce(operator.mul, constant_dims, 1)
-    local_sizes = np.array(props.comm.gather(field.size, root=0))
-    if props.rank == 0:
-        recv_buffer = np.empty(np.sum(local_sizes), dtype=field.dtype)
-        _log.info(
-            f"gather_field on rank = {props.rank} - setup receive buffer with size {sum(local_sizes)} on rank 0"
-        )
-    else:
-        recv_buffer = None
-
-    props.comm.Gatherv(sendbuf=field, recvbuf=(recv_buffer, local_sizes), root=0)
-    if props.rank == 0:
-        local_first_dim = tuple(sz // constant_length for sz in local_sizes)
-        _log.info(
-            f" gather_field on rank = 0: computed local dims {local_first_dim} - constant dims {constant_dims}"
-        )
-        gathered_field = recv_buffer.reshape((-1, *constant_dims))  # type: ignore [union-attr]
-    else:
-        gathered_field = None
-        local_first_dim = field.shape
-    return local_first_dim, gathered_field
-
-
-def check_local_global_field(
-    decomposition_info: decomp_defs.DecompositionInfo,
-    processor_props: decomp_defs.ProcessProperties,  # F811 # fixture
-    dim: gtx.Dimension,
-    global_reference_field: np.ndarray,
-    local_field: np.ndarray,
-    check_halos: bool,
-) -> None:
-    if dim == dims.KDim:
-        test_utils.assert_dallclose(global_reference_field, local_field)
-        return
-
-    _log.info(
-        f" rank= {processor_props.rank}/{processor_props.comm_size}----exchanging field of main dim {dim}"
-    )
-    assert (
-        local_field.shape[0]
-        == decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.ALL).shape[
-            0
-        ]
-    )
-
-    # Compare halo against global reference field
-    if check_halos:
-        test_utils.assert_dallclose(
-            global_reference_field[
-                decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO)
-            ],
-            local_field[
-                decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO)
-            ],
-            atol=1e-9,
-            verbose=True,
-        )
-
-    # Compare owned local field, excluding halos, against global reference
-    # field, by gathering owned entries to the first rank. This ensures that in
-    # total we have the full global field distributed on all ranks.
-    owned_entries = local_field[
-        decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED)
-    ]
-    gathered_sizes, gathered_field = gather_field(owned_entries, processor_props)
-
-    global_index_sizes, gathered_global_indices = gather_field(
-        decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED),
-        processor_props,
-    )
-
-    if processor_props.rank == 0:
-        _log.info(f"rank = {processor_props.rank}: asserting gathered fields: ")
-
-        assert np.all(
-            gathered_sizes == global_index_sizes
-        ), f"gathered field sizes do not match:  {dim} {gathered_sizes} - {global_index_sizes}"
-        _log.info(
-            f"rank = {processor_props.rank}: Checking field size on dim ={dim}: --- gathered sizes {gathered_sizes} = {sum(gathered_sizes)}"
-        )
-        _log.info(
-            f"rank = {processor_props.rank}:                      --- gathered field has size {gathered_sizes}"
-        )
-        sorted_ = np.zeros(global_reference_field.shape, dtype=gtx.float64)
-        sorted_[gathered_global_indices] = gathered_field
-        _log.info(
-            f" rank = {processor_props.rank}: SHAPES: global reference field {global_reference_field.shape}, gathered = {gathered_field.shape}"
-        )
-
-        test_utils.assert_dallclose(sorted_, global_reference_field, atol=1e-9, verbose=True)
-
-
 # These fields can't be computed with the embedded backend for one reason or
 # another, so we declare them here for xfailing.
 embedded_broken_fields = {
@@ -202,62 +102,7 @@ def check_local_global_field(
 }
 
 
-@pytest.mark.mpi
-@pytest.mark.parametrize("processor_props", [True], indirect=True)
-@pytest.mark.parametrize(
-    "attrs_name",
-    [
-        geometry_attributes.CELL_AREA,
-        geometry_attributes.CELL_CENTER_X,
-        geometry_attributes.CELL_CENTER_Y,
-        geometry_attributes.CELL_CENTER_Z,
-        geometry_attributes.CELL_LAT,
-        geometry_attributes.CELL_LON,
-        geometry_attributes.CELL_NORMAL_ORIENTATION,
-        geometry_attributes.CORIOLIS_PARAMETER,
-        geometry_attributes.DUAL_AREA,
-        geometry_attributes.DUAL_EDGE_LENGTH,
-        f"inverse_of_{geometry_attributes.DUAL_EDGE_LENGTH}",
-        geometry_attributes.EDGE_AREA,
-        geometry_attributes.EDGE_CELL_DISTANCE,
-        geometry_attributes.EDGE_CENTER_X,
-        geometry_attributes.EDGE_CENTER_Y,
-        geometry_attributes.EDGE_CENTER_Z,
-        geometry_attributes.EDGE_DUAL_U,
-        geometry_attributes.EDGE_DUAL_V,
-        geometry_attributes.EDGE_LAT,
-        f"inverse_of_{geometry_attributes.EDGE_LENGTH}",
-        geometry_attributes.EDGE_LENGTH,
-        geometry_attributes.EDGE_LON,
-        geometry_attributes.EDGE_NORMAL_CELL_U,
-        geometry_attributes.EDGE_NORMAL_CELL_V,
-        geometry_attributes.EDGE_NORMAL_U,
-        geometry_attributes.EDGE_NORMAL_V,
-        geometry_attributes.EDGE_NORMAL_VERTEX_U,
-        geometry_attributes.EDGE_NORMAL_VERTEX_V,
-        geometry_attributes.EDGE_NORMAL_X,
-        geometry_attributes.EDGE_NORMAL_Y,
-        geometry_attributes.EDGE_NORMAL_Z,
-        geometry_attributes.EDGE_TANGENT_CELL_U,
-        geometry_attributes.EDGE_TANGENT_CELL_V,
-        geometry_attributes.EDGE_TANGENT_VERTEX_U,
-        geometry_attributes.EDGE_TANGENT_VERTEX_V,
-        geometry_attributes.EDGE_TANGENT_X,
-        geometry_attributes.EDGE_TANGENT_Y,
-        geometry_attributes.EDGE_TANGENT_Z,
-        geometry_attributes.EDGE_VERTEX_DISTANCE,
-        geometry_attributes.TANGENT_ORIENTATION,
-        geometry_attributes.VERTEX_EDGE_ORIENTATION,
-        geometry_attributes.VERTEX_LAT,
-        geometry_attributes.VERTEX_LON,
-        geometry_attributes.VERTEX_VERTEX_LENGTH,
-        f"inverse_of_{geometry_attributes.VERTEX_VERTEX_LENGTH}",
-        geometry_attributes.VERTEX_X,
-        geometry_attributes.VERTEX_Y,
-        geometry_attributes.VERTEX_Z,
-    ],
-)
-def test_geometry_fields_compare_single_multi_rank(
+def _compare_geometry_fields_single_multi_rank(
     processor_props: decomp_defs.ProcessProperties,
     backend: gtx_typing.Backend | None,
     grid_description: test_defs.GridDescription,
@@ -269,11 +114,15 @@ def test_geometry_fields_compare_single_multi_rank(
     if attrs_name in embedded_broken_fields and test_utils.is_embedded(backend):
         pytest.xfail(f"Field {attrs_name} can't be computed with the embedded backend")
 
+    allocator = model_backends.get_allocator(backend)
+
     # TODO(msimberg): Add fixtures for single/multi-rank
     # grid/geometry/interpolation/metrics factories.
     grid_file = grid_utils._download_grid_file(grid_description)
     _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks")
-    single_rank_grid_manager = utils.run_grid_manager_for_single_rank(grid_file)
+    single_rank_grid_manager = utils.run_grid_manager_for_single_rank(
+        grid_file, allocator=allocator
+    )
     single_rank_geometry = geometry.GridGeometry(
         backend=backend,
         grid=single_rank_grid_manager.grid,
@@ -290,6 +139,7 @@ def test_geometry_fields_compare_single_multi_rank(
         file=grid_file,
         run_properties=processor_props,
         decomposer=decomp.MetisDecomposer(),
+        allocator=allocator,
     )
     _log.info(
         f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}"
@@ -316,46 +166,106 @@ def test_geometry_fields_compare_single_multi_rank(
     field = multi_rank_geometry.get(attrs_name)
     dim = field_ref.domain.dims[0]
 
-    check_local_global_field(
+    parallel_helpers.check_local_global_field(
         decomposition_info=multi_rank_grid_manager.decomposition_info,
         processor_props=processor_props,
         dim=dim,
         global_reference_field=field_ref.asnumpy(),
         local_field=field.asnumpy(),
         check_halos=True,
+        atol=1e-15,
     )
 
     _log.info(f"rank = {processor_props.rank} - DONE")
 
 
+@pytest.mark.level("unit")
 @pytest.mark.mpi
 @pytest.mark.parametrize("processor_props", [True], indirect=True)
 @pytest.mark.parametrize(
     "attrs_name",
     [
-        interpolation_attributes.CELL_AW_VERTS,
-        interpolation_attributes.C_BLN_AVG,
-        interpolation_attributes.C_LIN_E,
-        interpolation_attributes.E_BLN_C_S,
-        interpolation_attributes.E_FLX_AVG,
-        interpolation_attributes.GEOFAC_DIV,
-        interpolation_attributes.GEOFAC_GRDIV,
-        interpolation_attributes.GEOFAC_GRG_X,
-        interpolation_attributes.GEOFAC_GRG_Y,
-        interpolation_attributes.GEOFAC_N2S,
-        interpolation_attributes.GEOFAC_ROT,
-        interpolation_attributes.LSQ_PSEUDOINV,
-        interpolation_attributes.NUDGECOEFFS_E,
-        interpolation_attributes.POS_ON_TPLANE_E_X,
-        interpolation_attributes.POS_ON_TPLANE_E_Y,
-        interpolation_attributes.RBF_VEC_COEFF_C1,
-        interpolation_attributes.RBF_VEC_COEFF_C2,
-        interpolation_attributes.RBF_VEC_COEFF_E,
-        interpolation_attributes.RBF_VEC_COEFF_V1,
-        interpolation_attributes.RBF_VEC_COEFF_V2,
+        geometry_attributes.CELL_CENTER_Y,
+        geometry_attributes.CELL_CENTER_Z,
+        geometry_attributes.CELL_LON,
+        geometry_attributes.DUAL_EDGE_LENGTH,
+        geometry_attributes.EDGE_CENTER_Y,
+        geometry_attributes.EDGE_CENTER_Z,
+        geometry_attributes.EDGE_DUAL_V,
+        geometry_attributes.EDGE_LENGTH,
+        geometry_attributes.EDGE_LON,
+        geometry_attributes.EDGE_NORMAL_CELL_V,
+        geometry_attributes.EDGE_NORMAL_V,
+        geometry_attributes.EDGE_NORMAL_VERTEX_V,
+        geometry_attributes.EDGE_NORMAL_Y,
+        geometry_attributes.EDGE_NORMAL_Z,
+        geometry_attributes.EDGE_TANGENT_CELL_V,
+        geometry_attributes.EDGE_TANGENT_VERTEX_V,
+        geometry_attributes.EDGE_TANGENT_Y,
+        geometry_attributes.EDGE_TANGENT_Z,
+        geometry_attributes.VERTEX_LON,
+        geometry_attributes.VERTEX_VERTEX_LENGTH,
+        geometry_attributes.VERTEX_Y,
+        geometry_attributes.VERTEX_Z,
     ],
 )
-def test_interpolation_fields_compare_single_multi_rank(
+def test_geometry_fields_compare_single_multi_rank_unit(
+    processor_props: decomp_defs.ProcessProperties,
+    backend: gtx_typing.Backend | None,
+    grid_description: test_defs.GridDescription,
+    attrs_name: str,
+) -> None:
+    _compare_geometry_fields_single_multi_rank(
+        processor_props, backend, grid_description, attrs_name
+    )
+
+
+@pytest.mark.level("integration")
+@pytest.mark.mpi
+@pytest.mark.parametrize("processor_props", [True], indirect=True)
+@pytest.mark.parametrize(
+    "attrs_name",
+    [
+        geometry_attributes.CELL_AREA,
+        geometry_attributes.CELL_CENTER_X,
+        geometry_attributes.CELL_LAT,
+        geometry_attributes.CELL_NORMAL_ORIENTATION,
+        geometry_attributes.CORIOLIS_PARAMETER,
+        geometry_attributes.DUAL_AREA,
+        f"inverse_of_{geometry_attributes.DUAL_EDGE_LENGTH}",
+        geometry_attributes.EDGE_AREA,
+        geometry_attributes.EDGE_CELL_DISTANCE,
+        geometry_attributes.EDGE_CENTER_X,
+        geometry_attributes.EDGE_DUAL_U,
+        geometry_attributes.EDGE_LAT,
+        f"inverse_of_{geometry_attributes.EDGE_LENGTH}",
+        geometry_attributes.EDGE_NORMAL_CELL_U,
+        geometry_attributes.EDGE_NORMAL_U,
+        geometry_attributes.EDGE_NORMAL_VERTEX_U,
+        geometry_attributes.EDGE_NORMAL_X,
+        geometry_attributes.EDGE_TANGENT_CELL_U,
+        geometry_attributes.EDGE_TANGENT_VERTEX_U,
+        geometry_attributes.EDGE_TANGENT_X,
+        geometry_attributes.EDGE_VERTEX_DISTANCE,
+        geometry_attributes.TANGENT_ORIENTATION,
+        geometry_attributes.VERTEX_EDGE_ORIENTATION,
+        geometry_attributes.VERTEX_LAT,
+        f"inverse_of_{geometry_attributes.VERTEX_VERTEX_LENGTH}",
+        geometry_attributes.VERTEX_X,
+    ],
+)
+def test_geometry_fields_compare_single_multi_rank_integration(
+    processor_props: decomp_defs.ProcessProperties,
+    backend: gtx_typing.Backend | None,
+    grid_description: test_defs.GridDescription,
+    attrs_name: str,
+) -> None:
+    _compare_geometry_fields_single_multi_rank(
+        processor_props, backend, grid_description, attrs_name
+    )
+
+
+def _compare_interpolation_fields_single_multi_rank(
     processor_props: decomp_defs.ProcessProperties,
     backend: gtx_typing.Backend | None,
     experiment: test_defs.Experiment,
@@ -367,9 +277,11 @@ def test_interpolation_fields_compare_single_multi_rank(
     if attrs_name in embedded_broken_fields and test_utils.is_embedded(backend):
         pytest.xfail(f"Field {attrs_name} can't be computed with the embedded backend")
 
+    allocator = model_backends.get_allocator(backend)
+
     file = grid_utils.resolve_full_grid_file_name(experiment.grid)
     _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks")
-    single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file)
+    single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, allocator=allocator)
     single_rank_geometry = geometry.GridGeometry(
         backend=backend,
         grid=single_rank_grid_manager.grid,
@@ -394,6 +306,7 @@ def test_interpolation_fields_compare_single_multi_rank(
         file=file,
         run_properties=processor_props,
         decomposer=decomp.MetisDecomposer(),
+        allocator=allocator,
     )
     _log.info(
         f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}"
@@ -430,74 +343,82 @@ def test_interpolation_fields_compare_single_multi_rank(
     field = multi_rank_interpolation.get(attrs_name)
     dim = field_ref.domain.dims[0]
 
-    check_local_global_field(
+    parallel_helpers.check_local_global_field(
         decomposition_info=multi_rank_grid_manager.decomposition_info,
         processor_props=processor_props,
         dim=dim,
         global_reference_field=field_ref.asnumpy(),
         local_field=field.asnumpy(),
         check_halos=True,
+        atol=3e-9
+        if attrs_name.startswith("rbf")
+        else 1e-10
+        if attrs_name.startswith("pos_on_tplane")
+        else 1e-15,
     )
 
     _log.info(f"rank = {processor_props.rank} - DONE")
 
 
+@pytest.mark.level("unit")
 @pytest.mark.mpi
 @pytest.mark.parametrize("processor_props", [True], indirect=True)
 @pytest.mark.parametrize(
     "attrs_name",
     [
-        metrics_attributes.CELL_HEIGHT_ON_HALF_LEVEL,
-        metrics_attributes.COEFF1_DWDZ,
-        metrics_attributes.COEFF2_DWDZ,
-        metrics_attributes.COEFF_GRADEKIN,
-        metrics_attributes.D2DEXDZ2_FAC1_MC,
-        metrics_attributes.D2DEXDZ2_FAC2_MC,
-        metrics_attributes.DDQZ_Z_FULL,
-        metrics_attributes.DDQZ_Z_FULL_E,
-        metrics_attributes.DDQZ_Z_HALF,
-        metrics_attributes.DDXN_Z_FULL,
-        metrics_attributes.DDXN_Z_HALF_E,
-        metrics_attributes.DDXT_Z_FULL,
-        metrics_attributes.DDXT_Z_HALF_E,
-        metrics_attributes.D_EXNER_DZ_REF_IC,
-        metrics_attributes.EXNER_EXFAC,
-        metrics_attributes.EXNER_REF_MC,
-        metrics_attributes.EXNER_W_EXPLICIT_WEIGHT_PARAMETER,
-        metrics_attributes.EXNER_W_IMPLICIT_WEIGHT_PARAMETER,
-        metrics_attributes.FLAT_IDX_MAX,
-        metrics_attributes.HORIZONTAL_MASK_FOR_3D_DIVDAMP,
-        metrics_attributes.INV_DDQZ_Z_FULL,
-        metrics_attributes.MAXHGTD,
-        metrics_attributes.MAXHGTD_AVG,
-        metrics_attributes.MAXSLP,
-        metrics_attributes.MAXSLP_AVG,
-        metrics_attributes.MAX_NBHGT,
-        metrics_attributes.NFLAT_GRADP,
-        metrics_attributes.PG_EXDIST_DSL,
-        metrics_attributes.RAYLEIGH_W,
-        metrics_attributes.RHO_REF_MC,
-        metrics_attributes.RHO_REF_ME,
-        metrics_attributes.SCALING_FACTOR_FOR_3D_DIVDAMP,
-        metrics_attributes.DEEPATMO_DIVH,
-        metrics_attributes.DEEPATMO_DIVZL,
-        metrics_attributes.DEEPATMO_DIVZU,
-        metrics_attributes.THETA_REF_IC,
-        metrics_attributes.THETA_REF_MC,
-        metrics_attributes.THETA_REF_ME,
-        metrics_attributes.VERTOFFSET_GRADP,
-        metrics_attributes.WGTFACQ_C,
-        metrics_attributes.WGTFACQ_E,
-        metrics_attributes.WGTFAC_C,
-        metrics_attributes.WGTFAC_E,
-        metrics_attributes.ZDIFF_GRADP,
-        metrics_attributes.ZD_DIFFCOEF,
-        metrics_attributes.ZD_INTCOEF,
-        metrics_attributes.ZD_VERTOFFSET,
-        metrics_attributes.Z_MC,
+        interpolation_attributes.CELL_AW_VERTS,
+        interpolation_attributes.C_BLN_AVG,
+        interpolation_attributes.C_LIN_E,
+        interpolation_attributes.E_BLN_C_S,
+        interpolation_attributes.GEOFAC_DIV,
+        interpolation_attributes.GEOFAC_GRG_Y,
+        interpolation_attributes.GEOFAC_ROT,
+        interpolation_attributes.LSQ_PSEUDOINV,
+        interpolation_attributes.NUDGECOEFFS_E,
+        interpolation_attributes.POS_ON_TPLANE_E_X,
+        interpolation_attributes.POS_ON_TPLANE_E_Y,
+        interpolation_attributes.RBF_VEC_COEFF_C2,
+        interpolation_attributes.RBF_VEC_COEFF_V2,
     ],
 )
-def test_metrics_fields_compare_single_multi_rank(
+def test_interpolation_fields_compare_single_multi_rank_unit(
+    processor_props: decomp_defs.ProcessProperties,
+    backend: gtx_typing.Backend | None,
+    experiment: test_defs.Experiment,
+    attrs_name: str,
+) -> None:
+    _compare_interpolation_fields_single_multi_rank(
+        processor_props, backend, experiment, attrs_name
+    )
+
+
+@pytest.mark.level("integration")
+@pytest.mark.mpi
+@pytest.mark.parametrize("processor_props", [True], indirect=True)
+@pytest.mark.parametrize(
+    "attrs_name",
+    [
+        interpolation_attributes.E_FLX_AVG,
+        interpolation_attributes.GEOFAC_GRDIV,
+        interpolation_attributes.GEOFAC_GRG_X,
+        interpolation_attributes.GEOFAC_N2S,
+        interpolation_attributes.RBF_VEC_COEFF_C1,
+        interpolation_attributes.RBF_VEC_COEFF_E,
+        interpolation_attributes.RBF_VEC_COEFF_V1,
+    ],
+)
+def test_interpolation_fields_compare_single_multi_rank_integration(
+    processor_props: decomp_defs.ProcessProperties,
+    backend: gtx_typing.Backend | None,
+    experiment: test_defs.Experiment,
+    attrs_name: str,
+) -> None:
+    _compare_interpolation_fields_single_multi_rank(
+        processor_props, backend, experiment, attrs_name
+    )
+
+
+def _compare_metrics_fields_single_multi_rank(
     processor_props: decomp_defs.ProcessProperties,
     backend: gtx_typing.Backend | None,
     experiment: test_defs.Experiment,
@@ -547,7 +468,9 @@ def test_metrics_fields_compare_single_multi_rank(
     )
 
     _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks")
-    single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, experiment.num_levels)
+    single_rank_grid_manager = utils.run_grid_manager_for_single_rank(
+        file, allocator=allocator, num_levels=experiment.num_levels
+    )
     single_rank_geometry = geometry.GridGeometry(
         backend=backend,
         grid=single_rank_grid_manager.grid,
@@ -595,6 +518,7 @@ def test_metrics_fields_compare_single_multi_rank(
         file=file,
         run_properties=processor_props,
         decomposer=decomp.MetisDecomposer(),
+        allocator=allocator,
         num_levels=experiment.num_levels,
     )
     _log.info(
@@ -660,18 +584,101 @@ def test_metrics_fields_compare_single_multi_rank(
         assert isinstance(field, state_utils.ScalarType)
         assert pytest.approx(field) == field_ref
     else:
-        check_local_global_field(
+        parallel_helpers.check_local_global_field(
             decomposition_info=multi_rank_grid_manager.decomposition_info,
             processor_props=processor_props,
             dim=field_ref.domain.dims[0],
             global_reference_field=field_ref.asnumpy(),
             local_field=field.asnumpy(),
             check_halos=(attrs_name != metrics_attributes.WGTFAC_E),
+            atol=0.0,
         )
 
     _log.info(f"rank = {processor_props.rank} - DONE")
 
 
+@pytest.mark.level("unit")
+@pytest.mark.mpi
+@pytest.mark.parametrize("processor_props", [True], indirect=True)
+@pytest.mark.parametrize(
+    "attrs_name",
+    [
+        metrics_attributes.CELL_HEIGHT_ON_HALF_LEVEL,
+        metrics_attributes.COEFF2_DWDZ,
+        metrics_attributes.COEFF_GRADEKIN,
+        metrics_attributes.D2DEXDZ2_FAC2_MC,
+        metrics_attributes.DDQZ_Z_FULL,
+        metrics_attributes.DDXN_Z_HALF_E,
+        metrics_attributes.DDXT_Z_FULL,
+        metrics_attributes.DDXT_Z_HALF_E,
+        metrics_attributes.DEEPATMO_DIVH,
+        metrics_attributes.DEEPATMO_DIVZL,
+        metrics_attributes.DEEPATMO_DIVZU,
+        metrics_attributes.D_EXNER_DZ_REF_IC,
+        metrics_attributes.EXNER_REF_MC,
+        metrics_attributes.EXNER_W_IMPLICIT_WEIGHT_PARAMETER,
+        metrics_attributes.FLAT_IDX_MAX,
+        metrics_attributes.HORIZONTAL_MASK_FOR_3D_DIVDAMP,
+        metrics_attributes.INV_DDQZ_Z_FULL,
+        metrics_attributes.MAXHGTD,
+        metrics_attributes.MAXSLP,
+        metrics_attributes.MAXSLP_AVG,
+        metrics_attributes.MAX_NBHGT,
+        metrics_attributes.PG_EXDIST_DSL,
+        metrics_attributes.RAYLEIGH_W,
+        metrics_attributes.RHO_REF_MC,
+        metrics_attributes.RHO_REF_ME,
+        metrics_attributes.SCALING_FACTOR_FOR_3D_DIVDAMP,
+        metrics_attributes.THETA_REF_IC,
+        metrics_attributes.THETA_REF_MC,
+        metrics_attributes.THETA_REF_ME,
+        metrics_attributes.VERTOFFSET_GRADP,
+        metrics_attributes.WGTFACQ_C,
+        metrics_attributes.WGTFAC_C,
+        metrics_attributes.ZDIFF_GRADP,
+        metrics_attributes.ZD_DIFFCOEF,
+        metrics_attributes.ZD_VERTOFFSET,
+        metrics_attributes.Z_MC,
+    ],
+)
+def test_metrics_fields_compare_single_multi_rank_unit(
+    processor_props: decomp_defs.ProcessProperties,
+    backend: gtx_typing.Backend | None,
+    experiment: test_defs.Experiment,
+    attrs_name: str,
+) -> None:
+    _compare_metrics_fields_single_multi_rank(processor_props, backend, experiment, attrs_name)
+
+
+@pytest.mark.level("integration")
+@pytest.mark.mpi
+@pytest.mark.parametrize("processor_props", [True], indirect=True)
+@pytest.mark.parametrize(
+    "attrs_name",
+    [
+        metrics_attributes.COEFF1_DWDZ,
+        metrics_attributes.D2DEXDZ2_FAC1_MC,
+        metrics_attributes.DDQZ_Z_FULL_E,
+        metrics_attributes.DDQZ_Z_HALF,
+        metrics_attributes.DDXN_Z_FULL,
+        metrics_attributes.EXNER_EXFAC,
+        metrics_attributes.EXNER_W_EXPLICIT_WEIGHT_PARAMETER,
+        metrics_attributes.MAXHGTD_AVG,
+        metrics_attributes.NFLAT_GRADP,
+        metrics_attributes.WGTFACQ_E,
+        metrics_attributes.WGTFAC_E,
+        metrics_attributes.ZD_INTCOEF,
+    ],
+)
+def test_metrics_fields_compare_single_multi_rank_integration(
+    processor_props: decomp_defs.ProcessProperties,
+    backend: gtx_typing.Backend | None,
+    experiment: test_defs.Experiment,
+    attrs_name: str,
+) -> None:
+    _compare_metrics_fields_single_multi_rank(processor_props, backend, experiment, attrs_name)
+
+
 # MASK_PROG_HALO_C is defined specially only on halos, so we have a separate
 # test for it. It doesn't make sense to compare to a single-rank reference since
 # it has no halos.
@@ -729,6 +736,7 @@ def test_metrics_mask_prog_halo_c(
         run_properties=processor_props,
         decomposer=decomp.MetisDecomposer(),
         num_levels=experiment.num_levels,
+        allocator=model_backends.get_allocator(backend),
     )
     _log.info(
         f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}"
@@ -787,22 +795,22 @@ def test_metrics_mask_prog_halo_c(
     )
 
     attrs_name = metrics_attributes.MASK_PROG_HALO_C
-    field = multi_rank_metrics.get(attrs_name).asnumpy()
-    c_refin_ctrl = multi_rank_metrics.get("c_refin_ctrl").asnumpy()
-    assert not np.any(
+    field = multi_rank_metrics.get(attrs_name).ndarray
+    c_refin_ctrl = multi_rank_metrics.get("c_refin_ctrl").ndarray
+    assert not (
         field[
             multi_rank_grid_manager.decomposition_info.local_index(
                 dims.CellDim, decomp_defs.DecompositionInfo.EntryType.OWNED
             )
         ]
-    ), f"rank={processor_props.rank} - found nonzero in owned entries of {attrs_name}"
+    ).any(), f"rank={processor_props.rank} - found nonzero in owned entries of {attrs_name}"
     halo_indices = multi_rank_grid_manager.decomposition_info.local_index(
         dims.CellDim, decomp_defs.DecompositionInfo.EntryType.HALO
     )
-    assert np.all(
+    assert (
         field[halo_indices]
-        == ~((c_refin_ctrl[halo_indices] >= 1) & (c_refin_ctrl[halo_indices] <= 4))
-    ), f"rank={processor_props.rank} - halo for MASK_PROG_HALO_C is incorrect"
+        == xp.invert((c_refin_ctrl[halo_indices] >= 1) & (c_refin_ctrl[halo_indices] <= 4))
+    ).all(), f"rank={processor_props.rank} - halo for MASK_PROG_HALO_C is incorrect"
 
     _log.info(f"rank = {processor_props.rank} - DONE")
 
@@ -812,6 +820,7 @@ def test_metrics_mask_prog_halo_c(
 def test_validate_skip_values_in_distributed_connectivities(
     processor_props: decomp_defs.ProcessProperties,
     experiment: test_defs.Experiment,
+    backend: gtx_typing.Backend | None,
 ) -> None:
     if experiment == test_defs.Experiments.MCH_CH_R04B09:
         pytest.xfail("Limited-area grids not yet supported")
@@ -821,6 +830,7 @@ def test_validate_skip_values_in_distributed_connectivities(
         file=file,
         run_properties=processor_props,
         decomposer=decomp.MetisDecomposer(),
+        allocator=model_backends.get_allocator(backend),
     )
     distributed_grid = multi_rank_grid_manager.grid
     for k, c in distributed_grid.connectivities.items():
@@ -844,6 +854,7 @@ def test_validate_skip_values_in_distributed_connectivities(
 def test_limited_area_raises(
     processor_props: decomp_defs.ProcessProperties,
     grid: test_defs.GridDescription,
+    backend: gtx_typing.Backend | None,
 ) -> None:
     with pytest.raises(
         NotImplementedError, match="Limited-area grids are not supported in distributed runs"
@@ -852,4 +863,5 @@ def test_limited_area_raises(
             file=grid_utils.resolve_full_grid_file_name(grid),
             run_properties=processor_props,
             decomposer=decomp.MetisDecomposer(),
+            allocator=model_backends.get_allocator(backend),
         )
diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py
index ce8984e071..b7dae66a95 100644
--- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py
+++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py
@@ -11,13 +11,14 @@
 import gt4py.next as gtx
 import pytest
 
-from icon4py.model.common import dimension as dims
+from icon4py.model.common import dimension as dims, model_backends
 from icon4py.model.common.decomposition import (
     decomposer as decomp,
     definitions as decomposition,
     mpi_decomposition,
 )
 from icon4py.model.common.grid import grid_refinement, horizontal as h_grid
+from icon4py.model.common.utils import data_allocation as data_alloc
 from icon4py.model.testing import definitions, grid_utils, serialbox, test_utils
 from icon4py.model.testing.fixtures.datatest import (
     backend,
@@ -65,6 +66,7 @@ def test_compute_domain_bounds(
     experiment: definitions.Experiment,
     grid_savepoint: serialbox.IconGridSavepoint,
     processor_props: decomposition.ProcessProperties,
+    backend: gtx.typing.Backend | None,
 ) -> None:
     if (
         processor_props.is_single_rank()
@@ -75,11 +77,14 @@ def test_compute_domain_bounds(
             "end index data for single node APE are all 0 - re- serialization should fix that (patch%cells%end_index vs patch%cells%end_idx)"
         )
 
-    ref_grid = grid_savepoint.construct_icon_grid(backend=None, keep_skip_values=True)
+    ref_grid = grid_savepoint.construct_icon_grid(backend=backend, keep_skip_values=True)
     decomposition_info = grid_savepoint.construct_decomposition_info()
     refin_ctrl = {dim: grid_savepoint.refin_ctrl(dim) for dim in utils.main_horizontal_dims()}
     start_indices, end_indices = grid_refinement.compute_domain_bounds(
-        dim, refin_ctrl, decomposition_info
+        dim,
+        refin_ctrl,
+        decomposition_info,
+        array_ns=data_alloc.import_array_ns(backend),
     )
     if (
         experiment == definitions.Experiments.GAUSS3D
@@ -123,6 +128,7 @@ def test_bounds_decomposition(
         file=file,
         run_properties=processor_props,
         decomposer=decomp.MetisDecomposer(),
+        allocator=model_backends.get_allocator(backend),
     )
     _log.info(
         f"rank = {processor_props.rank} : {grid_manager.decomposition_info.get_horizontal_size()!r}"
diff --git a/model/common/tests/common/grid/mpi_tests/utils.py b/model/common/tests/common/grid/mpi_tests/utils.py
index 511ec82f77..ca771e1d3c 100644
--- a/model/common/tests/common/grid/mpi_tests/utils.py
+++ b/model/common/tests/common/grid/mpi_tests/utils.py
@@ -8,6 +8,8 @@
 
 import pathlib
 
+import gt4py.next as gtx
+
 from icon4py.model.common.decomposition import decomposer as decomp, definitions as decomp_defs
 from icon4py.model.common.grid import grid_manager as gm, vertical as v_grid
 
@@ -22,14 +24,16 @@ def _grid_manager(file: pathlib.Path, num_levels: int) -> gm.GridManager:
 
 
 def run_grid_manager_for_single_rank(
-    file: pathlib.Path, num_levels: int = NUM_LEVELS
+    file: pathlib.Path,
+    allocator: gtx.typing.Allocator,
+    num_levels: int = NUM_LEVELS,
 ) -> gm.GridManager:
     manager = _grid_manager(file, num_levels)
     manager(
         keep_skip_values=True,
         run_properties=decomp_defs.SingleNodeProcessProperties(),
         decomposer=decomp.SingleNodeDecomposer(),
-        allocator=None,
+        allocator=allocator,
     )
     return manager
 
@@ -38,10 +42,14 @@ def run_grid_manager_for_multi_rank(
     file: pathlib.Path,
     run_properties: decomp_defs.ProcessProperties,
     decomposer: decomp.Decomposer,
+    allocator: gtx.typing.Allocator,
     num_levels: int = NUM_LEVELS,
 ) -> gm.GridManager:
     manager = _grid_manager(file, num_levels)
     manager(
-        keep_skip_values=True, allocator=None, run_properties=run_properties, decomposer=decomposer
+        keep_skip_values=True,
+        allocator=allocator,
+        run_properties=run_properties,
+        decomposer=decomposer,
     )
     return manager
diff --git a/model/common/tests/common/grid/unit_tests/test_grid_manager.py b/model/common/tests/common/grid/unit_tests/test_grid_manager.py
index bc8d1839ea..4a457fbede 100644
--- a/model/common/tests/common/grid/unit_tests/test_grid_manager.py
+++ b/model/common/tests/common/grid/unit_tests/test_grid_manager.py
@@ -74,7 +74,7 @@ def test_grid_manager_eval_v2e(
     backend: gtx_typing.Backend,
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
-    seralized_v2e = grid_savepoint.v2e()
+    seralized_v2e = data_alloc.as_numpy(grid_savepoint.v2e())
     # there are vertices at the boundary of a local domain or at a pentagon point that have less than
     # 6 neighbors hence there are "Missing values" in the grid file
     # they get substituted by the "last valid index" in preprocessing step in icon.
@@ -120,7 +120,7 @@ def test_grid_manager_eval_v2c(
     backend: gtx_typing.Backend,
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
-    serialized_v2c = grid_savepoint.v2c()
+    serialized_v2c = data_alloc.as_numpy(grid_savepoint.v2c())
     v2c_table = grid.get_connectivity("V2C").asnumpy()
     # there are vertices that have less than 6 neighboring cells: either pentagon points or
     # vertices at the boundary of the domain for a limited area mode
@@ -176,7 +176,7 @@ def test_grid_manager_eval_e2v(
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
 
-    serialized_e2v = grid_savepoint.e2v()
+    serialized_e2v = data_alloc.as_numpy(grid_savepoint.e2v())
     e2v_table = grid.get_connectivity("E2V").asnumpy()
     # all vertices in the system have to neighboring edges, there no edges that point nowhere
     # hence this connectivity has no "missing values" in the grid file
@@ -199,7 +199,7 @@ def test_grid_manager_eval_e2c(
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
 
-    serialized_e2c = grid_savepoint.e2c()
+    serialized_e2c = data_alloc.as_numpy(grid_savepoint.e2c())
     e2c_table = grid.get_connectivity("E2C").asnumpy()
     assert has_invalid_index(serialized_e2c) == grid.limited_area
     assert has_invalid_index(e2c_table) == grid.limited_area
@@ -216,7 +216,7 @@ def test_grid_manager_eval_c2e(
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
 
-    serialized_c2e = grid_savepoint.c2e()
+    serialized_c2e = data_alloc.as_numpy(grid_savepoint.c2e())
     c2e_table = grid.get_connectivity("C2E").asnumpy()
     # no cells with less than 3 neighboring edges exist, otherwise the cell is not there in the
     # first place
@@ -237,7 +237,7 @@ def test_grid_manager_eval_c2e2c(
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
     assert np.allclose(
         grid.get_connectivity("C2E2C").asnumpy(),
-        grid_savepoint.c2e2c(),
+        data_alloc.as_numpy(grid_savepoint.c2e2c()),
     )
 
 
@@ -249,7 +249,7 @@ def test_grid_manager_eval_c2e2cO(
     backend: gtx_typing.Backend,
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
-    serialized_grid = grid_savepoint.construct_icon_grid()
+    serialized_grid = grid_savepoint.construct_icon_grid(backend=backend)
     assert np.allclose(
         grid.get_connectivity("C2E2CO").asnumpy(),
         serialized_grid.get_connectivity("C2E2CO").asnumpy(),
@@ -265,7 +265,7 @@ def test_grid_manager_eval_e2c2e(
     backend: gtx_typing.Backend,
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
-    serialized_grid = grid_savepoint.construct_icon_grid()
+    serialized_grid = grid_savepoint.construct_icon_grid(backend=backend)
     serialized_e2c2e = serialized_grid.get_connectivity("E2C2E").asnumpy()
     serialized_e2c2eO = serialized_grid.get_connectivity("E2C2EO").asnumpy()
     assert has_invalid_index(serialized_e2c2e) == grid.limited_area
@@ -290,7 +290,7 @@ def test_grid_manager_eval_e2c2v(
     backend: gtx_typing.Backend,
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
-    serialized_ref = grid_savepoint.e2c2v()
+    serialized_ref = data_alloc.as_numpy(grid_savepoint.e2c2v())
     # the "far" (adjacent to edge normal ) is not always there, because ICON only calculates those starting from
     #   (lateral_boundary(dims.EdgeDim) + 1) to end(dims.EdgeDim)  (see mo_intp_coeffs.f90) and only for owned cells
     table = grid.get_connectivity("E2C2V").asnumpy()
@@ -311,7 +311,7 @@ def test_grid_manager_eval_c2v(
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
     c2v = grid.get_connectivity("C2V").asnumpy()
-    assert np.allclose(c2v, grid_savepoint.c2v())
+    assert np.allclose(c2v, data_alloc.as_numpy(grid_savepoint.c2v()))
 
 
 @pytest.mark.parametrize(
@@ -397,7 +397,7 @@ def test_grid_manager_eval_c2e2c2e(
     backend: gtx_typing.Backend,
 ) -> None:
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
-    serialized_grid = grid_savepoint.construct_icon_grid()
+    serialized_grid = grid_savepoint.construct_icon_grid(backend=backend)
     assert np.allclose(
         grid.get_connectivity("C2E2C2E").asnumpy(),
         serialized_grid.get_connectivity("C2E2C2E").asnumpy(),
@@ -415,7 +415,7 @@ def test_grid_manager_start_end_index_compare_with_serialized_data(
     dim: gtx.Dimension,
     backend: gtx_typing.Backend,
 ) -> None:
-    serialized_grid = grid_savepoint.construct_icon_grid()
+    serialized_grid = grid_savepoint.construct_icon_grid(backend=backend)
     grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid
 
     for domain in h_grid.get_domains_for_dim(dim):
@@ -469,11 +469,11 @@ def test_tangent_orientation(
     experiment: definitions.Experiment,
     backend: gtx_typing.Backend,
 ) -> None:
-    expected = grid_savepoint.tangent_orientation()
+    expected = data_alloc.as_numpy(grid_savepoint.tangent_orientation())
     manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend)
     assert test_utils.dallclose(
         manager.geometry_fields[gridfile.GeometryName.TANGENT_ORIENTATION].asnumpy(),
-        expected.asnumpy(),
+        expected,
     )
 
 
@@ -483,11 +483,11 @@ def test_edge_orientation_on_vertex(
     experiment: definitions.Experiment,
     backend: gtx_typing.Backend,
 ) -> None:
-    expected = grid_savepoint.vertex_edge_orientation()
+    expected = data_alloc.as_numpy(grid_savepoint.vertex_edge_orientation())
     manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend)
     assert test_utils.dallclose(
         manager.geometry_fields[gridfile.GeometryName.EDGE_ORIENTATION_ON_VERTEX].asnumpy(),
-        expected.asnumpy(),
+        expected,
     )
 
 
@@ -526,11 +526,11 @@ def test_cell_normal_orientation(
     experiment: definitions.Experiment,
     backend: gtx_typing.Backend,
 ) -> None:
-    expected = grid_savepoint.edge_orientation()
+    expected = data_alloc.as_numpy(grid_savepoint.edge_orientation())
     manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend)
     assert test_utils.dallclose(
         manager.geometry_fields[gridfile.GeometryName.CELL_NORMAL_ORIENTATION].asnumpy(),
-        expected.asnumpy(),
+        expected,
     )
 
 
@@ -540,12 +540,12 @@ def test_edge_vertex_distance(
     experiment: definitions.Experiment,
     backend: gtx_typing.Backend,
 ) -> None:
-    expected = grid_savepoint.edge_vert_length()
+    expected = data_alloc.as_numpy(grid_savepoint.edge_vert_length())
     manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend)
 
     assert test_utils.dallclose(
         manager.geometry_fields[gridfile.GeometryName.EDGE_VERTEX_DISTANCE].asnumpy(),
-        expected.asnumpy(),
+        expected,
         equal_nan=True,
     )
 
@@ -574,10 +574,10 @@ def test_decomposition_info_single_rank(
     grid_file = experiment.grid
     gm = utils.run_grid_manager(grid_file, keep_skip_values=True, backend=backend)
     result = gm.decomposition_info
-    assert np.all(data_alloc.as_numpy(result.local_index(dim)) == expected.local_index(dim))
-    assert np.all(data_alloc.as_numpy(result.global_index(dim)) == expected.global_index(dim))
-    assert np.all(data_alloc.as_numpy(result.owner_mask(dim)) == expected.owner_mask(dim))
-    assert np.all(data_alloc.as_numpy(result.halo_levels(dim)) == expected.halo_levels(dim))
+    assert (result.local_index(dim) == expected.local_index(dim)).all()
+    assert (result.global_index(dim) == expected.global_index(dim)).all()
+    assert (result.owner_mask(dim) == expected.owner_mask(dim)).all()
+    assert (result.halo_levels(dim) == expected.halo_levels(dim)).all()
 
 
 @pytest.mark.parametrize("rank", (0, 1, 2, 3), ids=lambda rank: f"rank{rank}")
diff --git a/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py b/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py
index 7f1826ade8..bd97f9552a 100644
--- a/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py
+++ b/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py
@@ -78,7 +78,7 @@ def test_construct_rbf_matrix_offsets_tables_for_cells(
     )
     assert np.max(offset_table) == grid.num_edges - 1
 
-    offset_table_savepoint = grid_savepoint.c2e2c2e()
+    offset_table_savepoint = data_alloc.as_numpy(grid_savepoint.c2e2c2e())
     assert offset_table.shape == offset_table_savepoint.shape
 
     # Savepoint neighbors before start index may not be populated correctly,
@@ -110,7 +110,7 @@ def test_construct_rbf_matrix_offsets_tables_for_edges(
     )
     assert np.max(offset_table) == grid.num_edges - 1
 
-    offset_table_savepoint = grid_savepoint.e2c2e()
+    offset_table_savepoint = data_alloc.as_numpy(grid_savepoint.e2c2e())
     assert offset_table.shape == offset_table_savepoint.shape
 
     start_index = grid.start_index(
@@ -140,7 +140,7 @@ def test_construct_rbf_matrix_offsets_tables_for_vertices(
     )
     assert np.max(offset_table) == grid.num_edges - 1
 
-    offset_table_savepoint = grid_savepoint.v2e()
+    offset_table_savepoint = data_alloc.as_numpy(grid_savepoint.v2e())
     assert offset_table.shape == offset_table_savepoint.shape
 
     start_index = grid.start_index(
diff --git a/model/common/tests/common/math/unit_tests/test_smagorinsky.py b/model/common/tests/common/math/unit_tests/test_smagorinsky.py
index e43a97ab71..4c0e27e116 100644
--- a/model/common/tests/common/math/unit_tests/test_smagorinsky.py
+++ b/model/common/tests/common/math/unit_tests/test_smagorinsky.py
@@ -24,9 +24,8 @@ def test_init_enh_smag_fac(backend_like: model_backends.BackendLike, grid: base_
     a_vec = data_alloc.random_field(
         grid, dims.KDim, low=1.0, high=10.0, extend={dims.KDim: 1}, allocator=backend
     )
-    xp = data_alloc.import_array_ns(backend)
-    fac = xp.asarray([0.67, 0.5, 1.3, 0.8])
-    z = xp.asarray([0.1, 0.2, 0.3, 0.4])
+    fac = (0.67, 0.5, 1.3, 0.8)
+    z = (0.1, 0.2, 0.3, 0.4)
 
     enhanced_smag_fac_np = enhanced_smagorinski_factor_numpy(fac, z, a_vec.asnumpy())
     en_smag_fac_for_zero_nshift.with_backend(backend)(
diff --git a/model/testing/src/icon4py/model/testing/fixtures/datatest.py b/model/testing/src/icon4py/model/testing/fixtures/datatest.py
index 7730656d5a..8b07dd27b9 100644
--- a/model/testing/src/icon4py/model/testing/fixtures/datatest.py
+++ b/model/testing/src/icon4py/model/testing/fixtures/datatest.py
@@ -81,7 +81,6 @@ def cpu_allocator() -> gtx_typing.Allocator:
 
 @pytest.fixture(
     params=[
-        definitions.Grids.R01B01_GLOBAL,
         definitions.Grids.R02B04_GLOBAL,
         definitions.Grids.MCH_CH_R04B09_DSL,
         definitions.Grids.TORUS_50000x5000,
diff --git a/model/testing/src/icon4py/model/testing/parallel_helpers.py b/model/testing/src/icon4py/model/testing/parallel_helpers.py
index b0ad1b0465..50dde45d3d 100644
--- a/model/testing/src/icon4py/model/testing/parallel_helpers.py
+++ b/model/testing/src/icon4py/model/testing/parallel_helpers.py
@@ -5,15 +5,21 @@
 #
 # Please, refer to the LICENSE file in the root directory.
 # SPDX-License-Identifier: BSD-3-Clause
+import functools
 import logging
+import operator
 
+import numpy as np
 import pytest
+from gt4py import next as gtx
 
 from icon4py.model.common import dimension as dims
 from icon4py.model.common.decomposition import definitions
+from icon4py.model.common.utils import data_allocation as data_alloc
+from icon4py.model.testing import test_utils
 
 
-log = logging.getLogger(__file__)
+_log = logging.getLogger(__file__)
 
 
 def check_comm_size(
@@ -24,12 +30,123 @@ def check_comm_size(
 
 
 def log_process_properties(props: definitions.ProcessProperties) -> None:
-    log.info(f"rank={props.rank}/{props.comm_size}")
+    _log.info(f"rank={props.rank}/{props.comm_size}")
 
 
 def log_local_field_size(decomposition_info: definitions.DecompositionInfo) -> None:
-    log.info(
+    _log.info(
         f"local grid size: cells={decomposition_info.global_index(dims.CellDim).size}, "
         f"edges={decomposition_info.global_index(dims.EdgeDim).size}, "
         f"vertices={decomposition_info.global_index(dims.VertexDim).size}"
     )
+
+
+def gather_field(field: np.ndarray, props: definitions.ProcessProperties) -> tuple:
+    constant_dims = tuple(field.shape[1:])
+    _log.info(f"gather_field on rank={props.rank} - gathering field of local shape {field.shape}")
+    # Because of sparse indexing the field may have a non-contigous layout,
+    # which Gatherv doesn't support. Make sure the field is contiguous.
+    field = np.ascontiguousarray(field)
+    constant_length = functools.reduce(operator.mul, constant_dims, 1)
+    local_sizes = np.array(props.comm.gather(field.size, root=0))
+    if props.rank == 0:
+        recv_buffer = np.empty(np.sum(local_sizes), dtype=field.dtype)
+        _log.info(
+            f"gather_field on rank = {props.rank} - setup receive buffer with size {sum(local_sizes)} on rank 0"
+        )
+    else:
+        recv_buffer = None
+
+    props.comm.Gatherv(sendbuf=field, recvbuf=(recv_buffer, local_sizes), root=0)
+    if props.rank == 0:
+        local_first_dim = tuple(sz // constant_length for sz in local_sizes)
+        _log.info(
+            f" gather_field on rank = 0: computed local dims {local_first_dim} - constant dims {constant_dims}"
+        )
+        gathered_field = recv_buffer.reshape((-1, *constant_dims))  # type: ignore [union-attr]
+    else:
+        gathered_field = None
+        local_first_dim = field.shape
+    return local_first_dim, gathered_field
+
+
+def check_local_global_field(
+    decomposition_info: definitions.DecompositionInfo,
+    processor_props: definitions.ProcessProperties,  # F811 # fixture
+    dim: gtx.Dimension,
+    global_reference_field: np.ndarray,
+    local_field: np.ndarray,
+    check_halos: bool,
+    atol: float,
+) -> None:
+    if dim == dims.KDim:
+        test_utils.assert_dallclose(global_reference_field, local_field)
+        return
+
+    _log.info(
+        f" rank= {processor_props.rank}/{processor_props.comm_size}----exchanging field of main dim {dim}"
+    )
+    assert (
+        local_field.shape[0]
+        == decomposition_info.global_index(dim, definitions.DecompositionInfo.EntryType.ALL).shape[
+            0
+        ]
+    )
+
+    # Compare halo against global reference field
+    if check_halos:
+        test_utils.assert_dallclose(
+            global_reference_field[
+                data_alloc.as_numpy(
+                    decomposition_info.global_index(
+                        dim, definitions.DecompositionInfo.EntryType.HALO
+                    )
+                )
+            ],
+            local_field[
+                data_alloc.as_numpy(
+                    decomposition_info.local_index(
+                        dim, definitions.DecompositionInfo.EntryType.HALO
+                    )
+                )
+            ],
+            atol=atol,
+            verbose=True,
+        )
+
+    # Compare owned local field, excluding halos, against global reference
+    # field, by gathering owned entries to the first rank. This ensures that in
+    # total we have the full global field distributed on all ranks.
+    owned_entries = local_field[
+        data_alloc.as_numpy(
+            decomposition_info.local_index(dim, definitions.DecompositionInfo.EntryType.OWNED)
+        )
+    ]
+    gathered_sizes, gathered_field = gather_field(owned_entries, processor_props)
+
+    global_index_sizes, gathered_global_indices = gather_field(
+        data_alloc.as_numpy(
+            decomposition_info.global_index(dim, definitions.DecompositionInfo.EntryType.OWNED)
+        ),
+        processor_props,
+    )
+
+    if processor_props.rank == 0:
+        _log.info(f"rank = {processor_props.rank}: asserting gathered fields: ")
+
+        assert np.all(
+            gathered_sizes == global_index_sizes
+        ), f"gathered field sizes do not match:  {dim} {gathered_sizes} - {global_index_sizes}"
+        _log.info(
+            f"rank = {processor_props.rank}: Checking field size on dim ={dim}: --- gathered sizes {gathered_sizes} = {sum(gathered_sizes)}"
+        )
+        _log.info(
+            f"rank = {processor_props.rank}:                      --- gathered field has size {gathered_sizes}"
+        )
+        sorted_ = np.zeros(global_reference_field.shape, dtype=gtx.float64)
+        sorted_[gathered_global_indices] = gathered_field
+        _log.info(
+            f" rank = {processor_props.rank}: SHAPES: global reference field {global_reference_field.shape}, gathered = {gathered_field.shape}"
+        )
+
+        test_utils.assert_dallclose(sorted_, global_reference_field, atol=atol, verbose=True)
diff --git a/model/testing/src/icon4py/model/testing/serialbox.py b/model/testing/src/icon4py/model/testing/serialbox.py
index 15e86d11e3..3b3ca32775 100644
--- a/model/testing/src/icon4py/model/testing/serialbox.py
+++ b/model/testing/src/icon4py/model/testing/serialbox.py
@@ -12,7 +12,6 @@
 
 import gt4py.next as gtx
 import gt4py.next.typing as gtx_typing
-import numpy as np
 import serialbox
 
 import icon4py.model.common.decomposition.definitions as decomposition
@@ -73,7 +72,7 @@ def wrapper(self, *args, **kwargs):
                         # as a workaround for the lack of support for optional fields in gt4py.
                         shp = (1,) * len(dims)
                         return gtx.as_field(
-                            dims, np.zeros(shp, dtype=dtype), allocator=self.backend
+                            dims, self.xp.zeros(shp, dtype=dtype), allocator=self.backend
                         )
                     else:
                         return None
@@ -94,11 +93,11 @@ def _get_field(
         transpose: None | Sequence[int] = None,
     ):
         # Note: slice is applied before transpose!
-        buffer = np.squeeze(self.serializer.read(name, self.savepoint).astype(dtype))
+        buffer = self.xp.squeeze(self.serializer.read(name, self.savepoint).astype(dtype))
         if slice_ is not None:
             buffer = buffer[slice_]
         if transpose is not None:
-            buffer = np.transpose(buffer, axes=transpose)
+            buffer = self.xp.transpose(buffer, axes=transpose)
         buffer = self._reduce_to_dim_size(buffer, dimensions)
 
         self.log.debug(f"{name} {buffer.shape}")
@@ -106,7 +105,7 @@ def _get_field(
 
     def _get_field_component(self, name: str, level: int, dims: tuple[gtx.Dimension, gtx]):
         buffer = self.serializer.read(name, self.savepoint).astype(float)
-        buffer = np.squeeze(buffer)[:, :, level]
+        buffer = self.xp.squeeze(buffer)[:, :, level]
         buffer = self._reduce_to_dim_size(buffer, dims)
         self.log.debug(f"{name} {buffer.shape}")
         return gtx.as_field(dims, buffer, allocator=self.backend)
@@ -149,7 +148,9 @@ def _read_bool(self, name: str):
         return self._read(name, offset=0, dtype=bool)
 
     def _read(self, name: str, offset=0, dtype=int):
-        return np.squeeze(self.serializer.read(name, self.savepoint) - offset).astype(dtype)
+        return self.xp.asarray(
+            self.xp.squeeze(self.serializer.read(name, self.savepoint) - offset).astype(dtype)
+        )
 
 
 class IconGridSavepoint(IconSavepoint):
@@ -378,35 +379,35 @@ def edge_cell_length(self):
 
     def cells_start_index(self):
         start_idx = self._read_int32("c_start_index")
-        return np.where(start_idx == 0, start_idx, start_idx - 1)
+        return self.xp.where(start_idx == 0, start_idx, start_idx - 1)
 
     def cells_end_index(self):
         return self._read_int32("c_end_index")
 
     def vertex_start_index(self):
         start_idx = self._read_int32("v_start_index")
-        return np.where(start_idx == 0, start_idx, start_idx - 1)
+        return self.xp.where(start_idx == 0, start_idx, start_idx - 1)
 
     def vertex_end_index(self):
         return self._read_int32("v_end_index")
 
     def edge_start_index(self):
         start_idx = self._read_int32("e_start_index")
-        return np.where(start_idx == 0, start_idx, start_idx - 1)
+        return self.xp.where(start_idx == 0, start_idx, start_idx - 1)
 
     def edge_end_index(self):
         # don't need to subtract 1, because FORTRAN slices  are inclusive [from:to] so the being
         # one off accounts for being exclusive [from:to)
         return self._read_int32("e_end_index")
 
-    def start_index(self) -> dict[gtx.Dimension, np.ndarray]:
+    def start_index(self) -> dict[gtx.Dimension, data_alloc.NDArray]:
         return {
             dims.CellDim: self.cells_start_index(),
             dims.EdgeDim: self.edge_start_index(),
             dims.VertexDim: self.vertex_start_index(),
         }
 
-    def end_index(self) -> dict[gtx.Dimension, np.ndarray]:
+    def end_index(self) -> dict[gtx.Dimension, data_alloc.NDArray]:
         return {
             dims.CellDim: self.cells_end_index(),
             dims.EdgeDim: self.edge_end_index(),
@@ -439,7 +440,7 @@ def c2e(self):
 
     def _get_connectivity_array(self, name: str, target_dim: gtx.Dimension, reverse: bool = False):
         if reverse:
-            connectivity = np.transpose(self._read_int32(name, offset=1))[
+            connectivity = self.xp.transpose(self._read_int32(name, offset=1))[
                 : self.sizes[target_dim], :
             ]
         else:
@@ -455,7 +456,7 @@ def e2c2e(self):
 
     def c2e2c2e(self):
         if self._c2e2c2e() is None:
-            return np.zeros((self.sizes[dims.CellDim], 9), dtype=gtx.int32)
+            return self.xp.zeros((self.sizes[dims.CellDim], 9), dtype=gtx.int32)
         else:
             return self._c2e2c2e()
 
@@ -514,7 +515,7 @@ def _read_field_for_dim(field_name, read_func, dim: gtx.Dimension):
                 )
 
     def owner_mask(self, dim: gtx.Dimension):
-        return np.squeeze(self._read_field_for_dim("owner_mask", self._read_bool, dim))
+        return self.xp.squeeze(self._read_field_for_dim("owner_mask", self._read_bool, dim))
 
     def global_index(self, dim: gtx.Dimension):
         return self._read_field_for_dim("glb_index", self._read_int32_shift1, dim)
@@ -559,15 +560,14 @@ def construct_icon_grid(
             def potentially_revert_icon_index_transformation(ar):
                 return ar
         else:
-            potentially_revert_icon_index_transformation = functools.partial(
-                grid_utils.revert_repeated_index_to_invalid,
-                array_ns=data_alloc.import_array_ns(backend),
+            potentially_revert_icon_index_transformation = (
+                grid_utils.revert_repeated_index_to_invalid
             )
 
         c2e2c = self.c2e2c()
         e2c2e = potentially_revert_icon_index_transformation(self.e2c2e())
-        c2e2c0 = np.column_stack((range(c2e2c.shape[0]), c2e2c))
-        e2c2e0 = np.column_stack((range(e2c2e.shape[0]), e2c2e))
+        c2e2c0 = self.xp.column_stack((self.xp.asarray(range(c2e2c.shape[0])), c2e2c))
+        e2c2e0 = self.xp.column_stack((self.xp.asarray(range(e2c2e.shape[0])), e2c2e))
 
         constructor = functools.partial(
             h_grid.get_start_end_idx_from_icon_arrays,
@@ -662,7 +662,7 @@ def geofac_grdiv(self):
         return self._get_field("geofac_grdiv", dims.EdgeDim, dims.E2C2EODim)
 
     def geofac_grg(self):
-        grg = np.squeeze(self.serializer.read("geofac_grg", self.savepoint))
+        grg = self.xp.squeeze(self.serializer.read("geofac_grg", self.savepoint))
         num_cells = self.sizes[dims.CellDim]
         return gtx.as_field(
             (dims.CellDim, dims.C2E2CODim), grg[:num_cells, :, 0], allocator=self.backend
@@ -755,15 +755,15 @@ def mask_prog_halo_c(self):
 
     @IconSavepoint.optionally_registered()
     def pg_edgeidx(self):
-        return np.squeeze(self.serializer.read("pg_edgeidx", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("pg_edgeidx", self.savepoint))
 
     @IconSavepoint.optionally_registered()
     def pg_vertidx(self):
-        return np.squeeze(self.serializer.read("pg_vertidx", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("pg_vertidx", self.savepoint))
 
     @IconSavepoint.optionally_registered()
     def pg_exdist(self):
-        return np.squeeze(self.serializer.read("pg_exdist", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("pg_exdist", self.savepoint))
 
     def pg_exdist_dsl(self):
         pg_edgeidx = self.pg_edgeidx()
@@ -889,12 +889,12 @@ def geopot(self):
 
     @IconSavepoint.optionally_registered()
     def zd_cellidx(self):
-        return np.squeeze(self.serializer.read("zd_cellidx", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("zd_cellidx", self.savepoint))
 
     @IconSavepoint.optionally_registered()
     def zd_vertidx(self):
         # this is the k list (with fortran 1-based indexing) for the central point of the C2E2C stencil
-        return np.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[0, :]
+        return self.xp.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[0, :]
 
     @IconSavepoint.optionally_registered(dims.CellDim, dims.C2E2CDim, dims.KDim, dtype=gtx.int32)
     def zd_vertoffset(self):
@@ -902,7 +902,7 @@ def zd_vertoffset(self):
         zd_vertidx = self.zd_vertidx()
         # these are the three k offsets for the C2E2C neighbors
         zd_vertoffset = (
-            np.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[1:, :] - zd_vertidx
+            self.xp.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[1:, :] - zd_vertidx
         )
         cell_c2e2c_k_domain = gtx.domain(
             {
@@ -927,7 +927,7 @@ def zd_vertoffset(self):
     def zd_intcoef(self):
         zd_cellidx = self.zd_cellidx()
         zd_vertidx = self.zd_vertidx()
-        zd_intcoef = np.squeeze(self.serializer.read("zd_intcoef", self.savepoint))
+        zd_intcoef = self.xp.squeeze(self.serializer.read("zd_intcoef", self.savepoint))
         cell_c2e2c_k_domain = gtx.domain(
             {
                 dims.CellDim: self.theta_ref_mc().domain[dims.CellDim].unit_range,
@@ -951,7 +951,7 @@ def zd_intcoef(self):
     def zd_diffcoef(self):
         zd_cellidx = self.zd_cellidx()
         zd_vertidx = self.zd_vertidx()
-        zd_diffcoef = np.squeeze(self.serializer.read("zd_diffcoef", self.savepoint))
+        zd_diffcoef = self.xp.squeeze(self.serializer.read("zd_diffcoef", self.savepoint))
         return data_alloc.list2field(
             domain=self.geopot().domain,
             values=zd_diffcoef,
@@ -1028,16 +1028,16 @@ def exner(self):
         return self._get_field("exner", dims.CellDim, dims.KDim)
 
     def diff_multfac_smag(self):
-        return np.squeeze(self.serializer.read("diff_multfac_smag", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("diff_multfac_smag", self.savepoint))
 
     def enh_smag_fac(self):
-        return np.squeeze(self.serializer.read("enh_smag_fac", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("enh_smag_fac", self.savepoint))
 
     def smag_limit(self):
-        return np.squeeze(self.serializer.read("smag_limit", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("smag_limit", self.savepoint))
 
     def diff_multfac_n2w(self):
-        return np.squeeze(self.serializer.read("diff_multfac_n2w", self.savepoint))
+        return self.xp.squeeze(self.serializer.read("diff_multfac_n2w", self.savepoint))
 
     def nudgezone_diff(self) -> int:
         return self.serializer.read("nudgezone_diff", self.savepoint)[0]
diff --git a/uv.lock b/uv.lock
index 23ec7fab54..d5c216e846 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2523,15 +2523,55 @@ wheels = [
 
 [[package]]
 name = "mpi4py"
-version = "4.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/08/34/8499a92a387d24d0092c38089f8195f13c5c76f0f814126af3fe363e5636/mpi4py-4.0.1.tar.gz", hash = "sha256:f3174b245775d556f4fddb32519a2066ef0592edc810c5b5a59238f9a0a40c89", size = 466179, upload-time = "2024-10-11T10:59:53.425Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/22/15/7d2fd2ca8b1ae362371b2bb9b2f787f9166b6ecd536e0e773dce6b98a5a9/mpi4py-4.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:600f26cae7f390b4ec525f5c1ccc374686c37a8c07f9c21320866c0a323f6dae", size = 1588594, upload-time = "2024-10-12T07:10:26.736Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/f7/6dfdee53f9806361ab75cb83ee5feab06a738f7f6a42715c79d72a783d31/mpi4py-4.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:0cb209fcdc7fee0346d12edff1cfd1c1ffca1b807c53631ba0436b9c2bcf8229", size = 1599377, upload-time = "2024-10-12T07:10:30.836Z" },
-    { url = "https://files.pythonhosted.org/packages/35/28/7e5eae1a9940f48c41e208e9e6fdb56e497095030ab53e2d9ce702705cbb/mpi4py-4.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:b704e7db92b1ac94b96802e17cf28082455daa928e8e51398ad9f5e5eb8c9b7b", size = 1727556, upload-time = "2024-10-12T07:10:36.005Z" },
-    { url = "https://files.pythonhosted.org/packages/95/70/cc361869a2920476ecc5f29c98e0130aaf2e177a0087cb7ebbafb90414f1/mpi4py-4.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:52a7b1760b1aeb41a0ea38969314b2b170117a0ded2f689915f1cb89aaaf8a6f", size = 1726170, upload-time = "2024-10-12T07:10:39.15Z" },
-    { url = "https://files.pythonhosted.org/packages/17/23/81aed5da44f9d743f1e76909fd04ae5dc122ff7c9f97fa0b40b8f752245c/mpi4py-4.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:93f45dcc2fd5f3396f961b1bc8f0fb9d5db786fdc0d72e4f8611f47718b5dac8", size = 1584997, upload-time = "2024-10-12T07:10:52.704Z" },
+version = "4.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/62/74/28ea85b0b949cad827ea50720e00e814e88c8fd536c27c3c491e4f025724/mpi4py-4.1.1.tar.gz", hash = "sha256:eb2c8489bdbc47fdc6b26ca7576e927a11b070b6de196a443132766b3d0a2a22", size = 500518, upload-time = "2025-10-10T13:55:20.402Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/b3/2e7df40608f2188dca16e38f8030add1071f06b1cd94dd8a4e16b9acbd84/mpi4py-4.1.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:1586f5d1557abed9cba7e984d18f32e787b353be0986e599974db177ae36329a", size = 1422849, upload-time = "2025-10-10T13:53:40.082Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/ed/970bd3edc0e614eccc726fa406255b88f728a8bc059e81f96f28d6ede0af/mpi4py-4.1.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:ba85e4778d63c750226de95115c92b709f38d7e661be660a275da4f0992ee197", size = 1326982, upload-time = "2025-10-10T13:53:42.32Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/c3/f9a5d1f9ba52ac6386bf3d3550027f42a6b102b0432113cc43294420feb2/mpi4py-4.1.1-cp310-abi3-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0a8332884626994d9ef48da233dc7a0355f4868dd7ff59f078d5813a2935b930", size = 1373127, upload-time = "2025-10-10T13:53:43.957Z" },
+    { url = "https://files.pythonhosted.org/packages/84/d1/1fe75025df801d817ed49371c719559f742f3f263323442d34dbe3366af3/mpi4py-4.1.1-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6e0352860f0b3e18bc0dcb47e42e583ccb9472f89752d711a6fca46a38670554", size = 1225134, upload-time = "2025-10-10T13:53:45.583Z" },
+    { url = "https://files.pythonhosted.org/packages/40/44/d653fec0e4ca8181645da4bfb2763017625e5b3f151b208fadd932cb1766/mpi4py-4.1.1-cp310-abi3-win_amd64.whl", hash = "sha256:0f46dfe666a599e4bd2641116b2b4852a3ed9d37915edf98fae471d666663128", size = 1478863, upload-time = "2025-10-10T13:53:47.178Z" },
+    { url = "https://files.pythonhosted.org/packages/58/f7/793c9a532e5367cffb2b97ca6a879285ca73a14f79e6ff208bb390651a43/mpi4py-4.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9082e04c8afcffa7d650a262d800af1a617c555d610810deeab265a4a5f7d42e", size = 1585904, upload-time = "2025-10-10T13:53:49.129Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/fe/cdead6721426b25d817a1bf45d5adc6dc90fd8bb0831f5ca06a4edd2015c/mpi4py-4.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d618e6a5a8f6f86c33a954356d8ed398bec31f34b63321570661ac157063bb6", size = 1438343, upload-time = "2025-10-10T13:53:51.098Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c4/4a73c80cf483df603770278f0fdc57da5394edee376790c62f1eba04bb3b/mpi4py-4.1.1-cp310-cp310-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d4c460609bd6decc22ad89cbfe48e4c5a2461ff52ada9345a4c19edee39f93da", size = 1432321, upload-time = "2025-10-10T13:53:53.235Z" },
+    { url = "https://files.pythonhosted.org/packages/49/56/7b32631f3cc5cf741610a108a7f40a3714c9862c1f637b5ded525af32be9/mpi4py-4.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c04a388c7a945e751c82742c6bb277434d26a67768a01952f7494d1c25dff94b", size = 1299883, upload-time = "2025-10-10T13:53:55.22Z" },
+    { url = "https://files.pythonhosted.org/packages/14/76/53caf807ec74c042fbecf76162e071c09c53fb0ed66b1edf31dabd64c588/mpi4py-4.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:1ad4b225a5a1a02a2b89979ed8f328c6a2bc3bd6ad4a57e453727f90373fa5f8", size = 1622884, upload-time = "2025-10-10T13:53:56.882Z" },
+    { url = "https://files.pythonhosted.org/packages/20/8f/5d28174048ef02fb91dd0759a32c07b272c9f1df265e19145712aa7bd712/mpi4py-4.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a428ba96b992a8911cf932fa71dd8c0260d47ab7e5dee2b09239ad91fc540b79", size = 1596913, upload-time = "2025-10-10T13:53:58.466Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/81/dce928b11816fac9713e93e609476ddac520fc50368aa7591728c329ff19/mpi4py-4.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fc0cf81445fac2ae2e5716c365fd72e1bb545df065f5a3f6731f64b3beed886e", size = 1433274, upload-time = "2025-10-10T13:54:00.508Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/15/1a869a35d3e3438866dc8d8c9cb04dc6aa484171343627a8baf82c3c1ca9/mpi4py-4.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a753d5d61b46f90260247f344a6c57c527a6a4e7bea126830120ab41c3d057e5", size = 1423333, upload-time = "2025-10-10T13:54:03.679Z" },
+    { url = "https://files.pythonhosted.org/packages/25/33/072781fb85f5bc50b93ee7e8d3b3afb849d50570431b6cb2aa957db79b59/mpi4py-4.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4a36ef9d7b2b6b62026dbf9b59b44efb5430f7b9ca5fb855bfbf8d403218e37c", size = 1299183, upload-time = "2025-10-10T13:54:05.3Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/a7/152af3c6412702a4e0fcfd0fe572307ed52821de13db9c96535f31a39aa7/mpi4py-4.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:20bf4c0c65fd67287664f8b1b6dc7c7b341838f10bba34a2e452d47530ce8a5f", size = 1632284, upload-time = "2025-10-10T13:54:06.786Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/2c/e201cd4828555f10306a5439875cbd0ecfba766ace01ff5c6df43f795650/mpi4py-4.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4403a7cec985be9963efc626193e6df3f63f5ada0c26373c28e640e623e56c3", size = 1669517, upload-time = "2025-10-10T13:54:08.404Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/53/18d978c3a19deecf38217ce54319e6c9162fec3569c4256c039b66eac2f4/mpi4py-4.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a2ffccc9f3a8c7c957403faad594d650c60234ac08cbedf45beaa96602debe9", size = 1454721, upload-time = "2025-10-10T13:54:09.977Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/15/b908d1d23a4bd2bd7b2e98de5df23b26e43145119fe294728bf89211b935/mpi4py-4.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ed3d9b619bf197a290f7fd67eb61b1c2a5c204afd9621651a50dc0b1c1280d45", size = 1448977, upload-time = "2025-10-10T13:54:11.65Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/19/088a2d37e80e0feb7851853b2a71cbe6f9b18bdf0eab680977864ea83aab/mpi4py-4.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0699c194db5d95fc2085711e4e0013083bd7ae9a88438e1fd64ddb67e9b0cf9e", size = 1318737, upload-time = "2025-10-10T13:54:13.075Z" },
+    { url = "https://files.pythonhosted.org/packages/97/3a/526261f39bf096e5ff396d18b76740a58d872425612ff84113dd85c2c08e/mpi4py-4.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:0abf5490c3d49c30542b461bfc5ad88dd7d147a4bdb456b7163640577fdfef88", size = 1725676, upload-time = "2025-10-10T13:54:14.681Z" },
+    { url = "https://files.pythonhosted.org/packages/30/75/2ffccd69360680a0216e71f90fd50dc8ff49711be54502d522a068196c68/mpi4py-4.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3dd973c509f2dbb6904c035a4a071509cde98decf0528fa21e2e7d5db5cc988", size = 1710002, upload-time = "2025-10-10T13:54:17.042Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/13/22fa9dcbc5e4ae6fd10cba6d49b7c879c30c5bea88f450f79b373d200f40/mpi4py-4.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c8c83a359e62dd7fdd030360f430e0e8986df029c0953ab216ff97a110038dc4", size = 1484623, upload-time = "2025-10-10T13:54:19.097Z" },
+    { url = "https://files.pythonhosted.org/packages/47/01/476f0f9dc96261d02214009f42e10338fc56f260f1f10b23ee89c515c8b7/mpi4py-4.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:323ba354ba951c7736c033c5f2ad07bb1276f9696f0312ea6ff0a28cd0ab3e3d", size = 1448403, upload-time = "2025-10-10T13:54:21.211Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/20/dc990edb7b075ecdba4e02bcd03d1583faeb84f664d1585c4c00a0f9851a/mpi4py-4.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c4ef9fe5fb211b1c5b6afe521397e3feb01e104024d6bc37aa4289c370605e2", size = 1318018, upload-time = "2025-10-10T13:54:23.23Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bf/b0ab43a99ac2a1d6d5765cb7d2a4f093656090ce07528043057ecc3e87cb/mpi4py-4.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:e13a1ba26604514a12c95b7d76058ce800d5740d5f5f3b50c4b782cfa0dfaa1f", size = 1722939, upload-time = "2025-10-10T13:54:24.862Z" },
+    { url = "https://files.pythonhosted.org/packages/84/26/3e00dc536311e758096414b4f33beb4c7f04dff875e87a6e88fbbe4fc2d8/mpi4py-4.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:28ce1f7412f5e99a6b9fe2547203633431d0ee45670413a475a07e6c785e63b1", size = 1798116, upload-time = "2025-10-10T13:54:26.378Z" },
+    { url = "https://files.pythonhosted.org/packages/15/51/d06d2b126be5660aca8c00fe0d940a8658085038f61a9cfc834d3d5ffa80/mpi4py-4.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd1e49b84a0651018517e87daf68085719eca25e5c9a7cd05d98a73418c88836", size = 1586285, upload-time = "2025-10-10T13:54:27.838Z" },
+    { url = "https://files.pythonhosted.org/packages/51/63/eeb936e0e8cfd8160b6b297645c730b22d242595861cf6a2fa627a358175/mpi4py-4.1.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dd869ea7758b591ffbb1483588a6fbf84952a5090e80a45ea89674d55cf25f3b", size = 1514102, upload-time = "2025-10-10T13:54:29.297Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/c1/06967d4c107ea7169d2120c4fb86c404707e6de82e277dc9f0fa5a9c1bf1/mpi4py-4.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:475da0797442cba723c0ad37da6a1c51d9624e697dd8bf89f23d0fad81e73eda", size = 1395247, upload-time = "2025-10-10T13:54:30.881Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/7c/5f0f32b39185f0a7074c165dc37cdd235bfd737928a2fe223e41b308fb4c/mpi4py-4.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8d3bfa074776d9507ee957f5230d11ecd03da23f601a85349a1a333eaf55e5fa", size = 1771515, upload-time = "2025-10-10T13:54:32.395Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/e8/93ddde2b6ee7631b46bb79b851630b3527d9060b9b999844bcd882977539/mpi4py-4.1.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:1deb6f9df28ec6972305287cb2035c20d3f5af59f687f962080756374c16e48f", size = 1713353, upload-time = "2025-10-10T13:54:33.934Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/23/449562bd23fcfbd7d01006b39429972bfed5dfb8541355d06d2e17c16c27/mpi4py-4.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1bb1e3ad0b9047b0dbc7b4014160a7ab2a84f1627be665527c7445fc312f189b", size = 1496415, upload-time = "2025-10-10T13:54:35.927Z" },
+    { url = "https://files.pythonhosted.org/packages/51/33/9a5b9ae66cbb095b711f4ddae6d2d4b0f55202ac9e503fd588b101f04a22/mpi4py-4.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5f757e3089abf2c9db69fac1665fa99c52ed392fdf799159f25cba9ee3b64f5a", size = 1450750, upload-time = "2025-10-10T13:54:37.608Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/88/6acf948f19cb59c0e8843fed4ab4c471b7644e8a16c2d5d9c7ab6d73d573/mpi4py-4.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:807c6f1ed3adbc12952db52127e34cfbd6c48a05c3b3dd59deee2d2f09d78888", size = 1325773, upload-time = "2025-10-10T13:54:39.136Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/b4/3021e073772cd9e1062a810b7298e68ea40933fb91b1c1c0d07c968dce5c/mpi4py-4.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:2c85983d38d77e6302a242e32afd2a9a9b3adedd770e199a38e5b8957150e7ac", size = 1721603, upload-time = "2025-10-10T13:54:41.396Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/02/b6700c24fe28588a4e40adb23d02fe2aea82b33495fd6290235da5199383/mpi4py-4.1.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:729c4f625ad60e5cfb6c260608d249dc35a33cc16605faff01c6adbbd7e8ce0f", size = 1799551, upload-time = "2025-10-10T13:54:43.084Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/93/9c9870174183869bd5a50bbfe7bda91a52bf7ca2d0851de4009590e735a2/mpi4py-4.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3cca235d46009f54cb319c779c6ac53d41ce1eee3cf07f157995bc7739329b97", size = 1587583, upload-time = "2025-10-10T13:54:45.989Z" },
+    { url = "https://files.pythonhosted.org/packages/29/12/c46bec2311fc937ed3767312f9feb5f11bc70058c20bc53ae7369d759424/mpi4py-4.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2580fab891db492f32a6e02717e824f6fd5588be6560b08627c1e9322f7ccbfb", size = 1513437, upload-time = "2025-10-10T13:54:48.145Z" },
+    { url = "https://files.pythonhosted.org/packages/09/3e/e46629867204b22ce6804096e0b7d35bb5b473df1d12272021843af726c3/mpi4py-4.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6beec4841f9436d49ec9cabfd76a19df61c10b21ca14eddafa58fe7977802ee7", size = 1395082, upload-time = "2025-10-10T13:54:49.744Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/ca/7e27edf78cd8ba68aacafc836004cd092a978f0d5ffc8a3eac9e904a3e0e/mpi4py-4.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:b4b3813da9a7a1fc37ffb8dad314cb396313a40cd3fe150854ab29e999a9eb8c", size = 1771707, upload-time = "2025-10-10T13:54:51.756Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/63/b6a2863fb7dd5a9eccfdb055bf1124b999ff755d0187223b307161479b76/mpi4py-4.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:95bb98d946eb88c9ae4dc6c42d11b3af8ce6b91e644c288cc3f85ec7596ffcd3", size = 1480110, upload-time = "2025-10-10T13:55:11.381Z" },
+    { url = "https://files.pythonhosted.org/packages/de/18/358f0eb58fb3b79f65861ed682af9e735d86669663dfbce396e8673ed518/mpi4py-4.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:84e9eb2e609b0b94cd0e9a3e3b57d897f748fb0207c4f72e81e5a95aba033767", size = 1340704, upload-time = "2025-10-10T13:55:12.973Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/66/b342e330ac543d0147ebfab754f69854c4777ac9785cb5b7610e3cd0c29a/mpi4py-4.1.1-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:027b1a1ff9d57afed10af6b79041b95f85fd11b2af74e4c34ef4866ce81ecc24", size = 1380452, upload-time = "2025-10-10T13:55:14.582Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/61/bbf87de6f3a8a9c54e7a4b72878c9069646ca9cafac8217fa5493a54b068/mpi4py-4.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c1191856906967a48fdcc484b326c179747e68c186261d76480a75156bcc73bf", size = 1255980, upload-time = "2025-10-10T13:55:17.075Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/4b/227091dec11518e5545bd1ec91f52e06f64bdae697adc5fb33f9f20c04dc/mpi4py-4.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:189d49b0ae963f8f6f5dd8ed0f5f37923285c97bc725476990ec0556972bb4b2", size = 1452641, upload-time = "2025-10-10T13:55:18.562Z" },
 ]
 
 [[package]]