From 563469477df3480858cca9e4a3a8585f22a2d167 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 18 Jun 2025 16:08:08 -0400 Subject: [PATCH 01/19] Skip scalar to symbol promotion Remove un-needed passes in pipeline --- ndsl/dsl/dace/orchestration.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py index 5c30367f..85467912 100644 --- a/ndsl/dsl/dace/orchestration.py +++ b/ndsl/dsl/dace/orchestration.py @@ -106,6 +106,7 @@ def _simplify( validate=validate, validate_all=validate_all, verbose=verbose, + skip=["ScalarToSymbolPromotion"], ).apply_pass(sdfg, {}) @@ -139,18 +140,11 @@ def _build_sdfg( if k in sdfg_kwargs and tup[1].transient: del sdfg_kwargs[k] - with DaCeProgress(config, "Simplify (1/2)"): - _simplify(sdfg, validate=False, verbose=True) - # Perform pre-expansion fine tuning with DaCeProgress(config, "Split regions"): splittable_region_expansion(sdfg, verbose=True) - # Expand the stencil computation Library Nodes with the right expansion - with DaCeProgress(config, "Expand"): - sdfg.expand_library_nodes() - - with DaCeProgress(config, "Simplify (2/2)"): + with DaCeProgress(config, "Simplify"): _simplify(sdfg, validate=False, verbose=True) # Move all memory that can be into a pool to lower memory pressure. @@ -422,9 +416,9 @@ def __get__(self, obj, objtype=None) -> SDFGEnabledCallable: """Return SDFGEnabledCallable wrapping original obj.method from cache. Update cache first if need be""" if (id(obj), id(self.func)) not in _LazyComputepathMethod.bound_callables: - _LazyComputepathMethod.bound_callables[ - (id(obj), id(self.func)) - ] = _LazyComputepathMethod.SDFGEnabledCallable(self, obj) + _LazyComputepathMethod.bound_callables[(id(obj), id(self.func))] = ( + _LazyComputepathMethod.SDFGEnabledCallable(self, obj) + ) return _LazyComputepathMethod.bound_callables[(id(obj), id(self.func))] From 3faa58fdf2e64adaf8e4d4046271a190b98ab078 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Thu, 19 Jun 2025 14:10:05 +0200 Subject: [PATCH 02/19] Update gt4py and dace submodules Point the submodules to oir->stree->sdfg branches in gt4py and dace. This allows to run NDSL tests against these branches. --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index 13402cbf..b29a263a 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit 13402cbfeeb6969cbd3915acfb7a30bdb543071b +Subproject commit b29a263af5523222049e31564b58b62d39554917 diff --git a/external/gt4py b/external/gt4py index 45324c88..0fb8dec3 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 45324c88e57b5e8dfc974efa70fa2f2e5e10677f +Subproject commit 0fb8dec3328012c077e8560d6aefd50ce75b074a From d94925db6e36da12e23811cbbc719b18f9b16c25 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Thu, 19 Jun 2025 14:11:20 +0200 Subject: [PATCH 03/19] Run stencil factory tests not only against numpy, but also against dace:cpu --- ndsl/dsl/dace/orchestration.py | 6 +++--- tests/dsl/test_stencil_factory.py | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py index 7c5ec429..6feb13eb 100644 --- a/ndsl/dsl/dace/orchestration.py +++ b/ndsl/dsl/dace/orchestration.py @@ -418,9 +418,9 @@ def __get__(self, obj, objtype=None) -> SDFGEnabledCallable: """Return SDFGEnabledCallable wrapping original obj.method from cache. Update cache first if need be""" if (id(obj), id(self.func)) not in _LazyComputepathMethod.bound_callables: - _LazyComputepathMethod.bound_callables[(id(obj), id(self.func))] = ( - _LazyComputepathMethod.SDFGEnabledCallable(self, obj) - ) + _LazyComputepathMethod.bound_callables[ + (id(obj), id(self.func)) + ] = _LazyComputepathMethod.SDFGEnabledCallable(self, obj) return _LazyComputepathMethod.bound_callables[(id(obj), id(self.func))] diff --git a/tests/dsl/test_stencil_factory.py b/tests/dsl/test_stencil_factory.py index 65bf1cf2..675ce0d5 100644 --- a/tests/dsl/test_stencil_factory.py +++ b/tests/dsl/test_stencil_factory.py @@ -16,6 +16,9 @@ from ndsl.dsl.typing import FloatField +BACKENDS = ["numpy", "dace:cpu"] + + def copy_stencil(q_in: FloatField, q_out: FloatField): with computation(PARALLEL), interval(...): q_out = q_in @@ -68,6 +71,7 @@ def get_stencil_factory(backend: str) -> StencilFactory: return StencilFactory(config=config, grid_indexing=indexing) +@pytest.mark.parametrize("backend", BACKENDS) def test_get_stencils_with_varied_bounds(backend: str): origins = [(2, 2, 0), (1, 1, 0)] domains = [(1, 1, 3), (2, 2, 3)] @@ -87,6 +91,7 @@ def test_get_stencils_with_varied_bounds(backend: str): np.testing.assert_array_equal(q.data, q_ref.data) +@pytest.mark.parametrize("backend", BACKENDS) def test_get_stencils_with_varied_bounds_and_regions(backend: str): factory = get_stencil_factory(backend) origins = [(3, 3, 0), (2, 2, 0)] @@ -107,6 +112,7 @@ def test_get_stencils_with_varied_bounds_and_regions(backend: str): np.testing.assert_array_equal(q_orig.data, q_ref.data) +@pytest.mark.parametrize("backend", BACKENDS) def test_stencil_vertical_bounds(backend: str): factory = get_stencil_factory(backend) origins = [(3, 3, 0), (2, 2, 1)] @@ -124,9 +130,9 @@ def test_stencil_vertical_bounds(backend: str): assert "k_end" in stencils[1].externals and stencils[1].externals["k_end"] == 4 +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("enabled", [True, False]) -def test_stencil_factory_numpy_comparison_from_dims_halo(enabled: bool): - backend = "numpy" +def test_stencil_factory_numpy_comparison_from_dims_halo(backend: str, enabled: bool): dace_config = DaceConfig(communicator=None, backend=backend) config = StencilConfig( compilation_config=CompilationConfig( @@ -159,9 +165,11 @@ def test_stencil_factory_numpy_comparison_from_dims_halo(enabled: bool): assert isinstance(stencil, FrozenStencil) +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("enabled", [True, False]) -def test_stencil_factory_numpy_comparison_from_origin_domain(enabled: bool): - backend = "numpy" +def test_stencil_factory_numpy_comparison_from_origin_domain( + backend: str, enabled: bool +): dace_config = DaceConfig(communicator=None, backend=backend) config = StencilConfig( compilation_config=CompilationConfig( @@ -192,8 +200,8 @@ def test_stencil_factory_numpy_comparison_from_origin_domain(enabled: bool): assert isinstance(stencil, FrozenStencil) -def test_stencil_factory_numpy_comparison_runs_without_exceptions(): - backend = "numpy" +@pytest.mark.parametrize("backend", BACKENDS) +def test_stencil_factory_numpy_comparison_runs_without_exceptions(backend: str): dace_config = DaceConfig(communicator=None, backend=backend) config = StencilConfig( compilation_config=CompilationConfig( From b7772b52d6161d3338ae9e0103ab9a517d3bf872 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Thu, 19 Jun 2025 17:48:21 +0200 Subject: [PATCH 04/19] Update gt4py (burning the old bridge) --- external/gt4py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/gt4py b/external/gt4py index 0fb8dec3..00623d83 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 0fb8dec3328012c077e8560d6aefd50ce75b074a +Subproject commit 00623d83cce0ba2cb970f463982ca6f264955de4 From 092ef1a8805a183bbbca8e2542f5ac0e3b77a7e3 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 20 Jun 2025 09:33:03 +0200 Subject: [PATCH 05/19] Remove splittable region optimization splittable regions were based on StencilComputation library nodes that don't exist anymore since we torched the old bridge. To be re-implemented at the stree level (if we still want to keep it). --- ndsl/dsl/dace/orchestration.py | 10 +++++----- ndsl/dsl/dace/sdfg_opt_passes.py | 24 ------------------------ 2 files changed, 5 insertions(+), 29 deletions(-) delete mode 100644 ndsl/dsl/dace/sdfg_opt_passes.py diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py index 6feb13eb..92f14f36 100644 --- a/ndsl/dsl/dace/orchestration.py +++ b/ndsl/dsl/dace/orchestration.py @@ -27,7 +27,6 @@ negative_qtracers_checker, sdfg_nan_checker, ) -from ndsl.dsl.dace.sdfg_opt_passes import splittable_region_expansion from ndsl.dsl.dace.utils import ( DaCeProgress, memory_static_analysis, @@ -121,12 +120,17 @@ def _build_sdfg( if is_compiling: # Make the transients array persistents if config.is_gpu_backend(): + # TODO + # The following should happen on the stree level _to_gpu(sdfg) + make_transients_persistent(sdfg=sdfg, device=DaceDeviceType.GPU) # Upload args to device _upload_to_device(list(args) + list(kwargs.values())) else: + # TODO + # The following should happen on the stree level for _sd, _aname, arr in sdfg.arrays_recursive(): if arr.shape == (1,): arr.storage = DaceStorageType.Register @@ -142,10 +146,6 @@ def _build_sdfg( if k in sdfg_kwargs and tup[1].transient: del sdfg_kwargs[k] - # Perform pre-expansion fine tuning - with DaCeProgress(config, "Split regions"): - splittable_region_expansion(sdfg, verbose=True) - with DaCeProgress(config, "Simplify"): _simplify(sdfg, validate=False, verbose=True) diff --git a/ndsl/dsl/dace/sdfg_opt_passes.py b/ndsl/dsl/dace/sdfg_opt_passes.py deleted file mode 100644 index b7582cc5..00000000 --- a/ndsl/dsl/dace/sdfg_opt_passes.py +++ /dev/null @@ -1,24 +0,0 @@ -import dace - -from ndsl.logging import ndsl_log - - -def splittable_region_expansion(sdfg: dace.SDFG, verbose: bool = False): - """ - Set certain StencilComputation library nodes to expand to a different - schedule if they contain small splittable regions. - """ - from gt4py.cartesian.gtc.dace.nodes import StencilComputation - - for node, _ in sdfg.all_nodes_recursive(): - if isinstance(node, StencilComputation): - if node.has_splittable_regions() and "corner" in node.label: - node.expansion_specification = [ - "Sections", - "Stages", - "J", - "I", - "K", - ] - if verbose: - ndsl_log.debug(f"Reordered schedule for {node.label}") From 9241781ab5dccfa5cde12ea91d85ba6009927234 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:00:23 +0200 Subject: [PATCH 06/19] Update gt4py (Last pass on ADR for now) --- external/gt4py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/gt4py b/external/gt4py index 00623d83..75186e9f 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 00623d83cce0ba2cb970f463982ca6f264955de4 +Subproject commit 75186e9f62c4fbac035900723777cb65c16f5c78 From f4c58842f22c287c001d9ead8298ab78ad6cfaef Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 27 Jun 2025 08:33:08 +0200 Subject: [PATCH 07/19] Update gt4py branch (Minor cleanup refactors) --- external/gt4py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/gt4py b/external/gt4py index 75186e9f..2b28cdd6 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 75186e9f62c4fbac035900723777cb65c16f5c78 +Subproject commit 2b28cdd6e5493408db50c24a09a3d3d5794cdf6e From 843891e1eab9240109bc5fe0b8c68f4fc893bb6c Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 2 Jul 2025 15:36:46 -0400 Subject: [PATCH 08/19] Validate at `orchestration` entry --- ndsl/dsl/dace/orchestration.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py index 92f14f36..34c4a385 100644 --- a/ndsl/dsl/dace/orchestration.py +++ b/ndsl/dsl/dace/orchestration.py @@ -118,6 +118,9 @@ def _build_sdfg( is_compiling = True if DEACTIVATE_DISTRIBUTED_DACE_COMPILE else config.do_compile if is_compiling: + with DaCeProgress(config, "Validate original SDFG"): + sdfg.validate() + # Make the transients array persistents if config.is_gpu_backend(): # TODO From b2a5b8f1228fb2264aec871b7c295398ce3e0919 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 2 Jul 2025 16:52:46 -0400 Subject: [PATCH 09/19] Expose compiler optimization level to `dace` orchestration via `config` --- ndsl/dsl/dace/dace_config.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ndsl/dsl/dace/dace_config.py b/ndsl/dsl/dace/dace_config.py index 8f3d2688..52fbaf7a 100644 --- a/ndsl/dsl/dace/dace_config.py +++ b/ndsl/dsl/dace/dace_config.py @@ -7,6 +7,7 @@ import dace.config from dace.codegen.compiled_sdfg import CompiledSDFG from dace.frontend.python.parser import DaceProgram +from gt4py.cartesian.config import GT4PY_COMPILE_OPT_LEVEL from ndsl.comm.communicator import Communicator from ndsl.comm.partitioner import Partitioner @@ -181,6 +182,12 @@ def __init__( # We control this Dace configuration below with our own override dace_debug_env_var = os.getenv("PACE_DACE_DEBUG", "False") == "True" + # We hijack the optimization level of GT4Py because we don't + # have the configuration at NDSL level, but we do use the GT4Py + # level + # TODO: if GT4PY opt level is funnled via NDSL - use it here + optimization_level = GT4PY_COMPILE_OPT_LEVEL + # Set the configuration of DaCe to a rigid & tested set of divergence # from the defaults when orchestrating if orchestration != DaCeOrchestration.Python: @@ -195,7 +202,7 @@ def __init__( "compiler", "cpu", "args", - value="-std=c++14 -fPIC -Wall -Wextra -O3", + value=f"-std=c++14 -fPIC -Wall -Wextra -O{optimization_level}", ) # Potentially buggy - deactivate dace.config.Config.set( From 466fcb2a71cd338ba726debab55631b69b307820 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 3 Jul 2025 09:44:25 -0400 Subject: [PATCH 10/19] Update to GT4Py for transient flag --- external/gt4py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/gt4py b/external/gt4py index 2b28cdd6..f958944e 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 2b28cdd6e5493408db50c24a09a3d3d5794cdf6e +Subproject commit f958944e571d852eb7474fed9bcc686620cb9ca8 From 713616dd9813b3696a0dbce85622b96bb70be1a1 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Mon, 7 Jul 2025 10:20:40 +0200 Subject: [PATCH 11/19] Fixup: don't declare backends twice in test_stencil_factory --- tests/dsl/test_stencil_factory.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/dsl/test_stencil_factory.py b/tests/dsl/test_stencil_factory.py index 5623dfbd..ce9de962 100644 --- a/tests/dsl/test_stencil_factory.py +++ b/tests/dsl/test_stencil_factory.py @@ -19,9 +19,6 @@ BACKENDS = ["numpy", "dace:cpu"] -BACKENDS = ["numpy", "dace:cpu"] - - def copy_stencil(q_in: FloatField, q_out: FloatField): with computation(PARALLEL), interval(...): q_out = q_in From 623235fdc3a242aa02bd261141c8bfd6c32d1652 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Tue, 8 Jul 2025 18:03:42 +0200 Subject: [PATCH 12/19] Update dace & gt4py dace -> Move main visitor out and remove print statements gt4py -> Update DaCe version (remove debug print statements) --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index b29a263a..1bf84111 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit b29a263af5523222049e31564b58b62d39554917 +Subproject commit 1bf841113c0b4fedbef3cbe5b4cdbbe1b1fc787f diff --git a/external/gt4py b/external/gt4py index f958944e..c1a075f4 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit f958944e571d852eb7474fed9bcc686620cb9ca8 +Subproject commit c1a075f4683ad3d7d8b62005215d311bc6ac4f07 From 1ed6f4f8a5c6a310822c8588d95736db6451a189 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Thu, 10 Jul 2025 15:22:18 +0200 Subject: [PATCH 13/19] Update gt4py and dace submodules - dace: include Phil's fix to cycle detection - gt4py: update dace dependency and fix cpu memory layout --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index 1bf84111..6b26c1aa 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit 1bf841113c0b4fedbef3cbe5b4cdbbe1b1fc787f +Subproject commit 6b26c1aa5c827cdc52205de95909a7ca3d1deeac diff --git a/external/gt4py b/external/gt4py index c1a075f4..11ce4b6c 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit c1a075f4683ad3d7d8b62005215d311bc6ac4f07 +Subproject commit 11ce4b6c8f1b16a5bd8b75833eb40f971ad29649 From c478dfb25cdf336d68e1a400f08549aa500c399d Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Tue, 15 Jul 2025 17:52:52 +0200 Subject: [PATCH 14/19] Update dace & gt4py to latest dev commits - dace: fix write access caching - gt4py: DDE issue to be investigated --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index 6b26c1aa..37c56968 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit 6b26c1aa5c827cdc52205de95909a7ca3d1deeac +Subproject commit 37c56968f9b78517cdc4c3df1305236e763f0435 diff --git a/external/gt4py b/external/gt4py index 11ce4b6c..b7677e9e 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 11ce4b6c8f1b16a5bd8b75833eb40f971ad29649 +Subproject commit b7677e9ee49ed30692cfd5e1f5707c6fdef5eda9 From 2003fbb366f767ba79af357664196efb2bf5c162 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:14:39 +0200 Subject: [PATCH 15/19] Update gt4py and dace (DDE fixes) - dace: patch DDE not to attempt to inline pointers - gt4py: re-enable DDE --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index 37c56968..6ed38902 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit 37c56968f9b78517cdc4c3df1305236e763f0435 +Subproject commit 6ed3890287759e2164a395a339d0b06d50923102 diff --git a/external/gt4py b/external/gt4py index b7677e9e..bf2be21e 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit b7677e9ee49ed30692cfd5e1f5707c6fdef5eda9 +Subproject commit bf2be21ea608ca18b67ed4050df445f8c7ee5a73 From 7ed9cb76c8f6644abd8a6a0ec07a8c3078a9c197 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:51:45 +0200 Subject: [PATCH 16/19] Update gt4py (move dace branch to GridTools/dace) --- external/gt4py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/gt4py b/external/gt4py index bf2be21e..66ded75d 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit bf2be21ea608ca18b67ed4050df445f8c7ee5a73 +Subproject commit 66ded75d6a6ceb5a5ffa7e9ab37d665c576b715f From 0c1d4c3788d466528f98433f6191a04879329be6 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Wed, 23 Jul 2025 10:02:14 +0200 Subject: [PATCH 17/19] Import style: avoid importing everything --- ndsl/dsl/dace/orchestration.py | 42 +++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py index c8b966f5..38e7be09 100644 --- a/ndsl/dsl/dace/orchestration.py +++ b/ndsl/dsl/dace/orchestration.py @@ -3,9 +3,12 @@ import os from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union -import dace -import gt4py.storage +from dace import SDFG from dace import compiletime as DaceCompiletime +from dace import dtypes +from dace import method as dace_method +from dace import nodes +from dace import program as dace_program from dace.dtypes import DeviceType as DaceDeviceType from dace.dtypes import StorageType as DaceStorageType from dace.frontend.python.common import SDFGConvertible @@ -13,6 +16,7 @@ from dace.transformation.auto.auto_optimize import make_transients_persistent from dace.transformation.helpers import get_parent_map from dace.transformation.passes.simplify import SimplifyPass +from gt4py import storage from ndsl.comm.mpi import MPI from ndsl.dsl.dace.build import get_sdfg_path, write_build_info @@ -60,10 +64,10 @@ def _download_results_from_dace( return None backend = config.get_backend() - return [gt4py.storage.from_array(result, backend=backend) for result in dace_result] + return [storage.from_array(result, backend=backend) for result in dace_result] -def _to_gpu(sdfg: dace.SDFG): +def _to_gpu(sdfg: SDFG): """Flag memory in SDFG to GPU. Force deactivate OpenMP sections for sanity.""" @@ -71,7 +75,7 @@ def _to_gpu(sdfg: dace.SDFG): allmaps = [ (me, state) for me, state in sdfg.all_nodes_recursive() - if isinstance(me, dace.nodes.MapEntry) + if isinstance(me, nodes.MapEntry) ] topmaps = [ (me, state) for me, state in allmaps if get_parent_map(state, me) is None @@ -80,13 +84,13 @@ def _to_gpu(sdfg: dace.SDFG): # Set storage of arrays to GPU, scalarizable arrays will be set on registers for sd, _aname, arr in sdfg.arrays_recursive(): if arr.shape == (1,): - arr.storage = dace.StorageType.Register + arr.storage = dtypes.StorageType.Register else: - arr.storage = dace.StorageType.GPU_Global + arr.storage = dtypes.StorageType.GPU_Global # All maps will be schedule on GPU for mapentry, _state in topmaps: - mapentry.schedule = dace.ScheduleType.GPU_Device + mapentry.schedule = dtypes.ScheduleType.GPU_Device # Deactivate OpenMP sections for sd in sdfg.all_sdfgs_recursive(): @@ -94,7 +98,7 @@ def _to_gpu(sdfg: dace.SDFG): def _simplify( - sdfg: dace.SDFG, + sdfg: SDFG, *, validate: bool = True, validate_all: bool = False, @@ -112,7 +116,7 @@ def _simplify( def _build_sdfg( - dace_program: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs + dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, kwargs ): """Build the .so out of the SDFG on the top tile ranks only""" is_compiling = True if DEACTIVATE_DISTRIBUTED_DACE_COMPILE else config.do_compile @@ -157,10 +161,10 @@ def _build_sdfg( with DaCeProgress(config, "Turn Persistents into pooled Scope"): memory_pooled = 0.0 for _sd, _aname, arr in sdfg.arrays_recursive(): - if arr.lifetime == dace.AllocationLifetime.Persistent: + if arr.lifetime == dtypes.AllocationLifetime.Persistent: arr.pool = True memory_pooled += arr.total_size * arr.dtype.bytes - arr.lifetime = dace.AllocationLifetime.Scope + arr.lifetime = dtypes.AllocationLifetime.Scope memory_pooled = float(memory_pooled) / (1024 * 1024) ndsl_log.debug( f"{DaCeProgress.default_prefix(config)} Pooled {memory_pooled} mb", @@ -177,7 +181,9 @@ def _build_sdfg( # Compile with DaCeProgress(config, "Codegen & compile"): sdfg.compile() - write_build_info(sdfg, config.layout, config.tile_resolution, config._backend) + write_build_info( + sdfg, config.layout, config.tile_resolution, config.get_backend() + ) # Printing analysis of the compiled SDFG with DaCeProgress(config, "Build finished. Running memory static analysis"): @@ -220,9 +226,7 @@ def _build_sdfg( return _call_sdfg(dace_program, sdfg, config, args, kwargs) -def _call_sdfg( - dace_program: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs -): +def _call_sdfg(dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, kwargs): """Dispatch the SDFG execution and/or build""" # Pre-compiled SDFG code path does away with any data checks and # cached the marshalling - leading to almost direct C call @@ -256,7 +260,7 @@ def _parse_sdfg( config: DaceConfig, *args, **kwargs, -) -> Optional[dace.SDFG]: +) -> Optional[SDFG]: """Return an SDFG depending on cache existence. Either parses, load a .sdfg or load .so (as a compiled sdfg) @@ -315,7 +319,7 @@ class _LazyComputepathFunction(SDFGConvertible): def __init__(self, func: Callable, config: DaceConfig): self.func = func self.config = config - self.daceprog: DaceProgram = dace.program(self.func) + self.daceprog: DaceProgram = dace_program(self.func) self._sdfg = None def __call__(self, *args, **kwargs): @@ -370,7 +374,7 @@ class _LazyComputepathMethod: class SDFGEnabledCallable(SDFGConvertible): def __init__(self, lazy_method: _LazyComputepathMethod, obj_to_bind): - methodwrapper = dace.method(lazy_method.func) + methodwrapper = dace_method(lazy_method.func) self.obj_to_bind = obj_to_bind self.lazy_method = lazy_method self.daceprog: DaceProgram = methodwrapper.__get__(obj_to_bind) From a14b0fe3b1714120a935e3261b6d6ad977166051 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Thu, 24 Jul 2025 10:06:56 +0200 Subject: [PATCH 18/19] Update gt4py/dace: preliminary NView support --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index 6ed38902..82541a94 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit 6ed3890287759e2164a395a339d0b06d50923102 +Subproject commit 82541a9401dcadca43edc33cf1db61a0fe21d0e5 diff --git a/external/gt4py b/external/gt4py index 66ded75d..62e33612 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 66ded75d6a6ceb5a5ffa7e9ab37d665c576b715f +Subproject commit 62e336122c289863252c4ac53d8b48ea29b72398 From 05c4ffdd63d8099f7f8d019bf9742315e217334c Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Mon, 28 Jul 2025 08:44:01 +0200 Subject: [PATCH 19/19] Update gt4py (oir -> stree bridge from mainline) --- external/gt4py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/gt4py b/external/gt4py index 62e33612..68eea74b 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 62e336122c289863252c4ac53d8b48ea29b72398 +Subproject commit 68eea74b748747ac5415c93e479d7964f3ec6947