diff --git a/external/dace b/external/dace
index 13402cbf..82541a94 160000
--- a/external/dace
+++ b/external/dace
@@ -1 +1 @@
-Subproject commit 13402cbfeeb6969cbd3915acfb7a30bdb543071b
+Subproject commit 82541a9401dcadca43edc33cf1db61a0fe21d0e5
diff --git a/external/gt4py b/external/gt4py
index 45324c88..68eea74b 160000
--- a/external/gt4py
+++ b/external/gt4py
@@ -1 +1 @@
-Subproject commit 45324c88e57b5e8dfc974efa70fa2f2e5e10677f
+Subproject commit 68eea74b748747ac5415c93e479d7964f3ec6947
diff --git a/ndsl/dsl/dace/dace_config.py b/ndsl/dsl/dace/dace_config.py
index 78dcb7a5..f6e6bb26 100644
--- a/ndsl/dsl/dace/dace_config.py
+++ b/ndsl/dsl/dace/dace_config.py
@@ -7,6 +7,7 @@
 import dace.config
 from dace.codegen.compiled_sdfg import CompiledSDFG
 from dace.frontend.python.parser import DaceProgram
+from gt4py.cartesian.config import GT4PY_COMPILE_OPT_LEVEL
 
 from ndsl.comm.communicator import Communicator
 from ndsl.comm.partitioner import Partitioner
@@ -181,6 +182,12 @@ def __init__(
         # We control this Dace configuration below with our own override
         dace_debug_env_var = os.getenv("PACE_DACE_DEBUG", "False") == "True"
 
+        # We hijack the optimization level of GT4Py because we don't
+        # have the configuration at NDSL level, but we do use the GT4Py
+        # level
+        # TODO: if GT4PY opt level is funnled via NDSL - use it here
+        optimization_level = GT4PY_COMPILE_OPT_LEVEL
+
         # Set the configuration of DaCe to a rigid & tested set of divergence
         # from the defaults when orchestrating
         if orchestration != DaCeOrchestration.Python:
@@ -195,7 +202,7 @@ def __init__(
                 "compiler",
                 "cpu",
                 "args",
-                value="-std=c++14 -fPIC -Wall -Wextra -O3",
+                value=f"-std=c++14 -fPIC -Wall -Wextra -O{optimization_level}",
             )
             # Potentially buggy - deactivate
             dace.config.Config.set(
diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py
index 0b5283e6..38e7be09 100644
--- a/ndsl/dsl/dace/orchestration.py
+++ b/ndsl/dsl/dace/orchestration.py
@@ -3,9 +3,12 @@
 import os
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 
-import dace
-import gt4py.storage
+from dace import SDFG
 from dace import compiletime as DaceCompiletime
+from dace import dtypes
+from dace import method as dace_method
+from dace import nodes
+from dace import program as dace_program
 from dace.dtypes import DeviceType as DaceDeviceType
 from dace.dtypes import StorageType as DaceStorageType
 from dace.frontend.python.common import SDFGConvertible
@@ -13,6 +16,7 @@
 from dace.transformation.auto.auto_optimize import make_transients_persistent
 from dace.transformation.helpers import get_parent_map
 from dace.transformation.passes.simplify import SimplifyPass
+from gt4py import storage
 
 from ndsl.comm.mpi import MPI
 from ndsl.dsl.dace.build import get_sdfg_path, write_build_info
@@ -27,7 +31,6 @@
     negative_qtracers_checker,
     sdfg_nan_checker,
 )
-from ndsl.dsl.dace.sdfg_opt_passes import splittable_region_expansion
 from ndsl.dsl.dace.utils import (
     DaCeProgress,
     memory_static_analysis,
@@ -61,10 +64,10 @@ def _download_results_from_dace(
         return None
 
     backend = config.get_backend()
-    return [gt4py.storage.from_array(result, backend=backend) for result in dace_result]
+    return [storage.from_array(result, backend=backend) for result in dace_result]
 
 
-def _to_gpu(sdfg: dace.SDFG):
+def _to_gpu(sdfg: SDFG):
     """Flag memory in SDFG to GPU.
     Force deactivate OpenMP sections for sanity."""
 
@@ -72,7 +75,7 @@ def _to_gpu(sdfg: dace.SDFG):
     allmaps = [
         (me, state)
         for me, state in sdfg.all_nodes_recursive()
-        if isinstance(me, dace.nodes.MapEntry)
+        if isinstance(me, nodes.MapEntry)
     ]
     topmaps = [
         (me, state) for me, state in allmaps if get_parent_map(state, me) is None
@@ -81,13 +84,13 @@ def _to_gpu(sdfg: dace.SDFG):
     # Set storage of arrays to GPU, scalarizable arrays will be set on registers
     for sd, _aname, arr in sdfg.arrays_recursive():
         if arr.shape == (1,):
-            arr.storage = dace.StorageType.Register
+            arr.storage = dtypes.StorageType.Register
         else:
-            arr.storage = dace.StorageType.GPU_Global
+            arr.storage = dtypes.StorageType.GPU_Global
 
     # All maps will be schedule on GPU
     for mapentry, _state in topmaps:
-        mapentry.schedule = dace.ScheduleType.GPU_Device
+        mapentry.schedule = dtypes.ScheduleType.GPU_Device
 
     # Deactivate OpenMP sections
     for sd in sdfg.all_sdfgs_recursive():
@@ -95,7 +98,7 @@ def _to_gpu(sdfg: dace.SDFG):
 
 
 def _simplify(
-    sdfg: dace.SDFG,
+    sdfg: SDFG,
     *,
     validate: bool = True,
     validate_all: bool = False,
@@ -108,24 +111,33 @@ def _simplify(
         validate=validate,
         validate_all=validate_all,
         verbose=verbose,
+        skip=["ScalarToSymbolPromotion"],
     ).apply_pass(sdfg, {})
 
 
 def _build_sdfg(
-    dace_program: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs
+    dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, kwargs
 ):
     """Build the .so out of the SDFG on the top tile ranks only"""
     is_compiling = True if DEACTIVATE_DISTRIBUTED_DACE_COMPILE else config.do_compile
 
     if is_compiling:
+        with DaCeProgress(config, "Validate original SDFG"):
+            sdfg.validate()
+
         # Make the transients array persistents
         if config.is_gpu_backend():
+            # TODO
+            # The following should happen on the stree level
             _to_gpu(sdfg)
+
             make_transients_persistent(sdfg=sdfg, device=DaceDeviceType.GPU)
 
             # Upload args to device
             _upload_to_device(list(args) + list(kwargs.values()))
         else:
+            # TODO
+            # The following should happen on the stree level
             for _sd, _aname, arr in sdfg.arrays_recursive():
                 if arr.shape == (1,):
                     arr.storage = DaceStorageType.Register
@@ -141,18 +153,7 @@ def _build_sdfg(
             if k in sdfg_kwargs and tup[1].transient:
                 del sdfg_kwargs[k]
 
-        with DaCeProgress(config, "Simplify (1/2)"):
-            _simplify(sdfg, validate=False, verbose=True)
-
-        # Perform pre-expansion fine tuning
-        with DaCeProgress(config, "Split regions"):
-            splittable_region_expansion(sdfg, verbose=True)
-
-        # Expand the stencil computation Library Nodes with the right expansion
-        with DaCeProgress(config, "Expand"):
-            sdfg.expand_library_nodes()
-
-        with DaCeProgress(config, "Simplify (2/2)"):
+        with DaCeProgress(config, "Simplify"):
             _simplify(sdfg, validate=False, verbose=True)
 
         # Move all memory that can be into a pool to lower memory pressure.
@@ -160,10 +161,10 @@ def _build_sdfg(
         with DaCeProgress(config, "Turn Persistents into pooled Scope"):
             memory_pooled = 0.0
             for _sd, _aname, arr in sdfg.arrays_recursive():
-                if arr.lifetime == dace.AllocationLifetime.Persistent:
+                if arr.lifetime == dtypes.AllocationLifetime.Persistent:
                     arr.pool = True
                     memory_pooled += arr.total_size * arr.dtype.bytes
-                    arr.lifetime = dace.AllocationLifetime.Scope
+                    arr.lifetime = dtypes.AllocationLifetime.Scope
             memory_pooled = float(memory_pooled) / (1024 * 1024)
             ndsl_log.debug(
                 f"{DaCeProgress.default_prefix(config)} Pooled {memory_pooled} mb",
@@ -180,7 +181,9 @@ def _build_sdfg(
         # Compile
         with DaCeProgress(config, "Codegen & compile"):
             sdfg.compile()
-        write_build_info(sdfg, config.layout, config.tile_resolution, config._backend)
+        write_build_info(
+            sdfg, config.layout, config.tile_resolution, config.get_backend()
+        )
 
         # Printing analysis of the compiled SDFG
         with DaCeProgress(config, "Build finished. Running memory static analysis"):
@@ -223,9 +226,7 @@ def _build_sdfg(
         return _call_sdfg(dace_program, sdfg, config, args, kwargs)
 
 
-def _call_sdfg(
-    dace_program: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs
-):
+def _call_sdfg(dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, kwargs):
     """Dispatch the SDFG execution and/or build"""
     # Pre-compiled SDFG code path does away with any data checks and
     # cached the marshalling - leading to almost direct C call
@@ -259,7 +260,7 @@ def _parse_sdfg(
     config: DaceConfig,
     *args,
     **kwargs,
-) -> Optional[dace.SDFG]:
+) -> Optional[SDFG]:
     """Return an SDFG depending on cache existence.
     Either parses, load a .sdfg or load .so (as a compiled sdfg)
 
@@ -318,7 +319,7 @@ class _LazyComputepathFunction(SDFGConvertible):
     def __init__(self, func: Callable, config: DaceConfig):
         self.func = func
         self.config = config
-        self.daceprog: DaceProgram = dace.program(self.func)
+        self.daceprog: DaceProgram = dace_program(self.func)
         self._sdfg = None
 
     def __call__(self, *args, **kwargs):
@@ -373,7 +374,7 @@ class _LazyComputepathMethod:
 
     class SDFGEnabledCallable(SDFGConvertible):
         def __init__(self, lazy_method: _LazyComputepathMethod, obj_to_bind):
-            methodwrapper = dace.method(lazy_method.func)
+            methodwrapper = dace_method(lazy_method.func)
             self.obj_to_bind = obj_to_bind
             self.lazy_method = lazy_method
             self.daceprog: DaceProgram = methodwrapper.__get__(obj_to_bind)
diff --git a/ndsl/dsl/dace/sdfg_opt_passes.py b/ndsl/dsl/dace/sdfg_opt_passes.py
deleted file mode 100644
index b7582cc5..00000000
--- a/ndsl/dsl/dace/sdfg_opt_passes.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import dace
-
-from ndsl.logging import ndsl_log
-
-
-def splittable_region_expansion(sdfg: dace.SDFG, verbose: bool = False):
-    """
-    Set certain StencilComputation library nodes to expand to a different
-    schedule if they contain small splittable regions.
-    """
-    from gt4py.cartesian.gtc.dace.nodes import StencilComputation
-
-    for node, _ in sdfg.all_nodes_recursive():
-        if isinstance(node, StencilComputation):
-            if node.has_splittable_regions() and "corner" in node.label:
-                node.expansion_specification = [
-                    "Sections",
-                    "Stages",
-                    "J",
-                    "I",
-                    "K",
-                ]
-                if verbose:
-                    ndsl_log.debug(f"Reordered schedule for {node.label}")