hw-native-sys · Hzfengsy · May 13, 2026 · May 13, 2026
diff --git a/tests/st/codegen/test_paged_attention.py b/tests/st/codegen/test_paged_attention.py
@@ -88,7 +88,7 @@ class QKMatmulProgram:
             def orchestrator(
                 self,
                 qi: pl.Tensor[[16, 128], pl.BF16],
-                kj_t: pl.Tensor[[128, 128], pl.BF16, pl.DN],
+                kj_t: pl.Tensor[[128, 128], pl.BF16],
                 out_sij: pl.Out[pl.Tensor[[16, 128], pl.FP32]],
             ) -> pl.Tensor[[16, 128], pl.FP32]:
                 out_sij = kernel_qk_matmul(qi, kj_t, out_sij)

diff --git a/tests/ut/codegen/test_pto_codegen.py b/tests/ut/codegen/test_pto_codegen.py
@@ -1711,7 +1711,16 @@ class ColVecDNProgram:
             @pl.function(type=pl.FunctionType.InCore)
             def kernel(
                 self,
-                col_vec: pl.Tensor[[16, 1], pl.FP32, pl.DN],
+                # Explicit DN view with the canonical-packed strides for shape
+                # [16, 1] (stride[-2]=1, stride[-1]=shape[-2]=16). Using the
+                # explicit TensorView form (RFC #1300 supplementary 1 escape
+                # hatch) instead of the deprecated pl.Tensor[..., pl.DN]
+                # shorthand. This test specifically verifies the
+                # column-vector DN codegen path, so the DN view is the test
+                # subject — not a load-time alias.
+                col_vec: pl.Tensor[
+                    [16, 1], pl.FP32, pl.TensorView(stride=[1, 16], layout=pl.TensorLayout.DN)
+                ],
                 out: pl.Out[pl.Tensor[[16, 128], pl.FP32]],
             ) -> pl.Tensor[[16, 128], pl.FP32]:
                 v = pl.load(col_vec, [0, 0], [16, 1])
@@ -1786,7 +1795,16 @@ class DN3DProgram:
         @pl.function(type=pl.FunctionType.InCore)
         def kernel(
             self,
-            b: pl.Tensor[[2, 48, 64], pl.FP32, pl.DN],
+            # Explicit DN view with canonical-packed strides per RFC §2.3:
+            # shape [B=2, K=48, N=64] → stride[1]=1, stride[2]=K=48,
+            # stride[0]=K*N=3072. Uses the explicit TensorView form (RFC
+            # #1300 supplementary 1 escape hatch); the test subject is the
+            # 3-D DN codegen path, so the DN view is part of the fixture.
+            b: pl.Tensor[
+                [2, 48, 64],
+                pl.FP32,
+                pl.TensorView(stride=[3072, 1, 48], layout=pl.TensorLayout.DN),
+            ],
             out: pl.Out[pl.Tensor[[2, 48, 64], pl.FP32]],
         ) -> pl.Tensor[[2, 48, 64], pl.FP32]:
             tile_b = pl.load(b, [0, 0, 0], [2, 48, 64])

diff --git a/tests/ut/codegen/test_pto_codegen_paged_attn.py b/tests/ut/codegen/test_pto_codegen_paged_attn.py
@@ -63,7 +63,7 @@ class PagedAttention:
     def qk_matmul(
         self,
         qi: pl.Tensor[[16, 128], pl.BF16],
-        kj: pl.Tensor[[128, 128], pl.BF16, pl.DN],
+        kj: pl.Tensor[[128, 128], pl.BF16],
         s_ij: pl.Tensor[[16, 128], pl.FP32],
     ) -> pl.Tensor[[16, 128], pl.FP32]:
         q_tile: pl.Tile[[16, 128], pl.BF16] = pl.load(qi, [0, 0], [16, 128], target_memory=pl.MemorySpace.Mat)

diff --git a/tests/ut/ir/transforms/test_expand_mixed_kernel_a5.py b/tests/ut/ir/transforms/test_expand_mixed_kernel_a5.py
@@ -1364,7 +1364,7 @@ class Before:
             def main_incore_0(
                 self,
                 x: pl.Tensor[[16, 128], pl.BF16],
-                y: pl.Tensor[[128, 128], pl.BF16, pl.DN],
+                y: pl.Tensor[[128, 128], pl.BF16],
                 out_0: pl.Out[pl.Tensor[[16, 128], pl.FP32]],
             ) -> pl.Tensor[[16, 128], pl.FP32]:
                 x_l1 = pl.load(x, [0, 0], [16, 128], target_memory=pl.MemorySpace.Mat)
@@ -1396,7 +1396,7 @@ class Expected:
             def main_incore_0_aic(
                 self,
                 x: pl.Tensor[[16, 128], pl.BF16],
-                y: pl.Tensor[[128, 128], pl.BF16, pl.DN],
+                y: pl.Tensor[[128, 128], pl.BF16],
                 out_0: pl.Out[pl.Tensor[[16, 128], pl.FP32]],
             ):
                 x_l1 = pl.load(x, [0, 0], [16, 128], target_memory=pl.MemorySpace.Mat)
@@ -1410,7 +1410,7 @@ def main_incore_0_aic(
             def main_incore_0_aiv(
                 self,
                 x: pl.Tensor[[16, 128], pl.BF16],
-                y: pl.Tensor[[128, 128], pl.BF16, pl.DN],
+                y: pl.Tensor[[128, 128], pl.BF16],
                 out_0: pl.Out[pl.Tensor[[16, 128], pl.FP32]],
             ) -> pl.Tensor[[16, 128], pl.FP32]:
                 z_vec: pl.Tile[[16, 128], pl.FP32, pl.MemorySpace.Vec, pl.TileView()] = pl.tpop_from_aic(
@@ -1424,7 +1424,7 @@ def main_incore_0_aiv(
             def main_incore_0(
                 self,
                 x: pl.Tensor[[16, 128], pl.BF16],
-                y: pl.Tensor[[128, 128], pl.BF16, pl.DN],
+                y: pl.Tensor[[128, 128], pl.BF16],
                 out_0: pl.Out[pl.Tensor[[16, 128], pl.FP32]],
             ) -> pl.Tensor[[16, 128], pl.FP32]:
                 self.main_incore_0_aic(x, y, out_0)

diff --git a/tests/ut/ir/transforms/test_lower_transpose_load_param_layout_pass.py b/tests/ut/ir/transforms/test_lower_transpose_load_param_layout_pass.py
@@ -441,44 +441,6 @@ def orchestrator(
         After = passes.lower_transpose_load_param_layout()(Before)
         ir.assert_structural_equal(After, Before)
 
-    def test_already_dn_param_idempotent(self):
-        """A param already carrying the DN tag short-circuits — IR unchanged.
-
-        Mirrors the pre-P6 mid-state where the param has been DN-tagged but the
-        body's tile.load still has ``transpose=True`` (idempotent re-run of the
-        legacy pass form). The pass detects ``layout == DN`` on the param,
-        ``continue``s past the promotion, and leaves the body untouched.
-        """
-        M, K, N = 64, 128, 32
-
-        @pl.program
-        class Before:
-            @pl.function(type=pl.FunctionType.InCore)
-            def matmul_incore(
-                self,
-                a: pl.Tensor[[M, K], pl.FP32],
-                b: pl.Tensor[[N, K], pl.FP32, pl.DN],
-                c: pl.Out[pl.Tensor[[M, N], pl.FP32]],
-            ) -> pl.Tensor[[M, N], pl.FP32]:
-                tile_a = pl.load(a, [0, 0], [M, K], target_memory=pl.MemorySpace.Mat)
-                tile_b = pl.load(b, [0, 0], [N, K], target_memory=pl.MemorySpace.Mat, transpose=True)
-                tile_a_l0a = pl.move(tile_a, target_memory=pl.MemorySpace.Left)
-                tile_b_l0b = pl.move(tile_b, target_memory=pl.MemorySpace.Right)
-                tile_c = pl.matmul(tile_a_l0a, tile_b_l0b)
-                c_store = pl.store(tile_c, [0, 0], c)
-                return c_store
-
-            @pl.function(type=pl.FunctionType.Orchestration)
-            def orchestrator(
-                self, a: pl.Tensor[[M, K], pl.FP32], b: pl.Tensor[[N, K], pl.FP32, pl.DN]
-            ) -> pl.Tensor[[M, N], pl.FP32]:
-                c: pl.Tensor[[M, N], pl.FP32] = pl.create_tensor([M, N], dtype=pl.FP32)
-                c_result = self.matmul_incore(a, b, c)
-                return c_result
-
-        After = passes.lower_transpose_load_param_layout()(Before)
-        ir.assert_structural_equal(After, Before)
-
 
 class TestStridedParamFlipsCorrectly:
     """Regression for #1212 / #1213: when an InCore param's TensorView carries

diff --git a/tests/ut/language/parser/test_type_resolver.py b/tests/ut/language/parser/test_type_resolver.py
@@ -1390,12 +1390,16 @@ class TestLayoutResolution:
         "layout_str, expected_layout",
         [
             ("pl.NZ", ir.TensorLayout.NZ),
-            ("pl.DN", ir.TensorLayout.DN),
             ("pl.ND", ir.TensorLayout.ND),
         ],
     )
     def test_resolve_tensor_with_layout(self, layout_str, expected_layout):
-        """Tensor with various layouts creates TensorType with TensorView."""
+        """Tensor with various layouts creates TensorType with TensorView.
+
+        ``pl.DN`` is covered separately by ``test_resolve_tensor_with_dn_layout_warns``
+        — it emits a ``DeprecationWarning`` (RFC #1300 supplementary 1) so we
+        verify that warning explicitly rather than swallowing it here.
+        """
         resolver = _make_resolver()
         node = ast.parse(f"pl.Tensor[[64, 128], pl.FP16, {layout_str}]", mode="eval").body
         result = resolver.resolve_type(node)
@@ -1406,6 +1410,24 @@ def test_resolve_tensor_with_layout(self, layout_str, expected_layout):
         assert result.tensor_view is not None
         assert result.tensor_view.layout == expected_layout
 
+    def test_resolve_tensor_with_dn_layout_warns(self):
+        """``pl.Tensor[..., pl.DN]`` shorthand is deprecated (RFC #1300 supp. 1).
+
+        The parser still resolves it to a DN-tagged TensorView for backward
+        compatibility, but emits a ``DeprecationWarning`` pointing users at
+        migration paths (drop the marker, use ``pl.transpose``, or write an
+        explicit ``pl.TensorView(stride=..., layout=DN)``).
+        """
+        resolver = _make_resolver()
+        node = ast.parse("pl.Tensor[[64, 128], pl.FP16, pl.DN]", mode="eval").body
+
+        with pytest.warns(DeprecationWarning, match="pl.DN"):
+            result = resolver.resolve_type(node)
+
+        assert isinstance(result, ir.TensorType)
+        assert result.tensor_view is not None
+        assert result.tensor_view.layout == ir.TensorLayout.DN
+
     def test_resolve_tensor_without_layout_backward_compat(self):
         """Tensor without layout has no tensor_view (backward compatible)."""
         resolver = _make_resolver()
@@ -1484,13 +1506,13 @@ def test_resolve_tensor_layout_with_dynamic_shape(self):
     def test_resolve_tensor_layout_with_shape_variable(self):
         """Layout works with shape variable from closure."""
         resolver = _make_resolver(closure_vars={"shape": [64, 128]})
-        node = ast.parse("pl.Tensor[shape, pl.FP16, pl.DN]", mode="eval").body
+        node = ast.parse("pl.Tensor[shape, pl.FP16, pl.NZ]", mode="eval").body
         result = resolver.resolve_type(node)
 
         assert isinstance(result, ir.TensorType)
         assert len(result.shape) == 2
         assert result.tensor_view is not None
-        assert result.tensor_view.layout == ir.TensorLayout.DN
+        assert result.tensor_view.layout == ir.TensorLayout.NZ
 
 
 class TestLayoutIntegration:
@@ -1573,12 +1595,11 @@ def func(
         "layout,expected",
         [
             (pl.ND, ir.TensorLayout.ND),
-            (pl.DN, ir.TensorLayout.DN),
             (pl.NZ, ir.TensorLayout.NZ),
         ],
     )
     def test_parametrized_layout(self, layout, expected):
-        """pytest.mark.parametrize with layout."""
+        """pytest.mark.parametrize with layout (non-deprecated layouts only)."""
 
         @pl.function
         def func(
@@ -1591,14 +1612,21 @@ def func(
         assert param_type.tensor_view is not None
         assert param_type.tensor_view.layout == expected
 
-    def test_function_with_dn_layout(self):
-        """@pl.function with DN layout for column-major tensors."""
+    def test_function_with_dn_layout_warns(self):
+        """@pl.function with ``pl.DN`` shorthand emits ``DeprecationWarning``.
 
-        @pl.function
-        def func(
-            x: pl.Tensor[[16, 1], pl.FP16, pl.DN],
-        ) -> pl.Tensor[[16, 1], pl.FP16, pl.DN]:
-            return x
+        Backwards-compatible — the layout still resolves to DN — but the
+        shorthand is deprecated (RFC #1300 supplementary 1). Users should
+        drop the marker, derive DN at use site via ``pl.transpose``, or
+        write an explicit ``pl.TensorView(stride=..., layout=DN)``.
+        """
+        with pytest.warns(DeprecationWarning, match="pl.DN"):
+
+            @pl.function
+            def func(
+                x: pl.Tensor[[16, 1], pl.FP16, pl.DN],
+            ) -> pl.Tensor[[16, 1], pl.FP16, pl.DN]:
+                return x
 
         param_type = func.params[0].type
         assert isinstance(param_type, ir.TensorType)