From 7d13791b465dc0f0af77501ac27384170d7fd4c7 Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Wed, 6 May 2026 20:02:23 -0700
Subject: [PATCH] Add a16w8 reduce_sum FVP coverage for Ethos-U85 (#19319)

Summary:

Adds an a16w8 (int16 IO + int8 weights) sweep for `aten.sum.dim_IntList` reducing the last dim with `keepdim=True`. The new tests `test_sum_dim_intlist_a16w8_{u55,u85}_INT` run on the standard Corstone-300 / Corstone-320 FVP harness. The U85 case surfaces a known numerics issue in the Vela `regor` lowering at int16 IO precision (silent zero output), tracked upstream at https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/issues/23. The Ethos-U55 path uses a different accumulator and is correct on the same OFM rescale.

Also annotates the four `dim_None{,_4d_tensor}` parametrize ids on `test_sum_u{55,85}_INT_1_0` (and the corresponding fp16 / bf16 variants) with `skips=` -- those cases cannot be exercised through the FVP harness because `executorch.devtools.bundled_program.config` rejects `None` as a model input. The dim=None case is properly covered by the existing `SumDefault` class.

Test design:

- Standard `pipeline.run()` with the same a16w8 kwargs other arm a16w8 tests use (e.g. `test_native_layer_norm_16a8w_u85_INT` in `test_layer_norm.py`):  `a16w8_quantization=True, symmetric_io_quantization=True, qtol=128, epsilon=2**-16`.
- Numerical comparison is the standard `atol`/`rtol` check from `pipeline.run()` -- no SQNR helpers.
- The U85 a16w8 test is wrapped with both `common.XfailIfNoCorstone320` (handles missing-FVP environments via `FileNotFoundError`) and `pytest.mark.xfail(strict=False, reason="...")` (handles the silent-zero bug). Both are function-level decorators that compose cleanly -- pattern matches `test_max_pool1d.py:111-114`. `strict=False` keeps the test target green both on stock Vela 5.0 (cases XFAIL) and once the upstream Vela fix is in tree (cases XPASS allowed).

Reviewed By: digantdesai

Differential Revision: D103667823
---
 backends/arm/test/ops/test_sum.py | 74 ++++++++++++++++++++++++++++++-
 backends/arm/test/targets.bzl     |  1 +
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py
index 1075055c4f0..97e88c8292a 100644
--- a/backends/arm/test/ops/test_sum.py
+++ b/backends/arm/test/ops/test_sum.py
@@ -5,6 +5,8 @@
 
 from typing import Callable, Tuple
 
+import pytest
+
 import torch
 from executorch.backends.arm.test import common
 
@@ -96,7 +98,18 @@ def test_sum_dim_intlist_tosa_INT(test_data: input_t1):
     pipeline.run()
 
 
-@common.parametrize("test_data", Sum.test_parameters)
+# dim=None cases skipped: executorch.devtools.bundled_program.config rejects
+# None as a model input (cannot be serialized into the bundled program).
+_DIM_NONE_SKIP_REASON = (
+    "bundled_program cannot serialize None as a model input"
+)
+_dim_none_skips = {
+    "dim_None": _DIM_NONE_SKIP_REASON,
+    "dim_None_4d_tensor": _DIM_NONE_SKIP_REASON,
+}
+
+
+@common.parametrize("test_data", Sum.test_parameters, skips=_dim_none_skips)
 @common.XfailIfNoCorstone300
 def test_sum_u55_INT_1_0(test_data: Tuple):
     pipeline = EthosU55PipelineINT[input_t1](
@@ -108,7 +121,7 @@ def test_sum_u55_INT_1_0(test_data: Tuple):
     pipeline.run()
 
 
-@common.parametrize("test_data", Sum.test_parameters)
+@common.parametrize("test_data", Sum.test_parameters, skips=_dim_none_skips)
 @common.XfailIfNoCorstone320
 def test_sum_u85_INT_1_0(test_data: Tuple):
     pipeline = EthosU85PipelineINT[input_t1](
@@ -220,3 +233,60 @@ def test_sum_tosa_FP(test_data: Callable[[], input_t2]):
 def test_sum_tosa_INT(test_data: Callable[[], input_t2]):
     pipeline = TosaPipelineINT[input_t1](SumDefault(), test_data(), SumDefault.aten_op)
     pipeline.run()
+
+
+# a16w8 (int16 IO + int8 weights) coverage for sum.dim_IntList. Surfaces the
+# Ethos-U85 int16 ReduceSum silent-zero issue tracked upstream at
+# https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/issues/23.
+
+
+class SumLastDim(torch.nn.Module):
+    """Reduce the last dim with keepdim=True."""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x.sum(dim=-1, keepdim=True)
+
+
+a16w8_sum_test_parameters = {
+    "rank1_16": lambda: (torch.rand(16),),
+    "rank3_8x1x16": lambda: (torch.rand(8, 1, 16),),
+    "rank3_4x4x16": lambda: (torch.rand(4, 4, 16),),
+}
+
+
+@common.parametrize("test_data", a16w8_sum_test_parameters)
+@common.XfailIfNoCorstone300
+def test_sum_dim_intlist_a16w8_u55_INT(test_data: Callable[[], input_t1]):
+    pipeline = EthosU55PipelineINT[input_t1](
+        SumLastDim(),
+        test_data(),
+        aten_op,
+        exir_ops=[],
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        qtol=128,
+        epsilon=2**-16,
+    )
+    pipeline.run()
+
+
+# All cases hit upstream Vela issue #23 (linked above). strict=False so the
+# test target stays green both on stock Vela 5.0 (cases XFAIL) and once the
+# Vela fix is in tree (cases XPASS).
+@common.parametrize("test_data", a16w8_sum_test_parameters)
+@common.XfailIfNoCorstone320
+@pytest.mark.xfail(
+    reason="Ethos-U85 int16 ReduceSum returns zero (vela#23)", strict=False
+)
+def test_sum_dim_intlist_a16w8_u85_INT(test_data: Callable[[], input_t1]):
+    pipeline = EthosU85PipelineINT[input_t1](
+        SumLastDim(),
+        test_data(),
+        aten_op,
+        exir_ops=[],
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        qtol=128,
+        epsilon=2**-16,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 6e2539cf2dc..bad8a3eac76 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -30,6 +30,7 @@ def define_arm_tests():
         "ops/test_slice.py",
         "ops/test_sigmoid.py",
         "ops/test_sub.py",
+        "ops/test_sum.py",
         "ops/test_tanh.py",
         "ops/test_view.py",
         "ops/test_cos.py",