diff --git a/backends/nxp/backend/ir/converter/conversion/common.py b/backends/nxp/backend/ir/converter/conversion/common.py
index 9186f5d0ac6..72d1216e50c 100755
--- a/backends/nxp/backend/ir/converter/conversion/common.py
+++ b/backends/nxp/backend/ir/converter/conversion/common.py
@@ -23,8 +23,6 @@
     transpose_conv_options,
 )
 
-from torch.fx import Node
-
 
 def try_get_input(t_op: tflite_model.Operator, idx: int) -> tflite_model.Tensor | None:
     """Return the input tensors of 't_op' at index 'idx', or None if the operator doesn't have that input.
@@ -135,34 +133,6 @@ def uses_shape_broadcasting(t_op: tflite_model.Operator) -> bool:
     )
 
 
-def node_uses_shape_broadcasting(node: Node) -> bool:
-    """Determine if given PyTorch fx Node uses shape broadcasting for it's input nodes or not.
-
-    :param node: PyTorch fx Node with 'all_input_nodes' initialized.
-    :return: True, if the node uses shape broadcasting for it's input nodes.
-             False otherwise.
-    """
-
-    if node.all_input_nodes is None:
-        logger.e(
-            logger.Code.INTERNAL_ERROR,
-            "common.node_uses_shape_broadcasting(): 'all_input_nodes' are None!",
-        )
-
-    if len(node.all_input_nodes) == 0:
-        logger.e(
-            logger.Code.INTERNAL_ERROR,
-            "common.node_uses_shape_broadcasting(): Operator has no inputs!",
-        )
-
-    first_input_shape = node.all_input_nodes[0].meta["val"].shape
-
-    return any(
-        input_tensor.meta["val"].shape != first_input_shape
-        for input_tensor in node.all_input_nodes[1:]
-    )
-
-
 class OpsList:
     """
     Holder of TFLite operator (middle_op) that can be prefixed (pre_ops) of suffixed (post_ops)
diff --git a/backends/nxp/backend/ir/converter/node_converter.py b/backends/nxp/backend/ir/converter/node_converter.py
index c1c1830c583..3abda419e8e 100755
--- a/backends/nxp/backend/ir/converter/node_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converter.py
@@ -16,6 +16,7 @@
     input_quantization_type,
     output_quantization_type,
 )
+from executorch.backends.nxp.backend.ir import logger as logger
 from executorch.backends.nxp.backend.ir.conversion_context import ConversionContext
 from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import (
     AtenModelBuilderDirector,
@@ -377,3 +378,67 @@ def uses_quantization_type_for_io(
         ) and NodeConverter.uses_quantization_type_for_outputs(
             node, supported_types, output_indices
         )
+
+    @staticmethod
+    def uses_shape_broadcasting(node: Node) -> bool:
+        """Determine if given PyTorch fx Node uses shape broadcasting for it's input nodes or not.
+
+        :param node: PyTorch fx Node with 'all_input_nodes' initialized.
+        :return: True, if the node uses shape broadcasting for it's input nodes.
+                 False otherwise.
+        """
+
+        if node.all_input_nodes is None:
+            logger.e(
+                logger.Code.INTERNAL_ERROR,
+                "node_converter.uses_shape_broadcasting(): 'all_input_nodes' are None!",
+            )
+
+        if len(node.all_input_nodes) == 0:
+            logger.e(
+                logger.Code.INTERNAL_ERROR,
+                "node_converter.uses_shape_broadcasting(): Operator has no inputs!",
+            )
+
+        first_input_shape = node.all_input_nodes[0].meta["val"].shape
+
+        return any(
+            input_tensor.meta["val"].shape != first_input_shape
+            for input_tensor in node.all_input_nodes[1:]
+        )
+
+    @staticmethod
+    def at_least_one_input_shape_matches_the_output_shape(node: Node) -> bool:
+        """Determine if given PyTorch fx Node uses at least one input shape broadcasting for it's input nodes or not.
+
+        :param node: PyTorch fx Node with 'all_input_nodes' initialized.
+        :return: True, if at least one input has the same shape as the output node.
+                 False otherwise.
+        """
+
+        if node.all_input_nodes is None:
+            logger.e(
+                logger.Code.INTERNAL_ERROR,
+                "node_converter.at_least_one_input_shape_matches_the_output_shape(): 'all_input_nodes' are None!",
+            )
+
+        if len(node.all_input_nodes) == 0:
+            logger.e(
+                logger.Code.INTERNAL_ERROR,
+                "node_converter.at_least_one_input_shape_matches_the_output_shape(): Operator has no inputs!",
+            )
+
+        output_shape = node.meta["val"].shape
+
+        return any(
+            input_tensor.meta["val"].shape == output_shape
+            for input_tensor in node.all_input_nodes
+        )
+
+    @staticmethod
+    def _node_inputs_ranks_not_equal(node) -> bool:
+        first_input_shape = node.all_input_nodes[0].meta["val"].shape
+        return not all(
+            len(input_node.meta["val"].shape) == len(first_input_shape)
+            for input_node in node.all_input_nodes[1:]
+        )
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py
index cd5aa2ead81..fd28b077b8a 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py
@@ -1,11 +1,8 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.ir.converter.conversion.common import (
-    node_uses_shape_broadcasting,
-)
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -26,7 +23,7 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        if node_uses_shape_broadcasting(node):
+        if NodeConverter.uses_shape_broadcasting(node):
             # Shape broadcasting may require the addition of `Transpose` ops during conversion.
             return False
 
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py
index d67b0aa4bcb..0e13aeb9b44 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mul_tensor_converter.py
@@ -1,11 +1,11 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.ir.converter.conversion.common import (
-    node_uses_shape_broadcasting,
-)
+import torch
+
+from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -26,19 +26,41 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        if node_uses_shape_broadcasting(node):
-            # Shape broadcasting may require the addition of `Transpose` ops during conversion.
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(
+                node
+            ):
+                return False
+
+            # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
+            # Transpose is currently not supported for new flow
+            if any(
+                input_node.meta[NXP_NODE_FORMAT].is_channels_first()
+                for input_node in node.all_input_nodes
+            ) and NodeConverter._node_inputs_ranks_not_equal(node):
+                return False
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0, 1], [0]
+            ):
+                return False
+
+            return True
+        else:
+            if NodeConverter.uses_shape_broadcasting(node):
+                # Shape broadcasting may require the addition of `Transpose` ops during conversion.
+                return False
 
-        node_shape = node.meta["val"].shape
+            node_shape = node.meta["val"].shape
 
-        # Check that at least one dimension is divisible by number of MACS
-        # or all dimensions are equal to one
-        # Otherwise Neutron cannot convert it
-        dim_divisible = any(s % 8 == 0 for s in node_shape) or all(
-            s == 1 for s in node_shape
-        )
-        return dim_divisible
+            # Check that at least one dimension is divisible by number of MACS
+            # or all dimensions are equal to one
+            # Otherwise Neutron cannot convert it
+            dim_divisible = any(s % 8 == 0 for s in node_shape) or all(
+                s == 1 for s in node_shape
+            )
+            return dim_divisible
 
     @staticmethod
     def _is_supported_in_IR(
@@ -51,9 +73,11 @@ def _is_supported_in_IR(
 
         return True
 
-    # mul.Tensor Node format: (Tensor self, Tensor other, *)
     def convert(self, node: Node):
-        """Convert 'mul_tensor' operator to NeutronIR 'Mul'."""
+        """Convert 'mul_tensor' operator to NeutronIR 'Mul'.
+        The ExecuTorch schema is:
+            mul.Tensor(Tensor self, Tensor other)
+        """
         self.assert_convertible(node)
         t_op = self._create_tflite_op_with_io_tensors(node)
         t_op.builtin_options = mul_options.Mul()
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py
index e9522c87114..e97f4bf63c2 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/sub_tensor_converter.py
@@ -1,11 +1,8 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.ir.converter.conversion.common import (
-    node_uses_shape_broadcasting,
-)
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -26,7 +23,7 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        if node_uses_shape_broadcasting(node):
+        if NodeConverter.uses_shape_broadcasting(node):
             # Shape broadcasting may require the addition of `Transpose` ops during conversion.
             return False
 
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
index 60afa6bf4d2..f9cd75a7359 100644
--- a/backends/nxp/quantizer/patterns.py
+++ b/backends/nxp/quantizer/patterns.py
@@ -830,7 +830,7 @@ class MulTensorPattern(QuantizationPattern):
     Basic quantization for all inputs and output.
     """
 
-    def partition_types(self) -> list[torch.nn.Module]:
+    def partition_types(self) -> list[OpOverload]:
         return [torch.ops.aten.mul.Tensor]
 
     def get_anchors(
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py
index 053cd96944d..e72b988a591 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py
@@ -1,4 +1,4 @@
-# Copyright 2025 NXP
+# Copyright 2025-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -10,19 +10,30 @@
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    ModelInputSpec,
+    to_quantized_edge_program,
+)
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
+    graph_contains_any_of_ops,
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
 from executorch.backends.nxp.tests.models import (
     MulTensorConvModule,
     MulTensorModule,
     MulTensorOneInputModule,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    Convolution,
+    ExecutorchDelegateCall,
+    MulTensor,
+)
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -69,7 +80,7 @@ def test_mul_tensor_quant_conversion(mocker, x_input_shape):
     input_data = {0: input_data_1, 1: input_data_2}
 
     exported_nodes = list(exported_program.graph.nodes)
-    assert exported_nodes[4].target == exir_ops.edge.aten.mul.Tensor
+    assert exported_nodes[4].target == MulTensor
 
     convert_run_compare(
         exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data
@@ -93,9 +104,7 @@ def test_mul_tensor_shape_unsupported_quant_conversion(x_input_shape):
     nodes = list(edge_program.graph.nodes)
 
     # Input tensor shape is not supported, node is not converted
-    assert (
-        nodes[3].target == exir_ops.edge.aten.mul.Tensor
-    )  # Mul Tensor is not delegated.
+    assert nodes[3].target == MulTensor  # Mul Tensor is not delegated.
 
 
 @pytest.mark.parametrize(
@@ -128,7 +137,7 @@ def test_mul_tensor_one_input_quant_conversion(mocker, input_shape):
     input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
 
     exported_nodes = list(exported_program.graph.nodes)
-    assert exported_nodes[2].target == exir_ops.edge.aten.mul.Tensor
+    assert exported_nodes[2].target == MulTensor
 
     convert_run_compare(
         exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data
@@ -176,8 +185,8 @@ def test_mul_tensor_w_conv_quant_conversion(mocker, x_input_shape):
     input_data = {0: input_data_1, 1: input_data_2}
 
     exported_nodes = list(exported_program.graph.nodes)
-    assert exported_nodes[12].target == exir_ops.edge.aten.convolution.default
-    assert exported_nodes[15].target == exir_ops.edge.aten.mul.Tensor
+    assert exported_nodes[12].target == Convolution
+    assert exported_nodes[15].target == MulTensor
 
     convert_run_compare(
         exported_program,
@@ -207,6 +216,137 @@ def test_mul_tensor_broadcasting_unsupported_quant_conversion(
     nodes = list(edge_program.graph.nodes)
 
     # Broadcast is not supported, node is not converted
-    assert (
-        nodes[6].target == exir_ops.edge.aten.mul.Tensor
-    )  # Mul Tensor is not delegated.
+    assert nodes[6].target == MulTensor  # Mul Tensor is not delegated.
+
+
+class TestMulTensorNewNeutronFlow:
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((1,), id="1D."),
+            pytest.param((6, 8), id="2D."),
+            pytest.param((1, 4, 8), id="3D."),
+            pytest.param((1, 4, 8, 8), id="4D."),
+        ],
+    )
+    def test__basic_nsys_inference(self, x_input_shape):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = MulTensorModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,
+            exp_non_delegated_nodes=[],
+        )
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D+2D."
+            ),
+        ],
+    )
+    def test__correct_broadcast(self, input_spec):
+        model = MulTensorModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,
+            exp_non_delegated_nodes=[],
+        )
+
+        lower_run_compare(
+            model, input_spec, graph_verifier, use_new_flow_neutron_c=True
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 1)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 3, 4)), ModelInputSpec((5, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((6, 4)), ModelInputSpec((6, 6, 1))],
+                id="2 inputs 2D+3D.",
+            ),
+        ],
+    )
+    def test__incorrect_broadcast(self, input_spec):
+        # Broadcast where at least one of the inputs is not equal to output is not supported
+        model = MulTensorModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `mul.Tensor` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [MulTensor])
+
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param(
+                (1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."
+            ),
+        ],
+    )
+    def test__w_conv(self, x_input_shape):
+        model = MulTensorConvModule()
+
+        n, c, h, w = x_input_shape
+        y_input_spec = ModelInputSpec((n, 8, h, w))
+        x_input_spec = ModelInputSpec(x_input_shape)
+
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,
+            exp_non_delegated_nodes=[],
+        )
+
+        lower_run_compare(
+            model,
+            [x_input_spec, y_input_spec],
+            graph_verifier,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
+                id="2 inputs 4D ch last + 2D ch first.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
+                id="2 inputs 4D ch last + 3D ch first.",
+            ),
+        ],
+    )
+    def test__w_conv_unsupported(self, input_spec):
+        model = MulTensorConvModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `mul.Tensor` was NOT delegated.
+        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
+        assert graph_contains_any_of_ops(delegated_ep.graph, [MulTensor])
diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py
index ae4189e209f..cbf008d3dbc 100644
--- a/backends/nxp/tests/ops_aliases.py
+++ b/backends/nxp/tests/ops_aliases.py
@@ -11,9 +11,11 @@
 
 AvgPool2D = exir_ops.edge.aten.avg_pool2d.default
 Bmm = exir_ops.edge.aten.bmm.default
+Convolution = exir_ops.edge.aten.convolution.default
 ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate
 HardTanh = exir_ops.edge.aten.hardtanh.default
 HardTanh_ = exir_ops.edge.aten.hardtanh_.default
+MulTensor = exir_ops.edge.aten.mul.Tensor
 Slice = exir_ops.edge.aten.slice.Tensor
 SliceCopy = exir_ops.edge.aten.slice_copy.Tensor
 Softmax = exir_ops.edge.aten._softmax.default
diff --git a/examples/nxp/executor_runner/nxp_executor_runner.cpp b/examples/nxp/executor_runner/nxp_executor_runner.cpp
index 52a62611cb5..65f5831e5c5 100644
--- a/examples/nxp/executor_runner/nxp_executor_runner.cpp
+++ b/examples/nxp/executor_runner/nxp_executor_runner.cpp
@@ -446,6 +446,9 @@ int main(int argc, char* argv[]) {
         }
         closedir(datasetDir);
 
+        // Sort inputsData to ensure correct input ordering
+        std::sort(inputsData.begin(), inputsData.end());
+
         setInputs(method.get(), inputsData);
 
         status = method->execute();