pytorch
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎backends/arm/TARGETS‎
Lines changed: 9 additions & 0 deletions b/‎backends/arm/TARGETS‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/arm/ao_ext/ops/mxfp_conv2d_op.py‎
Lines changed: 7 additions & 5 deletions b/‎backends/arm/ao_ext/ops/mxfp_conv2d_op.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎backends/arm/ao_ext/ops/mxfp_linear_op.py‎
Lines changed: 28 additions & 6 deletions b/‎backends/arm/ao_ext/ops/mxfp_linear_op.py‎
Lines changed: 28 additions & 6 deletions
diff --git a/‎backends/arm/operators/op_tosa_identity.py‎
Lines changed: 12 additions & 38 deletions b/‎backends/arm/operators/op_tosa_identity.py‎
Lines changed: 12 additions & 38 deletions
diff --git a/‎backends/arm/public_api_manifests/api_manifest_running.toml‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/public_api_manifests/api_manifest_running.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/runtime/VGFSetup.cpp‎
Lines changed: 81 additions & 0 deletions b/‎backends/arm/runtime/VGFSetup.cpp‎
Lines changed: 81 additions & 0 deletions
@@ -816,6 +816,33 @@ jobs:
         # Test test_arm_backend.sh with test
         backends/arm/test/test_arm_backend.sh "${ARM_TEST}"
 
+  test-arm-backend-public-api-backward-compatibility:
+    name: test-arm-backend-public-api-backward-compatibility
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge.memory
+      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        source .ci/scripts/utils.sh
+        install_executorch "--use-pt-pinned-commit"
+
+        .ci/scripts/setup-arm-baremetal-tools.sh --enable-mlsdk-deps --install-mlsdk-deps-with-pip
+        source examples/arm/arm-scratch/setup_path.sh
+
+        backends/arm/scripts/public_api_manifest/validate_all_public_api_manifests.sh
+
+        python backends/arm/test/public_api_bc/run_public_api_bc_scenarios.py
+
   test-llama-runner-qnn-linux:
     name: test-llama-runner-qnn-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -119,6 +119,15 @@ runtime.python_library(
         "//executorch/exir:lib",
     ],
 )
+runtime.python_library(
+    name = "public_api",
+    srcs = ["__init__.py"],
+    deps = [
+        ":ethosu",
+        ":vgf",
+        "//executorch/backends/arm/quantizer:lib",
+    ],
+)
 
 runtime.python_library(
     name = "process_node",
 
@@ -206,11 +206,12 @@ def __init__(
         padding: tuple[int, int],
         dilation: tuple[int, int],
         groups: int,
-        config: MXFPOpConfig,
+        weight_dtype: MXFPDType,
+        block_size: int,
     ) -> None:
         super().__init__()
-        self.config = config
-        self.weight_dtype = mxfp_dtype_to_str(config.weight_dtype)
+        self.weight_dtype = mxfp_dtype_to_str(weight_dtype)
+        self.block_size = block_size
 
         self.register_buffer("weight_qdata", weight_qdata, persistent=True)
         self.register_buffer("weight_scale", weight_scale, persistent=True)
@@ -241,7 +242,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             list(self.padding),
             list(self.dilation),
             self.groups,
-            self.config.block_size,
+            self.block_size,
             self.weight_dtype,
         )
 
@@ -283,5 +284,6 @@ def transform_conv2d_to_mxfp(
         padding,
         dilation,
         module.groups,
-        config,
+        config.weight_dtype,
+        config.block_size,
     )
@@ -33,6 +33,12 @@
 )
 
 
+_SUPPORTED_OUTPUT_DTYPES: set[torch.dtype] = {
+    torch.float32,
+    torch.bfloat16,
+}
+
+
 def _get_mx_elem_dtype(
     weight_qdata: torch.Tensor,
     weight_payload_dtype: str = "",
@@ -137,11 +143,14 @@ def __init__(
         weight_qdata: torch.Tensor,
         weight_scale: torch.Tensor,
         bias: torch.Tensor | None,
-        config: MXFPOpConfig,
+        weight_dtype: MXFPDType,
+        block_size: int,
+        output_dtype: torch.dtype = torch.float32,
     ) -> None:
         super().__init__()
-        self.config = config
-        self.weight_dtype = mxfp_dtype_to_str(config.weight_dtype)
+        self.weight_dtype = mxfp_dtype_to_str(weight_dtype)
+        self.block_size = block_size
+        self.output_dtype = output_dtype
 
         self.register_buffer("weight_qdata", weight_qdata, persistent=True)
         self.register_buffer("weight_scale", weight_scale, persistent=True)
@@ -158,14 +167,17 @@ def __init__(
         )
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return torch.ops.tosa_mxfp.linear.default(
+        output = torch.ops.tosa_mxfp.linear.default(
             x,
             self.weight_qdata,
             self.weight_scale,
             self.bias,
-            self.config.block_size,
+            self.block_size,
             self.weight_dtype,
         )
+        if self.output_dtype != torch.float32:
+            output = output.to(self.output_dtype)
+        return output
 
 
 def transform_linear_to_mxfp(
@@ -195,4 +207,14 @@ def transform_linear_to_mxfp(
     weight_scale = weight_scale.unsqueeze(0)
 
     bias = module.bias.detach().to(torch.float32) if module.bias is not None else None
-    return MXFPLinearOp(weight_qdata, weight_scale, bias, config)
+    output_dtype = weight.dtype
+    if output_dtype not in _SUPPORTED_OUTPUT_DTYPES:
+        raise ValueError(f"Unsupported output_dtype: {output_dtype}")
+    return MXFPLinearOp(
+        weight_qdata,
+        weight_scale,
+        bias,
+        config.weight_dtype,
+        config.block_size,
+        output_dtype,
+    )
@@ -3,42 +3,28 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Any, List
-
-import torch
 import tosa_serializer as ts
 
-from executorch.backends.arm.operators.node_visitor import (
-    NodeVisitor,
-    register_node_visitor,
-)
-from executorch.backends.arm.operators.operator_validation_utils import (
-    validate_num_inputs,
-    validate_same_dtype,
-    validate_valid_dtype,
+from executorch.backends.arm.operators.node_visitor import register_node_visitor
+from executorch.backends.arm.operators.simple_node_visitor import (
+    SimpleNodeVisitor,
+    SimpleNodeVisitorConfig,
 )
-from executorch.backends.arm.tosa.mapping import TosaArg
 
 
 @register_node_visitor
-class IdentityVisitor(NodeVisitor):
+class IdentityVisitor(SimpleNodeVisitor):
     """Lower the TOSA IDENTITY op."""
 
     target = "tosa.IDENTITY.default"
 
-    def define_node(
-        self,
-        node: torch.fx.Node,
-        tosa_graph: Any,
-        inputs: List[TosaArg],
-        output: TosaArg,
-    ) -> None:
-        validate_num_inputs(self.target, inputs, 1)
-        validate_same_dtype(self.target, [inputs[0], output], ts)
-        validate_valid_dtype(
-            self.target,
-            [inputs[0], output],
-            [
+    @classmethod
+    def get_config(cls) -> SimpleNodeVisitorConfig:
+        return SimpleNodeVisitorConfig(
+            tosa_op=ts.Op.IDENTITY,
+            attr_method="IdentityAttribute",
+            num_inputs=1,
+            input_dtypes=[
                 ts.DType.BOOL,
                 ts.DType.INT8,
                 ts.DType.INT16,
@@ -49,16 +35,4 @@ def define_node(
                 ts.DType.FP8E4M3,
                 ts.DType.FP8E5M2,
             ],
-            self.tosa_spec,
-        )
-
-        attr = ts.TosaSerializerAttribute()
-        attr.IdentityAttribute()
-        self._serialize_operator(
-            node,
-            tosa_graph,
-            ts.Op.IDENTITY,
-            [inputs[0].name],
-            [output.name],
-            attr,
         )
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 #
 # This file is generated by
-# backends/arm/scripts/generate_public_api_manifest.py
+# backends/arm/scripts/public_api_manifest/generate_public_api_manifest.py
 
 [python]
 
 
@@ -778,6 +778,73 @@ static bool find_memory_index(
       memory_type_out);
 }
 
+bool VgfRepr::map_persistent_io_memory() {
+  unmap_persistent_io_memory();
+
+  for (auto& io : IOs) {
+    if (io.memory == VK_NULL_HANDLE) {
+      ET_LOG(Error, "Cannot persistently map null Vulkan IO memory");
+      unmap_persistent_io_memory();
+      return false;
+    }
+
+    void* persistent_memory = nullptr;
+
+    // IO resources may alias the same VkDeviceMemory. Vulkan memory must not be
+    // mapped more than once at the same time, so map each unique memory once
+    // and share the returned pointer across aliased IO entries.
+    // Make sure that memory is HOST_VISIBLE and HOST_COHERENT.
+    bool found_existing_mapping = false;
+    auto mapped_memory_it = std::find_if(
+        persistent_mapped_memories.begin(),
+        persistent_mapped_memories.end(),
+        [&](const auto& mapped_memory) {
+          return mapped_memory.memory == io.memory;
+        });
+
+    if (mapped_memory_it != persistent_mapped_memories.end()) {
+      persistent_memory = mapped_memory_it->data;
+      found_existing_mapping = true;
+    }
+
+    if (!found_existing_mapping) {
+      VkResult result = vkMapMemory(
+          vk_device, io.memory, 0, VK_WHOLE_SIZE, 0, &persistent_memory);
+      if (result != VK_SUCCESS) {
+        ET_LOG(
+            Error,
+            "Failed to persistently map Vulkan IO memory, error %d",
+            result);
+        unmap_persistent_io_memory();
+        return false;
+      }
+
+      persistent_mapped_memories.push_back(PersistentMappedMemory{
+          .memory = io.memory,
+          .data = persistent_memory,
+      });
+    }
+
+    io.persistent_memory = persistent_memory;
+  }
+
+  return true;
+}
+
+void VgfRepr::unmap_persistent_io_memory() {
+  for (const auto& mapped_memory : persistent_mapped_memories) {
+    if (mapped_memory.memory != VK_NULL_HANDLE &&
+        mapped_memory.data != nullptr) {
+      vkUnmapMemory(vk_device, mapped_memory.memory);
+    }
+  }
+  persistent_mapped_memories.clear();
+
+  for (auto& io : IOs) {
+    io.persistent_memory = nullptr;
+  }
+}
+
 VkResult allocate_memory(
     VkPhysicalDevice physical,
     VkDevice device,
@@ -1839,6 +1906,7 @@ bool VgfRepr::process_vgf(
                  VK_NULL_HANDLE,
                  tensor_memory,
                  {0, 0, 0},
+                 nullptr,
                  owns_memory,
                  true,
                  is_in});
@@ -1931,6 +1999,7 @@ bool VgfRepr::process_vgf(
                  VK_NULL_HANDLE,
                  buffer_memory,
                  {0, 0, 0},
+                 nullptr,
                  owns_memory,
                  true,
                  is_in});
@@ -2117,6 +2186,7 @@ bool VgfRepr::process_vgf(
                  image_memory,
                  staging_memory,
                  image_extent,
+                 nullptr,
                  true,
                  owns_image_memory,
                  is_in});
@@ -3433,6 +3503,15 @@ bool VgfRepr::process_vgf(
     vkEndCommandBuffer(vk_execute_cmd);
   }
 
+  {
+    VGF_PROFILE_SCOPE(event_tracer, "VGF_INIT_MAP_IO_MEMORY");
+
+    if (!map_persistent_io_memory()) {
+      ET_LOG(Error, "Failed to persistently map VGF IO memory");
+      return false;
+    }
+  }
+
   return true;
 }
 
@@ -3493,6 +3572,8 @@ bool VgfRepr::execute_vgf(executorch::runtime::EventTracer* event_tracer) {
 }
 
 void VgfRepr::free_vgf() {
+  unmap_persistent_io_memory();
+
   if (vk_timestamp_query_pool != VK_NULL_HANDLE) {
     vkDestroyQueryPool(vk_device, vk_timestamp_query_pool, nullptr);
     vk_timestamp_query_pool = VK_NULL_HANDLE;
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`# LICENSE file in the root directory of this source tree.`
`5`	`5`	`#`
`6`	`6`	`# This file is generated by`
`7`		`-# backends/arm/scripts/generate_public_api_manifest.py`
	`7`	`+# backends/arm/scripts/public_api_manifest/generate_public_api_manifest.py`
`8`	`8`
`9`	`9`	`[python]`
`10`	`10`