pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 0 additions & 9 deletions b/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎.ci/scripts/setup-windows.ps1‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/setup-windows.ps1‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_model.ps1‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model.ps1‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_torchao_huggingface_checkpoints.sh‎
Lines changed: 139 additions & 0 deletions b/‎.ci/scripts/test_torchao_huggingface_checkpoints.sh‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 201 additions & 0 deletions b/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 201 additions & 0 deletions
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 5 additions & 1 deletion b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 5 additions & 1 deletion
@@ -1 +1 @@
-e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3
+4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e
@@ -54,15 +54,6 @@ install_enn_backend() {
   rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
   ANDROID_NDK_VERSION=r27b
 
-  pushd .
-  cd /tmp
-  curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
-  unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
-
-  # Print the content for manual verification
-  ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
-  sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
-  popd
   # build Exynos backend
   export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
   bash backends/samsung/build.sh --build all
 
@@ -1,5 +1,5 @@
 param (
-    [string]$editable = $false
+    [string]$editable = "false"
 )
 
 conda create --yes --quiet -n et python=3.12
 
@@ -34,7 +34,7 @@ function ExportModel-Xnnpack {
         [bool]$quantize
     )
 
-    if $(quantize) {
+    if ($quantize) {
         python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize | Write-Host
         $modelFile = "$($modelName)_xnnpack_q8.pte"
     } else {
 
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+# -------------------------
+# Args / flags
+# -------------------------
+TEST_WITH_RUNNER=0
+MODEL_NAME=""
+
+# Parse args
+if [[ $# -lt 1 ]]; then
+  echo "Usage: $0 <model_name> [--test_with_runner]"
+  echo "Supported model_name values: qwen3_4b, phi_4_mini"
+  exit 1
+fi
+
+MODEL_NAME="$1"
+shift
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --test_with_runner)
+      TEST_WITH_RUNNER=1
+      ;;
+    -h|--help)
+      echo "Usage: $0 <model_name> [--test_with_runner]"
+      echo "  model_name: qwen3_4b | phi_4_mini"
+      echo "  --test_with_runner: build ET + run llama_main to sanity-check the export"
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+MODEL_OUT=model.pte
+
+case "$MODEL_NAME" in
+  qwen3_4b)
+    echo "Running Qwen3-4B export..."
+    HF_MODEL_DIR=$(hf download pytorch/Qwen3-4B-INT8-INT4)
+    EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB
+    $PYTHON_EXECUTABLE -m executorch.examples.models.qwen3.convert_weights \
+      $HF_MODEL_DIR \
+      pytorch_model_converted.bin
+
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \
+      --model "qwen3_4b" \
+      --checkpoint pytorch_model_converted.bin \
+      --params examples/models/qwen3/config/4b_config.json \
+      --output_name $MODEL_OUT \
+      -kv \
+      --use_sdpa_with_kv_cache \
+      -X \
+      --xnnpack-extended-ops \
+      --max_context_length 1024 \
+      --max_seq_length 1024 \
+      --dtype fp32 \
+      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
+    ;;
+
+  phi_4_mini)
+    echo "Running Phi-4-mini export..."
+    HF_MODEL_DIR=$(hf download pytorch/Phi-4-mini-instruct-INT8-INT4)
+    EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB
+    $PYTHON_EXECUTABLE -m executorch.examples.models.phi_4_mini.convert_weights \
+      $HF_MODEL_DIR \
+      pytorch_model_converted.bin
+
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \
+      --model "phi_4_mini" \
+      --checkpoint pytorch_model_converted.bin \
+      --params examples/models/phi_4_mini/config/config.json \
+      --output_name $MODEL_OUT \
+      -kv \
+      --use_sdpa_with_kv_cache \
+      -X \
+      --xnnpack-extended-ops \
+      --max_context_length 1024 \
+      --max_seq_length 1024 \
+      --dtype fp32 \
+      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
+    ;;
+
+  *)
+    echo "Error: unsupported model_name '$MODEL_NAME'"
+    echo "Supported values: qwen3_4b, phi_4_mini"
+    exit 1
+    ;;
+esac
+
+# Check file size
+MODEL_SIZE=$(stat --printf="%s" $MODEL_OUT 2>/dev/null || stat -f%z $MODEL_OUT)
+if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then
+  echo "Error: model size $MODEL_SIZE is greater than expected upper bound $EXPECTED_MODEL_SIZE_UPPER_BOUND"
+  exit 1
+fi
+
+# Install ET with CMake
+if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
+  echo "[runner] Building and testing llama_main ..."
+    cmake -DPYTHON_EXECUTABLE=python \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DEXECUTORCH_ENABLE_LOGGING=1 \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+        -DEXECUTORCH_BUILD_XNNPACK=ON \
+        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
+        -Bcmake-out .
+    cmake --build cmake-out -j16 --config Release --target install
+
+
+    # Install llama runner
+    cmake -DPYTHON_EXECUTABLE=python \
+        -DCMAKE_BUILD_TYPE=Release \
+        -Bcmake-out/examples/models/llama \
+        examples/models/llama
+    cmake --build cmake-out/examples/models/llama -j16 --config Release
+
+    # Run the model
+    ./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"
+fi
+
+# Clean up
+rm -f pytorch_model_converted.bin "$MODEL_OUT"
@@ -0,0 +1,201 @@
+#!/bin/bash
+# === CI Wheel Build & Test Script ===
+
+# Exit immediately on error, print each command, and capture all output to build.log
+set -e
+set -x
+exec > >(tee -i build.log) 2>&1
+
+# Save repo root
+REPO_ROOT=$(pwd)
+
+# ----------------------------
+# Dynamically create script_qnn_wheel_test.py
+# ----------------------------
+cat > "/tmp/script_qnn_wheel_test.py" << 'EOF'
+# pyre-ignore-all-errors
+import argparse
+
+import torch
+from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
+from executorch.backends.qualcomm.utils.utils import (
+    generate_htp_compiler_spec,
+    generate_qnn_executorch_compiler_spec,
+    get_soc_to_chipset_map,
+    to_edge_transform_and_lower_to_qnn,
+)
+from executorch.exir.backend.utils import format_delegated_graph
+from executorch.examples.models.model_factory import EagerModelFactory
+from executorch.exir.capture._config import ExecutorchBackendConfig
+from executorch.extension.export_util.utils import save_pte_program
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e, prepare_qat_pt2e
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f", "--output_folder", type=str, default="", help="The folder to store the exported program")
+    parser.add_argument("--soc", type=str, default="SM8650", help="Specify the SoC model.")
+    parser.add_argument("-q", "--quantization", choices=["ptq", "qat"], help="Run post-traininig quantization.")
+    args = parser.parse_args()
+
+    class LinearModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.linear = torch.nn.Linear(3, 3)
+        def forward(self, arg):
+            return self.linear(arg)
+        def get_example_inputs(self):
+            return (torch.randn(3, 3),)
+
+    model = LinearModule()
+    example_inputs = model.get_example_inputs()
+
+    if args.quantization:
+        quantizer = QnnQuantizer()
+        m = torch.export.export(model.eval(), example_inputs, strict=True).module()
+        if args.quantization == "qat":
+            m = prepare_qat_pt2e(m, quantizer)
+            m(*example_inputs)
+        elif args.quantization == "ptq":
+            m = prepare_pt2e(m, quantizer)
+            m(*example_inputs)
+        m = convert_pt2e(m)
+    else:
+        m = model
+
+    use_fp16 = True if args.quantization is None else False
+    backend_options = generate_htp_compiler_spec(use_fp16=use_fp16)
+    compile_spec = generate_qnn_executorch_compiler_spec(
+        soc_model=get_soc_to_chipset_map()[args.soc],
+        backend_options=backend_options,
+    )
+    delegated_program = to_edge_transform_and_lower_to_qnn(m, example_inputs, compile_spec)
+    output_graph = format_delegated_graph(delegated_program.exported_program().graph_module)
+    # Ensure QnnBackend is in the output graph
+    assert "QnnBackend" in output_graph
+    executorch_program = delegated_program.to_executorch(
+        config=ExecutorchBackendConfig(extract_delegate_segments=False)
+    )
+    save_pte_program(executorch_program, "linear", args.output_folder)
+
+if __name__ == "__main__":
+    main()
+EOF
+
+# ----------------------------
+# Wheel build and .so checks
+# ----------------------------
+echo "=== Building Wheel Package ==="
+source .ci/scripts/utils.sh
+install_executorch
+EXECUTORCH_BUILDING_WHEEL=1 python setup.py bdist_wheel
+unset EXECUTORCH_BUILDING_WHEEL
+
+WHEEL_FILE=$(ls dist/*.whl | head -n 1)
+echo "Found wheel: $WHEEL_FILE"
+
+PYTHON_VERSION=$1
+# ----------------------------
+# Check wheel does NOT contain qualcomm/sdk
+# ----------------------------
+echo "Checking wheel does not contain qualcomm/sdk..."
+SDK_FILES=$(unzip -l "$WHEEL_FILE" | awk '{print $4}' | grep "executorch/backends/qualcomm/sdk" || true)
+if [ -n "$SDK_FILES" ]; then
+    echo "ERROR: Wheel package contains unexpected qualcomm/sdk files:"
+    echo "$SDK_FILES"
+    exit 1
+else
+    echo "OK: No qualcomm/sdk files found in wheel"
+fi
+
+# ----------------------------
+# Check .so files in the wheel
+# ----------------------------
+echo "Checking for .so files inside the wheel..."
+WHEEL_SO_FILES=$(unzip -l "$WHEEL_FILE" | awk '{print $4}' | grep "executorch/backends/qualcomm/python" || true)
+if [ -z "$WHEEL_SO_FILES" ]; then
+    echo "ERROR: No .so files found in wheel under executorch/backends/qualcomm/python"
+    exit 1
+else
+    echo "Wheel contains the following .so files:"
+    echo "$WHEEL_SO_FILES"
+fi
+
+# ----------------------------
+# Helpers
+# ----------------------------
+get_site_packages_dir () {
+  local PYBIN="$1"
+  "$PYBIN" - <<'PY'
+import sysconfig, sys
+print(sysconfig.get_paths().get("purelib") or sysconfig.get_paths().get("platlib"))
+PY
+}
+
+run_core_tests () {
+  local PYBIN="$1"      # path to python
+  local PIPBIN="$2"     # path to pip
+  local LABEL="$3"      # label to print (conda/venv)
+
+  echo "=== [$LABEL] Installing wheel & deps ==="
+  "$PIPBIN" install --upgrade pip
+  "$PIPBIN" install "$WHEEL_FILE"
+  "$PIPBIN" install torch=="2.9.0.dev20250906" --index-url "https://download.pytorch.org/whl/nightly/cpu"
+  "$PIPBIN" install --pre torchao --index-url "https://download.pytorch.org/whl/nightly/cpu"
+
+  echo "=== [$LABEL] Import smoke tests ==="
+  "$PYBIN" -c "import executorch; print('executorch imported successfully')"
+  "$PYBIN" -c "import executorch.backends.qualcomm; print('executorch.backends.qualcomm imported successfully')"
+
+  echo "=== [$LABEL] List installed executorch/backends/qualcomm/python ==="
+  local SITE_DIR
+  SITE_DIR="$(get_site_packages_dir "$PYBIN")"
+  local SO_DIR="$SITE_DIR/executorch/backends/qualcomm/python"
+  ls -l "$SO_DIR" || echo "Folder does not exist!"
+
+  echo "=== [$LABEL] Run export script to generate linear.pte ==="
+  (cd "$REPO_ROOT" && "$PYBIN" "/tmp/script_qnn_wheel_test.py")
+
+  if [ -f "$REPO_ROOT/linear.pte" ]; then
+      echo "[$LABEL] Model file linear.pte successfully created"
+  else
+      echo "ERROR: [$LABEL] Model file linear.pte was not created"
+      exit 1
+  fi
+}
+
+# ----------------------------
+# Conda environment setup & tests
+# ----------------------------
+echo "=== Testing in Conda env ==="
+TEMP_ENV_DIR=$(mktemp -d)
+echo "Using temporary directory for conda: $TEMP_ENV_DIR"
+conda create -y -p "$TEMP_ENV_DIR/env" python=$PYTHON_VERSION
+# derive python/pip paths inside the conda env
+CONDA_PY="$TEMP_ENV_DIR/env/bin/python"
+CONDA_PIP="$TEMP_ENV_DIR/env/bin/pip"
+# Some images require conda run; keep pip/python direct to simplify path math
+run_core_tests "$CONDA_PY" "$CONDA_PIP" "conda"
+
+# Cleanup conda env
+conda env remove -p "$TEMP_ENV_DIR/env" -y || true
+rm -rf "$TEMP_ENV_DIR"
+
+# ----------------------------
+# Python venv setup & tests
+# ----------------------------
+echo "=== Testing in Python venv ==="
+TEMP_VENV_DIR=$(mktemp -d)
+echo "Using temporary directory for venv: $TEMP_VENV_DIR"
+python3 -m venv "$TEMP_VENV_DIR/venv"
+VENV_PY="$TEMP_VENV_DIR/venv/bin/python"
+VENV_PIP="$TEMP_VENV_DIR/venv/bin/pip"
+
+# Ensure venv has wheel/build basics if needed
+"$VENV_PIP" install --upgrade pip
+
+run_core_tests "$VENV_PY" "$VENV_PIP" "venv"
+
+# Cleanup venv
+rm -rf "$TEMP_VENV_DIR"
+
+echo "=== All tests completed! ==="
@@ -9,7 +9,11 @@ set -eux
 # TODO: expand this to //...
 # TODO: can't query cadence & vulkan backends
 # TODO: can't query //kernels/prim_ops because of non-buckified stuff in OSS.
-buck2 query "//backends/apple/... + //backends/example/... + \
+# TODO: Make //backends/arm tests use runtime wrapper so we can just query //backends/arm/...
+buck2 query "//backends/apple/... + //backends/arm: + //backends/arm/debug/... + \
+//backends/arm/operator_support/... + //backends/arm/operators/... + \
+//backends/arm/_passes/... + //backends/arm/runtime/... + //backends/arm/tosa/... \
++ //backends/example/... + \
 //backends/mediatek/... + //backends/transforms/... + \
 //backends/xnnpack/... + //configurations/... + //extension/flat_tensor: + \
 //extension/llm/runner: + //kernels/aten/... + //kernels/optimized/... + \
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3`
	`1`	`+4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`param (`
`2`		`- [string]$editable = $false`
	`2`	`+ [string]$editable = "false"`
`3`	`3`	`)`
`4`	`4`
`5`	`5`	`conda create --yes --quiet -n et python=3.12`
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ function ExportModel-Xnnpack {`
`34`	`34`	`[bool]$quantize`
`35`	`35`	`)`
`36`	`36`
`37`		`- if $(quantize) {`
	`37`	`+ if ($quantize) {`
`38`	`38`	`python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize \| Write-Host`
`39`	`39`	`$modelFile = "$($modelName)_xnnpack_q8.pte"`
`40`	`40`	`} else {`