From 018f7dbc2624a08b721c284eb2ad5cbf06d94fa4 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 13:02:44 +0100 Subject: [PATCH 01/11] Arm backend: Allow Arm executor_runner CMake to run standalone - Detect standalone invocation, derive EXECUTORCH_ROOT, and expose ARM_EXECUTOR_RUNNER_STANDALONE for diagnostics. - Load ExecuTorch presets and add_subdirectory(EXECUTORCH_ROOT ...) when building out of tree. - Refresh pte_to_header.py with a shebang and updated Arm copyright. Change-Id: I4582326c72a0b571c495aca64b2c58e45bfbb5be Signed-off-by: Usamah Zaheer --- backends/arm/scripts/build_executor_runner.sh | 67 +++++++---- backends/arm/test/misc/test_runner_utils.py | 36 ++++++ backends/arm/test/runner_utils.py | 106 ++++++++++++++---- examples/arm/executor_runner/CMakeLists.txt | 105 ++++++++++++++--- examples/arm/executor_runner/pte_to_header.py | 3 +- .../executor_runner/standalone/CMakeLists.txt | 106 ++++++++++++++++++ 6 files changed, 361 insertions(+), 62 deletions(-) create mode 100644 examples/arm/executor_runner/standalone/CMakeLists.txt diff --git a/backends/arm/scripts/build_executor_runner.sh b/backends/arm/scripts/build_executor_runner.sh index f2ffd2e27a7..55f1a272b9e 100755 --- a/backends/arm/scripts/build_executor_runner.sh +++ b/backends/arm/scripts/build_executor_runner.sh @@ -9,6 +9,9 @@ set -eu script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) et_root_dir=$(cd ${script_dir}/../../.. && pwd) et_root_dir=$(realpath ${et_root_dir}) +runner_source_dir=${et_root_dir}/examples/arm/executor_runner/standalone +runner_source_dir=$(realpath ${runner_source_dir}) +preset_file=${et_root_dir}/tools/cmake/preset/arm_baremetal.cmake toolchain=arm-none-eabi-gcc setup_path_script=${et_root_dir}/examples/arm/arm-scratch/setup_path.sh _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly install necessary tools." @@ -101,6 +104,9 @@ toolchain_cmake=$(realpath ${toolchain_cmake}) source ${setup_path_script} +[[ -f ${preset_file} ]] \ + || { echo "Missing ${preset_file}. ${_setup_msg}"; exit 1; } + if [[ ${pte_file} == "semihosting" ]]; then pte_data="-DSEMIHOSTING=ON" else @@ -122,13 +128,13 @@ else fi fi ethosu_tools_dir=$(realpath ${ethosu_tools_dir}) -ethos_u_root_dir="$ethosu_tools_dir/ethos-u" +ethos_u_root_dir="${ethosu_tools_dir}/ethos-u" mkdir -p "${ethos_u_root_dir}" -ethosu_tools_dir=$(realpath ${ethos_u_root_dir}) - -et_build_dir=${et_build_root}/cmake-out -mkdir -p ${et_build_dir} -et_build_dir=$(realpath ${et_build_dir}) +ethos_u_root_dir=$(realpath ${ethos_u_root_dir}) +cmsis_nn_local_path="" +if [[ -d "${ethos_u_root_dir}/core_software/cmsis-nn" ]]; then + cmsis_nn_local_path=$(realpath "${ethos_u_root_dir}/core_software/cmsis-nn") +fi if [[ ${system_config} == "" ]] then @@ -160,34 +166,47 @@ echo "-------------------------------------------------------------------------- echo "Build Arm ${toolchain/-gcc/} executor_runner for ${target} PTE: ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}'" echo "--------------------------------------------------------------------------------" -cd ${et_root_dir}/examples/arm/executor_runner - if [ "$bundleio" = true ] ; then build_bundleio_flags=" -DET_BUNDLE_IO=ON " + candidate_build_dir="${et_build_root}/cmake-out" + if [[ -d "${candidate_build_dir}" ]]; then + candidate_build_dir=$(realpath "${candidate_build_dir}") + build_bundleio_flags+=" -DET_BUILD_DIR_PATH=${candidate_build_dir} " + fi + if [[ -n "${BUNDLED_PROGRAM_LIBRARY_DIR:-}" ]]; then + build_bundleio_flags+=" -DBUNDLED_PROGRAM_LIBRARY_DIR=${BUNDLED_PROGRAM_LIBRARY_DIR} " + fi fi if [ "$build_with_etdump" = true ] ; then build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON -DET_DUMP_INTERMEDIATE_OUTPUTS=ON " fi +devtools_flags="" +if [ "$bundleio" = true ] || [ "$build_with_etdump" = true ] ; then + devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=ON " +fi -echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${extra_build_flags}" +echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${devtools_flags} ${extra_build_flags}" cmake \ - -DCMAKE_BUILD_TYPE=${build_type} \ - -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ - -DTARGET_CPU=${target_cpu} \ - -DET_DIR_PATH:PATH=${et_root_dir} \ - -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ - -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ - -DETHOSU_TARGET_NPU_CONFIG=${target} \ - ${pte_data} \ - ${build_bundleio_flags} \ - ${build_with_etdump_flags} \ - -DPYTHON_EXECUTABLE=$(which python3) \ - -DSYSTEM_CONFIG=${system_config} \ - -DMEMORY_MODE=${memory_mode} \ + -S ${runner_source_dir} \ + -B ${output_folder} \ + -DEXECUTORCH_ROOT=${et_root_dir} \ + -DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ + -DTARGET_CPU=${target_cpu} \ + -DETHOSU_TARGET_NPU_CONFIG=${target} \ + -DEXECUTORCH_BUILD_PRESET_FILE=${preset_file} \ + -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF \ + ${pte_data} \ + ${build_bundleio_flags} \ + ${build_with_etdump_flags} \ + ${devtools_flags} \ + -DSYSTEM_CONFIG=${system_config} \ + -DMEMORY_MODE=${memory_mode} \ -DEXECUTORCH_SELECT_OPS_LIST="${select_ops_list}" \ - ${extra_build_flags} \ - -B ${output_folder} + -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ + ${cmsis_nn_local_path:+-DCMSIS_NN_LOCAL_PATH:PATH=${cmsis_nn_local_path}} \ + ${extra_build_flags} echo "[${BASH_SOURCE[0]}] Configured CMAKE" diff --git a/backends/arm/test/misc/test_runner_utils.py b/backends/arm/test/misc/test_runner_utils.py index 10a8b6df3a6..3c78b21e008 100644 --- a/backends/arm/test/misc/test_runner_utils.py +++ b/backends/arm/test/misc/test_runner_utils.py @@ -77,3 +77,39 @@ def _fake_run_cmd(cmd, check=True): assert "-i i1.bin" in semihosting_cmd_arg assert long_input_paths[0] not in semihosting_cmd_arg assert long_input_paths[1] not in semihosting_cmd_arg + + +def test_get_elf_path_uses_repo_root_candidates(monkeypatch, tmp_path: Path) -> None: + elf_path = ( + tmp_path + / "arm_test" + / "arm_semihosting_executor_runner_corstone-300" + / "arm_executor_runner" + ) + elf_path.parent.mkdir(parents=True) + elf_path.write_bytes(b"") + + monkeypatch.setattr(runner_utils, "_elf_search_roots", lambda: [tmp_path]) + other_cwd = tmp_path / "elsewhere" + other_cwd.mkdir() + monkeypatch.chdir(other_cwd) + + assert runner_utils.get_elf_path("corstone-300") == str(elf_path) + + +def test_get_elf_path_accepts_nested_runner_output(monkeypatch, tmp_path: Path) -> None: + elf_path = ( + tmp_path + / "arm_test" + / "arm_semihosting_executor_runner_corstone-300" + / "examples" + / "arm" + / "executor_runner" + / "arm_executor_runner" + ) + elf_path.parent.mkdir(parents=True) + elf_path.write_bytes(b"") + + monkeypatch.setattr(runner_utils, "_elf_search_roots", lambda: [tmp_path]) + + assert runner_utils.get_elf_path("corstone-300") == str(elf_path) diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index 914a95f0c8d..93887fbda6b 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -12,6 +12,7 @@ import subprocess # nosec B404 - invoked only for trusted toolchain binaries import sys import tempfile +from collections.abc import Iterable from pathlib import Path from types import NoneType @@ -848,39 +849,98 @@ def vkml_emulation_layer_installed() -> bool: return layers_exists and deploy_exists -def assert_elf_path_exists(elf_path): - if not os.path.exists(elf_path): - raise FileNotFoundError( - f"Did not find build arm_executor_runner or executor_runner in path {elf_path}, \ - run setup_testing.sh or setup_testing_vkml.sh?" - ) - - -def get_elf_path(target_board: str, use_portable_ops: bool = False) -> str: - elf_path = "" +def _elf_search_roots() -> list[Path]: + roots: list[Path] = [] + for env_var in ( + "EXECUTORCH_ROOT", + "GITHUB_WORKSPACE", + "BUILD_WORKSPACE_DIRECTORY", + ): + env_root = os.environ.get(env_var) + if env_root: + roots.append(Path(env_root).expanduser()) + + cwd = Path.cwd().resolve() + search_parents = [cwd, *cwd.parents, *Path(__file__).resolve().parents] + for parent in search_parents: + if (parent / "examples" / "arm").is_dir() or (parent / "arm_test").exists(): + roots.append(parent) + + unique_roots: list[Path] = [] + seen: set[Path] = set() + for root in roots: + resolved = root.resolve() + if resolved not in seen: + unique_roots.append(resolved) + seen.add(resolved) + return unique_roots + + +def _elf_path_candidates( + target_board: str, use_portable_ops: bool = False +) -> list[Path]: if target_board not in VALID_TARGET: raise ValueError(f"Unsupported target: {target_board}") - if use_portable_ops: - portable_ops_str = "portable-ops_" - else: - portable_ops_str = "" - + portable_ops_str = "portable-ops_" if use_portable_ops else "" if target_board in ("corstone-300", "corstone-320"): - elf_path = os.path.join( + build_dir = Path( "arm_test", f"arm_semihosting_executor_runner_{portable_ops_str}{target_board}", - "arm_executor_runner", ) - elif target_board == "vkml_emulation_layer": - elf_path = os.path.join( - f"arm_test/arm_executor_runner_{portable_ops_str}vkml", - "executor_runner", + binary_name = "arm_executor_runner" + else: + build_dir = Path("arm_test", f"arm_executor_runner_{portable_ops_str}vkml") + binary_name = "executor_runner" + + candidates: list[Path] = [] + for root in _elf_search_roots(): + root_build_dir = root / build_dir + candidates.extend( + [ + root_build_dir / binary_name, + root_build_dir / "Release" / binary_name, + root_build_dir / "examples" / "arm" / "executor_runner" / binary_name, + root_build_dir + / "examples" + / "arm" + / "executor_runner" + / "Release" + / binary_name, + ] ) - assert_elf_path_exists(elf_path) - return elf_path + unique_candidates: list[Path] = [] + seen: set[Path] = set() + for candidate in candidates: + resolved = candidate.resolve(strict=False) + if resolved not in seen: + unique_candidates.append(resolved) + seen.add(resolved) + return unique_candidates + + +def _resolve_existing_elf_path(elf_candidates: Iterable[Path]) -> Path: + checked: list[Path] = [] + for elf_path in elf_candidates: + checked.append(elf_path) + if elf_path.exists(): + return elf_path + + checked_paths = ", ".join(str(path) for path in checked) + raise FileNotFoundError( + "Did not find build arm_executor_runner or executor_runner. " + f"Tried: {checked_paths}. " + "Run setup_testing.sh or setup_testing_vkml.sh?" + ) + + +def get_elf_path(target_board: str, use_portable_ops: bool = False) -> str: + elf_path = _resolve_existing_elf_path( + _elf_path_candidates(target_board, use_portable_ops=use_portable_ops) + ) + return str(elf_path) def arm_executor_runner_exists(target_board: str, use_portable_ops: bool = False): diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index c169f5d447a..267058d10cb 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -3,8 +3,41 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -cmake_minimum_required(VERSION 3.20) -project(arm_executor_runner) +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + cmake_minimum_required(VERSION 3.20) + project(arm_executor_runner_redirect LANGUAGES C CXX) + message( + FATAL_ERROR + "Configure standalone arm_executor_runner builds from ${CMAKE_CURRENT_LIST_DIR}/standalone instead of ${CMAKE_CURRENT_LIST_DIR}." + ) +endif() + +get_filename_component( + _default_executorch_root "${CMAKE_CURRENT_LIST_DIR}/../../.." ABSOLUTE +) + +if(NOT DEFINED EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT + "${_default_executorch_root}" + CACHE PATH "Path to an ExecuTorch checkout" + ) +endif() + +set(ET_DIR_PATH + "${EXECUTORCH_ROOT}" + CACHE PATH "Kept for backward compatibility; synonym for EXECUTORCH_ROOT" +) + +if(NOT EXISTS "${EXECUTORCH_ROOT}/CMakeLists.txt") + message( + FATAL_ERROR + "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project." + ) +endif() + +if(NOT COMMAND executorch_target_link_options_shared_lib) + include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) +endif() option( ET_MODEL_PTE_ADDR @@ -21,6 +54,12 @@ option(ET_LOG_DUMP_INPUT "Dump input in log" OFF) option(ET_LOG_DUMP_OUTPUT "Dump output in log" ON) option(ET_BUNDLE_IO "Set to compile in BundleIO support" OFF) +set(BUNDLED_PROGRAM_LIBRARY_DIR + "" + CACHE + PATH + "Optional directory that contains a prebuilt libbundled_program.a when ET_BUNDLE_IO is enabled without building devtools." +) set(ET_ATOL "0.01" CACHE STRING "Set atol to use for BundleIO testing (Requires ET_BUNDLE_IO)" @@ -154,13 +193,8 @@ message( add_corstone_subdirectory(${SYSTEM_CONFIG} ${ETHOS_SDK_PATH}) configure_timing_adapters(${SYSTEM_CONFIG} ${MEMORY_MODE}) -# Dependencies from the ExecuTorch build -find_package( - executorch REQUIRED HINTS "${ET_BUILD_DIR_PATH}/lib/cmake/ExecuTorch" -) - # Convert pte to header -if(NOT ${ET_MODEL_PTE_ADDR} AND NOT SEMIHOSTING) +if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING) add_custom_target( gen_model_header DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h ) @@ -199,12 +233,12 @@ endif() # Proceed with specific actions if either is found if(NOT U55_FOUND EQUAL -1) message(STATUS "SYSTEM_CONFIG contains 'U55'.") - set(LINK_FILE_IN "${CMAKE_SOURCE_DIR}/Corstone-300.ld") + set(LINK_FILE_IN "${CMAKE_CURRENT_LIST_DIR}/Corstone-300.ld") endif() if(NOT U85_FOUND EQUAL -1) message(STATUS "SYSTEM_CONFIG contains 'U85'.") - set(LINK_FILE_IN "${CMAKE_SOURCE_DIR}/Corstone-320.ld") + set(LINK_FILE_IN "${CMAKE_CURRENT_LIST_DIR}/Corstone-320.ld") endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") @@ -213,9 +247,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(COMPILER_PREPROCESSOR_OPTIONS -E -x c -P) endif() -get_filename_component(LINK_FILE_OUT_BASE ${LINK_FILE} NAME) +get_filename_component(LINK_FILE_OUT_BASE "${LINK_FILE}" NAME) set(LINK_FILE_OUT - ${CMAKE_CURRENT_BINARY_DIR}/${LINK_FILE_OUT_BASE}.${LINK_FILE_EXT} + "${CMAKE_CURRENT_BINARY_DIR}/${LINK_FILE_OUT_BASE}.${LINK_FILE_EXT}" ) execute_process( @@ -333,7 +367,50 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER) endif() if(ET_BUNDLE_IO) - list(APPEND arm_executor_runner_link bundled_program) + if(TARGET bundled_program) + list(APPEND arm_executor_runner_link bundled_program) + target_link_directories( + arm_executor_runner PRIVATE $ + ) + else() + set(_bundled_program_library "") + set(_bundled_program_search_paths "") + if(ET_BUILD_DIR_PATH) + list( + APPEND + _bundled_program_search_paths + "${ET_BUILD_DIR_PATH}" + "${ET_BUILD_DIR_PATH}/lib" + "${ET_BUILD_DIR_PATH}/devtools/bundled_program" + "${ET_BUILD_DIR_PATH}/devtools/bundled_program/lib" + ) + endif() + if(BUNDLED_PROGRAM_LIBRARY_DIR) + list(APPEND _bundled_program_search_paths + "${BUNDLED_PROGRAM_LIBRARY_DIR}" + ) + endif() + if(_bundled_program_search_paths) + list(REMOVE_DUPLICATES _bundled_program_search_paths) + find_library( + _bundled_program_library + NAMES bundled_program + PATHS ${_bundled_program_search_paths} + NO_DEFAULT_PATH + ) + endif() + if(NOT _bundled_program_library) + find_library(_bundled_program_library NAMES bundled_program) + endif() + if(_bundled_program_library) + list(APPEND arm_executor_runner_link ${_bundled_program_library}) + else() + message( + FATAL_ERROR + "ET_BUNDLE_IO enabled but bundled_program is unavailable. Either configure this build with EXECUTORCH_BUILD_DEVTOOLS=ON so the target exists or set BUNDLED_PROGRAM_LIBRARY_DIR/ET_BUILD_DIR_PATH to a build directory that contains libbundled_program.a." + ) + endif() + endif() endif() # Need whole-archive to ensure C++ ctor's are called - this may be wasteful for @@ -386,7 +463,7 @@ target_compile_definitions( arm_executor_runner PRIVATE C10_USING_CUSTOM_GENERATED_MACROS ) -if(NOT ${ET_MODEL_PTE_ADDR} AND NOT SEMIHOSTING) +if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING) add_dependencies(arm_executor_runner gen_model_header) endif() diff --git a/examples/arm/executor_runner/pte_to_header.py b/examples/arm/executor_runner/pte_to_header.py index 65213bc729e..8656ac5abdf 100644 --- a/examples/arm/executor_runner/pte_to_header.py +++ b/examples/arm/executor_runner/pte_to_header.py @@ -1,6 +1,7 @@ +#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. -# Copyright 2023-2025 Arm Limited and/or its affiliates. +# Copyright 2023-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. diff --git a/examples/arm/executor_runner/standalone/CMakeLists.txt b/examples/arm/executor_runner/standalone/CMakeLists.txt new file mode 100644 index 00000000000..f1dd9f315fc --- /dev/null +++ b/examples/arm/executor_runner/standalone/CMakeLists.txt @@ -0,0 +1,106 @@ +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +cmake_minimum_required(VERSION 3.20) +project(arm_executor_runner LANGUAGES C CXX) + +get_filename_component( + _default_executorch_root "${CMAKE_CURRENT_LIST_DIR}/../../../.." ABSOLUTE +) + +if(NOT DEFINED EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT + "${_default_executorch_root}" + CACHE PATH "Path to an ExecuTorch checkout" + ) +endif() + +if(NOT EXISTS "${EXECUTORCH_ROOT}/CMakeLists.txt") + if(EXISTS "${_default_executorch_root}/CMakeLists.txt") + message( + WARNING + "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project. Falling back to ${_default_executorch_root}." + ) + set(EXECUTORCH_ROOT + "${_default_executorch_root}" + CACHE PATH "Path to an ExecuTorch checkout" FORCE + ) + else() + message( + FATAL_ERROR + "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project." + ) + endif() +endif() + +set(ARM_EXECUTOR_RUNNER_STANDALONE + ON + CACHE BOOL + "Indicates arm_executor_runner was configured as a standalone project" + FORCE +) + +# Load the preset helper so standalone builds inherit the same defaults as the +# superbuild (toolchains, delegated targets, devtools options, etc.). +set(_executorch_preset_cmake + "${EXECUTORCH_ROOT}/tools/cmake/common/preset.cmake" +) +if(EXISTS "${_executorch_preset_cmake}") + include("${_executorch_preset_cmake}") + if(NOT DEFINED EXECUTORCH_BUILD_PRESET_FILE) + set(EXECUTORCH_BUILD_PRESET_FILE + "${EXECUTORCH_ROOT}/tools/cmake/preset/arm_baremetal.cmake" + CACHE PATH "Preset used when configuring the standalone runner" + ) + endif() + load_build_preset() +endif() +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) + +option(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES + "Skip install() rules for standalone arm_executor_runner builds" ON +) +if(DEFINED CMAKE_SKIP_INSTALL_RULES) + set(_arm_runner_skip_install_rules "${CMAKE_SKIP_INSTALL_RULES}") +endif() +if(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES) + set(CMAKE_SKIP_INSTALL_RULES ON) +endif() + +foreach( + _opt + EXECUTORCH_BUILD_ARM_BAREMETAL EXECUTORCH_BUILD_CORTEX_M + EXECUTORCH_BUILD_KERNELS_QUANTIZED EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL +) + if(NOT DEFINED ${_opt}) + set(${_opt} + ON + CACHE BOOL "" FORCE + ) + endif() +endforeach() +set(EXECUTORCH_SKIP_ARM_EXECUTOR_RUNNER + ON + CACHE BOOL "" FORCE +) + +# Pull ExecuTorch in-tree so all required targets (delegates, kernels, runner +# util, etc.) are built from this checkout. +add_subdirectory( + ${EXECUTORCH_ROOT} ${CMAKE_BINARY_DIR}/executorch EXCLUDE_FROM_ALL +) + +if(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES) + if(DEFINED _arm_runner_skip_install_rules) + set(CMAKE_SKIP_INSTALL_RULES "${_arm_runner_skip_install_rules}") + else() + unset(CMAKE_SKIP_INSTALL_RULES) + endif() +endif() + +add_subdirectory( + ${EXECUTORCH_ROOT}/examples/arm/executor_runner + ${CMAKE_BINARY_DIR}/examples/arm/executor_runner +) From c6d3870c2bed3c2c031e9496454c892861846b14 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 13:04:55 +0100 Subject: [PATCH 02/11] Arm backend: Manage Arm Ethos-U SDK setup and PTE inputs - Auto-detect Python and corstone helpers so standalone builds mirror setup.sh and run.sh. - Reuse the in-tree Ethos-U core driver unless a custom path is supplied and optionally fetch the SDK into arm-scratch. - Validate delegate prerequisites and enforce ET_PTE_FILE_PATH behavior. - Halt when no PTE or semihosting mode is provided. Change-Id: Iadd5dcd5e1a12dca7a00117c7778e9580364294a Signed-off-by: Usamah Zaheer --- backends/arm/cmake/ArmEthosUSDK.cmake | 60 +++++++++++++++ backends/arm/cmake/ArmRunnerUtils.cmake | 32 ++++++++ examples/arm/executor_runner/CMakeLists.txt | 82 ++++++++++++--------- 3 files changed, 138 insertions(+), 36 deletions(-) create mode 100644 backends/arm/cmake/ArmEthosUSDK.cmake create mode 100644 backends/arm/cmake/ArmRunnerUtils.cmake diff --git a/backends/arm/cmake/ArmEthosUSDK.cmake b/backends/arm/cmake/ArmEthosUSDK.cmake new file mode 100644 index 00000000000..03affdf69bb --- /dev/null +++ b/backends/arm/cmake/ArmEthosUSDK.cmake @@ -0,0 +1,60 @@ +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include_guard(GLOBAL) + +function(arm_ethos_u_content_ready SDK_PATH OUT_VAR) + if(EXISTS "${SDK_PATH}/core_platform" AND EXISTS "${SDK_PATH}/core_software") + set(${OUT_VAR} + TRUE + PARENT_SCOPE + ) + else() + set(${OUT_VAR} + FALSE + PARENT_SCOPE + ) + endif() +endfunction() + +function(arm_ethos_u_default_fetch SDK_PATH OUT_VAR) + arm_ethos_u_content_ready("${SDK_PATH}" _arm_ethos_ready) + if(_arm_ethos_ready) + set(${OUT_VAR} + OFF + PARENT_SCOPE + ) + else() + set(${OUT_VAR} + ON + PARENT_SCOPE + ) + endif() +endfunction() + +function(arm_ensure_ethos_u_content SDK_PATH EXECUTORCH_ROOT FETCH_REQUESTED) + arm_ethos_u_content_ready("${SDK_PATH}" _arm_ethos_ready_before) + + if(_arm_ethos_ready_before) + return() + endif() + + if(NOT FETCH_REQUESTED) + message( + FATAL_ERROR + "No Ethos-U content found at ${SDK_PATH}. Run examples/arm/setup.sh or enable FETCH_ETHOS_U_CONTENT=ON." + ) + endif() + + fetch_ethos_u_content(${SDK_PATH} ${EXECUTORCH_ROOT}) + + arm_ethos_u_content_ready("${SDK_PATH}" _arm_ethos_ready_after) + if(NOT _arm_ethos_ready_after) + message( + FATAL_ERROR + "Failed to fetch Ethos-U content into ${SDK_PATH}. Inspect the logs above." + ) + endif() +endfunction() diff --git a/backends/arm/cmake/ArmRunnerUtils.cmake b/backends/arm/cmake/ArmRunnerUtils.cmake new file mode 100644 index 00000000000..8cbce2b82f4 --- /dev/null +++ b/backends/arm/cmake/ArmRunnerUtils.cmake @@ -0,0 +1,32 @@ +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include_guard(GLOBAL) + +# Helper routines shared by the standalone runner and any superbuild that reuses +# the runner targets. + +function(arm_runner_require_baremetal_targets) + if(NOT TARGET extension_runner_util) + message( + FATAL_ERROR + "extension_runner_util target missing. Configure ExecuTorch (or the standalone runner) with EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON." + ) + endif() + + if(NOT TARGET quantized_ops_lib OR NOT TARGET quantized_kernels) + message( + FATAL_ERROR + "quantized kernels not found. Ensure EXECUTORCH_BUILD_KERNELS_QUANTIZED=ON when configuring ExecuTorch." + ) + endif() + + if(NOT TARGET cortex_m_ops_lib OR NOT TARGET cortex_m_kernels) + message( + FATAL_ERROR + "cortex_m backend not found. Ensure EXECUTORCH_BUILD_CORTEX_M=ON when configuring ExecuTorch." + ) + endif() +endfunction() diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index 267058d10cb..97a13cb6c84 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -94,13 +94,46 @@ option( OFF ) +if(NOT DEFINED PYTHON_EXECUTABLE) + find_package( + Python3 + COMPONENTS Interpreter + REQUIRED + ) + set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}") +endif() + +include(${EXECUTORCH_ROOT}/backends/arm/scripts/corstone_utils.cmake) +include(${EXECUTORCH_ROOT}/backends/arm/cmake/ArmEthosUSDK.cmake) +include(${EXECUTORCH_ROOT}/backends/arm/cmake/ArmRunnerUtils.cmake) + +arm_runner_require_baremetal_targets() + +# Keep the default scratch location aligned with the scratch tree used by +# setup.sh/run.sh so developers who just ran those scripts do not need extra +# CMake flags. +set(ETHOS_SDK_PATH + "${EXECUTORCH_ROOT}/examples/arm/arm-scratch/ethos-u" + CACHE PATH "Path to Ethos-U bare metal driver/env" +) + +arm_ethos_u_default_fetch("${ETHOS_SDK_PATH}" _fetch_ethos_u_default) option(FETCH_ETHOS_U_CONTENT - "Fetch ethos_u dependencies instead of relying on pre-downloads" ON + "Fetch ethos_u dependencies instead of relying on pre-downloads" + ${_fetch_ethos_u_default} +) +arm_ensure_ethos_u_content( + "${ETHOS_SDK_PATH}" "${EXECUTORCH_ROOT}" ${FETCH_ETHOS_U_CONTENT} +) + +set(ET_PTE_FILE_PATH + "" + CACHE PATH "Path to ExecuTorch model pte" ) -if(NOT DEFINED ET_MODEL_PTE_ADDR - AND NOT DEFINED ET_PTE_FILE_PATH - AND NOT DEFINED SEMIHOSTING +if(NOT ET_MODEL_PTE_ADDR + AND "${ET_PTE_FILE_PATH}" STREQUAL "" + AND NOT SEMIHOSTING ) message( FATAL_ERROR @@ -111,39 +144,16 @@ if(NOT DEFINED ET_MODEL_PTE_ADDR ) endif() -# Example ExecuTorch demo for bare metal Cortex-M based systems -set(ET_DIR_PATH - "${CMAKE_CURRENT_SOURCE_DIR}/../../.." - CACHE PATH "Path to ExecuTorch dir" -) -include(${ET_DIR_PATH}/tools/cmake/Utils.cmake) -set(ET_BUILD_DIR_PATH - "${ET_DIR_PATH}/cmake-out-arm" - CACHE PATH "Path to ExecuTorch build/install dir" -) -set(ET_INCLUDE_PATH - "${ET_DIR_PATH}/.." - CACHE PATH "Path to ExecuTorch headers" -) -set(ET_PTE_FILE_PATH - "" - CACHE PATH "Path to ExecuTorch model pte" +if(NOT SEMIHOSTING + AND NOT ET_MODEL_PTE_ADDR + AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "" ) -set(ETHOS_SDK_PATH - "${ET_DIR_PATH}/examples/arm/arm-scratch/ethos-u" - CACHE PATH "Path to Ethos-U bare metal driver/env" -) -set(PYTHON_EXECUTABLE - "python" - CACHE PATH "Define to override python executable used" -) - -# Include corstone help functions -include(${ET_DIR_PATH}/backends/arm/scripts/corstone_utils.cmake) - -if(FETCH_ETHOS_U_CONTENT) - # Download ethos_u dependency if needed. - fetch_ethos_u_content(${ETHOS_SDK_PATH} ${ET_DIR_PATH}) + if(NOT EXISTS "${ET_PTE_FILE_PATH}") + message( + FATAL_ERROR + "ET_PTE_FILE_PATH is set to ${ET_PTE_FILE_PATH}, but no file was found. Generate the model first or point ET_PTE_FILE_PATH at an existing .pte/.bpte." + ) + endif() endif() # Selects timing adapter values matching system_config. Default is From 16ec54453483636ca441ec1fbbde3a4f684cbe24 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 13:07:05 +0100 Subject: [PATCH 03/11] Arm backend: Hook selective build into the standalone runner - Call gen_oplist.py through the configured Python interpreter only when a model PTE exists. - Reference CMAKE_CURRENT_SOURCE_DIR for generated headers and linker scripts so out-of-tree builds resolve paths correctly. - Normalize runner outputs and sanitizer helpers so the standalone build mirrors the superbuild. - When BundleIO reuses a separate ExecuTorch build tree where bundled_program is not part of this CMake graph, restrict the fallback lookup to caller-provided build directories so the runner does not pick up an unrelated host library. Change-Id: I9932d8d7434e8a834b21ac9bbf290361d7ec117b Signed-off-by: Usamah Zaheer --- backends/arm/cmake/ArmRunnerUtils.cmake | 37 +++++++++ examples/arm/executor_runner/CMakeLists.txt | 90 ++++++++++++++------- 2 files changed, 99 insertions(+), 28 deletions(-) diff --git a/backends/arm/cmake/ArmRunnerUtils.cmake b/backends/arm/cmake/ArmRunnerUtils.cmake index 8cbce2b82f4..e67f38eec22 100644 --- a/backends/arm/cmake/ArmRunnerUtils.cmake +++ b/backends/arm/cmake/ArmRunnerUtils.cmake @@ -30,3 +30,40 @@ function(arm_runner_require_baremetal_targets) ) endif() endfunction() + +# Ensure a runner target emits its binary to a predictable location. Uses +# FALLBACK_DIR when TARGET_NAME has no runtime output directory set, and also +# fills per-configuration runtime output directories for multi-config generators +# when they are unset. +function(arm_runner_configure_runtime_output TARGET_NAME FALLBACK_DIR) + if(NOT TARGET ${TARGET_NAME}) + return() + endif() + + get_target_property(_base_runtime_dir ${TARGET_NAME} RUNTIME_OUTPUT_DIRECTORY) + if(NOT _base_runtime_dir + OR _base_runtime_dir STREQUAL "_base_runtime_dir-NOTFOUND" + OR "${_base_runtime_dir}" STREQUAL "" + ) + set_target_properties( + ${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${FALLBACK_DIR}" + ) + set(_base_runtime_dir "${FALLBACK_DIR}") + endif() + + if(CMAKE_CONFIGURATION_TYPES) + foreach(_cfg ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER ${_cfg} _cfg_upper) + set(_cfg_prop "RUNTIME_OUTPUT_DIRECTORY_${_cfg_upper}") + get_target_property(_cfg_dir ${TARGET_NAME} ${_cfg_prop}) + if(NOT _cfg_dir + OR _cfg_dir STREQUAL "_cfg_dir-NOTFOUND" + OR "${_cfg_dir}" STREQUAL "" + ) + set_target_properties( + ${TARGET_NAME} PROPERTIES ${_cfg_prop} "${_base_runtime_dir}/${_cfg}" + ) + endif() + endforeach() + endif() +endfunction() diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index 97a13cb6c84..d84947a75ad 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -27,6 +27,14 @@ set(ET_DIR_PATH "${EXECUTORCH_ROOT}" CACHE PATH "Kept for backward compatibility; synonym for EXECUTORCH_ROOT" ) +if(NOT DEFINED ET_INCLUDE_PATH) + set(ET_INCLUDE_PATH + "${EXECUTORCH_ROOT}" + CACHE + PATH + "Kept for backward compatibility; include root for ExecuTorch headers" + ) +endif() if(NOT EXISTS "${EXECUTORCH_ROOT}/CMakeLists.txt") message( @@ -203,6 +211,22 @@ message( add_corstone_subdirectory(${SYSTEM_CONFIG} ${ETHOS_SDK_PATH}) configure_timing_adapters(${SYSTEM_CONFIG} ${MEMORY_MODE}) +if(NOT CMAKE_SKIP_INSTALL_RULES AND TARGET ethosu_core_driver) + get_property( + _et_ethosu_core_driver_exported GLOBAL + PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED + ) + if(NOT _et_ethosu_core_driver_exported) + install( + TARGETS ethosu_core_driver + EXPORT ExecuTorchTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + set_property(GLOBAL PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED TRUE) + endif() +endif() + # Convert pte to header if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING) add_custom_target( @@ -211,10 +235,9 @@ if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/pte_to_header.py --pte - ${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/pte_to_header.py + --pte ${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${ET_PTE_FILE_PATH} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) endif() @@ -295,26 +318,31 @@ list( # (user-set)SELECT_OPS_MODEL variable. For normal build, use # EXECUTORCH_SELECT_OPS_MODEL to include ops automatically. If the pte contains # no undelegated ops, use neither. -execute_process( - COMMAND - python "${ET_DIR_PATH}/codegen/tools/gen_oplist.py" - --model_file_path=${ET_PTE_FILE_PATH} - --output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml - OUTPUT_VARIABLE CMD_RESULT +set(FOUND_OPS_IN_FILE FALSE) +if(NOT SEMIHOSTING + AND NOT ET_MODEL_PTE_ADDR + AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "" + AND EXISTS "${ET_PTE_FILE_PATH}" ) + execute_process( + COMMAND + ${PYTHON_EXECUTABLE} "${EXECUTORCH_ROOT}/codegen/tools/gen_oplist.py" + --model_file_path=${ET_PTE_FILE_PATH} + --output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml + OUTPUT_VARIABLE CMD_RESULT + ) -if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::") - set(FOUND_OPS_IN_FILE "true") -else() - set(FOUND_OPS_IN_FILE "false") + if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::") + set(FOUND_OPS_IN_FILE TRUE) + endif() endif() -if(${SEMIHOSTING}) +if(SEMIHOSTING) set(EXECUTORCH_SELECT_OPS_MODEL "") message( "gen_oplist: Building with semihosting, no model is used to auto generate ops from will use EXECUTORCH_SELECT_OPS_LIST=${EXECUTORCH_SELECT_OPS_LIST}" ) -elseif(${FOUND_OPS_IN_FILE}) +elseif(FOUND_OPS_IN_FILE) set(EXECUTORCH_SELECT_OPS_LIST "") set(EXECUTORCH_SELECT_OPS_MODEL "${ET_PTE_FILE_PATH}") message( @@ -333,10 +361,6 @@ endif() if(NOT ("${EXECUTORCH_SELECT_OPS_LIST}" STREQUAL "" AND "${EXECUTORCH_SELECT_OPS_MODEL}" STREQUAL "") ) - set(EXECUTORCH_ROOT ${ET_DIR_PATH}) - include(${ET_DIR_PATH}/tools/cmake/Utils.cmake) - include(${ET_DIR_PATH}/tools/cmake/Codegen.cmake) - gen_selected_ops( LIB_NAME "arm_portable_ops_lib" @@ -354,7 +378,7 @@ if(NOT ("${EXECUTORCH_SELECT_OPS_LIST}" STREQUAL "" generate_bindings_for_kernels( LIB_NAME "arm_portable_ops_lib" FUNCTIONS_YAML - ${ET_DIR_PATH}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD + ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}" ) gen_operators_lib( @@ -402,6 +426,10 @@ if(ET_BUNDLE_IO) endif() if(_bundled_program_search_paths) list(REMOVE_DUPLICATES _bundled_program_search_paths) + # BundleIO can reuse a separate ExecuTorch build tree where + # bundled_program is not part of this CMake graph. Restrict the fallback + # lookup to the caller-provided build directories so we do not + # accidentally pick up an unrelated library from the host system. find_library( _bundled_program_library NAMES bundled_program @@ -409,9 +437,6 @@ if(ET_BUNDLE_IO) NO_DEFAULT_PATH ) endif() - if(NOT _bundled_program_library) - find_library(_bundled_program_library NAMES bundled_program) - endif() if(_bundled_program_library) list(APPEND arm_executor_runner_link ${_bundled_program_library}) else() @@ -427,9 +452,15 @@ endif() # bin size as we link in a number of other symbols target_link_libraries(arm_executor_runner PUBLIC ${arm_executor_runner_link}) +# Ensure the ELF lands next to the CMake build tree so run.sh (and downstream +# tooling) can locate it deterministically regardless of multi-config vs +# single-config generators. target_link_options( arm_executor_runner PUBLIC LINKER:-Map=arm_executor_runner.map ) +# Reuse a parent build's output directory if it already set one; otherwise keep +# the runner ELF next to this build tree so run.sh can find it predictably. +arm_runner_configure_runtime_output(arm_executor_runner "${CMAKE_BINARY_DIR}") # Sanitizers if(CMAKE_BUILD_TYPE MATCHES "UndefinedSanitizer") @@ -438,7 +469,7 @@ if(CMAKE_BUILD_TYPE MATCHES "UndefinedSanitizer") target_link_options(arm_executor_runner PRIVATE ${_et_runner_ubsan_flag}) if(NOT TARGET executorch_ubsan) add_subdirectory( - ${ET_DIR_PATH}/examples/arm/ubsan + ${EXECUTORCH_ROOT}/examples/arm/ubsan ${CMAKE_CURRENT_BINARY_DIR}/ubsan_runtime ) endif() @@ -454,7 +485,8 @@ if(CMAKE_BUILD_TYPE MATCHES "AddressSanitizer") target_link_options(arm_executor_runner PRIVATE ${_et_runner_asan_flags}) if(NOT TARGET executorch_asan) add_subdirectory( - ${ET_DIR_PATH}/examples/arm/asan ${CMAKE_CURRENT_BINARY_DIR}/asan_runtime + ${EXECUTORCH_ROOT}/examples/arm/asan + ${CMAKE_CURRENT_BINARY_DIR}/asan_runtime ) endif() target_link_libraries(arm_executor_runner PRIVATE executorch_asan) @@ -464,10 +496,12 @@ if(CMAKE_BUILD_TYPE MATCHES "AddressSanitizer") endif() # ET headers and generated headers includes +set(_arm_runner_include_dirs + ${ET_INCLUDE_PATH} ${ET_INCLUDE_PATH}/runtime/core/portable_type/c10 + ${CMAKE_CURRENT_BINARY_DIR} +) target_include_directories( - arm_executor_runner - PRIVATE ${ET_INCLUDE_PATH} ${ET_DIR_PATH}/runtime/core/portable_type/c10 - ${CMAKE_CURRENT_BINARY_DIR} + arm_executor_runner PRIVATE ${_arm_runner_include_dirs} ) target_compile_definitions( arm_executor_runner PRIVATE C10_USING_CUSTOM_GENERATED_MACROS From 47b328dc0026a32e5d74ee619d255f49d173fd52 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 11:52:43 +0100 Subject: [PATCH 04/11] Arm backend: Make Arm bare-metal install/export subproject friendly - Honor EXECUTORCH_BAREMETAL_SKIP_INSTALL so embedders can disable install() rules. - Propagate Ethos-U delegate includes, install the core driver when available, and copy CMSIS-NN headers for downstream toolchains. - Route the arm_baremetal preset install output back into the build tree to keep standalone builds self-contained. Change-Id: I84bb6a1ad64a404e10e8ce8897167e595b8b82fa Signed-off-by: Usamah Zaheer --- CMakeLists.txt | 17 ++++++++++++++++ backends/arm/CMakeLists.txt | 27 +++++++++++++++++++++++--- backends/cortex_m/CMakeLists.txt | 20 +++++++++++++++++++ tools/cmake/preset/arm_baremetal.cmake | 25 ++++++++++++++++++++++-- 4 files changed, 84 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ce0def6000b..7bb4ba5ac58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,6 +160,23 @@ announce_configured_options(BUILD_TESTING) load_build_preset() include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake) +# Keep bare-metal installs enabled only when ExecuTorch owns the top-level +# build. Standalone consumers (e.g., the runner) set +# EXECUTORCH_BAREMETAL_SKIP_INSTALL=ON but still add ExecuTorch as a subproject, +# which cannot satisfy our install() export dependencies until their own targets +# are configured. +if(DEFINED EXECUTORCH_BAREMETAL_SKIP_INSTALL + AND EXECUTORCH_BAREMETAL_SKIP_INSTALL + AND NOT (CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) +) + set(CMAKE_SKIP_INSTALL_RULES + ON + CACHE BOOL + "Skip install() rules when ExecuTorch is consumed as a subproject" + FORCE + ) +endif() + # Enable ccache if available find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) diff --git a/backends/arm/CMakeLists.txt b/backends/arm/CMakeLists.txt index 0c8b241522c..d8a6c1afce7 100644 --- a/backends/arm/CMakeLists.txt +++ b/backends/arm/CMakeLists.txt @@ -63,17 +63,20 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL OR EXECUTORCH_BUILD_ARM_ETHOSU_LINUX) add_library(executorch_delegate_ethos_u STATIC ${_arm_backend_sources}) target_link_libraries(executorch_delegate_ethos_u PUBLIC executorch_core) + target_include_directories( + executorch_delegate_ethos_u PRIVATE ${_common_include_directories} + ) if(EXECUTORCH_BUILD_ARM_BAREMETAL) target_sources( executorch_delegate_ethos_u PRIVATE ${EXECUTORCH_ROOT}/backends/arm/runtime/EthosUBackend_Cortex_M.cpp ) - set(DRIVER_ETHOSU_INCLUDE_DIR + set(_ethosu_core_driver_include "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include" ) target_include_directories( - executorch_delegate_ethos_u PRIVATE ${DRIVER_ETHOSU_INCLUDE_DIR} + executorch_delegate_ethos_u PRIVATE ${_ethosu_core_driver_include} ) target_link_libraries(executorch_delegate_ethos_u PUBLIC ethosu_core_driver) elseif(EXECUTORCH_BUILD_ARM_ETHOSU_LINUX) @@ -110,7 +113,25 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL OR EXECUTORCH_BUILD_ARM_ETHOSU_LINUX) ) endif() - install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets) + if(NOT CMAKE_SKIP_INSTALL_RULES) + install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets) + + if(TARGET ethosu_core_driver) + get_property( + _et_ethosu_core_driver_exported GLOBAL + PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED + ) + if(NOT _et_ethosu_core_driver_exported) + install( + TARGETS ethosu_core_driver + EXPORT ExecuTorchTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + set_property(GLOBAL PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED TRUE) + endif() + endif() + endif() endif() diff --git a/backends/cortex_m/CMakeLists.txt b/backends/cortex_m/CMakeLists.txt index 8c8255b7b1b..876c65982e6 100644 --- a/backends/cortex_m/CMakeLists.txt +++ b/backends/cortex_m/CMakeLists.txt @@ -50,6 +50,26 @@ else() FetchContent_MakeAvailable(cmsis_nn) endif() +if(TARGET cmsis-nn) + if(CMSIS_NN_LOCAL_PATH AND EXISTS "${CMSIS_NN_LOCAL_PATH}") + set(cmsis_nn_source_dir "${CMSIS_NN_LOCAL_PATH}") + else() + set(cmsis_nn_source_dir "${cmsis_nn_SOURCE_DIR}") + endif() + if(cmsis_nn_source_dir) + set(cmsis_nn_include_dir "${cmsis_nn_source_dir}/Include") + set_target_properties( + cmsis-nn + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES + "$;$" + ) + install(DIRECTORY "${cmsis_nn_include_dir}/" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/cmsis-nn" + ) + endif() +endif() + # Cortex-M ops kernel sources set(_cortex_m_kernels__srcs ${CMAKE_CURRENT_SOURCE_DIR}/ops/op_dequantize_per_tensor.cpp diff --git a/tools/cmake/preset/arm_baremetal.cmake b/tools/cmake/preset/arm_baremetal.cmake index 882780ade1d..c12cc95233a 100644 --- a/tools/cmake/preset/arm_baremetal.cmake +++ b/tools/cmake/preset/arm_baremetal.cmake @@ -1,9 +1,30 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}") +define_overridable_option( + EXECUTORCH_BAREMETAL_SKIP_INSTALL + "Skip emitting install/export rules when building bare-metal artifacts" BOOL + ON +) + +if(EXECUTORCH_BAREMETAL_SKIP_INSTALL) + set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}") + # Bare-metal builds consume the build tree directly. Keep the install target + # available (many docs/scripts still invoke it) but route the output back into + # the build tree so nothing is exported outside the repo. + unset(CMAKE_SKIP_INSTALL_RULES CACHE) + set(CMAKE_SKIP_INSTALL_RULES OFF) + set(CMAKE_SKIP_INSTALL_RULES + OFF + CACHE + BOOL + "Retain install() rules so docs/scripts can keep calling `--target install`" + FORCE + ) +endif() + set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER OFF) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR OFF) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER OFF) From 5bf4660f1889187badd2824a022822f5ce41efb6 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 11:52:52 +0100 Subject: [PATCH 05/11] Arm backend: Keep bare-metal install artifacts for runner reuse - Force EXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF so build_executorch.sh always exports the Arm runner dependencies. - Stop building the install target on non-musl hosts; the default build target already covers what run.sh needs and avoids redundant installs. Change-Id: Iecd91e4a3eb275ca67ce6593ebfb06d3d7ec42ef Signed-off-by: Usamah Zaheer --- backends/arm/scripts/build_executorch.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh index cf7e327b9ce..828cec77ae8 100755 --- a/backends/arm/scripts/build_executorch.sh +++ b/backends/arm/scripts/build_executorch.sh @@ -85,6 +85,7 @@ cmake_args=( -DCMAKE_BUILD_TYPE=${build_type} -DEXECUTORCH_BUILD_DEVTOOLS=${build_devtools} -DEXECUTORCH_BUILD_ARM_ETDUMP=${build_with_etdump} + -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF ) if [[ ${is_linux_musl} -eq 1 ]]; then @@ -108,7 +109,7 @@ parallel_jobs="$(get_parallel_jobs)" if [[ ${is_linux_musl} -eq 1 ]]; then cmake --build ${et_build_dir} -j"${parallel_jobs}" --target executorch_delegate_ethos_u executor_runner --config ${build_type} -- else - cmake --build ${et_build_dir} -j"${parallel_jobs}" --target install --config ${build_type} -- + cmake --build ${et_build_dir} -j"${parallel_jobs}" --config ${build_type} fi set +x From 44afbe7dc8354de04c96c74db7cc46f4c79ea6f8 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 12:49:59 +0100 Subject: [PATCH 06/11] Arm backend: Expand run.sh CLI knobs for bare-metal builds - Clarify help text for select_ops_list, toolchain choices, and add --build-dir reuse. - Track whether select_ops_list was overridden, allow arbitrary cmake -D flags, and tidy scratch or toolchain warnings. - Plumb the new option state through the control flow to prepare for automation. Change-Id: I69b027e726eee0b23206e7e3c836db375a8bf5b6 Signed-off-by: Usamah Zaheer --- examples/arm/run.sh | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/arm/run.sh b/examples/arm/run.sh index b18115723b0..960531a2913 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -29,6 +29,7 @@ output_folder="." bundleio=false build_with_etdump=false build_type="Release" +build_dir="" extra_build_flags="" build_only=false system_config="" @@ -40,6 +41,7 @@ arm_scratch_dir=${script_dir}/arm-scratch scratch_dir_set=false toolchain=arm-none-eabi-gcc select_ops_list="aten::_softmax.out" +select_ops_list_overridden=false qdq_fusion_op=false model_explorer=false perf_overlay=false @@ -57,8 +59,7 @@ function help() { echo " --aot_arm_compiler_flags= Extra flags to pass to aot compiler" echo " --no_delegate Do not delegate the model (can't override builtin models)" echo " --no_quantize Do not quantize the model (can't override builtin models)" - echo " --portable_kernels= TO BE DEPRECATED: Alias to select_ops_list." - echo " --select_ops_list= Comma separated list of portable (non delagated) kernels to include Default: ${select_ops_list}" + echo " --select_ops_list= Comma separated list of portable (non delegated) kernels to include. Default: ${select_ops_list}" echo " NOTE: This is only used when building for semihosting." echo " See https://docs.pytorch.org/executorch/stable/kernel-library-selective-build.html for more information." echo " --target= Target to build and run for Default: ${target}" @@ -66,9 +67,10 @@ function help() { echo " --bundleio Create Bundled pte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --build_type= Build with Release, Debug, RelWithDebInfo, UndefinedSanitizer or AddressSanitizer, default is ${build_type}" - echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " + echo " --build-dir= Optional: reuse an existing arm_executor_runner build directory (configured via 'cmake -S examples/arm/executor_runner -B ...'). If omitted, run.sh auto-configures one under ${et_build_root} for bare-metal targets." echo " --build_only Only build, don't run" - echo " --toolchain= Ethos-U: Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc Default: ${toolchain}" + echo " --extra_build_flags=\"\" Extra -D style flags to pass to cmake when run.sh auto-configures the build" + echo " --toolchain= Toolchain preset to use when run.sh auto-configures the build. Default: ${toolchain}" echo " --system_config= Ethos-U: System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." echo " --config= Ethos-U: System configuration file that specifies system configurations (vela.ini)" @@ -76,11 +78,11 @@ function help() { echo " --pte_placement= Ethos-U: Control if runtime has PTE baked into the elf or if its placed in memory outside of the elf, defaults to ${pte_placement}" echo " --specify_ethosu_scratch Use actual Ethos-U scratch size for given model to size temp allocator" echo " --et_build_root= Executorch build output root folder to use, defaults to ${et_build_root}" - echo " --scratch-dir= Path to your Arm scrach dir if you not using default ${arm_scratch_dir}" + echo " --scratch-dir= Path to your Ethos-U scratch dir if you not using default ${arm_scratch_dir}" echo " --qdq_fusion_op Enable QDQ fusion op" echo " --model_explorer Enable model explorer to visualize a TOSA or PTE model graph." echo " --visualize_pte With --model_explorer, visualize PTE flatbuffer model and delegates. Cannot be used with --visualize_tosa" - echo " NOTE: If PTE contains an Ethos-U delegate, the Ethos-U subgraph will be visualized if aot_arm_compiler_flags is set with the -i flag to include intermediate tosa files." + echo " NOTE: If PTE contains an Ethos-U delegate, the Ethos-U subgraph will be visualized if aot_arm_compiler_flags includes -i for TOSA dumps." echo " --visualize_tosa With --model_explorer, visualize TOSA flatbuffer model. Cannot be used with --visualize_pte" echo " --perf_overlay With --model_explorer and --visualize_tosa, include performance data from FVP PMU trace." exit 0 @@ -94,13 +96,16 @@ for arg in "$@"; do --aot_arm_compiler_flags=*) aot_arm_compiler_flags="${arg#*=}";; --no_delegate) aot_arm_compiler_flag_delegate="" ;; --no_quantize) aot_arm_compiler_flag_quantize="" ;; - --portable_kernels=*) select_ops_list="${arg#*=}" ; echo "WARNING: --portable_kernels is DEPRECATED use select_ops_list." ;; - --select_ops_list=*) select_ops_list="${arg#*=}";; + --select_ops_list=*) + select_ops_list="${arg#*=}" + select_ops_list_overridden=true + ;; --target=*) target="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; --bundleio) bundleio=true ;; --etdump) build_with_etdump=true ;; --build_type=*) build_type="${arg#*=}";; + --build-dir=*) build_dir="${arg#*=}";; --extra_build_flags=*) extra_build_flags="${arg#*=}";; --build_only) build_only=true ;; --toolchain=*) toolchain="${arg#*=}";; From 79b2d4286579f4d1924e3f02b1d8ad3c39d8985c Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 12:50:12 +0100 Subject: [PATCH 07/11] Arm backend: Auto-configure Arm backend runner builds via presets - Auto-derive arm_executor_runner build directories when --build-dir is omitted and configure them with the arm_baremetal preset. - Add validation helpers that ensure standalone builds were configured with the right targets, toolchains, and BundledIO/devtools toggles. - Teach the script to stage PTEs, reuse multi-config build trees, and drive FVP/BundleIO workflows from a single entry point. Change-Id: If52327a1bc512c87fd2ce5d9ce89c352919fd447 Signed-off-by: Usamah Zaheer --- backends/arm/scripts/run_fvp.sh | 4 +- backends/arm/test/test_arm_baremetal.sh | 11 +- examples/arm/run.sh | 599 ++++++++++++++++++++---- 3 files changed, 513 insertions(+), 101 deletions(-) diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh index 9f0010189af..9da309fbe41 100755 --- a/backends/arm/scripts/run_fvp.sh +++ b/backends/arm/scripts/run_fvp.sh @@ -151,7 +151,7 @@ elif [[ ${target} == *"ethos-u55"* ]]; then -C mps3_board.telnetterminal0.start_telnet=0 \ -C mps3_board.uart0.out_file='-' \ -C mps3_board.uart0.shutdown_on_eot=1 \ - "${extra_args_u55[@]}" \ + ${extra_args_u55[@]+"${extra_args_u55[@]}"} \ -a "${elf_file}" \ ${data_file} \ --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true # seconds @@ -164,7 +164,7 @@ elif [[ ${target} == *"ethos-u85"* ]]; then -C mps4_board.telnetterminal0.start_telnet=0 \ -C mps4_board.uart0.out_file='-' \ -C mps4_board.uart0.shutdown_on_eot=1 \ - "${extra_args_u85[@]}" \ + ${extra_args_u85[@]+"${extra_args_u85[@]}"} \ -a "${elf_file}" \ ${data_file} \ --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true # seconds diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index ad8cd8b7d3a..18ea908f816 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -256,14 +256,17 @@ test_pytest_models_vkml() { test_run_vkml() { echo "${TEST_SUITE_NAME}: Test VKML delegate examples with run.sh" + source backends/arm/test/setup_testing_vkml.sh + echo "${TEST_SUITE_NAME}: Test VKML" out_folder="arm_test/test_run" + vkml_build_dir="${build_root_test_dir}" - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=add --output=${out_folder}/runner - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=mul --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=add --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=mul --output=${out_folder}/runner - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=qadd --output=${out_folder}/runner - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=qops --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=qadd --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=qops --output=${out_folder}/runner echo "${TEST_SUITE_NAME}: PASS" } diff --git a/examples/arm/run.sh b/examples/arm/run.sh index 960531a2913..e1fbe4d1ef1 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -14,8 +14,9 @@ set -eu ######## script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) et_root_dir=$(cd ${script_dir}/../.. && pwd) -et_root_dir=$(realpath ${et_root_dir}) - +et_root_dir=$(realpath "${et_root_dir}") +runner_source_dir="${et_root_dir}/examples/arm/executor_runner/standalone" +runner_source_dir=$(realpath "${runner_source_dir}") model_name="" model_input_set=false @@ -30,7 +31,6 @@ bundleio=false build_with_etdump=false build_type="Release" build_dir="" -extra_build_flags="" build_only=false system_config="" config="" @@ -39,7 +39,7 @@ pte_placement="elf" et_build_root="${et_root_dir}/arm_test" arm_scratch_dir=${script_dir}/arm-scratch scratch_dir_set=false -toolchain=arm-none-eabi-gcc +toolchain="arm-none-eabi-gcc" select_ops_list="aten::_softmax.out" select_ops_list_overridden=false qdq_fusion_op=false @@ -49,6 +49,12 @@ visualize_tosa=false visualize_pte=false model_converter=false specify_ethosu_scratch=false +extra_build_flags="" +preset_file="${et_root_dir}/tools/cmake/preset/arm_baremetal.cmake" +cmake_cache_file="" +build_dir_initialized=false +multi_config=false +parallel_jobs=1 function help() { echo "Usage: $(basename $0) [options]" @@ -67,7 +73,7 @@ function help() { echo " --bundleio Create Bundled pte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --build_type= Build with Release, Debug, RelWithDebInfo, UndefinedSanitizer or AddressSanitizer, default is ${build_type}" - echo " --build-dir= Optional: reuse an existing arm_executor_runner build directory (configured via 'cmake -S examples/arm/executor_runner -B ...'). If omitted, run.sh auto-configures one under ${et_build_root} for bare-metal targets." + echo " --build-dir= Optional: reuse an existing arm_executor_runner build directory (configured via 'cmake -S examples/arm/executor_runner/standalone -B ...'). If omitted, run.sh auto-configures one under ${et_build_root} for bare-metal targets." echo " --build_only Only build, don't run" echo " --extra_build_flags=\"\" Extra -D style flags to pass to cmake when run.sh auto-configures the build" echo " --toolchain= Toolchain preset to use when run.sh auto-configures the build. Default: ${toolchain}" @@ -106,8 +112,8 @@ for arg in "$@"; do --etdump) build_with_etdump=true ;; --build_type=*) build_type="${arg#*=}";; --build-dir=*) build_dir="${arg#*=}";; - --extra_build_flags=*) extra_build_flags="${arg#*=}";; --build_only) build_only=true ;; + --extra_build_flags=*) extra_build_flags="${arg#*=}";; --toolchain=*) toolchain="${arg#*=}";; --system_config=*) system_config="${arg#*=}";; --config=*) config="${arg#*=}";; @@ -126,6 +132,11 @@ for arg in "$@"; do esac done +auto_configure=false +if [[ -z "${build_dir}" ]]; then + auto_configure=true +fi + if [ "$perf_overlay" = true ] && [ "$model_explorer" != true ]; then echo "Error: --perf_overlay requires --model_explorer" >&2 exit 1 @@ -146,10 +157,32 @@ if ! [[ ${pte_placement} == "elf" ]]; then fi # Default Ethos-u tool folder override with --scratch-dir= -arm_scratch_dir=$(realpath ${arm_scratch_dir}) +arm_scratch_dir=$(realpath "${arm_scratch_dir}") +ethos_u_root_dir="${arm_scratch_dir}/ethos-u" +mkdir -p "${ethos_u_root_dir}" +ethos_u_root_dir=$(realpath "${ethos_u_root_dir}") +cmsis_nn_local_path="" +if [[ -d "${ethos_u_root_dir}/core_software/cmsis-nn" ]]; then + cmsis_nn_local_path=$(realpath "${ethos_u_root_dir}/core_software/cmsis-nn") +fi setup_path_script=${arm_scratch_dir}/setup_path.sh _setup_msg="please refer to ${script_dir}/setup.sh to properly install necessary tools." +toolchain_cmake="" +case "${toolchain}" in + arm-none-eabi-gcc) + toolchain_cmake="${et_root_dir}/examples/arm/ethos-u-setup/${toolchain}.cmake" + ;; + arm-zephyr-eabi-gcc) + toolchain_cmake="${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake" + ;; + *) + echo "Error: Invalid toolchain selection '${toolchain}'. Valid options: arm-none-eabi-gcc, arm-zephyr-eabi-gcc" >&2 + exit 1 + ;; +esac + + # Set target based variables if [[ ${system_config} == "" ]] then @@ -174,26 +207,10 @@ then config="Arm/vela.ini" fi -# Build executorch libraries -cd $et_root_dir -devtools_flag="" -bundleio_flag="" -etrecord_flag="" -et_dump_flag="" -qdq_fusion_op_flag="" -fvp_pmu_flag="" -if [ "$build_with_etdump" = true ] ; then - et_dump_flag="--etdump" - etrecord_flag="--etrecord" -fi - -if [ "$bundleio" = true ] ; then - devtools_flag="--devtools" - bundleio_flag="--bundleio" -fi - -if [ "$qdq_fusion_op" = true ] ; then - qdq_fusion_op_flag="--enable_qdq_fusion_pass" +target_cpu="cortex-m85" +if [[ ${target} =~ "ethos-u55" ]] +then + target_cpu="cortex-m55" fi function check_setup () { @@ -206,36 +223,31 @@ function check_setup () { echo "Could not find ${setup_path_script} file, ${_setup_msg}" return 1 fi - # If setup_path_script was correct all these checks should now pass - if [[ ${target} =~ "ethos-u" ]]; then - if [[ ${toolchain} == "arm-none-eabi-gcc" ]]; then - toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/${toolchain}.cmake - elif [[ ${toolchain} == "arm-zephyr-eabi-gcc" ]]; then - toolchain_cmake=${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake - else - echo "Error: Invalid toolchain selection, provided: ${toolchain}" - echo " Valid options are {arm-none-eabi-gcc, arm-zephyr-eabi-gcc}" - exit 1; + + [[ -f ${et_root_dir}/CMakeLists.txt ]] \ + || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; return 1; } + + [[ -f ${preset_file} ]] \ + || { echo "Could not find ${preset_file} file, ${_setup_msg}"; return 1; } + + if [[ "${auto_configure}" == true ]]; then + if ! command -v "${toolchain}" >/dev/null 2>&1; then + echo "Could not find ${toolchain} toolchain on PATH, ${_setup_msg}" + return 1 fi - toolchain_cmake=$(realpath ${toolchain_cmake}) - hash ${toolchain} \ - || { echo "Could not find ${toolchain} toolchain on PATH, ${_setup_msg}"; return 1; } [[ -f ${toolchain_cmake} ]] \ || { echo "Could not find ${toolchain_cmake} file, ${_setup_msg}"; return 1; } + fi - [[ -f ${et_root_dir}/CMakeLists.txt ]] \ - || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; return 1; } - - backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag $et_dump_flag --toolchain="${toolchain}" - elif [[ ${target} == cortex-m* ]]; then + if [[ ${target} == cortex-m* ]]; then # build_test_runner.sh handles toolchain setup; just validate it's on PATH. hash arm-none-eabi-gcc \ || { echo "Could not find arm-none-eabi-gcc on PATH, ${_setup_msg}"; return 1; } elif [[ ${target} =~ "vgf" ]]; then - model_converter=$(which model-converter) + model_converter=$(which model-converter || true) echo "${model_converter}" - [[ "${model_converter}" == "model-converter not found" ]] \ + [[ -z "${model_converter}" || "${model_converter}" == "model-converter not found" ]] \ && { echo "Could not find model-converter, ${_setup_msg}"; return 1; } fi @@ -257,25 +269,416 @@ print(size) PY } +sanitize_for_path() { + local value="$1" + printf '%s' "${value}" | tr -c '[:alnum:]._-' '_' +} + +set_default_build_dir_path() { + if [[ ${target} == *"vgf"* ]]; then + cat <&2 +Error: auto-configuring a build directory is only supported for Ethos-U bare-metal targets. +Configure a host build manually, e.g. + cmake -S "${runner_source_dir}" -B -DEXECUTORCH_ROOT="${et_root_dir}" -DEXECUTORCH_BUILD_VGF=ON +and then pass --build-dir=. +EOF + exit 1 + fi + local sanitized_target + sanitized_target=$(sanitize_for_path "${target}") + local sanitized_build_type + sanitized_build_type=$(sanitize_for_path "${build_type}") + local sanitized_toolchain + sanitized_toolchain=$(sanitize_for_path "${toolchain}") + build_dir="${et_build_root}/${sanitized_target}_${sanitized_build_type}_${sanitized_toolchain}" +} + +configure_runner_build_dir() { + local pte_source="$1" + if [[ -z "${build_dir}" ]]; then + echo "Error: build_dir is not set. Cannot configure runner." >&2 + exit 1 + fi + if [[ "${pte_placement}" == "elf" ]]; then + pte_source=$(realpath "${pte_source}") + fi + mkdir -p "${build_dir}" + local cmake_cmd=( + cmake -S "${runner_source_dir}" -B "${build_dir}" + -DEXECUTORCH_ROOT="${et_root_dir}" + -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" + -DCMAKE_BUILD_TYPE="${build_type}" + -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON + -DEXECUTORCH_BUILD_CORTEX_M=ON + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON + -DEXECUTORCH_BUILD_PRESET_FILE="${preset_file}" + -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF + -DETHOSU_TARGET_NPU_CONFIG="${target}" + -DTARGET_CPU="${target_cpu}" + -DSYSTEM_CONFIG="${system_config}" + -DMEMORY_MODE="${memory_mode}" + -DETHOS_SDK_PATH:PATH="${ethos_u_root_dir}" + -DEXECUTORCH_SELECT_OPS_LIST="${select_ops_list}" + ) + if [[ -n "${cmsis_nn_local_path}" ]]; then + cmake_cmd+=(-DCMSIS_NN_LOCAL_PATH:PATH="${cmsis_nn_local_path}") + fi + cmake_cmd+=(-DET_PTE_FILE_PATH:PATH="${pte_source}") + if [[ "${pte_placement}" == "elf" ]]; then + cmake_cmd+=(-DET_MODEL_PTE_ADDR=) + else + cmake_cmd+=(-DET_MODEL_PTE_ADDR="${pte_placement}") + fi + if [[ "${bundleio}" == true ]]; then + cmake_cmd+=(-DET_BUNDLE_IO=ON) + else + cmake_cmd+=(-DET_BUNDLE_IO=OFF) + fi + if [[ "${bundleio}" == true || "${build_with_etdump}" == true ]]; then + cmake_cmd+=(-DEXECUTORCH_BUILD_DEVTOOLS=ON) + else + cmake_cmd+=(-DEXECUTORCH_BUILD_DEVTOOLS=OFF) + fi + if [[ "${build_with_etdump}" == true ]]; then + cmake_cmd+=(-DEXECUTORCH_ENABLE_EVENT_TRACER=ON -DET_DUMP_INTERMEDIATE_OUTPUTS=ON) + else + cmake_cmd+=(-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF -DET_DUMP_INTERMEDIATE_OUTPUTS=OFF) + fi + if [[ -n "${extra_build_flags}" ]]; then + # shellcheck disable=SC2206 + local extra_args=(${extra_build_flags}) + cmake_cmd+=("${extra_args[@]}") + fi + echo "[run.sh] Configuring ExecuTorch build at ${build_dir}" + "${cmake_cmd[@]}" + build_dir_initialized=false +} + +cmake_cache_get() { + local key="$1" + if [[ ! -f ${cmake_cache_file} ]]; then + echo "" + return 0 + fi + local line + line=$(grep -m1 "^${key}:" "${cmake_cache_file}" || true) + if [[ -z "${line}" ]]; then + echo "" + else + echo "${line#*=}" + fi +} + +cmake_cache_has_key() { + local key="$1" + [[ -f ${cmake_cache_file} ]] && grep -q "^${key}:" "${cmake_cache_file}" +} + +ensure_runner_build_dir() { + local standalone + standalone=$(cmake_cache_get ARM_EXECUTOR_RUNNER_STANDALONE) + local normalized + normalized=$(printf '%s' "${standalone}" | tr '[:lower:]' '[:upper:]') + if [[ "${normalized}" != "TRUE" && "${normalized}" != "ON" ]]; then + cat <&2 +Error: ${build_dir} is not a standalone arm_executor_runner build directory. +Configure it via: + cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_ROOT=${et_root_dir} [...] +and re-run run.sh. +EOF + exit 1 + fi +} + +ensure_select_ops_list_setting() { + local expected="$1" + local cache_value + cache_value=$(cmake_cache_get EXECUTORCH_SELECT_OPS_LIST) + if [[ -z "${cache_value}" ]]; then + cat <&2 +Error: EXECUTORCH_SELECT_OPS_LIST is not configured in ${build_dir}. +Reconfigure cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_SELECT_OPS_LIST=${expected}. +EOF + exit 1 + fi + if [[ "${cache_value}" != "${expected}" ]]; then + cat <&2 +Error: ${build_dir} was configured with EXECUTORCH_SELECT_OPS_LIST=${cache_value}, but run.sh requested ${expected}. +Reconfigure cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_SELECT_OPS_LIST=${expected}, or omit --select_ops_list. +EOF + exit 1 + fi +} + +require_cache_value() { + local key="$1" + local expected="$2" + local value + if ! cmake_cache_has_key "${key}"; then + echo "Error: ${key} not found in ${cmake_cache_file}. Reconfigure CMake with -D${key}=${expected}." >&2 + exit 1 + fi + value=$(cmake_cache_get "${key}") + if [[ "${value}" != "${expected}" ]]; then + echo "Error: ${key}=${value} in ${build_dir}. Reconfigure CMake with -D${key}=${expected} to use this run.sh invocation." >&2 + exit 1 + fi +} + +require_cache_bool() { + local key="$1" + local expected="$2" + local value + value=$(cmake_cache_get "${key}") + if [[ -z "${value}" ]]; then + echo "Error: ${key} not found in ${cmake_cache_file}. Reconfigure CMake with -D${key}=${expected}." >&2 + exit 1 + fi + local value_upper + value_upper=$(printf '%s' "${value}" | tr '[:lower:]' '[:upper:]') + local expected_upper + expected_upper=$(printf '%s' "${expected}" | tr '[:lower:]' '[:upper:]') + if [[ "${value_upper}" != "${expected_upper}" ]]; then + echo "Error: ${key}=${value} in ${build_dir}. Reconfigure CMake with -D${key}=${expected} to use run.sh." >&2 + exit 1 + fi +} + +is_cmake_false_value() { + local value_upper + value_upper=$(printf '%s' "$1" | tr '[:lower:]' '[:upper:]') + case "${value_upper}" in + ""|0|OFF|FALSE|NO|N|IGNORE|*-NOTFOUND) + return 0 + ;; + *) + return 1 + ;; + esac +} + +ensure_pte_placement_setting() { + local cached_addr + cached_addr=$(cmake_cache_get ET_MODEL_PTE_ADDR) + if ! cmake_cache_has_key ET_MODEL_PTE_ADDR; then + echo "Error: ET_MODEL_PTE_ADDR not found in ${cmake_cache_file}. Reconfigure CMake for the requested --pte_placement=${pte_placement}." >&2 + exit 1 + fi + if [[ "${pte_placement}" == "elf" ]]; then + if ! is_cmake_false_value "${cached_addr}"; then + echo "Error: --pte_placement=elf requested, but ${build_dir} was configured with ET_MODEL_PTE_ADDR=${cached_addr}. Reconfigure CMake with -DET_MODEL_PTE_ADDR=." >&2 + exit 1 + fi + if ! cmake_cache_has_key ET_PTE_FILE_PATH; then + echo "Error: ET_PTE_FILE_PATH not found in ${cmake_cache_file}. Reconfigure CMake with -DET_PTE_FILE_PATH=." >&2 + exit 1 + fi + return + fi + if is_cmake_false_value "${cached_addr}"; then + echo "Error: --pte_placement=${pte_placement} requested, but ${build_dir} was configured for an embedded PTE. Reconfigure CMake with -DET_MODEL_PTE_ADDR=${pte_placement}, or use --pte_placement=elf." >&2 + exit 1 + fi + if [[ "${cached_addr}" != "${pte_placement}" ]]; then + echo "Error: --pte_placement=${pte_placement} requested, but ${build_dir} was configured with ET_MODEL_PTE_ADDR=${cached_addr}. Reconfigure CMake with -DET_MODEL_PTE_ADDR=${pte_placement}." >&2 + exit 1 + fi +} + +get_parallel_jobs() { + if command -v nproc >/dev/null 2>&1; then + nproc + elif command -v sysctl >/dev/null 2>&1 && sysctl hw.logicalcpu >/dev/null 2>&1; then + sysctl -n hw.logicalcpu + elif command -v getconf >/dev/null 2>&1; then + getconf _NPROCESSORS_ONLN + elif [[ -n "${NUMBER_OF_PROCESSORS:-}" ]]; then + echo "${NUMBER_OF_PROCESSORS}" + else + echo 1 + fi +} + +build_runner_target() { + local cmake_target="$1" + local build_cmd=(cmake --build "${build_dir}" --target "${cmake_target}" --parallel "${parallel_jobs}") + if [[ "${multi_config}" == true ]]; then + build_cmd+=(--config "${build_type}") + fi + echo "[run.sh] Building target ${cmake_target} in ${build_dir}" + "${build_cmd[@]}" +} + +locate_runner_binary() { + local binary_name="$1" + local candidates=() + if [[ "${multi_config}" == true ]]; then + candidates+=("${build_dir}/${build_type}/${binary_name}") + candidates+=("${build_dir}/examples/arm/executor_runner/${build_type}/${binary_name}") + fi + candidates+=("${build_dir}/${binary_name}") + candidates+=("${build_dir}/examples/arm/executor_runner/${binary_name}") + for candidate in "${candidates[@]}"; do + if [[ -f "${candidate}" ]]; then + echo "${candidate}" + return 0 + fi + done + local found + found=$(find "${build_dir}" -name "${binary_name}" -type f 2>/dev/null | head -n 1 || true) + if [[ -n "${found}" ]]; then + echo "${found}" + return 0 + fi + return 1 +} +ensure_build_dir_ready() { + if [[ "${build_dir_initialized}" == true ]]; then + return + fi + if [[ -z "${build_dir}" ]]; then + echo "Error: build_dir is not set. Configure CMake first." >&2 + exit 1 + fi + build_dir=$(realpath "${build_dir}") + cmake_cache_file="${build_dir}/CMakeCache.txt" + if [[ ! -f ${cmake_cache_file} ]]; then + cat <&2 +Error: ${build_dir} does not contain a configured arm_executor_runner build (missing CMakeCache.txt). +Run cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_ROOT=${et_root_dir} with the desired options first, then re-run run.sh. +EOF + exit 1 + fi + ensure_runner_build_dir + if [[ ${target} == *"vgf"* ]]; then + require_cache_bool EXECUTORCH_BUILD_VGF ON + else + require_cache_bool EXECUTORCH_BUILD_ARM_BAREMETAL ON + require_cache_bool EXECUTORCH_BAREMETAL_SKIP_INSTALL OFF + require_cache_value ETHOSU_TARGET_NPU_CONFIG "${target}" + require_cache_value TARGET_CPU "${target_cpu}" + require_cache_value SYSTEM_CONFIG "${system_config}" + require_cache_value MEMORY_MODE "${memory_mode}" + if [[ "${bundleio}" == true ]]; then + require_cache_bool ET_BUNDLE_IO ON + else + require_cache_bool ET_BUNDLE_IO OFF + fi + if [[ "${bundleio}" == true || "${build_with_etdump}" == true ]]; then + require_cache_bool EXECUTORCH_BUILD_DEVTOOLS ON + else + require_cache_bool EXECUTORCH_BUILD_DEVTOOLS OFF + fi + if [[ "${build_with_etdump}" == true ]]; then + require_cache_bool EXECUTORCH_ENABLE_EVENT_TRACER ON + require_cache_bool ET_DUMP_INTERMEDIATE_OUTPUTS ON + else + require_cache_bool EXECUTORCH_ENABLE_EVENT_TRACER OFF + require_cache_bool ET_DUMP_INTERMEDIATE_OUTPUTS OFF + fi + fi + if [[ ${target} != *"vgf"* ]]; then + ensure_select_ops_list_setting "${select_ops_list}" + fi + multi_config=false + if [[ -n "$(cmake_cache_get CMAKE_CONFIGURATION_TYPES)" ]]; then + multi_config=true + fi + parallel_jobs=$(get_parallel_jobs) + build_dir_initialized=true +} + ####### ### Main ####### if ! check_setup; then if [ "$scratch_dir_set" = false ] ; then - # check setup failed, no scratchdir given as parameter. trying to run setup.sh - if ${script_dir}/setup.sh; then - # and recheck setup. If this fails exit. - if ! check_setup; then - exit 1 - fi - else - # setup.sh failed, it should print why - exit 1 - fi + # check setup failed, no scratchdir given as parameter. trying to run setup.sh + if ${script_dir}/setup.sh; then + # and recheck setup. If this fails exit. + if ! check_setup; then + exit 1 + fi + else + # setup.sh failed, it should print why + exit 1 + fi fi fi +cd "${et_root_dir}" + +bundleio_flag="" +etrecord_flag_template="" +qdq_fusion_op_flag="" +if [ "$build_with_etdump" = true ] ; then + etrecord_flag_template="--etrecord" +fi + +if [ "$bundleio" = true ] ; then + bundleio_flag="--bundleio" +fi + +if [ "$qdq_fusion_op" = true ] ; then + qdq_fusion_op_flag="--enable_qdq_fusion_pass" +fi + +if [[ "${auto_configure}" == true ]]; then + set_default_build_dir_path +else + if [[ -z "${build_dir}" ]]; then + echo "Error: --build-dir must not be empty." >&2 + exit 1 + fi + ensure_build_dir_ready +fi + +stage_pte_into_cache() { + local new_pte="$1" + local cache_path + cache_path=$(cmake_cache_get ET_PTE_FILE_PATH) + if [[ -z "${cache_path}" ]]; then + cat <&2 +Error: --pte_placement=elf requires ET_PTE_FILE_PATH to be set when configuring CMake. +Re-run cmake -S . -B ${build_dir} -DET_PTE_FILE_PATH=/absolute/path/to/model.pte (or use --pte_placement=). +EOF + exit 1 + fi + if [[ "${cache_path}" != /* ]]; then + cache_path="${build_dir}/${cache_path}" + fi + mkdir -p "$(dirname "${cache_path}")" + cp "${new_pte}" "${cache_path}" + echo "${cache_path}" +} + +configure_ethosu_scratch_if_requested() { + local pte_path="$1" + if [ "$specify_ethosu_scratch" != true ] || [[ ! ${target} =~ "ethos-u" ]]; then + return + fi + local scratch_size + scratch_size=$(get_ethosu_scratch_size "$pte_path" || true) + if [[ -z "${scratch_size}" ]]; then + echo "WARNING: Failed to derive Ethos-U scratch size from ${pte_path}" >&2 + return + fi + local cmake_cmd=( + cmake -S "${runner_source_dir}" -B "${build_dir}" + ) + if [[ -n "${extra_build_flags}" ]]; then + # shellcheck disable=SC2206 + local extra_args=(${extra_build_flags}) + cmake_cmd+=("${extra_args[@]}") + fi + cmake_cmd+=("-DET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${scratch_size}") + echo "[run.sh] Updating scratch allocator size to ${scratch_size}" + "${cmake_cmd[@]}" +} + if [[ -z "$model_name" ]]; then + echo "[run.sh] WARNING: Built-in test models executed when --model_name is omitted are deprecated and will be removed after the ExecuTorch 1.2 release." >&2 # the test models run, and whether to delegate test_model=( "softmax" # 0 @@ -307,7 +710,7 @@ for i in "${!test_model[@]}"; do printf "Running e2e flow for model '%s' with flags '%s'\n" "${model}" "${model_compiler_flags}" echo "--------------------------------------------------------------------------------" - cd $et_root_dir + cd "${et_root_dir}" # Remove path and file exetension to get model_short_name ext=${model##*.} model_short_name=$(basename -- "${model}" .$ext) @@ -328,13 +731,14 @@ for i in "${!test_model[@]}"; do output_folder=${et_build_root}/${model_short_name} fi + local_fvp_pmu_flag="" if [ "$perf_overlay" = true ] ; then model_compiler_flags+="--enable_debug_mode tosa" - fvp_pmu_flag="--trace_file=${output_folder}/pmu_trace.gz" + local_fvp_pmu_flag="--trace_file=${output_folder}/pmu_trace.gz" fi - mkdir -p ${output_folder} - output_folder=$(realpath ${output_folder}) + mkdir -p "${output_folder}" + output_folder=$(realpath "${output_folder}") pte_file="${output_folder}/${model_filename_ext}" # Remove old pte files @@ -344,16 +748,17 @@ for i in "${!test_model[@]}"; do model_compiler_flags="${model_compiler_flags} --model_input=${model_input}" fi - ARM_AOT_CMD="python3 -m backends.arm.scripts.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag ${etrecord_flag} --config=${config} $qdq_fusion_op_flag" + model_etrecord_flag="${etrecord_flag_template}" + ARM_AOT_CMD="python3 -m backends.arm.scripts.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag ${model_etrecord_flag} --config=${config} $qdq_fusion_op_flag" echo "CALL ${ARM_AOT_CMD}" >&2 ${ARM_AOT_CMD} 1>&2 - pte_file=$(realpath ${pte_file}) + pte_file=$(realpath "${pte_file}") - if [ "${etrecord_flag}" != "" ] ; then + if [ "${model_etrecord_flag}" != "" ] ; then etrecord_filename="${output_folder}/${model_filename}_etrecord.bin" - etrecord_filename=$(realpath ${etrecord_filename}) - etrecord_flag="--etrecord=${etrecord_filename}" + etrecord_filename=$(realpath "${etrecord_filename}") + model_etrecord_flag="--etrecord=${etrecord_filename}" fi [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } @@ -362,6 +767,7 @@ for i in "${!test_model[@]}"; do if [[ ${target} == *"TOSA"* ]]; then echo "Build for ${target} skip generating a .elf and running it" + continue elif [[ ${target} == cortex-m* ]]; then # Cortex-M backend uses a shared semihosting executor_runner (built # by build_test_runner.sh) that loads the .bpte at runtime, rather @@ -379,51 +785,54 @@ for i in "${!test_model[@]}"; do set +x elif [[ ${target} == *"vgf"* ]]; then echo "Build and run for VKML, (target: ${target})" - set -x - backends/arm/scripts/build_executor_runner_vkml.sh --build_type=${build_type} \ - --extra_build_flags="${extra_build_flags}" \ - --output="${output_folder}" \ - ${bundleio_flag} + build_runner_target executor_runner if [ "$build_only" = false ] ; then - backends/arm/scripts/run_vkml.sh --model=${pte_file} --build_path=${output_folder} + backends/arm/scripts/run_vkml.sh --model=${pte_file} --build_path=${build_dir} fi - set +x - else - # Build the application, the pte is imported as a header/c array or the address specified by --pte_placement - model_data="" - pte_file_or_mem="${pte_file}" - elf_file="${output_folder}/${model_filename}/cmake-out/arm_executor_runner" - if ! [[ ${pte_placement} == "elf" ]]; then - # Place PTE in memory specified by pte_placement - pte_file_or_mem="${pte_placement}" - model_data="--data=${pte_file}@${pte_placement}" - elf_file="${et_build_root}/${target}_${pte_placement}/cmake-out/arm_executor_runner" + if [[ "${auto_configure}" == true ]]; then + configure_runner_build_dir "${pte_file}" fi + ensure_build_dir_ready + ensure_pte_placement_setting - if [ "$specify_ethosu_scratch" = true ] && [[ ${target} =~ "ethos-u" ]]; then - scratch_size=$(get_ethosu_scratch_size "$pte_file") - if [ "$?" -eq 0 ] && [ -n "$scratch_size" ]; then - extra_build_flags="${extra_build_flags} -DET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${scratch_size}" + model_data="" + if [[ ${pte_placement} == "elf" ]]; then + if [[ "${auto_configure}" == true ]]; then + staged_path=$(cmake_cache_get ET_PTE_FILE_PATH) + echo "ET_PTE_FILE_PATH payload: ${staged_path}" else - echo "WARNING: Failed to derive Ethos-U scratch size from ${pte_file}" >&2 + staged_path=$(stage_pte_into_cache "${pte_file}") + echo "Updated ET_PTE_FILE_PATH payload: ${staged_path}" fi + else + model_data="--data=${pte_file}@${pte_placement}" fi - set -x - backends/arm/scripts/build_executor_runner.sh --et_build_root="${et_build_root}" --pte="${pte_file_or_mem}" --build_type=${build_type} --target=${target} --system_config=${system_config} --memory_mode=${memory_mode} ${bundleio_flag} ${et_dump_flag} --extra_build_flags="${extra_build_flags}" --ethosu_tools_dir="${arm_scratch_dir}" --toolchain="${toolchain}" --select_ops_list="${select_ops_list}" - if [ "$build_only" = false ] ; then - # Execute the executor_runner on FVP Simulator + configure_ethosu_scratch_if_requested "${pte_file}" - backends/arm/scripts/run_fvp.sh --elf=${elf_file} ${model_data} --target=$target ${etrecord_flag} ${fvp_pmu_flag} + build_runner_target arm_executor_runner + elf_file=$(locate_runner_binary arm_executor_runner) \ + || { echo "Failed to locate arm_executor_runner in ${build_dir}." >&2; exit 1; } + if [ "$build_only" = false ] ; then + fvp_args=("--elf=${elf_file}" "--target=${target}") + if [[ -n "${model_data}" ]]; then + fvp_args+=("${model_data}") + fi + if [[ -n "${model_etrecord_flag}" ]]; then + fvp_args+=("${model_etrecord_flag}") + fi + if [[ -n "${local_fvp_pmu_flag}" ]]; then + fvp_args+=("${local_fvp_pmu_flag}") + fi + backends/arm/scripts/run_fvp.sh "${fvp_args[@]}" fi - set +x fi if [ "$model_explorer" = true ]; then perf_flags="" if [ "$perf_overlay" = true ]; then - perf_flags+="--trace ${output_folder}/pmu_trace.gz --tables ${output_folder}/output/out_debug.xml" + perf_flags+=" --trace ${output_folder}/pmu_trace.gz --tables ${output_folder}/output/out_debug.xml" fi visualization_file="" From 2883eceb0212cb941083c484f4c261b8d2d89673 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 7 Apr 2026 11:53:08 +0100 Subject: [PATCH 08/11] Arm backend: Document the new Arm runner workflow - Explain the auto-configured runner build flow and scratch directory expectations in examples/arm/README.md. - Update the Ethos-U notebook to export EXECUTORCH_ROOT before calling standalone cmake. Change-Id: If9f4f456c03b7a36a27ffdd1dfd1873ec286d07b Signed-off-by: Usamah Zaheer --- examples/arm/README.md | 16 ++++++++++++- examples/arm/cortex_m_mv2_example.ipynb | 4 ++-- examples/arm/ethos_u_minimal_example.ipynb | 28 ++++++---------------- examples/arm/pruning_minimal_example.ipynb | 4 ++-- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/examples/arm/README.md b/examples/arm/README.md index bcd8a1e1d0a..1e602700f78 100644 --- a/examples/arm/README.md +++ b/examples/arm/README.md @@ -1,3 +1,10 @@ + + ## ExecuTorch for Arm backends Ethos-U, VGF and Cortex-M This project contains scripts to help you setup and run a PyTorch @@ -11,12 +18,19 @@ The main scripts are `setup.sh`, `run.sh` and `setup.sh` will install the needed tools and with --root-dir you can change the path to a scratch folder where it will download and generate build artifacts. If supplied, you must also supply the same folder to run.sh with ---scratch-dir= If not supplied both script will use examples/arm/arm-scratch +--scratch-dir= If not supplied both scripts will use examples/arm/arm-scratch. `run.sh` can be used to build, run and test a model in an easy way and it will call cmake for you and in cases you want to run a simulator it will start it also. The script will call `aot_arm_compiler.py` to convert a model and include it in the build/run. +For bare-metal Ethos-U builds `run.sh` configures the standalone +`examples/arm/executor_runner/standalone` CMake entry point automatically. If +`--build-dir` is omitted, the script creates and owns a build tree under +`arm_test/_`. Supplying `--build-dir` reuses an existing tree +(for example a VGF host build or out-of-tree configuration) and `run.sh` +verifies it exposes the runner options it needs before compiling. + Build and test artifacts are by default placed under the folder arm_test folder this can be changed with --et_build_root= diff --git a/examples/arm/cortex_m_mv2_example.ipynb b/examples/arm/cortex_m_mv2_example.ipynb index c2fe4342773..36844b4e5fd 100644 --- a/examples/arm/cortex_m_mv2_example.ipynb +++ b/examples/arm/cortex_m_mv2_example.ipynb @@ -136,7 +136,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "%%bash \n# Build example executor runner application to examples/arm/cortex_m_mv2_example\n# Note that this is the same runner as used in the Ethos-U example, creating some overlap in the config even though the Ethos-U is not used.\ncmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n -DCMAKE_BUILD_TYPE=Release \\\n -DET_PTE_FILE_PATH=cortex_m_mv2_example.bpte \\\n -DTARGET_CPU=cortex-m55 \\\n -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \\\n -DMEMORY_MODE=Shared_Sram \\\n -DET_BUNDLE_IO=ON \\\n -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n -Bcortex_m_mv2_example \\\n executor_runner\ncmake --build cortex_m_mv2_example -j$(nproc) -- arm_executor_runner" + "source": "%%bash \n# Build example executor runner application to examples/arm/cortex_m_mv2_example\n# Note that this is the same runner as used in the Ethos-U example, creating some overlap in the config even though the Ethos-U is not used.\ncmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n -DCMAKE_BUILD_TYPE=Release \\\n -DET_PTE_FILE_PATH=cortex_m_mv2_example.bpte \\\n -DTARGET_CPU=cortex-m55 \\\n -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \\\n -DMEMORY_MODE=Shared_Sram \\\n -DET_BUNDLE_IO=ON \\\n -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n -Bcortex_m_mv2_example \\\n -S executor_runner/standalone\ncmake --build cortex_m_mv2_example -j$(nproc) -- arm_executor_runner" }, { "cell_type": "markdown", @@ -179,4 +179,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/arm/ethos_u_minimal_example.ipynb b/examples/arm/ethos_u_minimal_example.ipynb index fbb15cd0e57..11f24019d23 100644 --- a/examples/arm/ethos_u_minimal_example.ipynb +++ b/examples/arm/ethos_u_minimal_example.ipynb @@ -171,26 +171,8 @@ "source": [ "## Build executor runtime\n", "\n", - "After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced .pte-file using the Arm cross-compilation toolchain. This is done in two steps:\n", - "1. Build and install the executorch libraries and EthosUDelegate.\n", - "2. Build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "# Ensure the arm-none-eabi-gcc toolchain and FVP:s are available on $PATH\n", - "source arm-scratch/setup_path.sh\n", - "\n", - "# Build executorch libraries cross-compiled for arm baremetal to executorch/cmake-out-arm\n", - "cmake --preset arm-baremetal \\\n", - "-DCMAKE_BUILD_TYPE=Release \\\n", - "-B../../cmake-out-arm ../..\n", - "cmake --build ../../cmake-out-arm --target install -j$(nproc) " + "After the AOT compilation flow finishes, cross-compile and link the runtime by configuring the standalone `examples/arm/executor_runner/standalone` CMake project with the Arm toolchain.\n", + "It automatically pulls the ExecuTorch checkout in as a dependency so the delegate, kernels, and runner util are rebuilt alongside the application, and it generates kernel bindings for any non-delegated ops found in the `.pte`.\n" ] }, { @@ -201,6 +183,8 @@ "source": [ "%%bash \n", "source arm-scratch/setup_path.sh\n", + "# Ensure CMake resolves the ExecuTorch checkout root regardless of caller env\n", + "export EXECUTORCH_ROOT=$(cd ../.. && pwd)\n", "\n", "# Build example executor runner application to examples/arm/ethos_u_minimal_example\n", "cmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n", @@ -211,7 +195,7 @@ " -DMEMORY_MODE=Shared_Sram \\\n", " -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n", " -Bethos_u_minimal_example \\\n", - " executor_runner\n", + " -S executor_runner/standalone\n", "cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner" ] }, @@ -232,6 +216,8 @@ "source": [ "%%bash \n", "source arm-scratch/setup_path.sh\n", + "# Ensure CMake resolves the ExecuTorch checkout root regardless of caller env\n", + "export EXECUTORCH_ROOT=$(cd ../.. && pwd)\n", "\n", "# Run the example\n", "../../backends/arm/scripts/run_fvp.sh --elf=ethos_u_minimal_example/arm_executor_runner --target=ethos-u55-128" diff --git a/examples/arm/pruning_minimal_example.ipynb b/examples/arm/pruning_minimal_example.ipynb index db585b94158..a24c6626a15 100644 --- a/examples/arm/pruning_minimal_example.ipynb +++ b/examples/arm/pruning_minimal_example.ipynb @@ -453,7 +453,7 @@ " -DMEMORY_MODE=Shared_Sram \\\n", " -DSYSTEM_CONFIG=Ethos_U85_SYS_DRAM_Mid \\\n", " -Bethos_u_original_model \\\n", - " executor_runner\n", + " -S executor_runner/standalone\n", "cmake --build ethos_u_original_model -j$(nproc) -- arm_executor_runner" ] }, @@ -499,7 +499,7 @@ " -DMEMORY_MODE=Shared_Sram \\\n", " -DSYSTEM_CONFIG=Ethos_U85_SYS_DRAM_Mid \\\n", " -Bethos_u_pruned_model \\\n", - " executor_runner\n", + " -S executor_runner/standalone\n", "cmake --build ethos_u_pruned_model -j$(nproc) -- arm_executor_runner" ] }, From 455a9b2e62be41cb42033f2b251c76e686a5b391 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 12 May 2026 11:52:13 +0100 Subject: [PATCH 09/11] Arm backend: Fix standalone runner cache reuse Allow VGF host runner builds to reuse existing top-level CMake build directories without requiring the bare-metal standalone marker. Pin the standalone Arm runner registry size to the default capacity unless the user overrides MAX_KERNEL_NUM. This prevents selected-op cache sizing from undersizing binaries that also link quantized and Cortex-M registration libraries. Change-Id: I6716c454ec5d9d3adbff756afc14fe8739268520 Signed-off-by: Usamah Zaheer --- .../executor_runner/standalone/CMakeLists.txt | 53 +++++++++++++++++++ examples/arm/run.sh | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/examples/arm/executor_runner/standalone/CMakeLists.txt b/examples/arm/executor_runner/standalone/CMakeLists.txt index f1dd9f315fc..73493ca9e71 100644 --- a/examples/arm/executor_runner/standalone/CMakeLists.txt +++ b/examples/arm/executor_runner/standalone/CMakeLists.txt @@ -86,6 +86,50 @@ set(EXECUTORCH_SKIP_ARM_EXECUTOR_RUNNER CACHE BOOL "" FORCE ) +# examples/arm/executor_runner/CMakeLists.txt generates the runner-specific +# portable-op registration based on the PTE or an explicit select-ops list. +# Avoid feeding those cache entries into the top-level ExecuTorch configure, +# otherwise executorch_core auto-right-sizes MAX_KERNEL_NUM from the runner's +# placeholder/selective build inputs even though the runner also links +# quantized/cortex-m registration libraries. +set(_arm_runner_selective_cache_vars + EXECUTORCH_SELECT_OPS_LIST EXECUTORCH_SELECT_OPS_MODEL + EXECUTORCH_SELECT_OPS_YAML +) +foreach(_arm_runner_cache_var IN LISTS _arm_runner_selective_cache_vars) + if(DEFINED CACHE{${_arm_runner_cache_var}}) + get_property( + _arm_runner_cache_type + CACHE ${_arm_runner_cache_var} + PROPERTY TYPE + ) + if(NOT _arm_runner_cache_type OR _arm_runner_cache_type STREQUAL + "UNINITIALIZED" + ) + set(_arm_runner_cache_type STRING) + endif() + set(_arm_runner_saved_type_${_arm_runner_cache_var} + "${_arm_runner_cache_type}" + ) + set(_arm_runner_saved_value_${_arm_runner_cache_var} + "${${_arm_runner_cache_var}}" + ) + set(_arm_runner_saved_defined_${_arm_runner_cache_var} TRUE) + set(${_arm_runner_cache_var} + "" + CACHE ${_arm_runner_cache_type} "" FORCE + ) + endif() +endforeach() + +if(NOT DEFINED CACHE{MAX_KERNEL_NUM} AND NOT DEFINED MAX_KERNEL_NUM) + set(MAX_KERNEL_NUM + 2000 + CACHE STRING + "Maximum number of kernels registered by the standalone Arm runner" + ) +endif() + # Pull ExecuTorch in-tree so all required targets (delegates, kernels, runner # util, etc.) are built from this checkout. add_subdirectory( @@ -100,6 +144,15 @@ if(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES) endif() endif() +foreach(_arm_runner_cache_var IN LISTS _arm_runner_selective_cache_vars) + if(_arm_runner_saved_defined_${_arm_runner_cache_var}) + set(${_arm_runner_cache_var} + "${_arm_runner_saved_value_${_arm_runner_cache_var}}" + CACHE ${_arm_runner_saved_type_${_arm_runner_cache_var}} "" FORCE + ) + endif() +endforeach() + add_subdirectory( ${EXECUTORCH_ROOT}/examples/arm/executor_runner ${CMAKE_BINARY_DIR}/examples/arm/executor_runner diff --git a/examples/arm/run.sh b/examples/arm/run.sh index e1fbe4d1ef1..adb4ea228e0 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -550,10 +550,10 @@ Run cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_ROOT=${et_root_di EOF exit 1 fi - ensure_runner_build_dir if [[ ${target} == *"vgf"* ]]; then require_cache_bool EXECUTORCH_BUILD_VGF ON else + ensure_runner_build_dir require_cache_bool EXECUTORCH_BUILD_ARM_BAREMETAL ON require_cache_bool EXECUTORCH_BAREMETAL_SKIP_INSTALL OFF require_cache_value ETHOSU_TARGET_NPU_CONFIG "${target}" From 2215f4add629953871e44e3cc0b73e8744b5af9d Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Mon, 11 May 2026 11:35:57 +0100 Subject: [PATCH 10/11] Arm backend: Document standalone runner public docs Update generated Ethos-U docs and docgen templates to point users at the standalone Arm executor runner CMake entry point. This replaces the old two-step install and direct runner configure flow. Signed-off-by: Usamah Zaheer Change-Id: I582b87033c7d50a4219fc01a01f1b5ddd980e8e4 --- .../backends-arm-ethos-u-overview.md.in | 6 ++--- .../ethos-u-getting-started-tutorial.md.in | 23 +++++++------------ .../arm-ethos-u/arm-ethos-u-overview.md | 4 ++-- .../tutorials/ethos-u-getting-started.md | 23 +++++++------------ 4 files changed, 21 insertions(+), 35 deletions(-) diff --git a/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in b/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in index 1990bc6d946..4e9f04c85b5 100644 --- a/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in +++ b/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in @@ -4,7 +4,7 @@ The Arm® Ethos™-U backend targets Edge/IoT-type AI use-cases by enabli [Arm® Ethos™-U55 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u55), [Arm® Ethos™-U65 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u65), and [Arm® Ethos™-U85 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u85), leveraging [TOSA](https://www.mlplatform.org/tosa/) and the [ethos-u-vela](https://pypi.org/project/ethos-u-vela/) graph compiler. This document is a technical reference for using the Ethos-U backend, for a top level view with code examples -please refer to the [Arm Ethos-U Backend Tutorial](https://docs.pytorch.org/executorch/stable/tutorial-arm-ethos-u.html). +please refer to the [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). ## Features @@ -27,7 +27,7 @@ For the AOT flow, compilation of a model to `.pte` format using the Ethos-U back - [TOSA Serialization Library](https://www.mlplatform.org/tosa/software.html) for serializing the Exir IR graph into TOSA IR. - [Ethos-U Vela graph compiler](https://pypi.org/project/ethos-u-vela/) for compiling TOSA flatbuffers into an Ethos-U command stream. -And for building and running the example application available in `examples/arm/executor_runner/`: +And for building and running the example application available in `examples/arm/executor_runner/` through the standalone CMake entry point: - [Arm GNU Toolchain](https://developer.arm.com/Tools%20and%20Software/GNU%20Toolchain) for cross compilation. - [Arm® Corstone™ SSE-300 FVP](https://developer.arm.com/documentation/100966/1128/Arm--Corstone-SSE-300-FVP) for testing on a Arm® Cortex®-M55+Ethos-U55 reference design. - [Arm® Corstone™ SSE-320 FVP](https://developer.arm.com/documentation/109760/0000/SSE-320-FVP) for testing on a Arm® Cortex®-M85+Ethos-U85 reference design. @@ -55,7 +55,7 @@ For more information on quantization, see [Quantization](arm-ethos-u-quantizatio ## Runtime Integration -An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), and the steps requried for building and deploying it on a FVP it is explained in the previously mentioned [Arm Ethos-U Backend Tutorial](https://docs.pytorch.org/executorch/stable/tutorial-arm-ethos-u.html). +An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), with a standalone CMake entry point in `examples/arm/executor_runner/standalone`. The steps required for building and deploying it on an FVP are explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). The example application is recommended to use for testing basic functionality of your lowered models, as well as a starting point for developing runtime integrations for your own targets. For an in-depth explanation of the architecture of the executor_runner and the steps required for doing such an integration, please refer to [Ethos-U porting guide](https://github.com/pytorch/executorch/blob/main/examples/arm/ethos-u-porting-guide.md). diff --git a/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in b/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in index 68b73755317..0222e51a2fd 100644 --- a/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in +++ b/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in @@ -76,35 +76,28 @@ To produce a pte file equivalent to the one above, run ### Runtime: -After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. This is done in two steps: +After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. Configure the standalone Arm executor runner CMake project to pull in the ExecuTorch build graph, link the Ethos-U delegate, and generate kernel bindings for any non-delegated ops. This produces the `arm_executor_runner` program that will run on target. -First, build and install the ExecuTorch libraries and EthosUDelegate: ``` # In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_BUILD_TYPE=Release --preset arm-baremetal -B cmake-out-arm . -cmake --build cmake-out-arm --target install -j$(nproc) -``` -Second, build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops. This is the actual program that will run on target. - -``` -# In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_TOOLCHAIN_FILE=`pwd`/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ +cmake -S examples/arm/executor_runner/standalone \ + -B ethos_u_minimal_example \ + -DEXECUTORCH_ROOT=$(pwd) \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DET_PTE_FILE_PATH=ethos_u_minimal_example.pte \ -DTARGET_CPU=cortex-m55 \ -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \ -DMEMORY_MODE=Shared_Sram \ - -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \ - -Bethos_u_minimal_example \ - examples/arm/executor_runner + -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner ``` ```{tip} -For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to build the runner. +For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to configure and build the standalone runner. To build a runner equivalent to the one above, run `./backends/arm/scripts/build_executor_runner.sh --pte=ethos_u_minimal_example.pte` -```` +``` The block diagram below shows, at the high level, how the various build artifacts are generated and are linked together to generate the final bare-metal executable. diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md index faffedece35..28b5ce24338 100644 --- a/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md +++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md @@ -27,7 +27,7 @@ For the AOT flow, compilation of a model to `.pte` format using the Ethos-U back - [TOSA Serialization Library](https://www.mlplatform.org/tosa/software.html) for serializing the Exir IR graph into TOSA IR. - [Ethos-U Vela graph compiler](https://pypi.org/project/ethos-u-vela/) for compiling TOSA flatbuffers into an Ethos-U command stream. -And for building and running the example application available in `examples/arm/executor_runner/`: +And for building and running the example application available in `examples/arm/executor_runner/` through the standalone CMake entry point: - [Arm GNU Toolchain](https://developer.arm.com/Tools%20and%20Software/GNU%20Toolchain) for cross compilation. - [Arm® Corstone™ SSE-300 FVP](https://developer.arm.com/documentation/100966/1128/Arm--Corstone-SSE-300-FVP) for testing on a Arm® Cortex®-M55+Ethos-U55 reference design. - [Arm® Corstone™ SSE-320 FVP](https://developer.arm.com/documentation/109760/0000/SSE-320-FVP) for testing on a Arm® Cortex®-M85+Ethos-U85 reference design. @@ -111,7 +111,7 @@ For more information on quantization, see [Quantization](arm-ethos-u-quantizatio ## Runtime Integration -An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), and the steps requried for building and deploying it on a FVP it is explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). +An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), with a standalone CMake entry point in `examples/arm/executor_runner/standalone`. The steps required for building and deploying it on an FVP are explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). The example application is recommended to use for testing basic functionality of your lowered models, as well as a starting point for developing runtime integrations for your own targets. For an in-depth explanation of the architecture of the executor_runner and the steps required for doing such an integration, please refer to [Ethos-U porting guide](https://github.com/pytorch/executorch/blob/main/examples/arm/ethos-u-porting-guide.md). diff --git a/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md b/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md index 583e55dd8b6..fc966a02b86 100644 --- a/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md +++ b/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md @@ -149,35 +149,28 @@ To produce a pte file equivalent to the one above, run ### Runtime: -After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. This is done in two steps: +After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. Configure the standalone Arm executor runner CMake project to pull in the ExecuTorch build graph, link the Ethos-U delegate, and generate kernel bindings for any non-delegated ops. This produces the `arm_executor_runner` program that will run on target. -First, build and install the ExecuTorch libraries and EthosUDelegate: ``` # In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_BUILD_TYPE=Release --preset arm-baremetal -B cmake-out-arm . -cmake --build cmake-out-arm --target install -j$(nproc) -``` -Second, build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops. This is the actual program that will run on target. - -``` -# In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_TOOLCHAIN_FILE=`pwd`/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ +cmake -S examples/arm/executor_runner/standalone \ + -B ethos_u_minimal_example \ + -DEXECUTORCH_ROOT=$(pwd) \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DET_PTE_FILE_PATH=ethos_u_minimal_example.pte \ -DTARGET_CPU=cortex-m55 \ -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \ -DMEMORY_MODE=Shared_Sram \ - -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \ - -Bethos_u_minimal_example \ - examples/arm/executor_runner + -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner ``` ```{tip} -For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to build the runner. +For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to configure and build the standalone runner. To build a runner equivalent to the one above, run `./backends/arm/scripts/build_executor_runner.sh --pte=ethos_u_minimal_example.pte` -```` +``` The block diagram below shows, at the high level, how the various build artifacts are generated and are linked together to generate the final bare-metal executable. From c6262076803be7ad510a8b810fb31cf998b70db9 Mon Sep 17 00:00:00 2001 From: Usamah Zaheer Date: Tue, 12 May 2026 14:15:01 +0100 Subject: [PATCH 11/11] Arm backend: Fix CI setup fallback paths TOSA-only run.sh invocations stop after AOT export and do not build or run an Arm executor runner, so avoid requiring the bare-metal toolchain for those targets. The Cortex-M E2E CI wrapper can also invoke run.sh fallback setup, so accept the FVP EULA in that CI caller instead of making run.sh infer acceptance from CI=true. Signed-off-by: Usamah Zaheer Change-Id: Ic154c5dc6327ee7d882429f11f82fa9c8d7a17e1 --- .ci/scripts/test_cortex_m_e2e.sh | 1 + examples/arm/run.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh index c6e643f118c..53e50ae5c9b 100755 --- a/.ci/scripts/test_cortex_m_e2e.sh +++ b/.ci/scripts/test_cortex_m_e2e.sh @@ -19,6 +19,7 @@ et_root_dir=$(realpath "${script_dir}/../..") # Quantization is the default for the cortex-m55+int8 target; run.sh's # arg parser only recognizes --no_quantize, so we omit any explicit flag. +export ARM_FVP_INSTALL_I_AGREE_TO_THE_CONTAINED_EULA=True bash "${et_root_dir}/examples/arm/run.sh" \ --model_name="${MODEL}" \ --target=cortex-m55+int8 \ diff --git a/examples/arm/run.sh b/examples/arm/run.sh index adb4ea228e0..351eda14071 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -230,7 +230,7 @@ function check_setup () { [[ -f ${preset_file} ]] \ || { echo "Could not find ${preset_file} file, ${_setup_msg}"; return 1; } - if [[ "${auto_configure}" == true ]]; then + if [[ "${auto_configure}" == true && ${target} != *"TOSA"* ]]; then if ! command -v "${toolchain}" >/dev/null 2>&1; then echo "Could not find ${toolchain} toolchain on PATH, ${_setup_msg}" return 1