diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh index de47a45ea0d..ac6e6d46550 100755 --- a/.ci/scripts/test_cortex_m_e2e.sh +++ b/.ci/scripts/test_cortex_m_e2e.sh @@ -19,6 +19,7 @@ et_root_dir=$(realpath "${script_dir}/../..") # Quantization is the default for the cortex-m55 target; run.sh's # arg parser only recognizes --no_quantize, so we omit any explicit flag. +export ARM_FVP_INSTALL_I_AGREE_TO_THE_CONTAINED_EULA=True bash "${et_root_dir}/examples/arm/run.sh" \ --model_name="${MODEL}" \ --target=cortex-m55 \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 359a0e0f5e4..ac40d86d273 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,6 +160,23 @@ announce_configured_options(BUILD_TESTING) load_build_preset() include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake) +# Keep bare-metal installs enabled only when ExecuTorch owns the top-level +# build. Standalone consumers (e.g., the runner) set +# EXECUTORCH_BAREMETAL_SKIP_INSTALL=ON but still add ExecuTorch as a subproject, +# which cannot satisfy our install() export dependencies until their own targets +# are configured. +if(DEFINED EXECUTORCH_BAREMETAL_SKIP_INSTALL + AND EXECUTORCH_BAREMETAL_SKIP_INSTALL + AND NOT (CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) +) + set(CMAKE_SKIP_INSTALL_RULES + ON + CACHE BOOL + "Skip install() rules when ExecuTorch is consumed as a subproject" + FORCE + ) +endif() + # Enable ccache if available find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) diff --git a/backends/arm/CMakeLists.txt b/backends/arm/CMakeLists.txt index 0c8b241522c..d8a6c1afce7 100644 --- a/backends/arm/CMakeLists.txt +++ b/backends/arm/CMakeLists.txt @@ -63,17 +63,20 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL OR EXECUTORCH_BUILD_ARM_ETHOSU_LINUX) add_library(executorch_delegate_ethos_u STATIC ${_arm_backend_sources}) target_link_libraries(executorch_delegate_ethos_u PUBLIC executorch_core) + target_include_directories( + executorch_delegate_ethos_u PRIVATE ${_common_include_directories} + ) if(EXECUTORCH_BUILD_ARM_BAREMETAL) target_sources( executorch_delegate_ethos_u PRIVATE ${EXECUTORCH_ROOT}/backends/arm/runtime/EthosUBackend_Cortex_M.cpp ) - set(DRIVER_ETHOSU_INCLUDE_DIR + set(_ethosu_core_driver_include "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include" ) target_include_directories( - executorch_delegate_ethos_u PRIVATE ${DRIVER_ETHOSU_INCLUDE_DIR} + executorch_delegate_ethos_u PRIVATE ${_ethosu_core_driver_include} ) target_link_libraries(executorch_delegate_ethos_u PUBLIC ethosu_core_driver) elseif(EXECUTORCH_BUILD_ARM_ETHOSU_LINUX) @@ -110,7 +113,25 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL OR EXECUTORCH_BUILD_ARM_ETHOSU_LINUX) ) endif() - install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets) + if(NOT CMAKE_SKIP_INSTALL_RULES) + install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets) + + if(TARGET ethosu_core_driver) + get_property( + _et_ethosu_core_driver_exported GLOBAL + PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED + ) + if(NOT _et_ethosu_core_driver_exported) + install( + TARGETS ethosu_core_driver + EXPORT ExecuTorchTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + set_property(GLOBAL PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED TRUE) + endif() + endif() + endif() endif() diff --git a/backends/arm/cmake/ArmEthosUSDK.cmake b/backends/arm/cmake/ArmEthosUSDK.cmake new file mode 100644 index 00000000000..03affdf69bb --- /dev/null +++ b/backends/arm/cmake/ArmEthosUSDK.cmake @@ -0,0 +1,60 @@ +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include_guard(GLOBAL) + +function(arm_ethos_u_content_ready SDK_PATH OUT_VAR) + if(EXISTS "${SDK_PATH}/core_platform" AND EXISTS "${SDK_PATH}/core_software") + set(${OUT_VAR} + TRUE + PARENT_SCOPE + ) + else() + set(${OUT_VAR} + FALSE + PARENT_SCOPE + ) + endif() +endfunction() + +function(arm_ethos_u_default_fetch SDK_PATH OUT_VAR) + arm_ethos_u_content_ready("${SDK_PATH}" _arm_ethos_ready) + if(_arm_ethos_ready) + set(${OUT_VAR} + OFF + PARENT_SCOPE + ) + else() + set(${OUT_VAR} + ON + PARENT_SCOPE + ) + endif() +endfunction() + +function(arm_ensure_ethos_u_content SDK_PATH EXECUTORCH_ROOT FETCH_REQUESTED) + arm_ethos_u_content_ready("${SDK_PATH}" _arm_ethos_ready_before) + + if(_arm_ethos_ready_before) + return() + endif() + + if(NOT FETCH_REQUESTED) + message( + FATAL_ERROR + "No Ethos-U content found at ${SDK_PATH}. Run examples/arm/setup.sh or enable FETCH_ETHOS_U_CONTENT=ON." + ) + endif() + + fetch_ethos_u_content(${SDK_PATH} ${EXECUTORCH_ROOT}) + + arm_ethos_u_content_ready("${SDK_PATH}" _arm_ethos_ready_after) + if(NOT _arm_ethos_ready_after) + message( + FATAL_ERROR + "Failed to fetch Ethos-U content into ${SDK_PATH}. Inspect the logs above." + ) + endif() +endfunction() diff --git a/backends/arm/cmake/ArmRunnerUtils.cmake b/backends/arm/cmake/ArmRunnerUtils.cmake new file mode 100644 index 00000000000..e67f38eec22 --- /dev/null +++ b/backends/arm/cmake/ArmRunnerUtils.cmake @@ -0,0 +1,69 @@ +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include_guard(GLOBAL) + +# Helper routines shared by the standalone runner and any superbuild that reuses +# the runner targets. + +function(arm_runner_require_baremetal_targets) + if(NOT TARGET extension_runner_util) + message( + FATAL_ERROR + "extension_runner_util target missing. Configure ExecuTorch (or the standalone runner) with EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON." + ) + endif() + + if(NOT TARGET quantized_ops_lib OR NOT TARGET quantized_kernels) + message( + FATAL_ERROR + "quantized kernels not found. Ensure EXECUTORCH_BUILD_KERNELS_QUANTIZED=ON when configuring ExecuTorch." + ) + endif() + + if(NOT TARGET cortex_m_ops_lib OR NOT TARGET cortex_m_kernels) + message( + FATAL_ERROR + "cortex_m backend not found. Ensure EXECUTORCH_BUILD_CORTEX_M=ON when configuring ExecuTorch." + ) + endif() +endfunction() + +# Ensure a runner target emits its binary to a predictable location. Uses +# FALLBACK_DIR when TARGET_NAME has no runtime output directory set, and also +# fills per-configuration runtime output directories for multi-config generators +# when they are unset. +function(arm_runner_configure_runtime_output TARGET_NAME FALLBACK_DIR) + if(NOT TARGET ${TARGET_NAME}) + return() + endif() + + get_target_property(_base_runtime_dir ${TARGET_NAME} RUNTIME_OUTPUT_DIRECTORY) + if(NOT _base_runtime_dir + OR _base_runtime_dir STREQUAL "_base_runtime_dir-NOTFOUND" + OR "${_base_runtime_dir}" STREQUAL "" + ) + set_target_properties( + ${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${FALLBACK_DIR}" + ) + set(_base_runtime_dir "${FALLBACK_DIR}") + endif() + + if(CMAKE_CONFIGURATION_TYPES) + foreach(_cfg ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER ${_cfg} _cfg_upper) + set(_cfg_prop "RUNTIME_OUTPUT_DIRECTORY_${_cfg_upper}") + get_target_property(_cfg_dir ${TARGET_NAME} ${_cfg_prop}) + if(NOT _cfg_dir + OR _cfg_dir STREQUAL "_cfg_dir-NOTFOUND" + OR "${_cfg_dir}" STREQUAL "" + ) + set_target_properties( + ${TARGET_NAME} PROPERTIES ${_cfg_prop} "${_base_runtime_dir}/${_cfg}" + ) + endif() + endforeach() + endif() +endfunction() diff --git a/backends/arm/scripts/build_executor_runner.sh b/backends/arm/scripts/build_executor_runner.sh index f2ffd2e27a7..55f1a272b9e 100755 --- a/backends/arm/scripts/build_executor_runner.sh +++ b/backends/arm/scripts/build_executor_runner.sh @@ -9,6 +9,9 @@ set -eu script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) et_root_dir=$(cd ${script_dir}/../../.. && pwd) et_root_dir=$(realpath ${et_root_dir}) +runner_source_dir=${et_root_dir}/examples/arm/executor_runner/standalone +runner_source_dir=$(realpath ${runner_source_dir}) +preset_file=${et_root_dir}/tools/cmake/preset/arm_baremetal.cmake toolchain=arm-none-eabi-gcc setup_path_script=${et_root_dir}/examples/arm/arm-scratch/setup_path.sh _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly install necessary tools." @@ -101,6 +104,9 @@ toolchain_cmake=$(realpath ${toolchain_cmake}) source ${setup_path_script} +[[ -f ${preset_file} ]] \ + || { echo "Missing ${preset_file}. ${_setup_msg}"; exit 1; } + if [[ ${pte_file} == "semihosting" ]]; then pte_data="-DSEMIHOSTING=ON" else @@ -122,13 +128,13 @@ else fi fi ethosu_tools_dir=$(realpath ${ethosu_tools_dir}) -ethos_u_root_dir="$ethosu_tools_dir/ethos-u" +ethos_u_root_dir="${ethosu_tools_dir}/ethos-u" mkdir -p "${ethos_u_root_dir}" -ethosu_tools_dir=$(realpath ${ethos_u_root_dir}) - -et_build_dir=${et_build_root}/cmake-out -mkdir -p ${et_build_dir} -et_build_dir=$(realpath ${et_build_dir}) +ethos_u_root_dir=$(realpath ${ethos_u_root_dir}) +cmsis_nn_local_path="" +if [[ -d "${ethos_u_root_dir}/core_software/cmsis-nn" ]]; then + cmsis_nn_local_path=$(realpath "${ethos_u_root_dir}/core_software/cmsis-nn") +fi if [[ ${system_config} == "" ]] then @@ -160,34 +166,47 @@ echo "-------------------------------------------------------------------------- echo "Build Arm ${toolchain/-gcc/} executor_runner for ${target} PTE: ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}'" echo "--------------------------------------------------------------------------------" -cd ${et_root_dir}/examples/arm/executor_runner - if [ "$bundleio" = true ] ; then build_bundleio_flags=" -DET_BUNDLE_IO=ON " + candidate_build_dir="${et_build_root}/cmake-out" + if [[ -d "${candidate_build_dir}" ]]; then + candidate_build_dir=$(realpath "${candidate_build_dir}") + build_bundleio_flags+=" -DET_BUILD_DIR_PATH=${candidate_build_dir} " + fi + if [[ -n "${BUNDLED_PROGRAM_LIBRARY_DIR:-}" ]]; then + build_bundleio_flags+=" -DBUNDLED_PROGRAM_LIBRARY_DIR=${BUNDLED_PROGRAM_LIBRARY_DIR} " + fi fi if [ "$build_with_etdump" = true ] ; then build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON -DET_DUMP_INTERMEDIATE_OUTPUTS=ON " fi +devtools_flags="" +if [ "$bundleio" = true ] || [ "$build_with_etdump" = true ] ; then + devtools_flags=" -DEXECUTORCH_BUILD_DEVTOOLS=ON " +fi -echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${extra_build_flags}" +echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${devtools_flags} ${extra_build_flags}" cmake \ - -DCMAKE_BUILD_TYPE=${build_type} \ - -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ - -DTARGET_CPU=${target_cpu} \ - -DET_DIR_PATH:PATH=${et_root_dir} \ - -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ - -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ - -DETHOSU_TARGET_NPU_CONFIG=${target} \ - ${pte_data} \ - ${build_bundleio_flags} \ - ${build_with_etdump_flags} \ - -DPYTHON_EXECUTABLE=$(which python3) \ - -DSYSTEM_CONFIG=${system_config} \ - -DMEMORY_MODE=${memory_mode} \ + -S ${runner_source_dir} \ + -B ${output_folder} \ + -DEXECUTORCH_ROOT=${et_root_dir} \ + -DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ + -DTARGET_CPU=${target_cpu} \ + -DETHOSU_TARGET_NPU_CONFIG=${target} \ + -DEXECUTORCH_BUILD_PRESET_FILE=${preset_file} \ + -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF \ + ${pte_data} \ + ${build_bundleio_flags} \ + ${build_with_etdump_flags} \ + ${devtools_flags} \ + -DSYSTEM_CONFIG=${system_config} \ + -DMEMORY_MODE=${memory_mode} \ -DEXECUTORCH_SELECT_OPS_LIST="${select_ops_list}" \ - ${extra_build_flags} \ - -B ${output_folder} + -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ + ${cmsis_nn_local_path:+-DCMSIS_NN_LOCAL_PATH:PATH=${cmsis_nn_local_path}} \ + ${extra_build_flags} echo "[${BASH_SOURCE[0]}] Configured CMAKE" diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh index cf7e327b9ce..828cec77ae8 100755 --- a/backends/arm/scripts/build_executorch.sh +++ b/backends/arm/scripts/build_executorch.sh @@ -85,6 +85,7 @@ cmake_args=( -DCMAKE_BUILD_TYPE=${build_type} -DEXECUTORCH_BUILD_DEVTOOLS=${build_devtools} -DEXECUTORCH_BUILD_ARM_ETDUMP=${build_with_etdump} + -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF ) if [[ ${is_linux_musl} -eq 1 ]]; then @@ -108,7 +109,7 @@ parallel_jobs="$(get_parallel_jobs)" if [[ ${is_linux_musl} -eq 1 ]]; then cmake --build ${et_build_dir} -j"${parallel_jobs}" --target executorch_delegate_ethos_u executor_runner --config ${build_type} -- else - cmake --build ${et_build_dir} -j"${parallel_jobs}" --target install --config ${build_type} -- + cmake --build ${et_build_dir} -j"${parallel_jobs}" --config ${build_type} fi set +x diff --git a/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in b/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in index 1990bc6d946..4e9f04c85b5 100644 --- a/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in +++ b/backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in @@ -4,7 +4,7 @@ The Arm® Ethos™-U backend targets Edge/IoT-type AI use-cases by enabli [Arm® Ethos™-U55 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u55), [Arm® Ethos™-U65 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u65), and [Arm® Ethos™-U85 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u85), leveraging [TOSA](https://www.mlplatform.org/tosa/) and the [ethos-u-vela](https://pypi.org/project/ethos-u-vela/) graph compiler. This document is a technical reference for using the Ethos-U backend, for a top level view with code examples -please refer to the [Arm Ethos-U Backend Tutorial](https://docs.pytorch.org/executorch/stable/tutorial-arm-ethos-u.html). +please refer to the [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). ## Features @@ -27,7 +27,7 @@ For the AOT flow, compilation of a model to `.pte` format using the Ethos-U back - [TOSA Serialization Library](https://www.mlplatform.org/tosa/software.html) for serializing the Exir IR graph into TOSA IR. - [Ethos-U Vela graph compiler](https://pypi.org/project/ethos-u-vela/) for compiling TOSA flatbuffers into an Ethos-U command stream. -And for building and running the example application available in `examples/arm/executor_runner/`: +And for building and running the example application available in `examples/arm/executor_runner/` through the standalone CMake entry point: - [Arm GNU Toolchain](https://developer.arm.com/Tools%20and%20Software/GNU%20Toolchain) for cross compilation. - [Arm® Corstone™ SSE-300 FVP](https://developer.arm.com/documentation/100966/1128/Arm--Corstone-SSE-300-FVP) for testing on a Arm® Cortex®-M55+Ethos-U55 reference design. - [Arm® Corstone™ SSE-320 FVP](https://developer.arm.com/documentation/109760/0000/SSE-320-FVP) for testing on a Arm® Cortex®-M85+Ethos-U85 reference design. @@ -55,7 +55,7 @@ For more information on quantization, see [Quantization](arm-ethos-u-quantizatio ## Runtime Integration -An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), and the steps requried for building and deploying it on a FVP it is explained in the previously mentioned [Arm Ethos-U Backend Tutorial](https://docs.pytorch.org/executorch/stable/tutorial-arm-ethos-u.html). +An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), with a standalone CMake entry point in `examples/arm/executor_runner/standalone`. The steps required for building and deploying it on an FVP are explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). The example application is recommended to use for testing basic functionality of your lowered models, as well as a starting point for developing runtime integrations for your own targets. For an in-depth explanation of the architecture of the executor_runner and the steps required for doing such an integration, please refer to [Ethos-U porting guide](https://github.com/pytorch/executorch/blob/main/examples/arm/ethos-u-porting-guide.md). diff --git a/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in b/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in index 68b73755317..0222e51a2fd 100644 --- a/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in +++ b/backends/arm/scripts/docgen/ethos-u/ethos-u-getting-started-tutorial.md.in @@ -76,35 +76,28 @@ To produce a pte file equivalent to the one above, run ### Runtime: -After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. This is done in two steps: +After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. Configure the standalone Arm executor runner CMake project to pull in the ExecuTorch build graph, link the Ethos-U delegate, and generate kernel bindings for any non-delegated ops. This produces the `arm_executor_runner` program that will run on target. -First, build and install the ExecuTorch libraries and EthosUDelegate: ``` # In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_BUILD_TYPE=Release --preset arm-baremetal -B cmake-out-arm . -cmake --build cmake-out-arm --target install -j$(nproc) -``` -Second, build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops. This is the actual program that will run on target. - -``` -# In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_TOOLCHAIN_FILE=`pwd`/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ +cmake -S examples/arm/executor_runner/standalone \ + -B ethos_u_minimal_example \ + -DEXECUTORCH_ROOT=$(pwd) \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DET_PTE_FILE_PATH=ethos_u_minimal_example.pte \ -DTARGET_CPU=cortex-m55 \ -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \ -DMEMORY_MODE=Shared_Sram \ - -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \ - -Bethos_u_minimal_example \ - examples/arm/executor_runner + -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner ``` ```{tip} -For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to build the runner. +For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to configure and build the standalone runner. To build a runner equivalent to the one above, run `./backends/arm/scripts/build_executor_runner.sh --pte=ethos_u_minimal_example.pte` -```` +``` The block diagram below shows, at the high level, how the various build artifacts are generated and are linked together to generate the final bare-metal executable. diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh index 9f0010189af..9da309fbe41 100755 --- a/backends/arm/scripts/run_fvp.sh +++ b/backends/arm/scripts/run_fvp.sh @@ -151,7 +151,7 @@ elif [[ ${target} == *"ethos-u55"* ]]; then -C mps3_board.telnetterminal0.start_telnet=0 \ -C mps3_board.uart0.out_file='-' \ -C mps3_board.uart0.shutdown_on_eot=1 \ - "${extra_args_u55[@]}" \ + ${extra_args_u55[@]+"${extra_args_u55[@]}"} \ -a "${elf_file}" \ ${data_file} \ --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true # seconds @@ -164,7 +164,7 @@ elif [[ ${target} == *"ethos-u85"* ]]; then -C mps4_board.telnetterminal0.start_telnet=0 \ -C mps4_board.uart0.out_file='-' \ -C mps4_board.uart0.shutdown_on_eot=1 \ - "${extra_args_u85[@]}" \ + ${extra_args_u85[@]+"${extra_args_u85[@]}"} \ -a "${elf_file}" \ ${data_file} \ --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true # seconds diff --git a/backends/arm/test/misc/test_runner_utils.py b/backends/arm/test/misc/test_runner_utils.py index 10a8b6df3a6..3c78b21e008 100644 --- a/backends/arm/test/misc/test_runner_utils.py +++ b/backends/arm/test/misc/test_runner_utils.py @@ -77,3 +77,39 @@ def _fake_run_cmd(cmd, check=True): assert "-i i1.bin" in semihosting_cmd_arg assert long_input_paths[0] not in semihosting_cmd_arg assert long_input_paths[1] not in semihosting_cmd_arg + + +def test_get_elf_path_uses_repo_root_candidates(monkeypatch, tmp_path: Path) -> None: + elf_path = ( + tmp_path + / "arm_test" + / "arm_semihosting_executor_runner_corstone-300" + / "arm_executor_runner" + ) + elf_path.parent.mkdir(parents=True) + elf_path.write_bytes(b"") + + monkeypatch.setattr(runner_utils, "_elf_search_roots", lambda: [tmp_path]) + other_cwd = tmp_path / "elsewhere" + other_cwd.mkdir() + monkeypatch.chdir(other_cwd) + + assert runner_utils.get_elf_path("corstone-300") == str(elf_path) + + +def test_get_elf_path_accepts_nested_runner_output(monkeypatch, tmp_path: Path) -> None: + elf_path = ( + tmp_path + / "arm_test" + / "arm_semihosting_executor_runner_corstone-300" + / "examples" + / "arm" + / "executor_runner" + / "arm_executor_runner" + ) + elf_path.parent.mkdir(parents=True) + elf_path.write_bytes(b"") + + monkeypatch.setattr(runner_utils, "_elf_search_roots", lambda: [tmp_path]) + + assert runner_utils.get_elf_path("corstone-300") == str(elf_path) diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index 914a95f0c8d..93887fbda6b 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -12,6 +12,7 @@ import subprocess # nosec B404 - invoked only for trusted toolchain binaries import sys import tempfile +from collections.abc import Iterable from pathlib import Path from types import NoneType @@ -848,39 +849,98 @@ def vkml_emulation_layer_installed() -> bool: return layers_exists and deploy_exists -def assert_elf_path_exists(elf_path): - if not os.path.exists(elf_path): - raise FileNotFoundError( - f"Did not find build arm_executor_runner or executor_runner in path {elf_path}, \ - run setup_testing.sh or setup_testing_vkml.sh?" - ) - - -def get_elf_path(target_board: str, use_portable_ops: bool = False) -> str: - elf_path = "" +def _elf_search_roots() -> list[Path]: + roots: list[Path] = [] + for env_var in ( + "EXECUTORCH_ROOT", + "GITHUB_WORKSPACE", + "BUILD_WORKSPACE_DIRECTORY", + ): + env_root = os.environ.get(env_var) + if env_root: + roots.append(Path(env_root).expanduser()) + + cwd = Path.cwd().resolve() + search_parents = [cwd, *cwd.parents, *Path(__file__).resolve().parents] + for parent in search_parents: + if (parent / "examples" / "arm").is_dir() or (parent / "arm_test").exists(): + roots.append(parent) + + unique_roots: list[Path] = [] + seen: set[Path] = set() + for root in roots: + resolved = root.resolve() + if resolved not in seen: + unique_roots.append(resolved) + seen.add(resolved) + return unique_roots + + +def _elf_path_candidates( + target_board: str, use_portable_ops: bool = False +) -> list[Path]: if target_board not in VALID_TARGET: raise ValueError(f"Unsupported target: {target_board}") - if use_portable_ops: - portable_ops_str = "portable-ops_" - else: - portable_ops_str = "" - + portable_ops_str = "portable-ops_" if use_portable_ops else "" if target_board in ("corstone-300", "corstone-320"): - elf_path = os.path.join( + build_dir = Path( "arm_test", f"arm_semihosting_executor_runner_{portable_ops_str}{target_board}", - "arm_executor_runner", ) - elif target_board == "vkml_emulation_layer": - elf_path = os.path.join( - f"arm_test/arm_executor_runner_{portable_ops_str}vkml", - "executor_runner", + binary_name = "arm_executor_runner" + else: + build_dir = Path("arm_test", f"arm_executor_runner_{portable_ops_str}vkml") + binary_name = "executor_runner" + + candidates: list[Path] = [] + for root in _elf_search_roots(): + root_build_dir = root / build_dir + candidates.extend( + [ + root_build_dir / binary_name, + root_build_dir / "Release" / binary_name, + root_build_dir / "examples" / "arm" / "executor_runner" / binary_name, + root_build_dir + / "examples" + / "arm" + / "executor_runner" + / "Release" + / binary_name, + ] ) - assert_elf_path_exists(elf_path) - return elf_path + unique_candidates: list[Path] = [] + seen: set[Path] = set() + for candidate in candidates: + resolved = candidate.resolve(strict=False) + if resolved not in seen: + unique_candidates.append(resolved) + seen.add(resolved) + return unique_candidates + + +def _resolve_existing_elf_path(elf_candidates: Iterable[Path]) -> Path: + checked: list[Path] = [] + for elf_path in elf_candidates: + checked.append(elf_path) + if elf_path.exists(): + return elf_path + + checked_paths = ", ".join(str(path) for path in checked) + raise FileNotFoundError( + "Did not find build arm_executor_runner or executor_runner. " + f"Tried: {checked_paths}. " + "Run setup_testing.sh or setup_testing_vkml.sh?" + ) + + +def get_elf_path(target_board: str, use_portable_ops: bool = False) -> str: + elf_path = _resolve_existing_elf_path( + _elf_path_candidates(target_board, use_portable_ops=use_portable_ops) + ) + return str(elf_path) def arm_executor_runner_exists(target_board: str, use_portable_ops: bool = False): diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index ad8cd8b7d3a..18ea908f816 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -256,14 +256,17 @@ test_pytest_models_vkml() { test_run_vkml() { echo "${TEST_SUITE_NAME}: Test VKML delegate examples with run.sh" + source backends/arm/test/setup_testing_vkml.sh + echo "${TEST_SUITE_NAME}: Test VKML" out_folder="arm_test/test_run" + vkml_build_dir="${build_root_test_dir}" - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=add --output=${out_folder}/runner - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=mul --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=add --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=mul --output=${out_folder}/runner - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=qadd --output=${out_folder}/runner - examples/arm/run.sh --et_build_root=${out_folder} --target=vgf --model_name=qops --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=qadd --output=${out_folder}/runner + examples/arm/run.sh --build-dir="${vkml_build_dir}" --et_build_root=${out_folder} --target=vgf --model_name=qops --output=${out_folder}/runner echo "${TEST_SUITE_NAME}: PASS" } diff --git a/backends/cortex_m/CMakeLists.txt b/backends/cortex_m/CMakeLists.txt index 8c8255b7b1b..876c65982e6 100644 --- a/backends/cortex_m/CMakeLists.txt +++ b/backends/cortex_m/CMakeLists.txt @@ -50,6 +50,26 @@ else() FetchContent_MakeAvailable(cmsis_nn) endif() +if(TARGET cmsis-nn) + if(CMSIS_NN_LOCAL_PATH AND EXISTS "${CMSIS_NN_LOCAL_PATH}") + set(cmsis_nn_source_dir "${CMSIS_NN_LOCAL_PATH}") + else() + set(cmsis_nn_source_dir "${cmsis_nn_SOURCE_DIR}") + endif() + if(cmsis_nn_source_dir) + set(cmsis_nn_include_dir "${cmsis_nn_source_dir}/Include") + set_target_properties( + cmsis-nn + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES + "$;$" + ) + install(DIRECTORY "${cmsis_nn_include_dir}/" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/cmsis-nn" + ) + endif() +endif() + # Cortex-M ops kernel sources set(_cortex_m_kernels__srcs ${CMAKE_CURRENT_SOURCE_DIR}/ops/op_dequantize_per_tensor.cpp diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md index faffedece35..28b5ce24338 100644 --- a/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md +++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md @@ -27,7 +27,7 @@ For the AOT flow, compilation of a model to `.pte` format using the Ethos-U back - [TOSA Serialization Library](https://www.mlplatform.org/tosa/software.html) for serializing the Exir IR graph into TOSA IR. - [Ethos-U Vela graph compiler](https://pypi.org/project/ethos-u-vela/) for compiling TOSA flatbuffers into an Ethos-U command stream. -And for building and running the example application available in `examples/arm/executor_runner/`: +And for building and running the example application available in `examples/arm/executor_runner/` through the standalone CMake entry point: - [Arm GNU Toolchain](https://developer.arm.com/Tools%20and%20Software/GNU%20Toolchain) for cross compilation. - [Arm® Corstone™ SSE-300 FVP](https://developer.arm.com/documentation/100966/1128/Arm--Corstone-SSE-300-FVP) for testing on a Arm® Cortex®-M55+Ethos-U55 reference design. - [Arm® Corstone™ SSE-320 FVP](https://developer.arm.com/documentation/109760/0000/SSE-320-FVP) for testing on a Arm® Cortex®-M85+Ethos-U85 reference design. @@ -111,7 +111,7 @@ For more information on quantization, see [Quantization](arm-ethos-u-quantizatio ## Runtime Integration -An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), and the steps requried for building and deploying it on a FVP it is explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). +An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), with a standalone CMake entry point in `examples/arm/executor_runner/standalone`. The steps required for building and deploying it on an FVP are explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). The example application is recommended to use for testing basic functionality of your lowered models, as well as a starting point for developing runtime integrations for your own targets. For an in-depth explanation of the architecture of the executor_runner and the steps required for doing such an integration, please refer to [Ethos-U porting guide](https://github.com/pytorch/executorch/blob/main/examples/arm/ethos-u-porting-guide.md). diff --git a/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md b/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md index 841827cff9b..de0708c8735 100644 --- a/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md +++ b/docs/source/backends/arm-ethos-u/tutorials/ethos-u-getting-started.md @@ -149,35 +149,28 @@ To produce a pte file equivalent to the one above, run ### Runtime: -After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. This is done in two steps: +After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced `.pte`-file using the Arm cross-compilation toolchain. Configure the standalone Arm executor runner CMake project to pull in the ExecuTorch build graph, link the Ethos-U delegate, and generate kernel bindings for any non-delegated ops. This produces the `arm_executor_runner` program that will run on target. -First, build and install the ExecuTorch libraries and EthosUDelegate: ``` # In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_BUILD_TYPE=Release --preset arm-baremetal -B cmake-out-arm . -cmake --build cmake-out-arm --target install -j$(nproc) -``` -Second, build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops. This is the actual program that will run on target. - -``` -# In ExecuTorch top-level, with sourced setup_path.sh -cmake -DCMAKE_TOOLCHAIN_FILE=`pwd`/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ +cmake -S examples/arm/executor_runner/standalone \ + -B ethos_u_minimal_example \ + -DEXECUTORCH_ROOT=$(pwd) \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DET_PTE_FILE_PATH=ethos_u_minimal_example.pte \ -DTARGET_CPU=cortex-m55 \ -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \ -DMEMORY_MODE=Shared_Sram \ - -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \ - -Bethos_u_minimal_example \ - examples/arm/executor_runner + -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner ``` ```{tip} -For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to build the runner. +For a quick start, you can use the script `backends/arm/scripts/build_executor_runner.sh` to configure and build the standalone runner. To build a runner equivalent to the one above, run `./backends/arm/scripts/build_executor_runner.sh --pte=ethos_u_minimal_example.pte` -```` +``` The block diagram below shows, at the high level, how the various build artifacts are generated and are linked together to generate the final bare-metal executable. diff --git a/examples/arm/README.md b/examples/arm/README.md index bcd8a1e1d0a..1e602700f78 100644 --- a/examples/arm/README.md +++ b/examples/arm/README.md @@ -1,3 +1,10 @@ + + ## ExecuTorch for Arm backends Ethos-U, VGF and Cortex-M This project contains scripts to help you setup and run a PyTorch @@ -11,12 +18,19 @@ The main scripts are `setup.sh`, `run.sh` and `setup.sh` will install the needed tools and with --root-dir you can change the path to a scratch folder where it will download and generate build artifacts. If supplied, you must also supply the same folder to run.sh with ---scratch-dir= If not supplied both script will use examples/arm/arm-scratch +--scratch-dir= If not supplied both scripts will use examples/arm/arm-scratch. `run.sh` can be used to build, run and test a model in an easy way and it will call cmake for you and in cases you want to run a simulator it will start it also. The script will call `aot_arm_compiler.py` to convert a model and include it in the build/run. +For bare-metal Ethos-U builds `run.sh` configures the standalone +`examples/arm/executor_runner/standalone` CMake entry point automatically. If +`--build-dir` is omitted, the script creates and owns a build tree under +`arm_test/_`. Supplying `--build-dir` reuses an existing tree +(for example a VGF host build or out-of-tree configuration) and `run.sh` +verifies it exposes the runner options it needs before compiling. + Build and test artifacts are by default placed under the folder arm_test folder this can be changed with --et_build_root= diff --git a/examples/arm/cortex_m_mv2_example.ipynb b/examples/arm/cortex_m_mv2_example.ipynb index c2fe4342773..36844b4e5fd 100644 --- a/examples/arm/cortex_m_mv2_example.ipynb +++ b/examples/arm/cortex_m_mv2_example.ipynb @@ -136,7 +136,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "%%bash \n# Build example executor runner application to examples/arm/cortex_m_mv2_example\n# Note that this is the same runner as used in the Ethos-U example, creating some overlap in the config even though the Ethos-U is not used.\ncmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n -DCMAKE_BUILD_TYPE=Release \\\n -DET_PTE_FILE_PATH=cortex_m_mv2_example.bpte \\\n -DTARGET_CPU=cortex-m55 \\\n -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \\\n -DMEMORY_MODE=Shared_Sram \\\n -DET_BUNDLE_IO=ON \\\n -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n -Bcortex_m_mv2_example \\\n executor_runner\ncmake --build cortex_m_mv2_example -j$(nproc) -- arm_executor_runner" + "source": "%%bash \n# Build example executor runner application to examples/arm/cortex_m_mv2_example\n# Note that this is the same runner as used in the Ethos-U example, creating some overlap in the config even though the Ethos-U is not used.\ncmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n -DCMAKE_BUILD_TYPE=Release \\\n -DET_PTE_FILE_PATH=cortex_m_mv2_example.bpte \\\n -DTARGET_CPU=cortex-m55 \\\n -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \\\n -DMEMORY_MODE=Shared_Sram \\\n -DET_BUNDLE_IO=ON \\\n -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n -Bcortex_m_mv2_example \\\n -S executor_runner/standalone\ncmake --build cortex_m_mv2_example -j$(nproc) -- arm_executor_runner" }, { "cell_type": "markdown", @@ -179,4 +179,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/arm/ethos_u_minimal_example.ipynb b/examples/arm/ethos_u_minimal_example.ipynb index fbb15cd0e57..11f24019d23 100644 --- a/examples/arm/ethos_u_minimal_example.ipynb +++ b/examples/arm/ethos_u_minimal_example.ipynb @@ -171,26 +171,8 @@ "source": [ "## Build executor runtime\n", "\n", - "After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced .pte-file using the Arm cross-compilation toolchain. This is done in two steps:\n", - "1. Build and install the executorch libraries and EthosUDelegate.\n", - "2. Build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "# Ensure the arm-none-eabi-gcc toolchain and FVP:s are available on $PATH\n", - "source arm-scratch/setup_path.sh\n", - "\n", - "# Build executorch libraries cross-compiled for arm baremetal to executorch/cmake-out-arm\n", - "cmake --preset arm-baremetal \\\n", - "-DCMAKE_BUILD_TYPE=Release \\\n", - "-B../../cmake-out-arm ../..\n", - "cmake --build ../../cmake-out-arm --target install -j$(nproc) " + "After the AOT compilation flow finishes, cross-compile and link the runtime by configuring the standalone `examples/arm/executor_runner/standalone` CMake project with the Arm toolchain.\n", + "It automatically pulls the ExecuTorch checkout in as a dependency so the delegate, kernels, and runner util are rebuilt alongside the application, and it generates kernel bindings for any non-delegated ops found in the `.pte`.\n" ] }, { @@ -201,6 +183,8 @@ "source": [ "%%bash \n", "source arm-scratch/setup_path.sh\n", + "# Ensure CMake resolves the ExecuTorch checkout root regardless of caller env\n", + "export EXECUTORCH_ROOT=$(cd ../.. && pwd)\n", "\n", "# Build example executor runner application to examples/arm/ethos_u_minimal_example\n", "cmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n", @@ -211,7 +195,7 @@ " -DMEMORY_MODE=Shared_Sram \\\n", " -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n", " -Bethos_u_minimal_example \\\n", - " executor_runner\n", + " -S executor_runner/standalone\n", "cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner" ] }, @@ -232,6 +216,8 @@ "source": [ "%%bash \n", "source arm-scratch/setup_path.sh\n", + "# Ensure CMake resolves the ExecuTorch checkout root regardless of caller env\n", + "export EXECUTORCH_ROOT=$(cd ../.. && pwd)\n", "\n", "# Run the example\n", "../../backends/arm/scripts/run_fvp.sh --elf=ethos_u_minimal_example/arm_executor_runner --target=ethos-u55-128" diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index c169f5d447a..d84947a75ad 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -3,8 +3,49 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -cmake_minimum_required(VERSION 3.20) -project(arm_executor_runner) +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + cmake_minimum_required(VERSION 3.20) + project(arm_executor_runner_redirect LANGUAGES C CXX) + message( + FATAL_ERROR + "Configure standalone arm_executor_runner builds from ${CMAKE_CURRENT_LIST_DIR}/standalone instead of ${CMAKE_CURRENT_LIST_DIR}." + ) +endif() + +get_filename_component( + _default_executorch_root "${CMAKE_CURRENT_LIST_DIR}/../../.." ABSOLUTE +) + +if(NOT DEFINED EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT + "${_default_executorch_root}" + CACHE PATH "Path to an ExecuTorch checkout" + ) +endif() + +set(ET_DIR_PATH + "${EXECUTORCH_ROOT}" + CACHE PATH "Kept for backward compatibility; synonym for EXECUTORCH_ROOT" +) +if(NOT DEFINED ET_INCLUDE_PATH) + set(ET_INCLUDE_PATH + "${EXECUTORCH_ROOT}" + CACHE + PATH + "Kept for backward compatibility; include root for ExecuTorch headers" + ) +endif() + +if(NOT EXISTS "${EXECUTORCH_ROOT}/CMakeLists.txt") + message( + FATAL_ERROR + "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project." + ) +endif() + +if(NOT COMMAND executorch_target_link_options_shared_lib) + include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) +endif() option( ET_MODEL_PTE_ADDR @@ -21,6 +62,12 @@ option(ET_LOG_DUMP_INPUT "Dump input in log" OFF) option(ET_LOG_DUMP_OUTPUT "Dump output in log" ON) option(ET_BUNDLE_IO "Set to compile in BundleIO support" OFF) +set(BUNDLED_PROGRAM_LIBRARY_DIR + "" + CACHE + PATH + "Optional directory that contains a prebuilt libbundled_program.a when ET_BUNDLE_IO is enabled without building devtools." +) set(ET_ATOL "0.01" CACHE STRING "Set atol to use for BundleIO testing (Requires ET_BUNDLE_IO)" @@ -55,13 +102,46 @@ option( OFF ) +if(NOT DEFINED PYTHON_EXECUTABLE) + find_package( + Python3 + COMPONENTS Interpreter + REQUIRED + ) + set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}") +endif() + +include(${EXECUTORCH_ROOT}/backends/arm/scripts/corstone_utils.cmake) +include(${EXECUTORCH_ROOT}/backends/arm/cmake/ArmEthosUSDK.cmake) +include(${EXECUTORCH_ROOT}/backends/arm/cmake/ArmRunnerUtils.cmake) + +arm_runner_require_baremetal_targets() + +# Keep the default scratch location aligned with the scratch tree used by +# setup.sh/run.sh so developers who just ran those scripts do not need extra +# CMake flags. +set(ETHOS_SDK_PATH + "${EXECUTORCH_ROOT}/examples/arm/arm-scratch/ethos-u" + CACHE PATH "Path to Ethos-U bare metal driver/env" +) + +arm_ethos_u_default_fetch("${ETHOS_SDK_PATH}" _fetch_ethos_u_default) option(FETCH_ETHOS_U_CONTENT - "Fetch ethos_u dependencies instead of relying on pre-downloads" ON + "Fetch ethos_u dependencies instead of relying on pre-downloads" + ${_fetch_ethos_u_default} +) +arm_ensure_ethos_u_content( + "${ETHOS_SDK_PATH}" "${EXECUTORCH_ROOT}" ${FETCH_ETHOS_U_CONTENT} ) -if(NOT DEFINED ET_MODEL_PTE_ADDR - AND NOT DEFINED ET_PTE_FILE_PATH - AND NOT DEFINED SEMIHOSTING +set(ET_PTE_FILE_PATH + "" + CACHE PATH "Path to ExecuTorch model pte" +) + +if(NOT ET_MODEL_PTE_ADDR + AND "${ET_PTE_FILE_PATH}" STREQUAL "" + AND NOT SEMIHOSTING ) message( FATAL_ERROR @@ -72,39 +152,16 @@ if(NOT DEFINED ET_MODEL_PTE_ADDR ) endif() -# Example ExecuTorch demo for bare metal Cortex-M based systems -set(ET_DIR_PATH - "${CMAKE_CURRENT_SOURCE_DIR}/../../.." - CACHE PATH "Path to ExecuTorch dir" +if(NOT SEMIHOSTING + AND NOT ET_MODEL_PTE_ADDR + AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "" ) -include(${ET_DIR_PATH}/tools/cmake/Utils.cmake) -set(ET_BUILD_DIR_PATH - "${ET_DIR_PATH}/cmake-out-arm" - CACHE PATH "Path to ExecuTorch build/install dir" -) -set(ET_INCLUDE_PATH - "${ET_DIR_PATH}/.." - CACHE PATH "Path to ExecuTorch headers" -) -set(ET_PTE_FILE_PATH - "" - CACHE PATH "Path to ExecuTorch model pte" -) -set(ETHOS_SDK_PATH - "${ET_DIR_PATH}/examples/arm/arm-scratch/ethos-u" - CACHE PATH "Path to Ethos-U bare metal driver/env" -) -set(PYTHON_EXECUTABLE - "python" - CACHE PATH "Define to override python executable used" -) - -# Include corstone help functions -include(${ET_DIR_PATH}/backends/arm/scripts/corstone_utils.cmake) - -if(FETCH_ETHOS_U_CONTENT) - # Download ethos_u dependency if needed. - fetch_ethos_u_content(${ETHOS_SDK_PATH} ${ET_DIR_PATH}) + if(NOT EXISTS "${ET_PTE_FILE_PATH}") + message( + FATAL_ERROR + "ET_PTE_FILE_PATH is set to ${ET_PTE_FILE_PATH}, but no file was found. Generate the model first or point ET_PTE_FILE_PATH at an existing .pte/.bpte." + ) + endif() endif() # Selects timing adapter values matching system_config. Default is @@ -154,23 +211,33 @@ message( add_corstone_subdirectory(${SYSTEM_CONFIG} ${ETHOS_SDK_PATH}) configure_timing_adapters(${SYSTEM_CONFIG} ${MEMORY_MODE}) -# Dependencies from the ExecuTorch build -find_package( - executorch REQUIRED HINTS "${ET_BUILD_DIR_PATH}/lib/cmake/ExecuTorch" -) +if(NOT CMAKE_SKIP_INSTALL_RULES AND TARGET ethosu_core_driver) + get_property( + _et_ethosu_core_driver_exported GLOBAL + PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED + ) + if(NOT _et_ethosu_core_driver_exported) + install( + TARGETS ethosu_core_driver + EXPORT ExecuTorchTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + set_property(GLOBAL PROPERTY ET_ETHOSU_CORE_DRIVER_EXPORTED TRUE) + endif() +endif() # Convert pte to header -if(NOT ${ET_MODEL_PTE_ADDR} AND NOT SEMIHOSTING) +if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING) add_custom_target( gen_model_header DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h ) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/pte_to_header.py --pte - ${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/pte_to_header.py + --pte ${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${ET_PTE_FILE_PATH} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) endif() @@ -199,12 +266,12 @@ endif() # Proceed with specific actions if either is found if(NOT U55_FOUND EQUAL -1) message(STATUS "SYSTEM_CONFIG contains 'U55'.") - set(LINK_FILE_IN "${CMAKE_SOURCE_DIR}/Corstone-300.ld") + set(LINK_FILE_IN "${CMAKE_CURRENT_LIST_DIR}/Corstone-300.ld") endif() if(NOT U85_FOUND EQUAL -1) message(STATUS "SYSTEM_CONFIG contains 'U85'.") - set(LINK_FILE_IN "${CMAKE_SOURCE_DIR}/Corstone-320.ld") + set(LINK_FILE_IN "${CMAKE_CURRENT_LIST_DIR}/Corstone-320.ld") endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") @@ -213,9 +280,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(COMPILER_PREPROCESSOR_OPTIONS -E -x c -P) endif() -get_filename_component(LINK_FILE_OUT_BASE ${LINK_FILE} NAME) +get_filename_component(LINK_FILE_OUT_BASE "${LINK_FILE}" NAME) set(LINK_FILE_OUT - ${CMAKE_CURRENT_BINARY_DIR}/${LINK_FILE_OUT_BASE}.${LINK_FILE_EXT} + "${CMAKE_CURRENT_BINARY_DIR}/${LINK_FILE_OUT_BASE}.${LINK_FILE_EXT}" ) execute_process( @@ -251,26 +318,31 @@ list( # (user-set)SELECT_OPS_MODEL variable. For normal build, use # EXECUTORCH_SELECT_OPS_MODEL to include ops automatically. If the pte contains # no undelegated ops, use neither. -execute_process( - COMMAND - python "${ET_DIR_PATH}/codegen/tools/gen_oplist.py" - --model_file_path=${ET_PTE_FILE_PATH} - --output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml - OUTPUT_VARIABLE CMD_RESULT +set(FOUND_OPS_IN_FILE FALSE) +if(NOT SEMIHOSTING + AND NOT ET_MODEL_PTE_ADDR + AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "" + AND EXISTS "${ET_PTE_FILE_PATH}" ) + execute_process( + COMMAND + ${PYTHON_EXECUTABLE} "${EXECUTORCH_ROOT}/codegen/tools/gen_oplist.py" + --model_file_path=${ET_PTE_FILE_PATH} + --output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml + OUTPUT_VARIABLE CMD_RESULT + ) -if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::") - set(FOUND_OPS_IN_FILE "true") -else() - set(FOUND_OPS_IN_FILE "false") + if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::") + set(FOUND_OPS_IN_FILE TRUE) + endif() endif() -if(${SEMIHOSTING}) +if(SEMIHOSTING) set(EXECUTORCH_SELECT_OPS_MODEL "") message( "gen_oplist: Building with semihosting, no model is used to auto generate ops from will use EXECUTORCH_SELECT_OPS_LIST=${EXECUTORCH_SELECT_OPS_LIST}" ) -elseif(${FOUND_OPS_IN_FILE}) +elseif(FOUND_OPS_IN_FILE) set(EXECUTORCH_SELECT_OPS_LIST "") set(EXECUTORCH_SELECT_OPS_MODEL "${ET_PTE_FILE_PATH}") message( @@ -289,10 +361,6 @@ endif() if(NOT ("${EXECUTORCH_SELECT_OPS_LIST}" STREQUAL "" AND "${EXECUTORCH_SELECT_OPS_MODEL}" STREQUAL "") ) - set(EXECUTORCH_ROOT ${ET_DIR_PATH}) - include(${ET_DIR_PATH}/tools/cmake/Utils.cmake) - include(${ET_DIR_PATH}/tools/cmake/Codegen.cmake) - gen_selected_ops( LIB_NAME "arm_portable_ops_lib" @@ -310,7 +378,7 @@ if(NOT ("${EXECUTORCH_SELECT_OPS_LIST}" STREQUAL "" generate_bindings_for_kernels( LIB_NAME "arm_portable_ops_lib" FUNCTIONS_YAML - ${ET_DIR_PATH}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD + ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}" ) gen_operators_lib( @@ -333,16 +401,66 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER) endif() if(ET_BUNDLE_IO) - list(APPEND arm_executor_runner_link bundled_program) + if(TARGET bundled_program) + list(APPEND arm_executor_runner_link bundled_program) + target_link_directories( + arm_executor_runner PRIVATE $ + ) + else() + set(_bundled_program_library "") + set(_bundled_program_search_paths "") + if(ET_BUILD_DIR_PATH) + list( + APPEND + _bundled_program_search_paths + "${ET_BUILD_DIR_PATH}" + "${ET_BUILD_DIR_PATH}/lib" + "${ET_BUILD_DIR_PATH}/devtools/bundled_program" + "${ET_BUILD_DIR_PATH}/devtools/bundled_program/lib" + ) + endif() + if(BUNDLED_PROGRAM_LIBRARY_DIR) + list(APPEND _bundled_program_search_paths + "${BUNDLED_PROGRAM_LIBRARY_DIR}" + ) + endif() + if(_bundled_program_search_paths) + list(REMOVE_DUPLICATES _bundled_program_search_paths) + # BundleIO can reuse a separate ExecuTorch build tree where + # bundled_program is not part of this CMake graph. Restrict the fallback + # lookup to the caller-provided build directories so we do not + # accidentally pick up an unrelated library from the host system. + find_library( + _bundled_program_library + NAMES bundled_program + PATHS ${_bundled_program_search_paths} + NO_DEFAULT_PATH + ) + endif() + if(_bundled_program_library) + list(APPEND arm_executor_runner_link ${_bundled_program_library}) + else() + message( + FATAL_ERROR + "ET_BUNDLE_IO enabled but bundled_program is unavailable. Either configure this build with EXECUTORCH_BUILD_DEVTOOLS=ON so the target exists or set BUNDLED_PROGRAM_LIBRARY_DIR/ET_BUILD_DIR_PATH to a build directory that contains libbundled_program.a." + ) + endif() + endif() endif() # Need whole-archive to ensure C++ ctor's are called - this may be wasteful for # bin size as we link in a number of other symbols target_link_libraries(arm_executor_runner PUBLIC ${arm_executor_runner_link}) +# Ensure the ELF lands next to the CMake build tree so run.sh (and downstream +# tooling) can locate it deterministically regardless of multi-config vs +# single-config generators. target_link_options( arm_executor_runner PUBLIC LINKER:-Map=arm_executor_runner.map ) +# Reuse a parent build's output directory if it already set one; otherwise keep +# the runner ELF next to this build tree so run.sh can find it predictably. +arm_runner_configure_runtime_output(arm_executor_runner "${CMAKE_BINARY_DIR}") # Sanitizers if(CMAKE_BUILD_TYPE MATCHES "UndefinedSanitizer") @@ -351,7 +469,7 @@ if(CMAKE_BUILD_TYPE MATCHES "UndefinedSanitizer") target_link_options(arm_executor_runner PRIVATE ${_et_runner_ubsan_flag}) if(NOT TARGET executorch_ubsan) add_subdirectory( - ${ET_DIR_PATH}/examples/arm/ubsan + ${EXECUTORCH_ROOT}/examples/arm/ubsan ${CMAKE_CURRENT_BINARY_DIR}/ubsan_runtime ) endif() @@ -367,7 +485,8 @@ if(CMAKE_BUILD_TYPE MATCHES "AddressSanitizer") target_link_options(arm_executor_runner PRIVATE ${_et_runner_asan_flags}) if(NOT TARGET executorch_asan) add_subdirectory( - ${ET_DIR_PATH}/examples/arm/asan ${CMAKE_CURRENT_BINARY_DIR}/asan_runtime + ${EXECUTORCH_ROOT}/examples/arm/asan + ${CMAKE_CURRENT_BINARY_DIR}/asan_runtime ) endif() target_link_libraries(arm_executor_runner PRIVATE executorch_asan) @@ -377,16 +496,18 @@ if(CMAKE_BUILD_TYPE MATCHES "AddressSanitizer") endif() # ET headers and generated headers includes +set(_arm_runner_include_dirs + ${ET_INCLUDE_PATH} ${ET_INCLUDE_PATH}/runtime/core/portable_type/c10 + ${CMAKE_CURRENT_BINARY_DIR} +) target_include_directories( - arm_executor_runner - PRIVATE ${ET_INCLUDE_PATH} ${ET_DIR_PATH}/runtime/core/portable_type/c10 - ${CMAKE_CURRENT_BINARY_DIR} + arm_executor_runner PRIVATE ${_arm_runner_include_dirs} ) target_compile_definitions( arm_executor_runner PRIVATE C10_USING_CUSTOM_GENERATED_MACROS ) -if(NOT ${ET_MODEL_PTE_ADDR} AND NOT SEMIHOSTING) +if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING) add_dependencies(arm_executor_runner gen_model_header) endif() diff --git a/examples/arm/executor_runner/pte_to_header.py b/examples/arm/executor_runner/pte_to_header.py index 65213bc729e..8656ac5abdf 100644 --- a/examples/arm/executor_runner/pte_to_header.py +++ b/examples/arm/executor_runner/pte_to_header.py @@ -1,6 +1,7 @@ +#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. -# Copyright 2023-2025 Arm Limited and/or its affiliates. +# Copyright 2023-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. diff --git a/examples/arm/executor_runner/standalone/CMakeLists.txt b/examples/arm/executor_runner/standalone/CMakeLists.txt new file mode 100644 index 00000000000..73493ca9e71 --- /dev/null +++ b/examples/arm/executor_runner/standalone/CMakeLists.txt @@ -0,0 +1,159 @@ +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +cmake_minimum_required(VERSION 3.20) +project(arm_executor_runner LANGUAGES C CXX) + +get_filename_component( + _default_executorch_root "${CMAKE_CURRENT_LIST_DIR}/../../../.." ABSOLUTE +) + +if(NOT DEFINED EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT + "${_default_executorch_root}" + CACHE PATH "Path to an ExecuTorch checkout" + ) +endif() + +if(NOT EXISTS "${EXECUTORCH_ROOT}/CMakeLists.txt") + if(EXISTS "${_default_executorch_root}/CMakeLists.txt") + message( + WARNING + "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project. Falling back to ${_default_executorch_root}." + ) + set(EXECUTORCH_ROOT + "${_default_executorch_root}" + CACHE PATH "Path to an ExecuTorch checkout" FORCE + ) + else() + message( + FATAL_ERROR + "EXECUTORCH_ROOT (${EXECUTORCH_ROOT}) does not contain an ExecuTorch CMake project." + ) + endif() +endif() + +set(ARM_EXECUTOR_RUNNER_STANDALONE + ON + CACHE BOOL + "Indicates arm_executor_runner was configured as a standalone project" + FORCE +) + +# Load the preset helper so standalone builds inherit the same defaults as the +# superbuild (toolchains, delegated targets, devtools options, etc.). +set(_executorch_preset_cmake + "${EXECUTORCH_ROOT}/tools/cmake/common/preset.cmake" +) +if(EXISTS "${_executorch_preset_cmake}") + include("${_executorch_preset_cmake}") + if(NOT DEFINED EXECUTORCH_BUILD_PRESET_FILE) + set(EXECUTORCH_BUILD_PRESET_FILE + "${EXECUTORCH_ROOT}/tools/cmake/preset/arm_baremetal.cmake" + CACHE PATH "Preset used when configuring the standalone runner" + ) + endif() + load_build_preset() +endif() +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) + +option(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES + "Skip install() rules for standalone arm_executor_runner builds" ON +) +if(DEFINED CMAKE_SKIP_INSTALL_RULES) + set(_arm_runner_skip_install_rules "${CMAKE_SKIP_INSTALL_RULES}") +endif() +if(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES) + set(CMAKE_SKIP_INSTALL_RULES ON) +endif() + +foreach( + _opt + EXECUTORCH_BUILD_ARM_BAREMETAL EXECUTORCH_BUILD_CORTEX_M + EXECUTORCH_BUILD_KERNELS_QUANTIZED EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL +) + if(NOT DEFINED ${_opt}) + set(${_opt} + ON + CACHE BOOL "" FORCE + ) + endif() +endforeach() +set(EXECUTORCH_SKIP_ARM_EXECUTOR_RUNNER + ON + CACHE BOOL "" FORCE +) + +# examples/arm/executor_runner/CMakeLists.txt generates the runner-specific +# portable-op registration based on the PTE or an explicit select-ops list. +# Avoid feeding those cache entries into the top-level ExecuTorch configure, +# otherwise executorch_core auto-right-sizes MAX_KERNEL_NUM from the runner's +# placeholder/selective build inputs even though the runner also links +# quantized/cortex-m registration libraries. +set(_arm_runner_selective_cache_vars + EXECUTORCH_SELECT_OPS_LIST EXECUTORCH_SELECT_OPS_MODEL + EXECUTORCH_SELECT_OPS_YAML +) +foreach(_arm_runner_cache_var IN LISTS _arm_runner_selective_cache_vars) + if(DEFINED CACHE{${_arm_runner_cache_var}}) + get_property( + _arm_runner_cache_type + CACHE ${_arm_runner_cache_var} + PROPERTY TYPE + ) + if(NOT _arm_runner_cache_type OR _arm_runner_cache_type STREQUAL + "UNINITIALIZED" + ) + set(_arm_runner_cache_type STRING) + endif() + set(_arm_runner_saved_type_${_arm_runner_cache_var} + "${_arm_runner_cache_type}" + ) + set(_arm_runner_saved_value_${_arm_runner_cache_var} + "${${_arm_runner_cache_var}}" + ) + set(_arm_runner_saved_defined_${_arm_runner_cache_var} TRUE) + set(${_arm_runner_cache_var} + "" + CACHE ${_arm_runner_cache_type} "" FORCE + ) + endif() +endforeach() + +if(NOT DEFINED CACHE{MAX_KERNEL_NUM} AND NOT DEFINED MAX_KERNEL_NUM) + set(MAX_KERNEL_NUM + 2000 + CACHE STRING + "Maximum number of kernels registered by the standalone Arm runner" + ) +endif() + +# Pull ExecuTorch in-tree so all required targets (delegates, kernels, runner +# util, etc.) are built from this checkout. +add_subdirectory( + ${EXECUTORCH_ROOT} ${CMAKE_BINARY_DIR}/executorch EXCLUDE_FROM_ALL +) + +if(ARM_EXECUTOR_RUNNER_SKIP_INSTALL_RULES) + if(DEFINED _arm_runner_skip_install_rules) + set(CMAKE_SKIP_INSTALL_RULES "${_arm_runner_skip_install_rules}") + else() + unset(CMAKE_SKIP_INSTALL_RULES) + endif() +endif() + +foreach(_arm_runner_cache_var IN LISTS _arm_runner_selective_cache_vars) + if(_arm_runner_saved_defined_${_arm_runner_cache_var}) + set(${_arm_runner_cache_var} + "${_arm_runner_saved_value_${_arm_runner_cache_var}}" + CACHE ${_arm_runner_saved_type_${_arm_runner_cache_var}} "" FORCE + ) + endif() +endforeach() + +add_subdirectory( + ${EXECUTORCH_ROOT}/examples/arm/executor_runner + ${CMAKE_BINARY_DIR}/examples/arm/executor_runner +) diff --git a/examples/arm/pruning_minimal_example.ipynb b/examples/arm/pruning_minimal_example.ipynb index db585b94158..a24c6626a15 100644 --- a/examples/arm/pruning_minimal_example.ipynb +++ b/examples/arm/pruning_minimal_example.ipynb @@ -453,7 +453,7 @@ " -DMEMORY_MODE=Shared_Sram \\\n", " -DSYSTEM_CONFIG=Ethos_U85_SYS_DRAM_Mid \\\n", " -Bethos_u_original_model \\\n", - " executor_runner\n", + " -S executor_runner/standalone\n", "cmake --build ethos_u_original_model -j$(nproc) -- arm_executor_runner" ] }, @@ -499,7 +499,7 @@ " -DMEMORY_MODE=Shared_Sram \\\n", " -DSYSTEM_CONFIG=Ethos_U85_SYS_DRAM_Mid \\\n", " -Bethos_u_pruned_model \\\n", - " executor_runner\n", + " -S executor_runner/standalone\n", "cmake --build ethos_u_pruned_model -j$(nproc) -- arm_executor_runner" ] }, diff --git a/examples/arm/run.sh b/examples/arm/run.sh index b18115723b0..351eda14071 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -14,8 +14,9 @@ set -eu ######## script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) et_root_dir=$(cd ${script_dir}/../.. && pwd) -et_root_dir=$(realpath ${et_root_dir}) - +et_root_dir=$(realpath "${et_root_dir}") +runner_source_dir="${et_root_dir}/examples/arm/executor_runner/standalone" +runner_source_dir=$(realpath "${runner_source_dir}") model_name="" model_input_set=false @@ -29,7 +30,7 @@ output_folder="." bundleio=false build_with_etdump=false build_type="Release" -extra_build_flags="" +build_dir="" build_only=false system_config="" config="" @@ -38,8 +39,9 @@ pte_placement="elf" et_build_root="${et_root_dir}/arm_test" arm_scratch_dir=${script_dir}/arm-scratch scratch_dir_set=false -toolchain=arm-none-eabi-gcc +toolchain="arm-none-eabi-gcc" select_ops_list="aten::_softmax.out" +select_ops_list_overridden=false qdq_fusion_op=false model_explorer=false perf_overlay=false @@ -47,6 +49,12 @@ visualize_tosa=false visualize_pte=false model_converter=false specify_ethosu_scratch=false +extra_build_flags="" +preset_file="${et_root_dir}/tools/cmake/preset/arm_baremetal.cmake" +cmake_cache_file="" +build_dir_initialized=false +multi_config=false +parallel_jobs=1 function help() { echo "Usage: $(basename $0) [options]" @@ -57,8 +65,7 @@ function help() { echo " --aot_arm_compiler_flags= Extra flags to pass to aot compiler" echo " --no_delegate Do not delegate the model (can't override builtin models)" echo " --no_quantize Do not quantize the model (can't override builtin models)" - echo " --portable_kernels= TO BE DEPRECATED: Alias to select_ops_list." - echo " --select_ops_list= Comma separated list of portable (non delagated) kernels to include Default: ${select_ops_list}" + echo " --select_ops_list= Comma separated list of portable (non delegated) kernels to include. Default: ${select_ops_list}" echo " NOTE: This is only used when building for semihosting." echo " See https://docs.pytorch.org/executorch/stable/kernel-library-selective-build.html for more information." echo " --target= Target to build and run for Default: ${target}" @@ -66,9 +73,10 @@ function help() { echo " --bundleio Create Bundled pte using Devtools BundelIO with Input/RefOutput included" echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log" echo " --build_type= Build with Release, Debug, RelWithDebInfo, UndefinedSanitizer or AddressSanitizer, default is ${build_type}" - echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " + echo " --build-dir= Optional: reuse an existing arm_executor_runner build directory (configured via 'cmake -S examples/arm/executor_runner/standalone -B ...'). If omitted, run.sh auto-configures one under ${et_build_root} for bare-metal targets." echo " --build_only Only build, don't run" - echo " --toolchain= Ethos-U: Toolchain can be specified (e.g. bare metal as arm-none-eabi-gcc or zephyr as arm-zephyr-eabi-gcc Default: ${toolchain}" + echo " --extra_build_flags=\"\" Extra -D style flags to pass to cmake when run.sh auto-configures the build" + echo " --toolchain= Toolchain preset to use when run.sh auto-configures the build. Default: ${toolchain}" echo " --system_config= Ethos-U: System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." echo " --config= Ethos-U: System configuration file that specifies system configurations (vela.ini)" @@ -76,11 +84,11 @@ function help() { echo " --pte_placement= Ethos-U: Control if runtime has PTE baked into the elf or if its placed in memory outside of the elf, defaults to ${pte_placement}" echo " --specify_ethosu_scratch Use actual Ethos-U scratch size for given model to size temp allocator" echo " --et_build_root= Executorch build output root folder to use, defaults to ${et_build_root}" - echo " --scratch-dir= Path to your Arm scrach dir if you not using default ${arm_scratch_dir}" + echo " --scratch-dir= Path to your Ethos-U scratch dir if you not using default ${arm_scratch_dir}" echo " --qdq_fusion_op Enable QDQ fusion op" echo " --model_explorer Enable model explorer to visualize a TOSA or PTE model graph." echo " --visualize_pte With --model_explorer, visualize PTE flatbuffer model and delegates. Cannot be used with --visualize_tosa" - echo " NOTE: If PTE contains an Ethos-U delegate, the Ethos-U subgraph will be visualized if aot_arm_compiler_flags is set with the -i flag to include intermediate tosa files." + echo " NOTE: If PTE contains an Ethos-U delegate, the Ethos-U subgraph will be visualized if aot_arm_compiler_flags includes -i for TOSA dumps." echo " --visualize_tosa With --model_explorer, visualize TOSA flatbuffer model. Cannot be used with --visualize_pte" echo " --perf_overlay With --model_explorer and --visualize_tosa, include performance data from FVP PMU trace." exit 0 @@ -94,15 +102,18 @@ for arg in "$@"; do --aot_arm_compiler_flags=*) aot_arm_compiler_flags="${arg#*=}";; --no_delegate) aot_arm_compiler_flag_delegate="" ;; --no_quantize) aot_arm_compiler_flag_quantize="" ;; - --portable_kernels=*) select_ops_list="${arg#*=}" ; echo "WARNING: --portable_kernels is DEPRECATED use select_ops_list." ;; - --select_ops_list=*) select_ops_list="${arg#*=}";; + --select_ops_list=*) + select_ops_list="${arg#*=}" + select_ops_list_overridden=true + ;; --target=*) target="${arg#*=}";; --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; --bundleio) bundleio=true ;; --etdump) build_with_etdump=true ;; --build_type=*) build_type="${arg#*=}";; - --extra_build_flags=*) extra_build_flags="${arg#*=}";; + --build-dir=*) build_dir="${arg#*=}";; --build_only) build_only=true ;; + --extra_build_flags=*) extra_build_flags="${arg#*=}";; --toolchain=*) toolchain="${arg#*=}";; --system_config=*) system_config="${arg#*=}";; --config=*) config="${arg#*=}";; @@ -121,6 +132,11 @@ for arg in "$@"; do esac done +auto_configure=false +if [[ -z "${build_dir}" ]]; then + auto_configure=true +fi + if [ "$perf_overlay" = true ] && [ "$model_explorer" != true ]; then echo "Error: --perf_overlay requires --model_explorer" >&2 exit 1 @@ -141,10 +157,32 @@ if ! [[ ${pte_placement} == "elf" ]]; then fi # Default Ethos-u tool folder override with --scratch-dir= -arm_scratch_dir=$(realpath ${arm_scratch_dir}) +arm_scratch_dir=$(realpath "${arm_scratch_dir}") +ethos_u_root_dir="${arm_scratch_dir}/ethos-u" +mkdir -p "${ethos_u_root_dir}" +ethos_u_root_dir=$(realpath "${ethos_u_root_dir}") +cmsis_nn_local_path="" +if [[ -d "${ethos_u_root_dir}/core_software/cmsis-nn" ]]; then + cmsis_nn_local_path=$(realpath "${ethos_u_root_dir}/core_software/cmsis-nn") +fi setup_path_script=${arm_scratch_dir}/setup_path.sh _setup_msg="please refer to ${script_dir}/setup.sh to properly install necessary tools." +toolchain_cmake="" +case "${toolchain}" in + arm-none-eabi-gcc) + toolchain_cmake="${et_root_dir}/examples/arm/ethos-u-setup/${toolchain}.cmake" + ;; + arm-zephyr-eabi-gcc) + toolchain_cmake="${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake" + ;; + *) + echo "Error: Invalid toolchain selection '${toolchain}'. Valid options: arm-none-eabi-gcc, arm-zephyr-eabi-gcc" >&2 + exit 1 + ;; +esac + + # Set target based variables if [[ ${system_config} == "" ]] then @@ -169,26 +207,10 @@ then config="Arm/vela.ini" fi -# Build executorch libraries -cd $et_root_dir -devtools_flag="" -bundleio_flag="" -etrecord_flag="" -et_dump_flag="" -qdq_fusion_op_flag="" -fvp_pmu_flag="" -if [ "$build_with_etdump" = true ] ; then - et_dump_flag="--etdump" - etrecord_flag="--etrecord" -fi - -if [ "$bundleio" = true ] ; then - devtools_flag="--devtools" - bundleio_flag="--bundleio" -fi - -if [ "$qdq_fusion_op" = true ] ; then - qdq_fusion_op_flag="--enable_qdq_fusion_pass" +target_cpu="cortex-m85" +if [[ ${target} =~ "ethos-u55" ]] +then + target_cpu="cortex-m55" fi function check_setup () { @@ -201,36 +223,31 @@ function check_setup () { echo "Could not find ${setup_path_script} file, ${_setup_msg}" return 1 fi - # If setup_path_script was correct all these checks should now pass - if [[ ${target} =~ "ethos-u" ]]; then - if [[ ${toolchain} == "arm-none-eabi-gcc" ]]; then - toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/${toolchain}.cmake - elif [[ ${toolchain} == "arm-zephyr-eabi-gcc" ]]; then - toolchain_cmake=${et_root_dir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake - else - echo "Error: Invalid toolchain selection, provided: ${toolchain}" - echo " Valid options are {arm-none-eabi-gcc, arm-zephyr-eabi-gcc}" - exit 1; + + [[ -f ${et_root_dir}/CMakeLists.txt ]] \ + || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; return 1; } + + [[ -f ${preset_file} ]] \ + || { echo "Could not find ${preset_file} file, ${_setup_msg}"; return 1; } + + if [[ "${auto_configure}" == true && ${target} != *"TOSA"* ]]; then + if ! command -v "${toolchain}" >/dev/null 2>&1; then + echo "Could not find ${toolchain} toolchain on PATH, ${_setup_msg}" + return 1 fi - toolchain_cmake=$(realpath ${toolchain_cmake}) - hash ${toolchain} \ - || { echo "Could not find ${toolchain} toolchain on PATH, ${_setup_msg}"; return 1; } [[ -f ${toolchain_cmake} ]] \ || { echo "Could not find ${toolchain_cmake} file, ${_setup_msg}"; return 1; } + fi - [[ -f ${et_root_dir}/CMakeLists.txt ]] \ - || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; return 1; } - - backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag $et_dump_flag --toolchain="${toolchain}" - elif [[ ${target} == cortex-m* ]]; then + if [[ ${target} == cortex-m* ]]; then # build_test_runner.sh handles toolchain setup; just validate it's on PATH. hash arm-none-eabi-gcc \ || { echo "Could not find arm-none-eabi-gcc on PATH, ${_setup_msg}"; return 1; } elif [[ ${target} =~ "vgf" ]]; then - model_converter=$(which model-converter) + model_converter=$(which model-converter || true) echo "${model_converter}" - [[ "${model_converter}" == "model-converter not found" ]] \ + [[ -z "${model_converter}" || "${model_converter}" == "model-converter not found" ]] \ && { echo "Could not find model-converter, ${_setup_msg}"; return 1; } fi @@ -252,25 +269,416 @@ print(size) PY } +sanitize_for_path() { + local value="$1" + printf '%s' "${value}" | tr -c '[:alnum:]._-' '_' +} + +set_default_build_dir_path() { + if [[ ${target} == *"vgf"* ]]; then + cat <&2 +Error: auto-configuring a build directory is only supported for Ethos-U bare-metal targets. +Configure a host build manually, e.g. + cmake -S "${runner_source_dir}" -B -DEXECUTORCH_ROOT="${et_root_dir}" -DEXECUTORCH_BUILD_VGF=ON +and then pass --build-dir=. +EOF + exit 1 + fi + local sanitized_target + sanitized_target=$(sanitize_for_path "${target}") + local sanitized_build_type + sanitized_build_type=$(sanitize_for_path "${build_type}") + local sanitized_toolchain + sanitized_toolchain=$(sanitize_for_path "${toolchain}") + build_dir="${et_build_root}/${sanitized_target}_${sanitized_build_type}_${sanitized_toolchain}" +} + +configure_runner_build_dir() { + local pte_source="$1" + if [[ -z "${build_dir}" ]]; then + echo "Error: build_dir is not set. Cannot configure runner." >&2 + exit 1 + fi + if [[ "${pte_placement}" == "elf" ]]; then + pte_source=$(realpath "${pte_source}") + fi + mkdir -p "${build_dir}" + local cmake_cmd=( + cmake -S "${runner_source_dir}" -B "${build_dir}" + -DEXECUTORCH_ROOT="${et_root_dir}" + -DCMAKE_TOOLCHAIN_FILE="${toolchain_cmake}" + -DCMAKE_BUILD_TYPE="${build_type}" + -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON + -DEXECUTORCH_BUILD_CORTEX_M=ON + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON + -DEXECUTORCH_BUILD_PRESET_FILE="${preset_file}" + -DEXECUTORCH_BAREMETAL_SKIP_INSTALL=OFF + -DETHOSU_TARGET_NPU_CONFIG="${target}" + -DTARGET_CPU="${target_cpu}" + -DSYSTEM_CONFIG="${system_config}" + -DMEMORY_MODE="${memory_mode}" + -DETHOS_SDK_PATH:PATH="${ethos_u_root_dir}" + -DEXECUTORCH_SELECT_OPS_LIST="${select_ops_list}" + ) + if [[ -n "${cmsis_nn_local_path}" ]]; then + cmake_cmd+=(-DCMSIS_NN_LOCAL_PATH:PATH="${cmsis_nn_local_path}") + fi + cmake_cmd+=(-DET_PTE_FILE_PATH:PATH="${pte_source}") + if [[ "${pte_placement}" == "elf" ]]; then + cmake_cmd+=(-DET_MODEL_PTE_ADDR=) + else + cmake_cmd+=(-DET_MODEL_PTE_ADDR="${pte_placement}") + fi + if [[ "${bundleio}" == true ]]; then + cmake_cmd+=(-DET_BUNDLE_IO=ON) + else + cmake_cmd+=(-DET_BUNDLE_IO=OFF) + fi + if [[ "${bundleio}" == true || "${build_with_etdump}" == true ]]; then + cmake_cmd+=(-DEXECUTORCH_BUILD_DEVTOOLS=ON) + else + cmake_cmd+=(-DEXECUTORCH_BUILD_DEVTOOLS=OFF) + fi + if [[ "${build_with_etdump}" == true ]]; then + cmake_cmd+=(-DEXECUTORCH_ENABLE_EVENT_TRACER=ON -DET_DUMP_INTERMEDIATE_OUTPUTS=ON) + else + cmake_cmd+=(-DEXECUTORCH_ENABLE_EVENT_TRACER=OFF -DET_DUMP_INTERMEDIATE_OUTPUTS=OFF) + fi + if [[ -n "${extra_build_flags}" ]]; then + # shellcheck disable=SC2206 + local extra_args=(${extra_build_flags}) + cmake_cmd+=("${extra_args[@]}") + fi + echo "[run.sh] Configuring ExecuTorch build at ${build_dir}" + "${cmake_cmd[@]}" + build_dir_initialized=false +} + +cmake_cache_get() { + local key="$1" + if [[ ! -f ${cmake_cache_file} ]]; then + echo "" + return 0 + fi + local line + line=$(grep -m1 "^${key}:" "${cmake_cache_file}" || true) + if [[ -z "${line}" ]]; then + echo "" + else + echo "${line#*=}" + fi +} + +cmake_cache_has_key() { + local key="$1" + [[ -f ${cmake_cache_file} ]] && grep -q "^${key}:" "${cmake_cache_file}" +} + +ensure_runner_build_dir() { + local standalone + standalone=$(cmake_cache_get ARM_EXECUTOR_RUNNER_STANDALONE) + local normalized + normalized=$(printf '%s' "${standalone}" | tr '[:lower:]' '[:upper:]') + if [[ "${normalized}" != "TRUE" && "${normalized}" != "ON" ]]; then + cat <&2 +Error: ${build_dir} is not a standalone arm_executor_runner build directory. +Configure it via: + cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_ROOT=${et_root_dir} [...] +and re-run run.sh. +EOF + exit 1 + fi +} + +ensure_select_ops_list_setting() { + local expected="$1" + local cache_value + cache_value=$(cmake_cache_get EXECUTORCH_SELECT_OPS_LIST) + if [[ -z "${cache_value}" ]]; then + cat <&2 +Error: EXECUTORCH_SELECT_OPS_LIST is not configured in ${build_dir}. +Reconfigure cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_SELECT_OPS_LIST=${expected}. +EOF + exit 1 + fi + if [[ "${cache_value}" != "${expected}" ]]; then + cat <&2 +Error: ${build_dir} was configured with EXECUTORCH_SELECT_OPS_LIST=${cache_value}, but run.sh requested ${expected}. +Reconfigure cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_SELECT_OPS_LIST=${expected}, or omit --select_ops_list. +EOF + exit 1 + fi +} + +require_cache_value() { + local key="$1" + local expected="$2" + local value + if ! cmake_cache_has_key "${key}"; then + echo "Error: ${key} not found in ${cmake_cache_file}. Reconfigure CMake with -D${key}=${expected}." >&2 + exit 1 + fi + value=$(cmake_cache_get "${key}") + if [[ "${value}" != "${expected}" ]]; then + echo "Error: ${key}=${value} in ${build_dir}. Reconfigure CMake with -D${key}=${expected} to use this run.sh invocation." >&2 + exit 1 + fi +} + +require_cache_bool() { + local key="$1" + local expected="$2" + local value + value=$(cmake_cache_get "${key}") + if [[ -z "${value}" ]]; then + echo "Error: ${key} not found in ${cmake_cache_file}. Reconfigure CMake with -D${key}=${expected}." >&2 + exit 1 + fi + local value_upper + value_upper=$(printf '%s' "${value}" | tr '[:lower:]' '[:upper:]') + local expected_upper + expected_upper=$(printf '%s' "${expected}" | tr '[:lower:]' '[:upper:]') + if [[ "${value_upper}" != "${expected_upper}" ]]; then + echo "Error: ${key}=${value} in ${build_dir}. Reconfigure CMake with -D${key}=${expected} to use run.sh." >&2 + exit 1 + fi +} + +is_cmake_false_value() { + local value_upper + value_upper=$(printf '%s' "$1" | tr '[:lower:]' '[:upper:]') + case "${value_upper}" in + ""|0|OFF|FALSE|NO|N|IGNORE|*-NOTFOUND) + return 0 + ;; + *) + return 1 + ;; + esac +} + +ensure_pte_placement_setting() { + local cached_addr + cached_addr=$(cmake_cache_get ET_MODEL_PTE_ADDR) + if ! cmake_cache_has_key ET_MODEL_PTE_ADDR; then + echo "Error: ET_MODEL_PTE_ADDR not found in ${cmake_cache_file}. Reconfigure CMake for the requested --pte_placement=${pte_placement}." >&2 + exit 1 + fi + if [[ "${pte_placement}" == "elf" ]]; then + if ! is_cmake_false_value "${cached_addr}"; then + echo "Error: --pte_placement=elf requested, but ${build_dir} was configured with ET_MODEL_PTE_ADDR=${cached_addr}. Reconfigure CMake with -DET_MODEL_PTE_ADDR=." >&2 + exit 1 + fi + if ! cmake_cache_has_key ET_PTE_FILE_PATH; then + echo "Error: ET_PTE_FILE_PATH not found in ${cmake_cache_file}. Reconfigure CMake with -DET_PTE_FILE_PATH=." >&2 + exit 1 + fi + return + fi + if is_cmake_false_value "${cached_addr}"; then + echo "Error: --pte_placement=${pte_placement} requested, but ${build_dir} was configured for an embedded PTE. Reconfigure CMake with -DET_MODEL_PTE_ADDR=${pte_placement}, or use --pte_placement=elf." >&2 + exit 1 + fi + if [[ "${cached_addr}" != "${pte_placement}" ]]; then + echo "Error: --pte_placement=${pte_placement} requested, but ${build_dir} was configured with ET_MODEL_PTE_ADDR=${cached_addr}. Reconfigure CMake with -DET_MODEL_PTE_ADDR=${pte_placement}." >&2 + exit 1 + fi +} + +get_parallel_jobs() { + if command -v nproc >/dev/null 2>&1; then + nproc + elif command -v sysctl >/dev/null 2>&1 && sysctl hw.logicalcpu >/dev/null 2>&1; then + sysctl -n hw.logicalcpu + elif command -v getconf >/dev/null 2>&1; then + getconf _NPROCESSORS_ONLN + elif [[ -n "${NUMBER_OF_PROCESSORS:-}" ]]; then + echo "${NUMBER_OF_PROCESSORS}" + else + echo 1 + fi +} + +build_runner_target() { + local cmake_target="$1" + local build_cmd=(cmake --build "${build_dir}" --target "${cmake_target}" --parallel "${parallel_jobs}") + if [[ "${multi_config}" == true ]]; then + build_cmd+=(--config "${build_type}") + fi + echo "[run.sh] Building target ${cmake_target} in ${build_dir}" + "${build_cmd[@]}" +} + +locate_runner_binary() { + local binary_name="$1" + local candidates=() + if [[ "${multi_config}" == true ]]; then + candidates+=("${build_dir}/${build_type}/${binary_name}") + candidates+=("${build_dir}/examples/arm/executor_runner/${build_type}/${binary_name}") + fi + candidates+=("${build_dir}/${binary_name}") + candidates+=("${build_dir}/examples/arm/executor_runner/${binary_name}") + for candidate in "${candidates[@]}"; do + if [[ -f "${candidate}" ]]; then + echo "${candidate}" + return 0 + fi + done + local found + found=$(find "${build_dir}" -name "${binary_name}" -type f 2>/dev/null | head -n 1 || true) + if [[ -n "${found}" ]]; then + echo "${found}" + return 0 + fi + return 1 +} +ensure_build_dir_ready() { + if [[ "${build_dir_initialized}" == true ]]; then + return + fi + if [[ -z "${build_dir}" ]]; then + echo "Error: build_dir is not set. Configure CMake first." >&2 + exit 1 + fi + build_dir=$(realpath "${build_dir}") + cmake_cache_file="${build_dir}/CMakeCache.txt" + if [[ ! -f ${cmake_cache_file} ]]; then + cat <&2 +Error: ${build_dir} does not contain a configured arm_executor_runner build (missing CMakeCache.txt). +Run cmake -S ${runner_source_dir} -B ${build_dir} -DEXECUTORCH_ROOT=${et_root_dir} with the desired options first, then re-run run.sh. +EOF + exit 1 + fi + if [[ ${target} == *"vgf"* ]]; then + require_cache_bool EXECUTORCH_BUILD_VGF ON + else + ensure_runner_build_dir + require_cache_bool EXECUTORCH_BUILD_ARM_BAREMETAL ON + require_cache_bool EXECUTORCH_BAREMETAL_SKIP_INSTALL OFF + require_cache_value ETHOSU_TARGET_NPU_CONFIG "${target}" + require_cache_value TARGET_CPU "${target_cpu}" + require_cache_value SYSTEM_CONFIG "${system_config}" + require_cache_value MEMORY_MODE "${memory_mode}" + if [[ "${bundleio}" == true ]]; then + require_cache_bool ET_BUNDLE_IO ON + else + require_cache_bool ET_BUNDLE_IO OFF + fi + if [[ "${bundleio}" == true || "${build_with_etdump}" == true ]]; then + require_cache_bool EXECUTORCH_BUILD_DEVTOOLS ON + else + require_cache_bool EXECUTORCH_BUILD_DEVTOOLS OFF + fi + if [[ "${build_with_etdump}" == true ]]; then + require_cache_bool EXECUTORCH_ENABLE_EVENT_TRACER ON + require_cache_bool ET_DUMP_INTERMEDIATE_OUTPUTS ON + else + require_cache_bool EXECUTORCH_ENABLE_EVENT_TRACER OFF + require_cache_bool ET_DUMP_INTERMEDIATE_OUTPUTS OFF + fi + fi + if [[ ${target} != *"vgf"* ]]; then + ensure_select_ops_list_setting "${select_ops_list}" + fi + multi_config=false + if [[ -n "$(cmake_cache_get CMAKE_CONFIGURATION_TYPES)" ]]; then + multi_config=true + fi + parallel_jobs=$(get_parallel_jobs) + build_dir_initialized=true +} + ####### ### Main ####### if ! check_setup; then if [ "$scratch_dir_set" = false ] ; then - # check setup failed, no scratchdir given as parameter. trying to run setup.sh - if ${script_dir}/setup.sh; then - # and recheck setup. If this fails exit. - if ! check_setup; then - exit 1 - fi - else - # setup.sh failed, it should print why - exit 1 - fi + # check setup failed, no scratchdir given as parameter. trying to run setup.sh + if ${script_dir}/setup.sh; then + # and recheck setup. If this fails exit. + if ! check_setup; then + exit 1 + fi + else + # setup.sh failed, it should print why + exit 1 + fi fi fi +cd "${et_root_dir}" + +bundleio_flag="" +etrecord_flag_template="" +qdq_fusion_op_flag="" +if [ "$build_with_etdump" = true ] ; then + etrecord_flag_template="--etrecord" +fi + +if [ "$bundleio" = true ] ; then + bundleio_flag="--bundleio" +fi + +if [ "$qdq_fusion_op" = true ] ; then + qdq_fusion_op_flag="--enable_qdq_fusion_pass" +fi + +if [[ "${auto_configure}" == true ]]; then + set_default_build_dir_path +else + if [[ -z "${build_dir}" ]]; then + echo "Error: --build-dir must not be empty." >&2 + exit 1 + fi + ensure_build_dir_ready +fi + +stage_pte_into_cache() { + local new_pte="$1" + local cache_path + cache_path=$(cmake_cache_get ET_PTE_FILE_PATH) + if [[ -z "${cache_path}" ]]; then + cat <&2 +Error: --pte_placement=elf requires ET_PTE_FILE_PATH to be set when configuring CMake. +Re-run cmake -S . -B ${build_dir} -DET_PTE_FILE_PATH=/absolute/path/to/model.pte (or use --pte_placement=). +EOF + exit 1 + fi + if [[ "${cache_path}" != /* ]]; then + cache_path="${build_dir}/${cache_path}" + fi + mkdir -p "$(dirname "${cache_path}")" + cp "${new_pte}" "${cache_path}" + echo "${cache_path}" +} + +configure_ethosu_scratch_if_requested() { + local pte_path="$1" + if [ "$specify_ethosu_scratch" != true ] || [[ ! ${target} =~ "ethos-u" ]]; then + return + fi + local scratch_size + scratch_size=$(get_ethosu_scratch_size "$pte_path" || true) + if [[ -z "${scratch_size}" ]]; then + echo "WARNING: Failed to derive Ethos-U scratch size from ${pte_path}" >&2 + return + fi + local cmake_cmd=( + cmake -S "${runner_source_dir}" -B "${build_dir}" + ) + if [[ -n "${extra_build_flags}" ]]; then + # shellcheck disable=SC2206 + local extra_args=(${extra_build_flags}) + cmake_cmd+=("${extra_args[@]}") + fi + cmake_cmd+=("-DET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${scratch_size}") + echo "[run.sh] Updating scratch allocator size to ${scratch_size}" + "${cmake_cmd[@]}" +} + if [[ -z "$model_name" ]]; then + echo "[run.sh] WARNING: Built-in test models executed when --model_name is omitted are deprecated and will be removed after the ExecuTorch 1.2 release." >&2 # the test models run, and whether to delegate test_model=( "softmax" # 0 @@ -302,7 +710,7 @@ for i in "${!test_model[@]}"; do printf "Running e2e flow for model '%s' with flags '%s'\n" "${model}" "${model_compiler_flags}" echo "--------------------------------------------------------------------------------" - cd $et_root_dir + cd "${et_root_dir}" # Remove path and file exetension to get model_short_name ext=${model##*.} model_short_name=$(basename -- "${model}" .$ext) @@ -323,13 +731,14 @@ for i in "${!test_model[@]}"; do output_folder=${et_build_root}/${model_short_name} fi + local_fvp_pmu_flag="" if [ "$perf_overlay" = true ] ; then model_compiler_flags+="--enable_debug_mode tosa" - fvp_pmu_flag="--trace_file=${output_folder}/pmu_trace.gz" + local_fvp_pmu_flag="--trace_file=${output_folder}/pmu_trace.gz" fi - mkdir -p ${output_folder} - output_folder=$(realpath ${output_folder}) + mkdir -p "${output_folder}" + output_folder=$(realpath "${output_folder}") pte_file="${output_folder}/${model_filename_ext}" # Remove old pte files @@ -339,16 +748,17 @@ for i in "${!test_model[@]}"; do model_compiler_flags="${model_compiler_flags} --model_input=${model_input}" fi - ARM_AOT_CMD="python3 -m backends.arm.scripts.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag ${etrecord_flag} --config=${config} $qdq_fusion_op_flag" + model_etrecord_flag="${etrecord_flag_template}" + ARM_AOT_CMD="python3 -m backends.arm.scripts.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag ${model_etrecord_flag} --config=${config} $qdq_fusion_op_flag" echo "CALL ${ARM_AOT_CMD}" >&2 ${ARM_AOT_CMD} 1>&2 - pte_file=$(realpath ${pte_file}) + pte_file=$(realpath "${pte_file}") - if [ "${etrecord_flag}" != "" ] ; then + if [ "${model_etrecord_flag}" != "" ] ; then etrecord_filename="${output_folder}/${model_filename}_etrecord.bin" - etrecord_filename=$(realpath ${etrecord_filename}) - etrecord_flag="--etrecord=${etrecord_filename}" + etrecord_filename=$(realpath "${etrecord_filename}") + model_etrecord_flag="--etrecord=${etrecord_filename}" fi [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } @@ -357,6 +767,7 @@ for i in "${!test_model[@]}"; do if [[ ${target} == *"TOSA"* ]]; then echo "Build for ${target} skip generating a .elf and running it" + continue elif [[ ${target} == cortex-m* ]]; then # Cortex-M backend uses a shared semihosting executor_runner (built # by build_test_runner.sh) that loads the .bpte at runtime, rather @@ -374,51 +785,54 @@ for i in "${!test_model[@]}"; do set +x elif [[ ${target} == *"vgf"* ]]; then echo "Build and run for VKML, (target: ${target})" - set -x - backends/arm/scripts/build_executor_runner_vkml.sh --build_type=${build_type} \ - --extra_build_flags="${extra_build_flags}" \ - --output="${output_folder}" \ - ${bundleio_flag} + build_runner_target executor_runner if [ "$build_only" = false ] ; then - backends/arm/scripts/run_vkml.sh --model=${pte_file} --build_path=${output_folder} + backends/arm/scripts/run_vkml.sh --model=${pte_file} --build_path=${build_dir} fi - set +x - else - # Build the application, the pte is imported as a header/c array or the address specified by --pte_placement - model_data="" - pte_file_or_mem="${pte_file}" - elf_file="${output_folder}/${model_filename}/cmake-out/arm_executor_runner" - if ! [[ ${pte_placement} == "elf" ]]; then - # Place PTE in memory specified by pte_placement - pte_file_or_mem="${pte_placement}" - model_data="--data=${pte_file}@${pte_placement}" - elf_file="${et_build_root}/${target}_${pte_placement}/cmake-out/arm_executor_runner" + if [[ "${auto_configure}" == true ]]; then + configure_runner_build_dir "${pte_file}" fi + ensure_build_dir_ready + ensure_pte_placement_setting - if [ "$specify_ethosu_scratch" = true ] && [[ ${target} =~ "ethos-u" ]]; then - scratch_size=$(get_ethosu_scratch_size "$pte_file") - if [ "$?" -eq 0 ] && [ -n "$scratch_size" ]; then - extra_build_flags="${extra_build_flags} -DET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${scratch_size}" + model_data="" + if [[ ${pte_placement} == "elf" ]]; then + if [[ "${auto_configure}" == true ]]; then + staged_path=$(cmake_cache_get ET_PTE_FILE_PATH) + echo "ET_PTE_FILE_PATH payload: ${staged_path}" else - echo "WARNING: Failed to derive Ethos-U scratch size from ${pte_file}" >&2 + staged_path=$(stage_pte_into_cache "${pte_file}") + echo "Updated ET_PTE_FILE_PATH payload: ${staged_path}" fi + else + model_data="--data=${pte_file}@${pte_placement}" fi - set -x - backends/arm/scripts/build_executor_runner.sh --et_build_root="${et_build_root}" --pte="${pte_file_or_mem}" --build_type=${build_type} --target=${target} --system_config=${system_config} --memory_mode=${memory_mode} ${bundleio_flag} ${et_dump_flag} --extra_build_flags="${extra_build_flags}" --ethosu_tools_dir="${arm_scratch_dir}" --toolchain="${toolchain}" --select_ops_list="${select_ops_list}" - if [ "$build_only" = false ] ; then - # Execute the executor_runner on FVP Simulator + configure_ethosu_scratch_if_requested "${pte_file}" - backends/arm/scripts/run_fvp.sh --elf=${elf_file} ${model_data} --target=$target ${etrecord_flag} ${fvp_pmu_flag} + build_runner_target arm_executor_runner + elf_file=$(locate_runner_binary arm_executor_runner) \ + || { echo "Failed to locate arm_executor_runner in ${build_dir}." >&2; exit 1; } + if [ "$build_only" = false ] ; then + fvp_args=("--elf=${elf_file}" "--target=${target}") + if [[ -n "${model_data}" ]]; then + fvp_args+=("${model_data}") + fi + if [[ -n "${model_etrecord_flag}" ]]; then + fvp_args+=("${model_etrecord_flag}") + fi + if [[ -n "${local_fvp_pmu_flag}" ]]; then + fvp_args+=("${local_fvp_pmu_flag}") + fi + backends/arm/scripts/run_fvp.sh "${fvp_args[@]}" fi - set +x fi if [ "$model_explorer" = true ]; then perf_flags="" if [ "$perf_overlay" = true ]; then - perf_flags+="--trace ${output_folder}/pmu_trace.gz --tables ${output_folder}/output/out_debug.xml" + perf_flags+=" --trace ${output_folder}/pmu_trace.gz --tables ${output_folder}/output/out_debug.xml" fi visualization_file="" diff --git a/tools/cmake/preset/arm_baremetal.cmake b/tools/cmake/preset/arm_baremetal.cmake index 882780ade1d..c12cc95233a 100644 --- a/tools/cmake/preset/arm_baremetal.cmake +++ b/tools/cmake/preset/arm_baremetal.cmake @@ -1,9 +1,30 @@ -# Copyright 2025 Arm Limited and/or its affiliates. +# Copyright 2025-2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}") +define_overridable_option( + EXECUTORCH_BAREMETAL_SKIP_INSTALL + "Skip emitting install/export rules when building bare-metal artifacts" BOOL + ON +) + +if(EXECUTORCH_BAREMETAL_SKIP_INSTALL) + set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}") + # Bare-metal builds consume the build tree directly. Keep the install target + # available (many docs/scripts still invoke it) but route the output back into + # the build tree so nothing is exported outside the repo. + unset(CMAKE_SKIP_INSTALL_RULES CACHE) + set(CMAKE_SKIP_INSTALL_RULES OFF) + set(CMAKE_SKIP_INSTALL_RULES + OFF + CACHE + BOOL + "Retain install() rules so docs/scripts can keep calling `--target install`" + FORCE + ) +endif() + set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER OFF) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR OFF) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER OFF)