Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/scripts/setup-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu
set -eu

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
Expand Down
50 changes: 34 additions & 16 deletions .ci/scripts/test_riscv_qemu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,50 +4,65 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# CI wrapper: install RISC-V cross-compile + qemu-user tooling, then run the
# RISC-V smoke test (export, cross-compile, qemu-user execution) via
# examples/riscv/run.sh. The bundled-IO comparison and Test_result: PASS
# check are done by run.sh.
# CI wrapper: install riscv32/64 cross-compile + qemu tooling, then drive
# examples/riscv/run.sh which does the export, cross-compile, qemu run, and
# bundled-IO PASS check.

set -eu

script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
et_root_dir=$(realpath "${script_dir}/../..")

model="add"
xnnpack=false
backend="portable"
quantize=false
os="linux"
arch="rv64"
qemu_cpu_ext=""
verbose_xnnpack=false
debug_xnnpack=false
build_dir=

usage() {
cat <<EOF
Usage: $(basename "$0") [options]
Options:
--model=<NAME> Which model to export and run (default: add)
--xnnpack Enable the XNNPACK backend (AOT partitioner + runtime)
--quantize Produce an 8-bit quantized model
--verbose-xnnpack Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch
--debug-xnnpack Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
-h, --help Show this help
--model=<NAME> Which model to export and run (default: ${model})
--quantize Produce an 8-bit quantized model
--backend=<NAME> AOT backend (portable|xnnpack) (default: ${backend})
--os=<NAME> Target OS (linux|baremetal) (default: ${os})
--arch=<NAME> Target arch (rv32|rv64) (default: ${arch})
--qemu-cpu-ext=<EXT> QEMU -cpu extensions (no rv32/rv64 prefix, default: none)
--build-dir=<DIR> Build/output directory for this configuration (required)
--verbose-xnnpack Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch
--debug-xnnpack Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
-h, --help Show this help
EOF
}

for arg in "$@"; do
case $arg in
--model=*) model="${arg#*=}" ;;
--xnnpack) xnnpack=true ;;
--quantize) quantize=true ;;
--backend=*) backend="${arg#*=}" ;;
--os=*) os="${arg#*=}" ;;
--arch=*) arch="${arg#*=}" ;;
--qemu-cpu-ext=*) qemu_cpu_ext="${arg#*=}" ;;
--build-dir=*) build_dir="${arg#*=}" ;;
--debug-xnnpack) debug_xnnpack=true ;;
--verbose-xnnpack) verbose_xnnpack=true ;;
-h|--help) usage; exit 0 ;;
*) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
esac
done

if [[ -z "${build_dir}" ]]; then
echo "[test_riscv_qemu.sh] --build-dir is required" >&2; usage; exit 1
fi

run_extra_args=()
if ${xnnpack}; then
run_extra_args+=(--xnnpack)
if [ -n "${qemu_cpu_ext}" ]; then
run_extra_args+=(--qemu-cpu-ext="${qemu_cpu_ext}")
fi
if ${quantize}; then
run_extra_args+=(--quantize)
Expand All @@ -59,5 +74,8 @@ if ${verbose_xnnpack}; then
run_extra_args+=(--verbose-xnnpack)
fi

bash "${et_root_dir}/examples/riscv/setup.sh"
bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}"
bash "${et_root_dir}/examples/riscv/setup-${os}.sh"
bash "${et_root_dir}/examples/riscv/run.sh" \
--model="${model}" --backend="${backend}" --os="${os}" --arch="${arch}" \
--build-dir="${build_dir}" \
"${run_extra_args[@]}"
57 changes: 36 additions & 21 deletions .github/workflows/_test_riscv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,44 @@ on:
type: number
default: 30
model:
description: 'Which model to run. Possible values are: add, mv2 (mobilenetv2)'
description: 'Which model to run (add, mv2, mobilebert, llama2, resnet18, yolo26)'
required: false
type: string
default: 'add'
xnnpack:
description: 'Whether to enable XNNPACK'
required: false
type: boolean
default: false
quantize:
description: 'Produce an 8-bit quantized model'
required: false
type: boolean
default: false
qemu-cpu:
description: 'Configuration(s) for the CPU to emulate with QEMU, expecting a JSON array'
required: true
backend:
description: 'AOT backend to lower to (portable|xnnpack)'
required: false
type: string
docker-image:
description: 'The docker image to use for this job'
default: 'portable'
os:
description: 'Target OS for the runner (linux|baremetal)'
required: false
type: string
default: 'linux'
arch:
description: 'Target architecture (rv32|rv64)'
required: false
type: string
default: 'rv64'
qemu-cpu-ext:
description: >-
JSON array of QEMU -cpu *extension* strings (no rv32/rv64 prefix).
The script splices each entry with `arch` to form the final -cpu
value. Use [""] for plain base-ISA runs.
required: true
type: string

jobs:
run:
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-24.04-gcc14
docker-image: ${{ inputs.os == 'linux' && 'ci-image:executorch-ubuntu-24.04-gcc14' || 'ci-image:executorch-ubuntu-26.04-gcc15' }}
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: ${{ inputs.timeout }}
Expand All @@ -55,20 +64,26 @@ jobs:
# Allows failure in `echo | jq | while read` pipeline to bubble up and fail the workflow
set -o pipefail

echo '${{ inputs.qemu-cpu }}' | jq -r '.[]' | while IFS= read -r qemu_cpu; do
export QEMU_CPU="${qemu_cpu}"
export GCC_VERSION=14
echo '${{ inputs.qemu-cpu-ext }}' | jq -r '.[]' | while IFS= read -r qemu_cpu_ext; do
variant_slug="${qemu_cpu_ext//,/_}"; variant_slug="${variant_slug//=/_}"; variant_slug="${variant_slug:-base}"
build_dir="riscv_test/${{ inputs.model }}${{ inputs.quantize && '_q' || '' }}/${{ inputs.backend }}/${{ inputs.os }}-${{ inputs.arch }}-${variant_slug}"

bash .ci/scripts/test_riscv_qemu.sh \
--model="${{ inputs.model }}" \
${{ inputs.xnnpack && '--xnnpack --verbose-xnnpack' || '' }} \
--backend="${{ inputs.backend }}" \
--os="${{ inputs.os }}" \
--arch="${{ inputs.arch }}" \
--qemu-cpu-ext="${qemu_cpu_ext}" \
--build-dir="${build_dir}" \
${{ inputs.backend == 'xnnpack' && '--verbose-xnnpack' || '' }} \
${{ inputs.quantize && '--quantize' || '' }}

# We only generate riscv_test/${{ inputs.model }}_riscv.etdump.json from `--verbose-xnnpack`.
if ${{ inputs.xnnpack }}; then
# Generate markdown table from riscv_test/${{ inputs.model }}_riscv.etdump.json, sorted by sum_ms
# We only generate run.etdump.json from `--verbose-xnnpack`.
if [[ "${{ inputs.backend }}" == "xnnpack" ]]; then
# Generate markdown table from ${build_dir}/run.etdump.json, sorted by sum_ms
(
etdump_json="riscv_test/${{ inputs.model }}_riscv.etdump.json"
echo "### Model=${{ inputs.model }} XNNPACK=${{ inputs.xnnpack }} Quantize=${{ inputs.quantize }} QEMU_CPU='${QEMU_CPU}'"
etdump_json="${build_dir}/run.etdump.json"
echo "### Model=${{ inputs.model }} Quantize=${{ inputs.quantize }} Backend=${{ inputs.backend }} OS=${{ inputs.os }} Arch=${{ inputs.arch }}${qemu_cpu_ext:+,${qemu_cpu_ext}}"
jq -r '
def r3: (. * 1000 | round) / 1000;
["Section","Op","Count","Sum (ms)","Avg (ms)","Max (ms)","Microkernels"],
Expand Down
42 changes: 26 additions & 16 deletions .github/workflows/riscv64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ on:
pull_request:
paths:
- .github/workflows/riscv64.yml
- .github/workflows/_test_riscv.yml
- .ci/scripts/test_riscv_qemu.sh
- tools/cmake/preset/riscv64_linux.cmake
- tools/cmake/preset/riscv_*.cmake
- examples/riscv/**
workflow_dispatch:
schedule:
Expand All @@ -35,33 +36,42 @@ jobs:
- llama2
- resnet18
- yolo26
xnnpack: [true, false]
quantize: [true, false]
backend: [portable, xnnpack]
os: [linux, baremetal]
arch: [rv64, rv32]
exclude:
# We only enable quantization with XNNPACK
- xnnpack: false
quantize: true
# We don't test quantization for Yolo26
- model: yolo26
quantize: true
# Disable quantization testing with Portable Kernels
- { backend: portable, quantize: true }
# XNNPACK needs pthreads + dynamic loading (no baremetal)
- { backend: xnnpack, os: baremetal }
# No quantization recipe for Yolo26.
- { model: yolo26, quantize: true }
# No riscv32-linux-gnu cross is packaged on Ubuntu.
- { os: linux, arch: rv32 }
permissions:
id-token: write
contents: read
with:
model: ${{ matrix.model }}
xnnpack: ${{ matrix.xnnpack }}
quantize: ${{ matrix.quantize }}
# If XNNPACK, test with multiple RVV length, disabled otherwise
qemu-cpu: >-
backend: ${{ matrix.backend }}
os: ${{ matrix.os }}
arch: ${{ matrix.arch }}
# JSON array of QEMU -cpu *extension* strings (no rv32/rv64 prefix - that
# comes from `arch`). The script splices them as `<arch>,<ext>`. xnnpack
# benefits from RVV so it sweeps multiple vlen; everything else just uses
# the plain base ISA.
qemu-cpu-ext: >-
${{
case(
matrix.xnnpack, '[
"rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0",
"rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0",
"rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
matrix.backend == 'xnnpack', '[
"v=true,vext_spec=v1.0,vlen=128",
"v=true,vext_spec=v1.0,vlen=256",
"v=true,vext_spec=v1.0,vlen=512"
]',
'[
"rv64,zba=true,zbb=true,zbs=true,v=false"
"v=false"
]'
)
}}
20 changes: 19 additions & 1 deletion CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@
"displayName": "Build ExecuTorch for riscv64 Linux (cross-compile)",
"inherits": ["common"],
"cacheVariables": {
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv64_linux.cmake",
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv_linux.cmake",
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/riscv/riscv64-linux-gnu-toolchain.cmake"
},
"condition": {
Expand All @@ -327,6 +327,24 @@
"rhs": "Linux"
}
},
{
"name": "riscv64-baremetal",
"displayName": "Build ExecuTorch for riscv64 baremetal (cross-compile)",
"inherits": ["common"],
"cacheVariables": {
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv_baremetal.cmake",
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/riscv/riscv64-unknown-elf-toolchain.cmake"
}
},
{
"name": "riscv32-baremetal",
"displayName": "Build ExecuTorch for riscv32 baremetal (cross-compile)",
"inherits": ["common"],
"cacheVariables": {
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/riscv_baremetal.cmake",
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/riscv/riscv32-unknown-elf-toolchain.cmake"
}
},
{
"name": "mlx",
"displayName": "Build MLX delegate",
Expand Down
51 changes: 23 additions & 28 deletions examples/riscv/README.md
Original file line number Diff line number Diff line change
@@ -1,41 +1,36 @@
# RISC-V

Cross-compile `executor_runner` for `riscv64-linux-gnu` and run it under
`qemu-user-static` against a small bundled program. The end-to-end check
mirrors the Arm Cortex-M e2e flow: a `Test_result: PASS` line in stdout from
the bundled-IO comparison path is the pass criterion.
End-to-end smoke tests that cross-compile ExecuTorch for RISC-V and run a bundled program under QEMU. A `Test_result: PASS` line emitted by the bundled-IO comparison path is the pass criterion.

This is the Phase 1 deliverable for the RISC-V Support RFC at
[pytorch/executorch#18991][rfc]. The cross-compile and runner artifacts
(toolchain file, preset, AOT script) are designed to carry over unchanged
to a hardware-runner job once one becomes available; only the invocation
step (qemu-user vs. native) would change.

[rfc]: https://github.com/pytorch/executorch/issues/18991
Part of the RISC-V Support RFC, [pytorch/executorch#18991](https://github.com/pytorch/executorch/issues/18991).

## Quick start (Ubuntu / Debian)

```bash
examples/riscv/setup.sh # apt: gcc-riscv64-linux-gnu, qemu-user-static
examples/riscv/run.sh # export, cross-compile, run under qemu-user
examples/riscv/setup-linux.sh # apt: gcc cross riscv64-linux-gnu + qemu-user
examples/riscv/setup-baremetal.sh # apt: gcc cross riscv64-unknown-elf + qemu-system + picolibc
examples/riscv/run.sh # export, cross-compile, run under qemu
```

The driver does three steps:
`run.sh` accepts:

| Flag | Values | Default | Notes |
|---|---|---|---|
| `--model=<N>` | `add`, `mv2`, `mobilebert`, `llama2`, `resnet18`, `yolo26` | `add` | which model to export |
| `--quantize` | flag | off | XNNPACK quantizer (requires `--backend=xnnpack`) |
| `--backend=<N>` | `portable`, `xnnpack` | `portable` | xnnpack is linux-only |
| `--os=<N>` | `linux`, `baremetal` | `linux` | qemu-user vs qemu-system + semihosting |
| `--arch=<N>` | `rv32`, `rv64` | `rv64` | valid <os>-<arch> pairs are `linux-rv64`, `baremetal-rv32`, `baremetal-rv64` |
| `--qemu-cpu-ext=<S>` | e.g. `v=true,vlen=128` | empty | extensions appended after the arch base |

## Pipelines

**linux**: `aot_riscv.py` → `cmake --preset riscv64-linux` → `executor_runner` under `qemu-riscv64`. Portable kernels + (optional) XNNPACK delegate.

**baremetal**: `aot_riscv.py` → `cmake -S examples/riscv/baremetal` (standalone project; pulls executorch in via `add_subdirectory`) → `executor_runner_baremetal.elf` under `qemu-system-riscv64 -machine virt -bios none -semihosting-config target=native`.

1. `python examples/riscv/aot_riscv.py` exports a `torch.add` module to
`riscv_test/add_riscv.bpte` (a BundledProgram with reference outputs
embedded for two test cases).
2. `cmake --preset riscv64-linux` configures the cross-build using
`examples/riscv/riscv64-linux-gnu-toolchain.cmake` and
`tools/cmake/preset/riscv64_linux.cmake`. `executor_runner` is built
against portable kernels with `ET_BUNDLE_IO_ENABLED` defined.
3. `qemu-riscv64-static` invokes the runner with `--model_path` pointing at
the `.bpte`. The runner detects the bundle, runs every embedded test case,
and emits `Test_result: PASS` (or `FAIL`) per case.
The baremetal runner embeds the `.bpte` directly in `.rodata` via the same `examples/arm/executor_runner/pte_to_header.py` Cortex-M uses; semihosting SYS_WRITE0 / SYS_EXIT carry log output and exit status to the host.

## CI

`.github/workflows/_test_riscv_qemu.yml` is a reusable `workflow_call`
job (mirroring `_test_cortex_m_e2e.yml`) invoked from `pull.yml` to run on
every PR. It runs on the standard `linux.2xlarge` x86_64 runner using the
`executorch-ubuntu-22.04-gcc11` docker image.
`.github/workflows/riscv64.yml` is the entry point; it fans out into `_test_riscv.yml` over a `(model, backend, os, arch, quantize)` matrix and sweeps `qemu-cpu-ext` per backend. Runs on the `executorch-ubuntu-26.04-gcc15` docker image (needed for the `riscv64-unknown-elf` picolibc + libstdc++ packages - see [setup-linux.sh](setup-linux.sh) or [setup-baremetal.sh](setup-baremetal.sh)).
Loading
Loading