Fix missing check (#19340) #11461

Workflow file for this run

	name: Test Metal Backend

	on:
	push:
	branches:
	- main
	- release/*
	tags:
	- ciflow/metal/*
	pull_request:
	paths:
	- .github/workflows/metal.yml
	- backends/apple/metal/**
	- backends/aoti/**
	- examples/models/qwen3_5_moe/**
	- extension/llm/export/**
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	test-metal-builds:
	name: test-executorch-metal-build
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	with:
	default-packages: ""
	runner: macos-m2-stable
	python-version: '3.11'
	submodules: 'recursive'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 90
	script: \|
	set -eux

	echo "::group::Test ExecuTorch Metal build"
	PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_METAL=ON" ${CONDA_RUN} --no-capture-output ./install_executorch.sh
	echo "::endgroup::"

	test-metal-modules:
	name: test-metal-backend-modules
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	with:
	default-packages: ""
	runner: macos-m2-stable
	python-version: '3.11'
	submodules: 'recursive'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 120
	script: \|
	set -eux

	echo "::group::Setup ExecuTorch"
	PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
	echo "::endgroup::"

	echo "::group::Build Metal Runtime"
	${CONDA_RUN} backends/apple/metal/tests/run_metal_test.sh --build
	echo "::endgroup::"

	echo "::group::Run Metal Backend Module Tests"
	${CONDA_RUN} python -m unittest backends.apple.metal.tests.test_modules.TestMetalBackendModules
	echo "::endgroup::"

	test-metal-qwen35-moe-tiny:
	name: test-metal-qwen35-moe-tiny
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	with:
	default-packages: ""
	runner: macos-m2-stable
	python-version: '3.11'
	submodules: 'recursive'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 120
	script: \|
	set -eux

	echo "::group::Setup ExecuTorch"
	PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
	echo "::endgroup::"

	# Isolate Inductor cache per job to prevent PCH conflicts
	export TMPDIR=$(mktemp -d "${RUNNER_TEMP}/tmpdir_XXXXXX")
	export TORCHINDUCTOR_CACHE_DIR=$(mktemp -d "${RUNNER_TEMP}/inductor_cache_XXXXXX")

	echo "::group::Export Qwen 3.5 MoE (tiny model, Metal)"
	${CONDA_RUN} python -m executorch.examples.models.qwen3_5_moe.export \
	--tiny-test \
	--backend metal \
	--qlinear fpa4w \
	--output-dir /tmp/qwen35_moe_metal_tiny
	echo "::endgroup::"

	echo "::group::Build Metal runtime and Qwen 3.5 MoE runner"
	${CONDA_RUN} cmake --workflow --preset llm-release-metal
	cd examples/models/qwen3_5_moe
	${CONDA_RUN} cmake --workflow --preset qwen3-5-moe-metal
	cd -
	echo "::endgroup::"

	# Create a byte-level tokenizer for the tiny model (vocab_size=256).
	# Maps each byte value to its own token ID so any prompt produces valid IDs.
	${CONDA_RUN} python - <<'PY'
	import json
	vocab = {chr(i) if 32 <= i < 127 else f'<0x{i:02X}>': i for i in range(256)}
	tokenizer = {
	'version': '1.0',
	'model': {'type': 'BPE', 'vocab': vocab, 'merges': []},
	'added_tokens': [{'id': i, 'content': chr(i) if 32 <= i < 127 else f'<0x{i:02X}>', 'single_word': False, 'lstrip': False, 'rstrip': False, 'normalized': False, 'special': False} for i in range(256)],
	}
	with open('/tmp/qwen35_moe_metal_tiny/tokenizer.json', 'w') as f:
	json.dump(tokenizer, f)
	print('Created byte-level tokenizer.json')
	PY

	RUNNER=./cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner
	# Patch absolute libomp install name to rpath-based lookup (same as test_model_e2e.sh)
	if otool -L "$RUNNER" \| grep -q "/opt/llvm-openmp/lib/libomp.dylib"; then
	install_name_tool -change /opt/llvm-openmp/lib/libomp.dylib @rpath/libomp.dylib "$RUNNER"
	fi
	MODEL=/tmp/qwen35_moe_metal_tiny/model.pte
	TOKENIZER=/tmp/qwen35_moe_metal_tiny/tokenizer.json

	echo "::group::Run Qwen 3.5 MoE inference (T=1 decode)"
	# Single-char prompt → 1 token → exercises decode-only path
	set +e
	OUTPUT=$($RUNNER --model_path $MODEL --tokenizer_path $TOKENIZER \
	--prompt "A" --temperature 0 --max_new_tokens 4 2>&1)
	RC=$?
	set -e
	echo "$OUTPUT"
	if [ $RC -ne 0 ]; then
	echo "Failed: runner exited with code $RC"
	exit 1
	fi
	echo "$OUTPUT" \| grep -q "Prompt tokens: 1" \|\| { echo "Failed: expected 1 prompt token for decode path"; exit 1; }
	echo "$OUTPUT" \| grep -q "Decode:" \|\| { echo "Failed: decode did not complete"; exit 1; }
	echo "Success: decode completed"
	echo "::endgroup::"

	echo "::group::Run Qwen 3.5 MoE inference (T>2 prefill + decode)"
	set +e
	OUTPUT=$($RUNNER --model_path $MODEL --tokenizer_path $TOKENIZER \
	--prompt "one two three" --temperature 0 --max_new_tokens 4 2>&1)
	RC=$?
	set -e
	echo "$OUTPUT"
	if [ $RC -ne 0 ]; then
	echo "Failed: runner exited with code $RC"
	exit 1
	fi
	# Byte-level tokenizer: "one two three" = 13 tokens (13 bytes)
	PROMPT_TOKENS=$(echo "$OUTPUT" \| grep -o "Prompt tokens: [0-9]" \| head -1 \| grep -o "[0-9]")
	if [ "$PROMPT_TOKENS" -le 2 ]; then
	echo "Failed: expected >2 prompt tokens for prefill path, got $PROMPT_TOKENS"
	exit 1
	fi
	echo "$OUTPUT" \| grep -q "Decode:" \|\| { echo "Failed: prefill + decode did not complete"; exit 1; }
	echo "Success: prefill ($PROMPT_TOKENS tokens) + decode completed"
	echo "::endgroup::"

	export-model-metal-artifact:
	name: export-model-metal-artifact
	# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
	if: github.event.pull_request.head.repo.full_name == github.repository \|\| github.event_name != 'pull_request'
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	secrets: inherit
	strategy:
	fail-fast: false
	matrix:
	model:
	- repo: "mistralai"
	name: "Voxtral-Mini-3B-2507"
	- repo: "mistralai"
	name: "Voxtral-Mini-4B-Realtime-2602"
	- repo: "openai"
	name: "whisper-small"
	- repo: "openai"
	name: "whisper-large-v3-turbo"
	- repo: "nvidia"
	name: "parakeet-tdt"
	quant:
	- "non-quantized"
	- "quantized-int4-metal"
	exclude:
	# Exclude non-quantized for Voxtral Realtime (too large)
	- model:
	repo: "mistralai"
	name: "Voxtral-Mini-4B-Realtime-2602"
	quant: "non-quantized"
	with:
	default-packages: ""
	runner: macos-m2-stable
	python-version: '3.11'
	submodules: 'recursive'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 90
	secrets-env: EXECUTORCH_HF_TOKEN
	upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-metal-${{ matrix.quant }}
	script: \|
	set -eux

	echo "::group::Setup Huggingface"
	${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
	${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
	echo "::endgroup::"

	echo "::group::Setup Optimum-ExecuTorch"
	OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
	echo "Using optimum-executorch version: ${OPTIMUM_ET_VERSION}"
	${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
	echo "::endgroup::"

	echo "::group::Setup ExecuTorch"
	PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
	echo "::endgroup::"

	echo "::group::Pip List"
	${CONDA_RUN} pip list
	echo "::endgroup::"

	# Isolate Inductor cache and precompiled headers (PCH) per job to prevent
	# PCH mtime conflicts between parallel matrix jobs on the same runner.
	# TORCHINDUCTOR_CACHE_DIR isolates the code cache; setting TMPDIR isolates
	# the PCH dir, which PyTorch derives from tempfile.gettempdir() independently.
	export TMPDIR=$(mktemp -d "${RUNNER_TEMP}/tmpdir_XXXXXX")
	export TORCHINDUCTOR_CACHE_DIR=$(mktemp -d "${RUNNER_TEMP}/inductor_cache_XXXXXX")
	${CONDA_RUN} bash .ci/scripts/export_model_artifact.sh metal "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"

	test-model-metal-e2e:
	name: test-model-metal-e2e
	needs: export-model-metal-artifact
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	strategy:
	fail-fast: false
	matrix:
	model:
	- repo: "mistralai"
	name: "Voxtral-Mini-3B-2507"
	- repo: "mistralai"
	name: "Voxtral-Mini-4B-Realtime-2602"
	- repo: "openai"
	name: "whisper-small"
	- repo: "openai"
	name: "whisper-large-v3-turbo"
	- repo: "nvidia"
	name: "parakeet-tdt"
	quant:
	- "non-quantized"
	- "quantized-int4-metal"
	exclude:
	# Exclude non-quantized for Voxtral Realtime (too large)
	- model:
	repo: "mistralai"
	name: "Voxtral-Mini-4B-Realtime-2602"
	quant: "non-quantized"
	with:
	default-packages: ""
	runner: macos-m2-stable
	python-version: '3.11'
	submodules: 'recursive'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 90
	download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-metal-${{ matrix.quant }}
	script: \|
	set -eux

	echo "::group::Print machine info"
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sw_vers
	# Print RAM in GB
	RAM_BYTES=$(sysctl -n hw.memsize)
	RAM_GB=$(echo "scale=2; $RAM_BYTES/1024/1024/1024" \| bc)
	echo "Available RAM (GB): $RAM_GB"
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	# Print number of GPU cores (Apple Silicon)
	if command -v system_profiler &> /dev/null; then
	GPU_CORES=$(system_profiler SPDisplaysDataType \| awk '/Total Number of Cores/ {print $5; exit}')
	if [ -z "$GPU_CORES" ]; then
	# Fallback: try to parse "Core Count" from Apple GPU section
	GPU_CORES=$(system_profiler SPDisplaysDataType \| awk '/Core Count/ {print $3; exit}')
	fi
	echo "GPU Cores: ${GPU_CORES:-Unknown}"
	else
	echo "system_profiler not available, cannot determine GPU cores."
	fi
	fi
	echo "::endgroup::"

	${CONDA_RUN} bash .ci/scripts/test_model_e2e.sh metal "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix missing check (#19340) #11461

Workflow file

Fix missing check (#19340) #11461

Uh oh!

Workflow file for this run