Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ on:
runner:
required: true
type: string
image:
image-a2:
required: true
type: string
image-a3:
required: true
type: string
model_list:
Expand All @@ -55,11 +58,11 @@ concurrency:
cancel-in-progress: true

jobs:
e2e-nightly:
e2e-accuracy-test:
name: ${{inputs.model_list}} accuracy test
runs-on: ${{ inputs.runner }}
container:
image: "${{ inputs.image }}"
image: ${{ contains(inputs.runner, 'a2') && inputs.image-a2 || inputs.image-a3 }}
env:
VLLM_USE_MODELSCOPE: True
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
Expand Down Expand Up @@ -101,18 +104,19 @@ jobs:
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pip install -r requirements-dev.txt
pip install -v -e .

- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
if: ${{ contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
shell: bash -l {0}
run: |
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"

- name: Install tensorflow (for Molmo-7B-D-0924)
if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
if: ${{ contains(inputs.model_list, 'Molmo-7B-D-0924') }}
shell: bash -l {0}
run: |
pip install tensorflow --no-cache-dir
Expand Down Expand Up @@ -180,6 +184,7 @@ jobs:
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_DATASETS_OFFLINE: True
VLLM_USE_MODELSCOPE: True
HCCL_BUFFSIZE: 600
VLLM_CI_RUNNER: ${{ inputs.runner }}
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
Expand Down
17 changes: 8 additions & 9 deletions .github/workflows/vllm_ascend_test_nightly_a2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ jobs:
(
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
contains(github.event.pull_request.labels.*.name, 'ready-for-test')
)
}}
) }}
strategy:
fail-fast: false
matrix:
Expand All @@ -86,29 +85,29 @@ jobs:
- Qwen3-8B-W8A8
- Qwen3-VL-8B-Instruct
- Qwen2.5-Omni-7B
- Meta-Llama-3.1-8B-Instruct
- os: linux-aarch64-a2-1
model_list:
- ERNIE-4.5-21B-A3B-PT
- gemma-3-4b-it
- internlm-7b
- InternVL3_5-8B-hf
- llava-1.5-7b-hf
- Molmo-7B-D-0924
- os: linux-aarch64-a2-2
- Meta-Llama-3.1-8B-Instruct
- os: linux-aarch64-a3-2
model_list:
- Qwen3-30B-A3B
- Qwen3-VL-30B-A3B-Instruct
- Qwen3-30B-A3B-W8A8
- os: linux-aarch64-a2-4
- ERNIE-4.5-21B-A3B-PT
- os: linux-aarch64-a3-4
model_list:
- Qwen3-Next-80B-A3B-Instruct
uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
uses: ./.github/workflows/_e2e_accuracy.yaml
with:
vllm: v0.12.0
runner: ${{ matrix.test_config.os }}
model_list: ${{ toJson(matrix.test_config.model_list) }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11'
image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a2-ubuntu22.04-py3.11
image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
upload: false


Expand Down
47 changes: 47 additions & 0 deletions .github/workflows/vllm_ascend_test_pr_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
accuracy_tracker: ${{ steps.filter.outputs.accuracy_tracker }}
steps:
- name: Setup git proxy
run: |
Expand All @@ -69,6 +70,11 @@ jobs:
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
accuracy_tracker:
- '.github/workflows/_e2e_accuracy.yaml'
- 'csrc/**'
- 'vllm_ascend/**'
- 'tests/e2e/models/**'

e2e-test:
name: e2e-full
Expand All @@ -83,3 +89,44 @@ jobs:
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
type: full

e2e-accuracy:
needs: [changes]
name: e2e-accuracy
if: ${{ needs.changes.outputs.accuracy_tracker == 'true' }}
strategy:
fail-fast: false
matrix:
test_config:
- os: linux-aarch64-a2-1
model_list:
- Qwen3-8B
- Qwen2-Audio-7B-Instruct
- Qwen3-8B-W8A8
- Qwen3-VL-8B-Instruct
- Qwen2.5-Omni-7B
- Meta-Llama-3.1-8B-Instruct
- os: linux-aarch64-a2-1
model_list:
- ERNIE-4.5-21B-A3B-PT
- gemma-3-4b-it
- internlm-7b
- InternVL3_5-8B-hf
- llava-1.5-7b-hf
- Molmo-7B-D-0924
- os: linux-aarch64-a3-2
model_list:
- Qwen3-30B-A3B
- Qwen3-VL-30B-A3B-Instruct
- Qwen3-30B-A3B-W8A8
- os: linux-aarch64-a3-4
model_list:
- Qwen3-Next-80B-A3B-Instruct
uses: ./.github/workflows/_e2e_accuracy.yaml
with:
vllm: v0.12.0
runner: ${{ matrix.test_config.os }}
model_list: ${{ toJson(matrix.test_config.model_list) }}
image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
upload: false
9 changes: 4 additions & 5 deletions .github/workflows/vllm_ascend_test_report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,17 @@ jobs:
- runner: linux-aarch64-a2-1
model_list:
- Qwen3-8B
- Qwen2.5-VL-7B-Instruct
- Qwen2-Audio-7B-Instruct
- runner: linux-aarch64-a2-2
- runner: linux-aarch64-a3-2
model_list:
- Qwen3-30B-A3B
- Qwen3-VL-30B-A3B-Instruct
- DeepSeek-V2-Lite
uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
uses: ./.github/workflows/_e2e_accuracy.yaml
with:
vllm: v0.12.0
runner: ${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
model_list: ${{ toJson(matrix.model_list) }}
upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}

Expand Down
1 change: 1 addition & 0 deletions tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ tasks:
value: 0.45
num_fewshot: 5
gpu_memory_utilization: 0.8
enforce_eager: True
5 changes: 3 additions & 2 deletions tests/e2e/models/configs/gemma-3-4b-it.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.59
value: 0.56
- name: "exact_match,flexible-extract"
value: 0.59
value: 0.56
num_fewshot: 5
apply_chat_template: False
fewshot_as_multiturn: False
gpu_memory_utilization: 0.7
enforce_eager: True
1 change: 1 addition & 0 deletions tests/e2e/models/configs/llava-1.5-7b-hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ tasks:
trust_remote_code: True
gpu_memory_utilization: 0.8
dtype: "bfloat16"
enforce_eager: True
Loading