vllm-project · zhangxinyuehfad · Dec 4, 2025
diff --git a/...lows/_e2e_nightly_single_node_models.yaml → .github/workflows/_e2e_accuracy.yaml b/...lows/_e2e_nightly_single_node_models.yaml → .github/workflows/_e2e_accuracy.yaml
@@ -30,7 +30,10 @@ on:
       runner:
         required: true
         type: string
-      image:
+      image-a2:
+        required: true
+        type: string
+      image-a3:
         required: true
         type: string
       model_list:
@@ -55,11 +58,11 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  e2e-nightly:
+  e2e-accuracy-test:
     name: ${{inputs.model_list}} accuracy test
     runs-on: ${{ inputs.runner }}
     container:
-      image: "${{ inputs.image }}"
+      image: ${{ contains(inputs.runner, 'a2') && inputs.image-a2 || inputs.image-a3 }}
       env:
         VLLM_USE_MODELSCOPE: True
         GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
@@ -101,18 +104,19 @@ jobs:
         env:
           PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
         run: |
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
           pip install -r requirements-dev.txt
           pip install -v -e .
 
       - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
-        if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
+        if: ${{ contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
         shell: bash -l {0}
         run: |
           . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
           python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
 
       - name: Install tensorflow (for Molmo-7B-D-0924)
-        if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
+        if: ${{ contains(inputs.model_list, 'Molmo-7B-D-0924') }}
         shell: bash -l {0}
         run: |
           pip install tensorflow --no-cache-dir
@@ -180,6 +184,7 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           HF_DATASETS_OFFLINE: True
           VLLM_USE_MODELSCOPE: True
+          HCCL_BUFFSIZE: 600
           VLLM_CI_RUNNER: ${{ inputs.runner }}
           VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
           VLLM_COMMIT: ${{ env.VLLM_COMMIT }}

diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
@@ -73,8 +73,7 @@ jobs:
         (
           contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
           contains(github.event.pull_request.labels.*.name, 'ready-for-test')
-        )
-      }}
+        ) }}
     strategy:
       fail-fast: false
       matrix:
@@ -86,29 +85,29 @@ jobs:
               - Qwen3-8B-W8A8
               - Qwen3-VL-8B-Instruct
               - Qwen2.5-Omni-7B
-              - Meta-Llama-3.1-8B-Instruct
           - os: linux-aarch64-a2-1
             model_list:
-              - ERNIE-4.5-21B-A3B-PT
               - gemma-3-4b-it
-              - internlm-7b
               - InternVL3_5-8B-hf
               - llava-1.5-7b-hf
               - Molmo-7B-D-0924
-          - os: linux-aarch64-a2-2
+              - Meta-Llama-3.1-8B-Instruct
+          - os: linux-aarch64-a3-2
             model_list:
               - Qwen3-30B-A3B
               - Qwen3-VL-30B-A3B-Instruct
               - Qwen3-30B-A3B-W8A8
-          - os: linux-aarch64-a2-4
+              - ERNIE-4.5-21B-A3B-PT
+          - os: linux-aarch64-a3-4
             model_list:
               - Qwen3-Next-80B-A3B-Instruct
-    uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
+    uses: ./.github/workflows/_e2e_accuracy.yaml
     with:
       vllm: v0.12.0
       runner: ${{ matrix.test_config.os }}
       model_list: ${{ toJson(matrix.test_config.model_list) }}
-      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11'
+      image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a2-ubuntu22.04-py3.11
+      image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       upload: false
 
 

diff --git a/.github/workflows/vllm_ascend_test_pr_full.yaml b/.github/workflows/vllm_ascend_test_pr_full.yaml
@@ -43,6 +43,7 @@ jobs:
     outputs:
       e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
       ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
+      accuracy_tracker: ${{ steps.filter.outputs.accuracy_tracker }}
     steps:
       - name: Setup git proxy
         run: |
@@ -69,6 +70,11 @@ jobs:
               - 'packages.txt'
             ut_tracker:
               - 'tests/ut/**'
+            accuracy_tracker:
+              - '.github/workflows/_e2e_accuracy.yaml'
+              - 'csrc/**'
+              - 'vllm_ascend/**'
+              - 'tests/e2e/models/**'
 
   e2e-test:
     name: e2e-full
@@ -83,3 +89,44 @@ jobs:
       runner: linux-aarch64-a2
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       type: full
+
+  e2e-accuracy:
+    needs: [changes]
+    name: e2e-accuracy
+    if: ${{ needs.changes.outputs.accuracy_tracker == 'true' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        test_config:
+          - os: linux-aarch64-a2-1
+            model_list:
+              - Qwen3-8B
+              - Qwen2-Audio-7B-Instruct
+              - Qwen3-8B-W8A8
+              - Qwen3-VL-8B-Instruct
+              - Qwen2.5-Omni-7B
+              - Meta-Llama-3.1-8B-Instruct
+          - os: linux-aarch64-a2-1
+            model_list:
+              - ERNIE-4.5-21B-A3B-PT
+              - gemma-3-4b-it
+              - internlm-7b
+              - InternVL3_5-8B-hf
+              - llava-1.5-7b-hf
+              - Molmo-7B-D-0924
+          - os: linux-aarch64-a3-2
+            model_list:
+              - Qwen3-30B-A3B
+              - Qwen3-VL-30B-A3B-Instruct
+              - Qwen3-30B-A3B-W8A8
+          - os: linux-aarch64-a3-4
+            model_list:
+              - Qwen3-Next-80B-A3B-Instruct
+    uses: ./.github/workflows/_e2e_accuracy.yaml
+    with:
+      vllm: v0.12.0
+      runner: ${{ matrix.test_config.os }}
+      model_list: ${{ toJson(matrix.test_config.model_list) }}
+      image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
+      upload: false
diff --git a/.github/workflows/vllm_ascend_test_report.yaml b/.github/workflows/vllm_ascend_test_report.yaml
@@ -63,18 +63,17 @@ jobs:
           - runner: linux-aarch64-a2-1
             model_list:
               - Qwen3-8B
-              - Qwen2.5-VL-7B-Instruct
               - Qwen2-Audio-7B-Instruct
-          - runner: linux-aarch64-a2-2
+          - runner: linux-aarch64-a3-2
             model_list:
               - Qwen3-30B-A3B
               - Qwen3-VL-30B-A3B-Instruct
-              - DeepSeek-V2-Lite
-    uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
+    uses: ./.github/workflows/_e2e_accuracy.yaml
     with:
       vllm: v0.12.0
       runner: ${{ matrix.runner }}
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
+      image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       model_list: ${{ toJson(matrix.model_list) }}
       upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
 

diff --git a/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml b/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml
@@ -9,3 +9,4 @@ tasks:
     value: 0.45
 num_fewshot: 5
 gpu_memory_utilization: 0.8
+enforce_eager: True
diff --git a/tests/e2e/models/configs/gemma-3-4b-it.yaml b/tests/e2e/models/configs/gemma-3-4b-it.yaml
@@ -4,10 +4,11 @@ tasks:
 - name: "gsm8k"
   metrics:
   - name: "exact_match,strict-match"
-    value: 0.59
+    value: 0.56
   - name: "exact_match,flexible-extract"
-    value: 0.59
+    value: 0.56
 num_fewshot: 5
 apply_chat_template: False
 fewshot_as_multiturn: False
 gpu_memory_utilization: 0.7
+enforce_eager: True
diff --git a/tests/e2e/models/configs/llava-1.5-7b-hf.yaml b/tests/e2e/models/configs/llava-1.5-7b-hf.yaml
@@ -9,3 +9,4 @@ tasks:
 trust_remote_code: True
 gpu_memory_utilization: 0.8
 dtype: "bfloat16"
+enforce_eager: True