diff --git a/.github/workflows/_e2e_nightly_single_node_models.yaml b/.github/workflows/_e2e_accuracy.yaml similarity index 94% rename from .github/workflows/_e2e_nightly_single_node_models.yaml rename to .github/workflows/_e2e_accuracy.yaml index 90f5d1633d0..86db0561250 100644 --- a/.github/workflows/_e2e_nightly_single_node_models.yaml +++ b/.github/workflows/_e2e_accuracy.yaml @@ -30,7 +30,10 @@ on: runner: required: true type: string - image: + image-a2: + required: true + type: string + image-a3: required: true type: string model_list: @@ -55,11 +58,11 @@ concurrency: cancel-in-progress: true jobs: - e2e-nightly: + e2e-accuracy-test: name: ${{inputs.model_list}} accuracy test runs-on: ${{ inputs.runner }} container: - image: "${{ inputs.image }}" + image: ${{ contains(inputs.runner, 'a2') && inputs.image-a2 || inputs.image-a3 }} env: VLLM_USE_MODELSCOPE: True GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }} @@ -101,18 +104,19 @@ jobs: env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib pip install -r requirements-dev.txt pip install -v -e . - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct) - if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }} + if: ${{ contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }} shell: bash -l {0} run: | . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl" - name: Install tensorflow (for Molmo-7B-D-0924) - if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }} + if: ${{ contains(inputs.model_list, 'Molmo-7B-D-0924') }} shell: bash -l {0} run: | pip install tensorflow --no-cache-dir @@ -180,6 +184,7 @@ jobs: VLLM_WORKER_MULTIPROC_METHOD: spawn HF_DATASETS_OFFLINE: True VLLM_USE_MODELSCOPE: True + HCCL_BUFFSIZE: 600 VLLM_CI_RUNNER: ${{ inputs.runner }} VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }} VLLM_COMMIT: ${{ env.VLLM_COMMIT }} diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml index d13e79f18a2..cd16ab437a0 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml @@ -73,8 +73,7 @@ jobs: ( contains(github.event.pull_request.labels.*.name, 'accuracy-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') - ) - }} + ) }} strategy: fail-fast: false matrix: @@ -86,29 +85,29 @@ jobs: - Qwen3-8B-W8A8 - Qwen3-VL-8B-Instruct - Qwen2.5-Omni-7B - - Meta-Llama-3.1-8B-Instruct - os: linux-aarch64-a2-1 model_list: - - ERNIE-4.5-21B-A3B-PT - gemma-3-4b-it - - internlm-7b - InternVL3_5-8B-hf - llava-1.5-7b-hf - Molmo-7B-D-0924 - - os: linux-aarch64-a2-2 + - Meta-Llama-3.1-8B-Instruct + - os: linux-aarch64-a3-2 model_list: - Qwen3-30B-A3B - Qwen3-VL-30B-A3B-Instruct - Qwen3-30B-A3B-W8A8 - - os: linux-aarch64-a2-4 + - ERNIE-4.5-21B-A3B-PT + - os: linux-aarch64-a3-4 model_list: - Qwen3-Next-80B-A3B-Instruct - uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml + uses: ./.github/workflows/_e2e_accuracy.yaml with: vllm: v0.12.0 runner: ${{ matrix.test_config.os }} model_list: ${{ toJson(matrix.test_config.model_list) }} - image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11' + image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a2-ubuntu22.04-py3.11 + image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 upload: false diff --git a/.github/workflows/vllm_ascend_test_pr_full.yaml b/.github/workflows/vllm_ascend_test_pr_full.yaml index 308a87128ba..85609214a50 100644 --- a/.github/workflows/vllm_ascend_test_pr_full.yaml +++ b/.github/workflows/vllm_ascend_test_pr_full.yaml @@ -43,6 +43,7 @@ jobs: outputs: e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }} ut_tracker: ${{ steps.filter.outputs.ut_tracker }} + accuracy_tracker: ${{ steps.filter.outputs.accuracy_tracker }} steps: - name: Setup git proxy run: | @@ -69,6 +70,11 @@ jobs: - 'packages.txt' ut_tracker: - 'tests/ut/**' + accuracy_tracker: + - '.github/workflows/_e2e_accuracy.yaml' + - 'csrc/**' + - 'vllm_ascend/**' + - 'tests/e2e/models/**' e2e-test: name: e2e-full @@ -83,3 +89,44 @@ jobs: runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 type: full + + e2e-accuracy: + needs: [changes] + name: e2e-accuracy + if: ${{ needs.changes.outputs.accuracy_tracker == 'true' }} + strategy: + fail-fast: false + matrix: + test_config: + - os: linux-aarch64-a2-1 + model_list: + - Qwen3-8B + - Qwen2-Audio-7B-Instruct + - Qwen3-8B-W8A8 + - Qwen3-VL-8B-Instruct + - Qwen2.5-Omni-7B + - Meta-Llama-3.1-8B-Instruct + - os: linux-aarch64-a2-1 + model_list: + - ERNIE-4.5-21B-A3B-PT + - gemma-3-4b-it + - internlm-7b + - InternVL3_5-8B-hf + - llava-1.5-7b-hf + - Molmo-7B-D-0924 + - os: linux-aarch64-a3-2 + model_list: + - Qwen3-30B-A3B + - Qwen3-VL-30B-A3B-Instruct + - Qwen3-30B-A3B-W8A8 + - os: linux-aarch64-a3-4 + model_list: + - Qwen3-Next-80B-A3B-Instruct + uses: ./.github/workflows/_e2e_accuracy.yaml + with: + vllm: v0.12.0 + runner: ${{ matrix.test_config.os }} + model_list: ${{ toJson(matrix.test_config.model_list) }} + image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 + upload: false \ No newline at end of file diff --git a/.github/workflows/vllm_ascend_test_report.yaml b/.github/workflows/vllm_ascend_test_report.yaml index 0c548929d1c..30286040df5 100644 --- a/.github/workflows/vllm_ascend_test_report.yaml +++ b/.github/workflows/vllm_ascend_test_report.yaml @@ -63,18 +63,17 @@ jobs: - runner: linux-aarch64-a2-1 model_list: - Qwen3-8B - - Qwen2.5-VL-7B-Instruct - Qwen2-Audio-7B-Instruct - - runner: linux-aarch64-a2-2 + - runner: linux-aarch64-a3-2 model_list: - Qwen3-30B-A3B - Qwen3-VL-30B-A3B-Instruct - - DeepSeek-V2-Lite - uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml + uses: ./.github/workflows/_e2e_accuracy.yaml with: vllm: v0.12.0 runner: ${{ matrix.runner }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + image-a2: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + image-a3: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11 model_list: ${{ toJson(matrix.model_list) }} upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }} diff --git a/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml b/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml index 2dd02ab434e..d1d9d421b55 100644 --- a/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +++ b/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml @@ -9,3 +9,4 @@ tasks: value: 0.45 num_fewshot: 5 gpu_memory_utilization: 0.8 +enforce_eager: True diff --git a/tests/e2e/models/configs/gemma-3-4b-it.yaml b/tests/e2e/models/configs/gemma-3-4b-it.yaml index 42366800db0..00ef6901833 100644 --- a/tests/e2e/models/configs/gemma-3-4b-it.yaml +++ b/tests/e2e/models/configs/gemma-3-4b-it.yaml @@ -4,10 +4,11 @@ tasks: - name: "gsm8k" metrics: - name: "exact_match,strict-match" - value: 0.59 + value: 0.56 - name: "exact_match,flexible-extract" - value: 0.59 + value: 0.56 num_fewshot: 5 apply_chat_template: False fewshot_as_multiturn: False gpu_memory_utilization: 0.7 +enforce_eager: True diff --git a/tests/e2e/models/configs/llava-1.5-7b-hf.yaml b/tests/e2e/models/configs/llava-1.5-7b-hf.yaml index 7bd69de99f7..95ab52695ea 100644 --- a/tests/e2e/models/configs/llava-1.5-7b-hf.yaml +++ b/tests/e2e/models/configs/llava-1.5-7b-hf.yaml @@ -9,3 +9,4 @@ tasks: trust_remote_code: True gpu_memory_utilization: 0.8 dtype: "bfloat16" +enforce_eager: True