Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ jobs:
# ------------------------------------ v1 spec decode test ------------------------------------ #
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
# Fix me: test_eagle_correctness OOM error
#pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py

e2e-2-cards:
name: multicard-2
Expand Down
19 changes: 12 additions & 7 deletions tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

import os
import random
from typing import Any

Expand All @@ -9,6 +10,8 @@

from tests.e2e.conftest import VllmRunner

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Modifying os.environ directly at the module level can lead to side effects in other tests, as it's a global state change that persists throughout the pytest session. This can make tests flaky and hard to debug.

A safer and more idiomatic pytest approach is to use the monkeypatch fixture to manage environment variables. This ensures that the change is properly scoped and cleaned up after the tests in this module are done.

You could define a module-scoped autouse fixture like this, which would replace the direct modification of os.environ:

import pytest

@pytest.fixture(scope="module", autouse=True)
def set_spawn_method(monkeypatch):
    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")

This would make the test suite more robust against side-effects.



@pytest.fixture
def test_prompts():
Expand Down Expand Up @@ -61,7 +64,6 @@ def eagle3_model_name():
return "vllm-ascend/EAGLE3-LLaMA3.1-Instruct-8B"


@pytest.mark.skip("TODO: Revert me after ngram oom issue on ci is fixed")
def test_ngram_correctness(
test_prompts: list[list[dict[str, Any]]],
sampling_config: SamplingParams,
Expand All @@ -71,9 +73,11 @@ def test_ngram_correctness(
Compare the outputs of a original LLM and a speculative LLM
should be the same when using ngram speculative decoding.
'''
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
del ref_llm

with VllmRunner(model_name, max_model_len=1024,
enforce_eager=False) as ref_llm:
ref_outputs = ref_llm.model.chat(test_prompts, sampling_config)

with VllmRunner(model_name,
speculative_config={
"method": "ngram",
Expand Down Expand Up @@ -156,9 +160,10 @@ def test_suffix_correctness(
Compare the outputs of a original LLM and a speculative LLM
should be the same when using ngram speculative decoding.
'''
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
del ref_llm
with VllmRunner(model_name, max_model_len=1024,
enforce_eager=False) as ref_llm:
ref_outputs = ref_llm.model.chat(test_prompts, sampling_config)

with VllmRunner(model_name,
speculative_config={
"method": "suffix",
Expand Down
Loading