Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/configs/ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
platform: ascend

# Docker image for this hardware
ci_image: harbor.baai.ac.cn/flagscale/vllm-plugin-fl:v0.1.0-ascend-ci
ci_image: harbor.baai.ac.cn/flagscale/vllm-plugin-fl:v0.2.0-ascend-ci

# Runner labels for this hardware
runner_labels:
Expand Down
3 changes: 2 additions & 1 deletion .github/configs/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
platform: cuda

# Docker image for this hardware
ci_image: harbor.baai.ac.cn/flagscale/vllm-plugin-fl:v0.1.0-cuda-ci
ci_image: harbor.baai.ac.cn/flagscale/vllm-plugin-fl:v0.2.0-cuda-ci

# Runner labels for this hardware
runner_labels:
Expand All @@ -20,6 +20,7 @@ container_volumes:

# Container options (hardware-specific settings)
container_options: >-
--privileged
--gpus all
--shm-size=500g
--hostname vllm-plugin-fl
Expand Down
2 changes: 2 additions & 0 deletions .github/scripts/ascend/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@
# Setup script for Ascend NPU CI environment.
set -euo pipefail

git config --global --add safe.directory "$(pwd)"

pip install --upgrade pip "setuptools>=77.0.3"
pip install --no-build-isolation -e ".[test]"
2 changes: 2 additions & 0 deletions .github/scripts/cuda/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@
# Setup script for CUDA CI environment.
set -euo pipefail

git config --global --add safe.directory "$(pwd)"

uv pip install --system --upgrade pip
uv pip install --system --no-build-isolation -e ".[test]"
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ on:
paths-ignore:
- "**.md"
- "docs/**"
- "examples/**"
- "docker/**"
- "LICENSE"
- ".github/ISSUE_TEMPLATE/**"
- ".github/PULL_REQUEST_TEMPLATE.md"
Expand All @@ -17,6 +19,8 @@ on:
paths-ignore:
- "**.md"
- "docs/**"
- "examples/**"
- "docker/**"
- "LICENSE"
- ".github/ISSUE_TEMPLATE/**"
- ".github/PULL_REQUEST_TEMPLATE.md"
Expand Down
19 changes: 6 additions & 13 deletions docker/ascend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
ARG VLLM_VERSION=0.13.0
ARG VLLM_VERSION=0.18.0

# ---------- base stage ----------
FROM quay.io/ascend/vllm-ascend:v${VLLM_VERSION}rc1-a3 AS base

RUN pip install --upgrade pip setuptools

# CANN Toolkit environment variables (mirrors set_env.sh baked in at build time)
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH="${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64:${LD_LIBRARY_PATH}" \
PYTHONPATH="${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}" \
PATH="${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${ASCEND_TOOLKIT_HOME}/tools/ccec_compiler/bin:${PATH}"

# Set ATB environment variables
ENV ATB_HOME_PATH=/usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1
ENV LD_LIBRARY_PATH="${ATB_HOME_PATH}/lib:${ATB_HOME_PATH}/examples:${ATB_HOME_PATH}/tests/atbopstest:${LD_LIBRARY_PATH}" \
PATH="${ATB_HOME_PATH}/bin:${PATH}"
# Add BiShengIR compiler to PATH
ENV PATH="${ASCEND_TOOLKIT_HOME}/tools/bishengir/bin:${PATH}"

# ---------- dev stage ----------
FROM base AS dev
Expand Down Expand Up @@ -49,8 +41,9 @@ RUN pip install \
cmake

# Install FlagGems (NPU backend)
ARG FLAGGEMS_VERSION=v5.0.0
RUN pip install -U scikit-build-core==0.11 pybind11 \
&& git clone https://github.com/flagos-ai/FlagGems /workspace/FlagGems \
&& git clone --branch ${FLAGGEMS_VERSION} --depth 1 https://github.com/flagos-ai/FlagGems /workspace/FlagGems \
&& pip install --no-build-isolation \
--config-settings=cmake.define.FLAGGEMS_BACKEND=NPU \
/workspace/FlagGems
Expand All @@ -71,7 +64,7 @@ FROM base AS release

ARG INDEX_URL
ARG EXTRA_INDEX_URL
ARG VLLM_VERSION=0.13.0
ARG VLLM_VERSION=0.18.0

# Install vLLM
# Todo
Expand Down
79 changes: 79 additions & 0 deletions docker/ascend/Dockerfile.v0.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
ARG VLLM_VERSION=0.13.0

# ---------- base stage ----------
FROM quay.io/ascend/vllm-ascend:v${VLLM_VERSION}rc1-a3 AS base

RUN pip install --upgrade pip setuptools

# CANN Toolkit environment variables (mirrors set_env.sh baked in at build time)
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH="${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64:${LD_LIBRARY_PATH}" \
PYTHONPATH="${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}" \
PATH="${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${ASCEND_TOOLKIT_HOME}/tools/ccec_compiler/bin:${PATH}"

# Set ATB environment variables
ENV ATB_HOME_PATH=/usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1
ENV LD_LIBRARY_PATH="${ATB_HOME_PATH}/lib:${ATB_HOME_PATH}/examples:${ATB_HOME_PATH}/tests/atbopstest:${LD_LIBRARY_PATH}" \
PATH="${ATB_HOME_PATH}/bin:${PATH}"

# ---------- dev stage ----------
FROM base AS dev

# Install dev tools
RUN pip install \
pytest \
pytest-cov \
pytest-json-report \
ruff \
pre-commit \
ninja \
cmake

# ---------- ci stage ----------
FROM base AS ci

# Install dev/test tools
RUN pip install --upgrade pip
RUN pip install \
pytest \
pytest-cov \
pytest-timeout \
pytest-json-report \
numpy \
requests \
decorator \
"modelscope>=1.18.1" \
ruff \
pre-commit \
ninja \
cmake

# Install FlagGems (NPU backend)
RUN pip install -U scikit-build-core==0.11 pybind11 \
&& git clone https://github.com/flagos-ai/FlagGems /workspace/FlagGems \
&& pip install --no-build-isolation \
--config-settings=cmake.define.FLAGGEMS_BACKEND=NPU \
/workspace/FlagGems

# Install FlagTree
RUN pip install flagtree==0.4.0+ascend3.2 \
--index-url=https://resource.flagos.net/repository/flagos-pypi-hosted/simple \
--trusted-host=resource.flagos.net

# Set environment variables for vLLM and Triton
ENV VLLM_PLUGINS=fl
ENV TRITON_ALL_BLOCKS_PARALLEL=1

WORKDIR /workspace

# ---------- release stage ----------
FROM base AS release

ARG INDEX_URL
ARG EXTRA_INDEX_URL
ARG VLLM_VERSION=0.13.0

# Install vLLM
# Todo

WORKDIR /workspace
4 changes: 2 additions & 2 deletions docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ PYTHON_VERSION="${PYTHON_VERSION:-3.12}"
UV_VERSION="${UV_VERSION:-0.7.12}"
CUDA_VERSION="${CUDA_VERSION:-12.8.1}"
UBUNTU_VERSION="${UBUNTU_VERSION:-22.04}"
VLLM_VERSION="${VLLM_VERSION:-0.13.0}"
VLLM_VERSION="${VLLM_VERSION:-0.18.0}"

# ---- Build options ----
PLATFORM="${PLATFORM:-cuda}"
TARGET="dev"
IMAGE_NAME="localhost:5000/vllm-plugin-fl"
IMAGE_NAME="harbor.baai.ac.cn/flagscale/vllm-plugin-fl"
IMAGE_TAG=""
INDEX_URL="${INDEX_URL:-}"
EXTRA_INDEX_URL="${EXTRA_INDEX_URL:-}"
Expand Down
15 changes: 7 additions & 8 deletions docker/cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS base

ARG PYTHON_VERSION=3.12
ARG UV_VERSION=0.7.12
ARG VLLM_VERSION=0.13.0
ARG VLLM_VERSION=0.18.0

ENV DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -47,7 +47,7 @@ FROM base AS dev

ARG INDEX_URL
ARG EXTRA_INDEX_URL
ARG VLLM_VERSION=0.13.0
ARG VLLM_VERSION=0.18.0

# Install vLLM
RUN uv pip install --system \
Expand All @@ -70,7 +70,7 @@ FROM base AS ci

ARG INDEX_URL
ARG EXTRA_INDEX_URL
ARG VLLM_VERSION=0.13.0
ARG VLLM_VERSION=0.18.0

# Install vLLM
RUN uv pip install --system \
Expand All @@ -92,18 +92,17 @@ RUN uv pip install --system \
pre-commit \
ninja \
cmake

ARG FLAGGEMS_VERSION=v5.0.0
ARG FLAGCX_VERSION=v0.9.0

# Install FlagGems
RUN uv pip install --system scikit-build-core==0.11 pybind11 \
&& git clone https://github.com/flagos-ai/FlagGems /workspace/FlagGems \
&& git clone --branch ${FLAGGEMS_VERSION} --depth 1 https://github.com/flagos-ai/FlagGems /workspace/FlagGems \
&& uv pip install --system --no-build-isolation /workspace/FlagGems

# Install FlagCX (NVIDIA)
RUN git clone https://github.com/flagos-ai/FlagCX.git /workspace/FlagCX \
RUN git clone --branch ${FLAGCX_VERSION} --depth 1 https://github.com/flagos-ai/FlagCX.git /workspace/FlagCX \
&& cd /workspace/FlagCX \
&& git checkout ${FLAGCX_VERSION} \
&& git submodule update --init --recursive \
&& make USE_NVIDIA=1 \
&& cd plugin/torch \
Expand All @@ -125,7 +124,7 @@ FROM base AS release

ARG INDEX_URL
ARG EXTRA_INDEX_URL
ARG VLLM_VERSION=0.13.0
ARG VLLM_VERSION=0.18.0

# Install vLLM
RUN uv pip install --system \
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ test = [
"requests",
"openai",
"decorator",
"vllm[audio]==0.13.0",
"vllm[audio]==0.18.0",
"modelscope>=1.18.1",
]

Expand Down
6 changes: 3 additions & 3 deletions tests/models/qwen3/next_tp8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
llm:
model: "/data/models/Qwen/Qwen3-Next-80B-A3B-Instruct"
tensor_parallel_size: 8
max_model_len: 16384
max_num_batched_tokens: 16384
max_model_len: 8192
max_num_batched_tokens: 8192
max_num_seqs: 512
gpu_memory_utilization: 0.7
gpu_memory_utilization: 0.8
enforce_eager: true
trust_remote_code: true

Expand Down
21 changes: 20 additions & 1 deletion tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@
_REPO_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(_REPO_ROOT))

from tests.utils.cleanup import device_cleanup
from tests.utils.cleanup import device_cleanup, wait_for_memory
from tests.utils.model_config import ModelConfig
from tests.utils.platform_config import PlatformConfig
from tests.utils.report import TestReport, TestResult

Expand Down Expand Up @@ -351,6 +352,24 @@ def _run_single(self, tc: TestCase) -> TestResult:
message="dry-run",
)

# Wait for sufficient device memory before e2e tests
if tc.task in ("inference", "serving") and tc.model and tc.case:
gpu_util = ModelConfig.load(tc.model, tc.case).engine.get(
"gpu_memory_utilization", 0.9
)
ok, info = wait_for_memory(self.config.platform, gpu_util)
if not ok:
print("[run] FAILED: timed out waiting for device memory")
return TestResult(
name=tc.name,
passed=False,
duration=0.0,
message=f"OOM: timed out waiting for device memory\n{info}",
task=tc.task,
model=tc.model,
case=tc.case,
)

# Merge extra env vars (e.g. FL_TEST_MODEL/FL_TEST_CASE for inference)
env = None
if tc.extra_env:
Expand Down
Loading
Loading