Using vllm nightly docker.
machine: mi355
pip uninstall vllm
git clone https://github.com/ROCm/vllm.git
cd vllm
git fetch origin hexwang/dsv4_adapt
git checkout hexwang/dsv4_adapt
python3 setup.py develop
# offline_test.py
from vllm import LLM, SamplingParams
if __name__ == "__main__":
prompts = ["The capital of France is "]
# Keep this smoke test short and deterministic while validating
# the explicit DeepSeek-V4 backend choices.
sampling_params = SamplingParams(temperature=0, top_p=1, max_tokens=256)
llm = LLM(
model="/home/models/DeepSeek-V4-Flash",
tensor_parallel_size=4,
kv_cache_dtype="fp8",
gpu_memory_utilization=0.8,
async_scheduling=False,
enable_prefix_caching=False,
enforce_eager=True,
disable_log_stats=False,
tokenizer_mode="deepseek_v4",
moe_backend="triton_unfused",
seed=0,
)
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
token_ids = output.outputs[0].token_ids
print(
f"Prompt: {prompt!r}, Generated text: {generated_text!r}, "
f"Token ids: {token_ids}"
)
export VLLM_ROCM_USE_AITER=1
export VLLM_ROCM_USE_AITER_LINEAR=1
export VLLM_DSV4_WO_A_FP8=0
python offline_test.py
Your current environment
Using vllm nightly docker.
machine: mi355
How would you like to use vllm
create offline_test.py:
create test_offline.sh:
run with:
bash test_offline.shBefore submitting a new issue...