From 8cbbd8af15eb7f9d586e760252c99834fb88e8a9 Mon Sep 17 00:00:00 2001 From: Admin DAIS AICE Team Date: Tue, 16 Dec 2025 12:49:45 +0530 Subject: [PATCH 1/5] Dryrun implementation for generating command line file --- .cd/docker-compose.yml | 4 ++++ .cd/entrypoints/entrypoint_main.py | 2 ++ .cd/entrypoints/script_generator.py | 23 +++++++++++++++++++--- .cd/server/server_output.env | 30 ----------------------------- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 292f031af..d5605230b 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -10,10 +10,12 @@ services: - HF_TOKEN - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all} - PYTHONUNBUFFERED=1 + - DRYRUN_SERVER=${DRYRUN_SERVER:-0} env_file: - ./server/server_user.env volumes: - /mnt/hf_cache:/mnt/hf_cache + - ${PWD}:/local ports: - "8000:8000" cap_add: @@ -40,8 +42,10 @@ services: - MODEL - HF_TOKEN=${HF_TOKEN} - PYTHONUNBUFFERED=1 + - DRYRUN_BENCHMARK=${DRYRUN_BENCHMARK:-0} env_file: - ./benchmark/benchmark_user.env volumes: - /tmp/logs:/root/scripts/logs + - ${PWD}:/local command: ["benchmark", "--config-file", "${VLLM_BENCHMARK_CONFIG_FILE}", "--config-name", "${VLLM_BENCHMARK_CONFIG_NAME}"] diff --git a/.cd/entrypoints/entrypoint_main.py b/.cd/entrypoints/entrypoint_main.py index babfce32a..fb6b7c792 100644 --- a/.cd/entrypoints/entrypoint_main.py +++ b/.cd/entrypoints/entrypoint_main.py @@ -189,6 +189,7 @@ def run(self): template_script_path="templates/template_vllm_server.sh", output_script_path="vllm_server.sh", variables=variables, + mode=self.mode, log_dir="logs", varlist_conf_path="server/server_output.env", ).create_and_run() @@ -199,6 +200,7 @@ def run(self): template_script_path="templates/template_vllm_benchmark.sh", output_script_path="vllm_benchmark.sh", variables=self.config_envs, + mode=self.mode, log_dir="logs", ).create_and_run() else: diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py index 684d1e9c7..e0dd4a990 100644 --- a/.cd/entrypoints/script_generator.py +++ b/.cd/entrypoints/script_generator.py @@ -1,15 +1,18 @@ # SPDX-License-Identifier: Apache-2.0 import os - +import shutil +import sys +import time class ScriptGenerator: - def __init__(self, template_script_path, output_script_path, variables, log_dir="logs", varlist_conf_path=None): + def __init__(self, template_script_path, output_script_path, variables, mode, log_dir="logs", varlist_conf_path=None): self.template_script_path = template_script_path self.varlist_conf_path = varlist_conf_path self.output_script_path = output_script_path self.variables = variables self.log_dir = log_dir + self.mode = mode self.log_file = os.path.join(self.log_dir, f"{os.path.splitext(os.path.basename(self.output_script_path))[0]}.log") @@ -57,4 +60,18 @@ def create_and_run(self): # Run the generated script and redirect output to log file print(f"Starting script, logging to {self.log_file}") os.makedirs(self.log_dir, exist_ok=True) - os.execvp("bash", ["bash", self.output_script_path]) + if (os.environ.get("DRYRUN_SERVER")=='1' and self.mode=='server') or \ + (os.environ.get("DRYRUN_BENCHMARK")=='1' and self.mode=='benchmark'): + print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.") + shutil.copy(self.output_script_path, f"/local/{self.mode}/") + print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.mode}/{self.output_script_path}") + try: + while True: + print("[INFO] Press Ctrl+C to exit.") + time.sleep(60) + except KeyboardInterrupt: + print("Exiting cmd mode.") + sys.exit(0) + else: + os.execvp("bash", ["bash", self.output_script_path]) + diff --git a/.cd/server/server_output.env b/.cd/server/server_output.env index 1d288a25f..4532f53f7 100644 --- a/.cd/server/server_output.env +++ b/.cd/server/server_output.env @@ -1,11 +1,7 @@ MODEL DTYPE -DEVICE_NAME TENSOR_PARALLEL_SIZE MAX_MODEL_LEN -TOTAL_GPU_MEM -MODEL_DTYPE -QUANT_DTYPE BLOCK_SIZE VLLM_PROMPT_BS_BUCKET_MIN VLLM_PROMPT_BS_BUCKET_STEP @@ -17,40 +13,14 @@ VLLM_PROMPT_SEQ_BUCKET_STEP VLLM_PROMPT_CTX_BUCKET_STEP VLLM_DECODE_BLOCK_BUCKET_MIN VLLM_DECODE_BLOCK_BUCKET_STEP -NUM_HIDDEN_LAYERS -HIDDEN_SIZE -NUM_KEY_VALUE_HEADS -NUM_ATTENTION_HEADS -CACHE_DTYPE_BYTES -LIMIT_MODEL_LEN PT_HPU_LAZY_MODE VLLM_SKIP_WARMUP VLLM_EXPONENTIAL_BUCKETING MAX_NUM_BATCHED_TOKENS PT_HPU_ENABLE_LAZY_COLLECTIVES -DEVICE_HPU_MEM -MODEL_MEM_IN_GB -USABLE_MEM GPU_MEM_UTILIZATION -KV_CACHE_PER_SEQ -EST_MAX_NUM_SEQS -EST_HPU_BLOCKS -DECODE_BS_RAMP_GRAPHS -DECODE_BS_STEP_GRAPHS -DECODE_BLOCK_RAMP_GRAPHS -DECODE_BLOCK_STEP_GRAPHS -NUM_DECODE_GRAPHS -PROMPT_BS_RAMP_GRAPHS -PROMPT_BS_STEP_GRAPHS -PROMPT_SEQ_RAMP_GRAPHS -PROMPT_SEQ_STEP_GRAPHS -EST_NUM_PROMPT_GRAPHS -EST_GRAPH_PROMPT_RATIO VLLM_GRAPH_PROMPT_RATIO -DECODE_GRAPH_TARGET_GB -EST_GRAPH_RESERVE_MEM VLLM_GRAPH_RESERVED_MEM -KV_CACHE_MEM MAX_NUM_SEQS VLLM_CONTIGUOUS_PA VLLM_DEFRAG From 2050a0fbdff85c7017008e865153a36a31e960ca Mon Sep 17 00:00:00 2001 From: Admin DAIS AICE Team Date: Wed, 17 Dec 2025 12:39:51 +0530 Subject: [PATCH 2/5] Dry-Run implementation - dependency on mode removed Signed-off-by: <> --- .cd/docker-compose.yml | 12 +++++++++--- .cd/entrypoints/entrypoint_main.py | 2 -- .cd/entrypoints/script_generator.py | 16 +++++++--------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index d5605230b..f725cda8f 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -10,7 +10,7 @@ services: - HF_TOKEN - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all} - PYTHONUNBUFFERED=1 - - DRYRUN_SERVER=${DRYRUN_SERVER:-0} + - DRY_RUN=${DRY_RUN:-0} env_file: - ./server/server_user.env volumes: @@ -25,7 +25,13 @@ services: restart: unless-stopped command: ["server", "--config-file", "${VLLM_SERVER_CONFIG_FILE}", "--config-name", "${VLLM_SERVER_CONFIG_NAME}"] healthcheck: - test: ["CMD", "sh", "-c", "[ -f logs/vllm_server.log ] && grep -q 'Application startup complete' logs/vllm_server.log"] + test: + [ + "CMD", + "sh", + "-c", + "if [ \"$DRY_RUN\" = \"1\" ]; then exit 0; else [ -f logs/vllm_server.log ] && grep -q 'Application startup complete' logs/vllm_server.log; fi" + ] interval: 10s timeout: 2s retries: 500 @@ -42,7 +48,7 @@ services: - MODEL - HF_TOKEN=${HF_TOKEN} - PYTHONUNBUFFERED=1 - - DRYRUN_BENCHMARK=${DRYRUN_BENCHMARK:-0} + - DRY_RUN=${DRY_RUN:-0} env_file: - ./benchmark/benchmark_user.env volumes: diff --git a/.cd/entrypoints/entrypoint_main.py b/.cd/entrypoints/entrypoint_main.py index fb6b7c792..babfce32a 100644 --- a/.cd/entrypoints/entrypoint_main.py +++ b/.cd/entrypoints/entrypoint_main.py @@ -189,7 +189,6 @@ def run(self): template_script_path="templates/template_vllm_server.sh", output_script_path="vllm_server.sh", variables=variables, - mode=self.mode, log_dir="logs", varlist_conf_path="server/server_output.env", ).create_and_run() @@ -200,7 +199,6 @@ def run(self): template_script_path="templates/template_vllm_benchmark.sh", output_script_path="vllm_benchmark.sh", variables=self.config_envs, - mode=self.mode, log_dir="logs", ).create_and_run() else: diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py index e0dd4a990..f95b9be2f 100644 --- a/.cd/entrypoints/script_generator.py +++ b/.cd/entrypoints/script_generator.py @@ -4,15 +4,15 @@ import sys import time + class ScriptGenerator: - def __init__(self, template_script_path, output_script_path, variables, mode, log_dir="logs", varlist_conf_path=None): + def __init__(self, template_script_path, output_script_path, variables, log_dir="logs", varlist_conf_path=None): self.template_script_path = template_script_path self.varlist_conf_path = varlist_conf_path self.output_script_path = output_script_path self.variables = variables self.log_dir = log_dir - self.mode = mode self.log_file = os.path.join(self.log_dir, f"{os.path.splitext(os.path.basename(self.output_script_path))[0]}.log") @@ -60,18 +60,16 @@ def create_and_run(self): # Run the generated script and redirect output to log file print(f"Starting script, logging to {self.log_file}") os.makedirs(self.log_dir, exist_ok=True) - if (os.environ.get("DRYRUN_SERVER")=='1' and self.mode=='server') or \ - (os.environ.get("DRYRUN_BENCHMARK")=='1' and self.mode=='benchmark'): + shutil.copy(self.output_script_path, "/local/") + print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.output_script_path}") + if os.environ.get("DRY_RUN") == '1': print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.") - shutil.copy(self.output_script_path, f"/local/{self.mode}/") - print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.mode}/{self.output_script_path}") try: while True: - print("[INFO] Press Ctrl+C to exit.") + print("[INFO] Press Ctrl+C to exit.") time.sleep(60) except KeyboardInterrupt: - print("Exiting cmd mode.") + print("Exiting the DRY_RUN execution.") sys.exit(0) else: os.execvp("bash", ["bash", self.output_script_path]) - From dcadb3daab3fae67df8c00c7cbbfee7a8e8df13c Mon Sep 17 00:00:00 2001 From: Rajan Kumar Date: Thu, 8 Jan 2026 10:13:26 +0000 Subject: [PATCH 3/5] patch for os agnostic code Signed-off-by: Rajan Kumar --- .cd/entrypoints/script_generator.py | 43 ++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py index f95b9be2f..8cbd2e8ff 100644 --- a/.cd/entrypoints/script_generator.py +++ b/.cd/entrypoints/script_generator.py @@ -3,16 +3,47 @@ import shutil import sys import time +from pathlib import Path + + +def shutil_copy(source_file, destination_dir): + try: + src_path = Path(source_file) + dst_dir_path = Path(destination_dir) + + dst_path = dst_dir_path / src_path.name + + # Ensure the destination directory exists + dst_dir_path.mkdir(parents=True, exist_ok=True) + + shutil.copy(src_path, dst_path) + print(f"[Info] File '{source_file}' saved at '{dst_path}'") + + except FileNotFoundError: + print(f"Error: The source file '{source_file}' was not found.") + except PermissionError: + print(f"Error: Permission denied. Cannot access '{source_file}' or write to '{destination_dir}'.") + except shutil.SameFileError: + print("Error: Source and destination files are the same.") + except Exception as e: + print(f"An unexpected error occurred: {e}") class ScriptGenerator: - def __init__(self, template_script_path, output_script_path, variables, log_dir="logs", varlist_conf_path=None): + def __init__(self, + template_script_path, + output_script_path, + variables, + log_dir="logs", + dry_run_dir="/local/", + varlist_conf_path=None): self.template_script_path = template_script_path self.varlist_conf_path = varlist_conf_path self.output_script_path = output_script_path self.variables = variables self.log_dir = log_dir + self.dry_run_dir = dry_run_dir self.log_file = os.path.join(self.log_dir, f"{os.path.splitext(os.path.basename(self.output_script_path))[0]}.log") @@ -59,10 +90,14 @@ def create_and_run(self): # Run the generated script and redirect output to log file print(f"Starting script, logging to {self.log_file}") - os.makedirs(self.log_dir, exist_ok=True) - shutil.copy(self.output_script_path, "/local/") - print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.output_script_path}") + try: + os.makedirs(self.log_dir, exist_ok=True) + except Exception: + print(f"Error: could not create {self.log_dir}.") + if os.environ.get("DRY_RUN") == '1': + shutil_copy(self.output_script_path, self.dry_run_dir) + print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.") try: while True: From 3579c6311ba078931fc1cc346136cf5d323ddd67 Mon Sep 17 00:00:00 2001 From: Rajan Kumar Date: Thu, 8 Jan 2026 20:22:19 +0530 Subject: [PATCH 4/5] Option to dry run with docker run cmd, save log files, create multiple vLLM services Signed-off-by: Rajan Kumar --- .cd/docker-compose.yml | 19 ++++-- .../quickstart/quickstart_configuration.md | 64 +++++++++++++++++++ 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index f725cda8f..abd4d353c 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -10,14 +10,19 @@ services: - HF_TOKEN - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all} - PYTHONUNBUFFERED=1 - - DRY_RUN=${DRY_RUN:-0} + - DRY_RUN + - ${HOST_PORT:-8000}:8000 + - http_proxy + - https_proxy + - no_proxy env_file: - ./server/server_user.env volumes: - /mnt/hf_cache:/mnt/hf_cache + - ${PWD}/logs:/root/scripts/logs - ${PWD}:/local ports: - - "8000:8000" + - ${HOST_PORT}:8000 cap_add: - SYS_NICE ipc: host @@ -46,12 +51,16 @@ services: condition: service_healthy environment: - MODEL - - HF_TOKEN=${HF_TOKEN} + - HF_TOKEN - PYTHONUNBUFFERED=1 - - DRY_RUN=${DRY_RUN:-0} + - DRY_RUN + - ${HOST_PORT:-8000}:8000 + - http_proxy + - https_proxy + - no_proxy env_file: - ./benchmark/benchmark_user.env volumes: - - /tmp/logs:/root/scripts/logs + - ${PWD}/logs:/root/scripts/logs - ${PWD}:/local command: ["benchmark", "--config-file", "${VLLM_BENCHMARK_CONFIG_FILE}", "--config-name", "${VLLM_BENCHMARK_CONFIG_NAME}"] diff --git a/docs/getting_started/quickstart/quickstart_configuration.md b/docs/getting_started/quickstart/quickstart_configuration.md index 0f1f92257..7b9a70255 100644 --- a/docs/getting_started/quickstart/quickstart_configuration.md +++ b/docs/getting_started/quickstart/quickstart_configuration.md @@ -137,6 +137,70 @@ docker run -it --rm \ This method provides full flexibility over how the vLLM server is executed within the container. +## Dry Run to create vLLM sever and client command line + +Set environment variable **DRY_RUN=1** +DRY_RUN env var set to 1 create a copy of vllm-server.sh or vllm-benchmark.sh command line file on the host machine, without launching the server or the client. + +Example - Docker Compose + +```bash +MODEL="Qwen/Qwen2.5-14B-Instruct" \ +HF_TOKEN="" \ +DOCKER_IMAGE="vault.habana.ai/gaudi-docker/{{ VERSION }}/ubuntu24.04/habanalabs/vllm-installer-{{ PT_VERSION }}:latest" \ +TENSOR_PARALLEL_SIZE=1 \ +MAX_MODEL_LEN=2048 \ +DRY_RUN=1 \ +docker compose up +``` + +Example - Docker Run + +```bash +docker run -it --rm \ + -e MODEL=$MODEL \ + -e HF_TOKEN=$HF_TOKEN \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + --cap-add=sys_nice \ + --ipc=host \ + --runtime=habana \ + -e HABANA_VISIBLE_DEVICES=all \ + -p 8000:8000 \ + -v ${PWD}:/local \ + --name vllm-server \ + +``` + +!!! note + While launching the vLLM server using Docker Run command for Dry Run, make sure to mount the present working directory as `-v ${PWD}:/local`. + +## To save vLLM sever and client log files + +If vLLM server is launched using Docker Compose command, the log files are saved at `vllm-gaudi/.cd/logs/` by default. + +If vLLM server is launched using Docker Run command, the user can save the log files by creating a directory named `logs` and mount this log directory as `-v ${PWD}/logs:/root/scripts/logs`. + +## To create multiple vLLM services using Docker Compose + +Set environment variables **HOST_PORT** and **COMPOSE_PROJECT_NAME** +Example + +```bash +MODEL="Qwen/Qwen2.5-14B-Instruct" \ +HF_TOKEN="" \ +DOCKER_IMAGE="vault.habana.ai/gaudi-docker/{{ VERSION }}/ubuntu24.04/habanalabs/vllm-installer-{{ PT_VERSION }}:latest" \ +TENSOR_PARALLEL_SIZE=1 \ +MAX_MODEL_LEN=2048 \ +HOST_PORT=9000 \ +COMPOSE_PROJECT_NAME=serv1 \ +docker compose up +``` + +!!! note + The default values, when these vars not set, are `HOST_PORT=8000` and `COMPOSE_PROJECT_NAME=cd`. + ## Pinning CPU Cores for Memory Access Coherence To improve memory-access coherence and release CPUs to other CPU-only workloads, such as vLLM serving with Llama3 8B, you can pin CPU cores based on different CPU Non-Uniform Memory Access (NUMA) nodes using the automatically generated `docker-compose.override.yml` file. The following procedure explains the process. From 381f5857d3fe8fc92e1356563b0934cf109cd0bf Mon Sep 17 00:00:00 2001 From: Rajan Kumar Date: Fri, 9 Jan 2026 20:44:16 +0530 Subject: [PATCH 5/5] Updated Readme and restart condition Signed-off-by: Rajan Kumar --- .cd/docker-compose.yml | 2 +- .cd/entrypoints/script_generator.py | 10 +--------- .../quickstart/quickstart_configuration.md | 5 +++-- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index abd4d353c..abef40215 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -27,7 +27,7 @@ services: - SYS_NICE ipc: host runtime: habana - restart: unless-stopped + restart: on-failure command: ["server", "--config-file", "${VLLM_SERVER_CONFIG_FILE}", "--config-name", "${VLLM_SERVER_CONFIG_NAME}"] healthcheck: test: diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py index 8cbd2e8ff..569b68658 100644 --- a/.cd/entrypoints/script_generator.py +++ b/.cd/entrypoints/script_generator.py @@ -2,7 +2,6 @@ import os import shutil import sys -import time from pathlib import Path @@ -97,14 +96,7 @@ def create_and_run(self): if os.environ.get("DRY_RUN") == '1': shutil_copy(self.output_script_path, self.dry_run_dir) - print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.") - try: - while True: - print("[INFO] Press Ctrl+C to exit.") - time.sleep(60) - except KeyboardInterrupt: - print("Exiting the DRY_RUN execution.") - sys.exit(0) + sys.exit(0) else: os.execvp("bash", ["bash", self.output_script_path]) diff --git a/docs/getting_started/quickstart/quickstart_configuration.md b/docs/getting_started/quickstart/quickstart_configuration.md index 7b9a70255..cc71c5c0c 100644 --- a/docs/getting_started/quickstart/quickstart_configuration.md +++ b/docs/getting_started/quickstart/quickstart_configuration.md @@ -168,6 +168,7 @@ docker run -it --rm \ --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -p 8000:8000 \ + -e DRY_RUN=1 \ -v ${PWD}:/local \ --name vllm-server \ @@ -176,13 +177,13 @@ docker run -it --rm \ !!! note While launching the vLLM server using Docker Run command for Dry Run, make sure to mount the present working directory as `-v ${PWD}:/local`. -## To save vLLM sever and client log files +## Save vLLM sever and client log files If vLLM server is launched using Docker Compose command, the log files are saved at `vllm-gaudi/.cd/logs/` by default. If vLLM server is launched using Docker Run command, the user can save the log files by creating a directory named `logs` and mount this log directory as `-v ${PWD}/logs:/root/scripts/logs`. -## To create multiple vLLM services using Docker Compose +## Create multiple vLLM services using Docker Compose Set environment variables **HOST_PORT** and **COMPOSE_PROJECT_NAME** Example