From 8cbbd8af15eb7f9d586e760252c99834fb88e8a9 Mon Sep 17 00:00:00 2001
From: Admin DAIS AICE Team <admindais@g3-dell01.iind.intel.com>
Date: Tue, 16 Dec 2025 12:49:45 +0530
Subject: [PATCH 1/5] Dryrun implementation for generating command line file

---
 .cd/docker-compose.yml              |  4 ++++
 .cd/entrypoints/entrypoint_main.py  |  2 ++
 .cd/entrypoints/script_generator.py | 23 +++++++++++++++++++---
 .cd/server/server_output.env        | 30 -----------------------------
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml
index 292f031af..d5605230b 100644
--- a/.cd/docker-compose.yml
+++ b/.cd/docker-compose.yml
@@ -10,10 +10,12 @@ services:
       - HF_TOKEN
       - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all}
       - PYTHONUNBUFFERED=1
+      - DRYRUN_SERVER=${DRYRUN_SERVER:-0}
     env_file:
       - ./server/server_user.env
     volumes:
       - /mnt/hf_cache:/mnt/hf_cache
+      - ${PWD}:/local
     ports:
       - "8000:8000"
     cap_add:
@@ -40,8 +42,10 @@ services:
       - MODEL
       - HF_TOKEN=${HF_TOKEN}
       - PYTHONUNBUFFERED=1
+      - DRYRUN_BENCHMARK=${DRYRUN_BENCHMARK:-0}
     env_file:
       - ./benchmark/benchmark_user.env
     volumes:
       - /tmp/logs:/root/scripts/logs
+      - ${PWD}:/local
     command: ["benchmark", "--config-file", "${VLLM_BENCHMARK_CONFIG_FILE}", "--config-name", "${VLLM_BENCHMARK_CONFIG_NAME}"]
diff --git a/.cd/entrypoints/entrypoint_main.py b/.cd/entrypoints/entrypoint_main.py
index babfce32a..fb6b7c792 100644
--- a/.cd/entrypoints/entrypoint_main.py
+++ b/.cd/entrypoints/entrypoint_main.py
@@ -189,6 +189,7 @@ def run(self):
                 template_script_path="templates/template_vllm_server.sh",
                 output_script_path="vllm_server.sh",
                 variables=variables,
+                mode=self.mode,
                 log_dir="logs",
                 varlist_conf_path="server/server_output.env",
             ).create_and_run()
@@ -199,6 +200,7 @@ def run(self):
                 template_script_path="templates/template_vllm_benchmark.sh",
                 output_script_path="vllm_benchmark.sh",
                 variables=self.config_envs,
+                mode=self.mode,
                 log_dir="logs",
             ).create_and_run()
         else:
diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py
index 684d1e9c7..e0dd4a990 100644
--- a/.cd/entrypoints/script_generator.py
+++ b/.cd/entrypoints/script_generator.py
@@ -1,15 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
-
+import shutil
+import sys
+import time
 
 class ScriptGenerator:
 
-    def __init__(self, template_script_path, output_script_path, variables, log_dir="logs", varlist_conf_path=None):
+    def __init__(self, template_script_path, output_script_path, variables, mode, log_dir="logs", varlist_conf_path=None):
         self.template_script_path = template_script_path
         self.varlist_conf_path = varlist_conf_path
         self.output_script_path = output_script_path
         self.variables = variables
         self.log_dir = log_dir
+        self.mode = mode
         self.log_file = os.path.join(self.log_dir,
                                      f"{os.path.splitext(os.path.basename(self.output_script_path))[0]}.log")
 
@@ -57,4 +60,18 @@ def create_and_run(self):
         # Run the generated script and redirect output to log file
         print(f"Starting script, logging to {self.log_file}")
         os.makedirs(self.log_dir, exist_ok=True)
-        os.execvp("bash", ["bash", self.output_script_path])
+        if (os.environ.get("DRYRUN_SERVER")=='1' and self.mode=='server') or \
+        (os.environ.get("DRYRUN_BENCHMARK")=='1' and self.mode=='benchmark'):
+            print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.")
+            shutil.copy(self.output_script_path, f"/local/{self.mode}/")
+            print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.mode}/{self.output_script_path}")            
+            try:
+                while True:
+                    print("[INFO] Press Ctrl+C to exit.")                    
+                    time.sleep(60)
+            except KeyboardInterrupt:
+                print("Exiting cmd mode.")
+                sys.exit(0)
+        else:
+            os.execvp("bash", ["bash", self.output_script_path])
+
diff --git a/.cd/server/server_output.env b/.cd/server/server_output.env
index 1d288a25f..4532f53f7 100644
--- a/.cd/server/server_output.env
+++ b/.cd/server/server_output.env
@@ -1,11 +1,7 @@
 MODEL
 DTYPE
-DEVICE_NAME
 TENSOR_PARALLEL_SIZE
 MAX_MODEL_LEN
-TOTAL_GPU_MEM
-MODEL_DTYPE
-QUANT_DTYPE
 BLOCK_SIZE
 VLLM_PROMPT_BS_BUCKET_MIN
 VLLM_PROMPT_BS_BUCKET_STEP
@@ -17,40 +13,14 @@ VLLM_PROMPT_SEQ_BUCKET_STEP
 VLLM_PROMPT_CTX_BUCKET_STEP
 VLLM_DECODE_BLOCK_BUCKET_MIN
 VLLM_DECODE_BLOCK_BUCKET_STEP
-NUM_HIDDEN_LAYERS
-HIDDEN_SIZE
-NUM_KEY_VALUE_HEADS
-NUM_ATTENTION_HEADS
-CACHE_DTYPE_BYTES
-LIMIT_MODEL_LEN
 PT_HPU_LAZY_MODE
 VLLM_SKIP_WARMUP
 VLLM_EXPONENTIAL_BUCKETING
 MAX_NUM_BATCHED_TOKENS
 PT_HPU_ENABLE_LAZY_COLLECTIVES
-DEVICE_HPU_MEM
-MODEL_MEM_IN_GB
-USABLE_MEM
 GPU_MEM_UTILIZATION
-KV_CACHE_PER_SEQ
-EST_MAX_NUM_SEQS
-EST_HPU_BLOCKS
-DECODE_BS_RAMP_GRAPHS
-DECODE_BS_STEP_GRAPHS
-DECODE_BLOCK_RAMP_GRAPHS
-DECODE_BLOCK_STEP_GRAPHS
-NUM_DECODE_GRAPHS
-PROMPT_BS_RAMP_GRAPHS
-PROMPT_BS_STEP_GRAPHS
-PROMPT_SEQ_RAMP_GRAPHS
-PROMPT_SEQ_STEP_GRAPHS
-EST_NUM_PROMPT_GRAPHS
-EST_GRAPH_PROMPT_RATIO
 VLLM_GRAPH_PROMPT_RATIO
-DECODE_GRAPH_TARGET_GB
-EST_GRAPH_RESERVE_MEM
 VLLM_GRAPH_RESERVED_MEM
-KV_CACHE_MEM
 MAX_NUM_SEQS
 VLLM_CONTIGUOUS_PA
 VLLM_DEFRAG

From 2050a0fbdff85c7017008e865153a36a31e960ca Mon Sep 17 00:00:00 2001
From: Admin DAIS AICE Team <admindais@g3-dell04.iind.intel.com>
Date: Wed, 17 Dec 2025 12:39:51 +0530
Subject: [PATCH 2/5] Dry-Run implementation - dependency on mode removed

Signed-off-by:  <>
---
 .cd/docker-compose.yml              | 12 +++++++++---
 .cd/entrypoints/entrypoint_main.py  |  2 --
 .cd/entrypoints/script_generator.py | 16 +++++++---------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml
index d5605230b..f725cda8f 100644
--- a/.cd/docker-compose.yml
+++ b/.cd/docker-compose.yml
@@ -10,7 +10,7 @@ services:
       - HF_TOKEN
       - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all}
       - PYTHONUNBUFFERED=1
-      - DRYRUN_SERVER=${DRYRUN_SERVER:-0}
+      - DRY_RUN=${DRY_RUN:-0}
     env_file:
       - ./server/server_user.env
     volumes:
@@ -25,7 +25,13 @@ services:
     restart: unless-stopped
     command: ["server", "--config-file", "${VLLM_SERVER_CONFIG_FILE}", "--config-name", "${VLLM_SERVER_CONFIG_NAME}"]
     healthcheck:
-      test: ["CMD", "sh", "-c", "[ -f logs/vllm_server.log ] && grep -q 'Application startup complete' logs/vllm_server.log"]
+      test:
+        [
+          "CMD",
+          "sh",
+          "-c",
+          "if [ \"$DRY_RUN\" = \"1\" ]; then exit 0; else [ -f logs/vllm_server.log ] && grep -q 'Application startup complete' logs/vllm_server.log; fi"
+        ]
       interval: 10s
       timeout: 2s
       retries: 500
@@ -42,7 +48,7 @@ services:
       - MODEL
       - HF_TOKEN=${HF_TOKEN}
       - PYTHONUNBUFFERED=1
-      - DRYRUN_BENCHMARK=${DRYRUN_BENCHMARK:-0}
+      - DRY_RUN=${DRY_RUN:-0}
     env_file:
       - ./benchmark/benchmark_user.env
     volumes:
diff --git a/.cd/entrypoints/entrypoint_main.py b/.cd/entrypoints/entrypoint_main.py
index fb6b7c792..babfce32a 100644
--- a/.cd/entrypoints/entrypoint_main.py
+++ b/.cd/entrypoints/entrypoint_main.py
@@ -189,7 +189,6 @@ def run(self):
                 template_script_path="templates/template_vllm_server.sh",
                 output_script_path="vllm_server.sh",
                 variables=variables,
-                mode=self.mode,
                 log_dir="logs",
                 varlist_conf_path="server/server_output.env",
             ).create_and_run()
@@ -200,7 +199,6 @@ def run(self):
                 template_script_path="templates/template_vllm_benchmark.sh",
                 output_script_path="vllm_benchmark.sh",
                 variables=self.config_envs,
-                mode=self.mode,
                 log_dir="logs",
             ).create_and_run()
         else:
diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py
index e0dd4a990..f95b9be2f 100644
--- a/.cd/entrypoints/script_generator.py
+++ b/.cd/entrypoints/script_generator.py
@@ -4,15 +4,15 @@
 import sys
 import time
 
+
 class ScriptGenerator:
 
-    def __init__(self, template_script_path, output_script_path, variables, mode, log_dir="logs", varlist_conf_path=None):
+    def __init__(self, template_script_path, output_script_path, variables, log_dir="logs", varlist_conf_path=None):
         self.template_script_path = template_script_path
         self.varlist_conf_path = varlist_conf_path
         self.output_script_path = output_script_path
         self.variables = variables
         self.log_dir = log_dir
-        self.mode = mode
         self.log_file = os.path.join(self.log_dir,
                                      f"{os.path.splitext(os.path.basename(self.output_script_path))[0]}.log")
 
@@ -60,18 +60,16 @@ def create_and_run(self):
         # Run the generated script and redirect output to log file
         print(f"Starting script, logging to {self.log_file}")
         os.makedirs(self.log_dir, exist_ok=True)
-        if (os.environ.get("DRYRUN_SERVER")=='1' and self.mode=='server') or \
-        (os.environ.get("DRYRUN_BENCHMARK")=='1' and self.mode=='benchmark'):
+        shutil.copy(self.output_script_path, "/local/")
+        print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.output_script_path}")
+        if os.environ.get("DRY_RUN") == '1':
             print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.")
-            shutil.copy(self.output_script_path, f"/local/{self.mode}/")
-            print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.mode}/{self.output_script_path}")            
             try:
                 while True:
-                    print("[INFO] Press Ctrl+C to exit.")                    
+                    print("[INFO] Press Ctrl+C to exit.")
                     time.sleep(60)
             except KeyboardInterrupt:
-                print("Exiting cmd mode.")
+                print("Exiting the DRY_RUN execution.")
                 sys.exit(0)
         else:
             os.execvp("bash", ["bash", self.output_script_path])
-

From dcadb3daab3fae67df8c00c7cbbfee7a8e8df13c Mon Sep 17 00:00:00 2001
From: Rajan Kumar <rajan.kumar@intel.com>
Date: Thu, 8 Jan 2026 10:13:26 +0000
Subject: [PATCH 3/5] patch for os agnostic code

Signed-off-by: Rajan Kumar <rajan.kumar@intel.com>
---
 .cd/entrypoints/script_generator.py | 43 ++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py
index f95b9be2f..8cbd2e8ff 100644
--- a/.cd/entrypoints/script_generator.py
+++ b/.cd/entrypoints/script_generator.py
@@ -3,16 +3,47 @@
 import shutil
 import sys
 import time
+from pathlib import Path
+
+
+def shutil_copy(source_file, destination_dir):
+    try:
+        src_path = Path(source_file)
+        dst_dir_path = Path(destination_dir)
+
+        dst_path = dst_dir_path / src_path.name
+
+        # Ensure the destination directory exists
+        dst_dir_path.mkdir(parents=True, exist_ok=True)
+
+        shutil.copy(src_path, dst_path)
+        print(f"[Info] File '{source_file}' saved at '{dst_path}'")
+
+    except FileNotFoundError:
+        print(f"Error: The source file '{source_file}' was not found.")
+    except PermissionError:
+        print(f"Error: Permission denied. Cannot access '{source_file}' or write to '{destination_dir}'.")
+    except shutil.SameFileError:
+        print("Error: Source and destination files are the same.")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
 
 
 class ScriptGenerator:
 
-    def __init__(self, template_script_path, output_script_path, variables, log_dir="logs", varlist_conf_path=None):
+    def __init__(self,
+                 template_script_path,
+                 output_script_path,
+                 variables,
+                 log_dir="logs",
+                 dry_run_dir="/local/",
+                 varlist_conf_path=None):
         self.template_script_path = template_script_path
         self.varlist_conf_path = varlist_conf_path
         self.output_script_path = output_script_path
         self.variables = variables
         self.log_dir = log_dir
+        self.dry_run_dir = dry_run_dir
         self.log_file = os.path.join(self.log_dir,
                                      f"{os.path.splitext(os.path.basename(self.output_script_path))[0]}.log")
 
@@ -59,10 +90,14 @@ def create_and_run(self):
 
         # Run the generated script and redirect output to log file
         print(f"Starting script, logging to {self.log_file}")
-        os.makedirs(self.log_dir, exist_ok=True)
-        shutil.copy(self.output_script_path, "/local/")
-        print(f"[INFO] The command line file {self.output_script_path} saved at .cd/{self.output_script_path}")
+        try:
+            os.makedirs(self.log_dir, exist_ok=True)
+        except Exception:
+            print(f"Error: could not create {self.log_dir}.")
+
         if os.environ.get("DRY_RUN") == '1':
+            shutil_copy(self.output_script_path, self.dry_run_dir)
+
             print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.")
             try:
                 while True:

From 3579c6311ba078931fc1cc346136cf5d323ddd67 Mon Sep 17 00:00:00 2001
From: Rajan Kumar <rajan.kumar@intel.com>
Date: Thu, 8 Jan 2026 20:22:19 +0530
Subject: [PATCH 4/5] Option to dry run with docker run cmd, save log files,
 create multiple vLLM services

Signed-off-by: Rajan Kumar <rajan.kumar@intel.com>
---
 .cd/docker-compose.yml                        | 19 ++++--
 .../quickstart/quickstart_configuration.md    | 64 +++++++++++++++++++
 2 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml
index f725cda8f..abd4d353c 100644
--- a/.cd/docker-compose.yml
+++ b/.cd/docker-compose.yml
@@ -10,14 +10,19 @@ services:
       - HF_TOKEN
       - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all}
       - PYTHONUNBUFFERED=1
-      - DRY_RUN=${DRY_RUN:-0}
+      - DRY_RUN
+      - ${HOST_PORT:-8000}:8000
+      - http_proxy
+      - https_proxy
+      - no_proxy
     env_file:
       - ./server/server_user.env
     volumes:
       - /mnt/hf_cache:/mnt/hf_cache
+      - ${PWD}/logs:/root/scripts/logs
       - ${PWD}:/local
     ports:
-      - "8000:8000"
+      - ${HOST_PORT}:8000
     cap_add:
       - SYS_NICE
     ipc: host
@@ -46,12 +51,16 @@ services:
         condition: service_healthy
     environment:
       - MODEL
-      - HF_TOKEN=${HF_TOKEN}
+      - HF_TOKEN
       - PYTHONUNBUFFERED=1
-      - DRY_RUN=${DRY_RUN:-0}
+      - DRY_RUN
+      - ${HOST_PORT:-8000}:8000
+      - http_proxy
+      - https_proxy
+      - no_proxy
     env_file:
       - ./benchmark/benchmark_user.env
     volumes:
-      - /tmp/logs:/root/scripts/logs
+      - ${PWD}/logs:/root/scripts/logs
       - ${PWD}:/local
     command: ["benchmark", "--config-file", "${VLLM_BENCHMARK_CONFIG_FILE}", "--config-name", "${VLLM_BENCHMARK_CONFIG_NAME}"]
diff --git a/docs/getting_started/quickstart/quickstart_configuration.md b/docs/getting_started/quickstart/quickstart_configuration.md
index 0f1f92257..7b9a70255 100644
--- a/docs/getting_started/quickstart/quickstart_configuration.md
+++ b/docs/getting_started/quickstart/quickstart_configuration.md
@@ -137,6 +137,70 @@ docker run -it --rm \
 
 This method provides full flexibility over how the vLLM server is executed within the container.
 
+## Dry Run to create vLLM sever and client command line
+
+Set environment variable **DRY_RUN=1**  
+DRY_RUN env var set to 1 create a copy of vllm-server.sh or vllm-benchmark.sh command line file on the host machine, without launching the server or the client.
+
+Example - Docker Compose
+
+```bash
+MODEL="Qwen/Qwen2.5-14B-Instruct" \
+HF_TOKEN="<your huggingface token>" \
+DOCKER_IMAGE="vault.habana.ai/gaudi-docker/{{ VERSION }}/ubuntu24.04/habanalabs/vllm-installer-{{ PT_VERSION }}:latest" \
+TENSOR_PARALLEL_SIZE=1 \
+MAX_MODEL_LEN=2048 \
+DRY_RUN=1 \
+docker compose up
+```
+
+Example - Docker Run
+
+```bash
+docker run -it --rm \
+    -e MODEL=$MODEL \
+    -e HF_TOKEN=$HF_TOKEN \
+    -e http_proxy=$http_proxy \
+    -e https_proxy=$https_proxy \
+    -e no_proxy=$no_proxy \
+    --cap-add=sys_nice \
+    --ipc=host \
+    --runtime=habana \
+    -e HABANA_VISIBLE_DEVICES=all \
+    -p 8000:8000 \
+    -v ${PWD}:/local \
+    --name vllm-server \
+    <docker image name>
+```
+
+!!! note
+    While launching the vLLM server using Docker Run command for Dry Run, make sure to mount the present working directory as `-v ${PWD}:/local`.
+
+## To save vLLM sever and client log files
+
+If vLLM server is launched using Docker Compose command, the log files are saved at `vllm-gaudi/.cd/logs/` by default.
+
+If vLLM server is launched using Docker Run command, the user can save the log files by creating a directory named `logs` and mount this log directory as `-v ${PWD}/logs:/root/scripts/logs`.
+
+## To create multiple vLLM services using Docker Compose
+
+Set environment variables **HOST_PORT** and **COMPOSE_PROJECT_NAME**  
+Example
+
+```bash
+MODEL="Qwen/Qwen2.5-14B-Instruct" \
+HF_TOKEN="<your huggingface token>" \
+DOCKER_IMAGE="vault.habana.ai/gaudi-docker/{{ VERSION }}/ubuntu24.04/habanalabs/vllm-installer-{{ PT_VERSION }}:latest" \
+TENSOR_PARALLEL_SIZE=1 \
+MAX_MODEL_LEN=2048 \
+HOST_PORT=9000 \
+COMPOSE_PROJECT_NAME=serv1 \
+docker compose up
+```
+
+!!! note
+    The default values, when these vars not set, are `HOST_PORT=8000` and `COMPOSE_PROJECT_NAME=cd`.
+
 ## Pinning CPU Cores for Memory Access Coherence
 
 To improve memory-access coherence and release CPUs to other CPU-only workloads, such as vLLM serving with Llama3 8B, you can pin CPU cores based on different CPU Non-Uniform Memory Access (NUMA) nodes using the automatically generated `docker-compose.override.yml` file. The following procedure explains the process.

From 381f5857d3fe8fc92e1356563b0934cf109cd0bf Mon Sep 17 00:00:00 2001
From: Rajan Kumar <rajan.kumar@intel.com>
Date: Fri, 9 Jan 2026 20:44:16 +0530
Subject: [PATCH 5/5] Updated Readme and restart condition

Signed-off-by: Rajan Kumar <rajan.kumar@intel.com>
---
 .cd/docker-compose.yml                                 |  2 +-
 .cd/entrypoints/script_generator.py                    | 10 +---------
 .../quickstart/quickstart_configuration.md             |  5 +++--
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml
index abd4d353c..abef40215 100644
--- a/.cd/docker-compose.yml
+++ b/.cd/docker-compose.yml
@@ -27,7 +27,7 @@ services:
       - SYS_NICE
     ipc: host
     runtime: habana
-    restart: unless-stopped
+    restart: on-failure
     command: ["server", "--config-file", "${VLLM_SERVER_CONFIG_FILE}", "--config-name", "${VLLM_SERVER_CONFIG_NAME}"]
     healthcheck:
       test:
diff --git a/.cd/entrypoints/script_generator.py b/.cd/entrypoints/script_generator.py
index 8cbd2e8ff..569b68658 100644
--- a/.cd/entrypoints/script_generator.py
+++ b/.cd/entrypoints/script_generator.py
@@ -2,7 +2,6 @@
 import os
 import shutil
 import sys
-import time
 from pathlib import Path
 
 
@@ -97,14 +96,7 @@ def create_and_run(self):
 
         if os.environ.get("DRY_RUN") == '1':
             shutil_copy(self.output_script_path, self.dry_run_dir)
-
             print(f"[INFO] This is a dry run to save the command line file {self.output_script_path}.")
-            try:
-                while True:
-                    print("[INFO] Press Ctrl+C to exit.")
-                    time.sleep(60)
-            except KeyboardInterrupt:
-                print("Exiting the DRY_RUN execution.")
-                sys.exit(0)
+            sys.exit(0)
         else:
             os.execvp("bash", ["bash", self.output_script_path])
diff --git a/docs/getting_started/quickstart/quickstart_configuration.md b/docs/getting_started/quickstart/quickstart_configuration.md
index 7b9a70255..cc71c5c0c 100644
--- a/docs/getting_started/quickstart/quickstart_configuration.md
+++ b/docs/getting_started/quickstart/quickstart_configuration.md
@@ -168,6 +168,7 @@ docker run -it --rm \
     --runtime=habana \
     -e HABANA_VISIBLE_DEVICES=all \
     -p 8000:8000 \
+    -e DRY_RUN=1 \
     -v ${PWD}:/local \
     --name vllm-server \
     <docker image name>
@@ -176,13 +177,13 @@ docker run -it --rm \
 !!! note
     While launching the vLLM server using Docker Run command for Dry Run, make sure to mount the present working directory as `-v ${PWD}:/local`.
 
-## To save vLLM sever and client log files
+## Save vLLM sever and client log files
 
 If vLLM server is launched using Docker Compose command, the log files are saved at `vllm-gaudi/.cd/logs/` by default.
 
 If vLLM server is launched using Docker Run command, the user can save the log files by creating a directory named `logs` and mount this log directory as `-v ${PWD}/logs:/root/scripts/logs`.
 
-## To create multiple vLLM services using Docker Compose
+## Create multiple vLLM services using Docker Compose
 
 Set environment variables **HOST_PORT** and **COMPOSE_PROJECT_NAME**  
 Example