From d18425b4981c4527a0d7a8250f8ad3c0b485ef30 Mon Sep 17 00:00:00 2001
From: luoyoucai <136922441@qq.com>
Date: Fri, 16 Jan 2026 17:57:14 +0800
Subject: [PATCH] Bugfix flagscale PD disaggregated run on A800

---
 flagscale/runner/backend/backend_vllm.py | 34 +++++++++++++++++++-----
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/flagscale/runner/backend/backend_vllm.py b/flagscale/runner/backend/backend_vllm.py
index 0f78e47e26..1b84f8c8ef 100644
--- a/flagscale/runner/backend/backend_vllm.py
+++ b/flagscale/runner/backend/backend_vllm.py
@@ -784,12 +784,23 @@ def generate_run_script(self, config, host, node_rank, cmd, background=True, wit
                             p_instance_log_path = os.path.join(default_log_dir, f"prefill_{i}.log")
 
                             if update_p_address != master_ip and len(nodes) > 1:
-                                p_kv_config_format_json = p_kv_config_json.replace('"', '\\"')
-                                node_cmd = f"{ids_env} && {vllm_command} --port {http_port} --kv-transfer-config '\\''{p_kv_config_format_json}'\\''"
                                 if docker_name:
+                                    p_kv_config_format_json = p_kv_config_json.replace('"', '\\"')
+                                    node_cmd = f"{ids_env} && {vllm_command} --port {http_port} --kv-transfer-config '\\''{p_kv_config_format_json}'\\''"
                                     ssh_cmd = f"ssh -f -n -p {ssh_port} {update_p_address} \"docker exec {docker_name} /bin/bash -c '{node_cmd} > {p_instance_log_path} 2>&1 &'\""
                                 else:
-                                    ssh_cmd = f'ssh -f -n -p {ssh_port} {update_p_address} "{node_cmd} > {p_instance_log_path} 2>&1 &"'
+                                    p_kv_config_format_json = p_kv_config_json.replace(
+                                        '"', '\\\\\\"'
+                                    )
+                                    vllm_command = vllm_command.replace(
+                                        "vllm serve", "(vllm serve"
+                                    ).replace("((vllm serve", "(vllm serve")
+                                    node_cmd = f'{vllm_command} --port {http_port} --kv-transfer-config \\"{p_kv_config_format_json}\\" > {p_instance_log_path} 2>&1 &) && disown'
+                                    node_cmd = f"{ids_env} && " + node_cmd
+                                    logger.info(f"node_cmd {node_cmd}")
+                                    ssh_cmd = (
+                                        f'ssh -f -n -p {ssh_port} {update_p_address} "{node_cmd}"'
+                                    )
                                 f.write(f"{ssh_cmd}\n\n")
                             else:
                                 p_cmd = f"{ids_env} && {vllm_command} --port {http_port} --kv-transfer-config '\\''{p_kv_config_json}'\\''"
@@ -843,12 +854,23 @@ def generate_run_script(self, config, host, node_rank, cmd, background=True, wit
                             d_instance_log_path = os.path.join(default_log_dir, f"decode_{j}.log")
 
                             if update_d_address != master_ip and len(nodes) > 1:
-                                d_kv_config_format_json = d_kv_config_json.replace('"', '\\"')
-                                node_cmd = f"{ids_env} && {vllm_command} --port {http_port} --gpu-memory-utilization {decode_gpu_memory_utilization} --kv-transfer-config '\\''{d_kv_config_format_json}'\\''"
                                 if docker_name:
+                                    d_kv_config_format_json = d_kv_config_json.replace('"', '\\"')
+                                    node_cmd = f"{ids_env} && {vllm_command} --port {http_port} --gpu-memory-utilization {decode_gpu_memory_utilization} --kv-transfer-config '\\''{d_kv_config_format_json}'\\''"
                                     ssh_cmd = f"ssh -f -n -p {ssh_port} {update_d_address} \"docker exec {docker_name} /bin/bash -c '{node_cmd} > {d_instance_log_path} 2>&1 &'\""
                                 else:
-                                    ssh_cmd = f'ssh -f -n -p {ssh_port} {update_d_address} "{node_cmd} > {d_instance_log_path} 2>&1 &"'
+                                    d_kv_config_format_json = d_kv_config_json.replace(
+                                        '"', '\\\\\\"'
+                                    )
+                                    vllm_command = vllm_command.replace(
+                                        "vllm serve", "(vllm serve"
+                                    ).replace("((vllm serve", "(vllm serve")
+                                    node_cmd = f'{vllm_command} --port {http_port} --gpu-memory-utilization {decode_gpu_memory_utilization} --kv-transfer-config \\"{d_kv_config_format_json}\\" > {d_instance_log_path} 2>&1 &) && disown'
+                                    node_cmd = f"{ids_env} && " + node_cmd
+                                    logger.info(f"node_cmd {node_cmd}")
+                                    ssh_cmd = (
+                                        f'ssh -f -n -p {ssh_port} {update_d_address} "{node_cmd}"'
+                                    )
                                 f.write(f"{ssh_cmd}\n\n")
                             else:
                                 d_cmd = f"{ids_env} && {vllm_command} --port {http_port} --gpu-memory-utilization {decode_gpu_memory_utilization} --kv-transfer-config '\\''{d_kv_config_json}'\\''"