applied few suggestions from code-assistant

whitememory · whitememory · commit 49a7cfeb18df · 2025-10-02T11:21:05.000+09:00
diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
@@ -510,8 +510,10 @@ def _ensure_shmem_initialized(self):
                 logger.debug(
                     "[rank %s] torch process group shmem init failed: %s",
                     self.rank, torch_error)
-
-            self._shmem_initialized = True
+                self._shmem_initialized = True
+                logger.warning(
+                    "[rank %s] Continuing without mori shmem optimization",
+                    self.rank)
 
         except Exception as e:
             logger.error("[rank %s] mori shmem initialization failed: %s",
diff --git a/vllm/model_executor/layers/fused_moe/mori_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/mori_prepare_finalize.py
@@ -88,9 +88,9 @@ def prepare(
 
         Args:
             a1: Input hidden states [num_tokens, hidden_dim]
-            a1_scale: Input activation scales
             topk_weights: Top-k routing weights [num_experts, experts_per_token]
             topk_ids: Top-k expert indices [num_experts, experts_per_token]
+            apply_router_weight_on_input: Whether to apply router weight
             quant_config: Quantization config
 
         Returns: