enable moe/mlp fusion

v-shobhit · v-shobhit · commit 31f25cba85d4 · 2025-04-25T15:16:03.000-07:00
diff --git a/tensorrt_llm/_torch/models/modeling_llama.py b/tensorrt_llm/_torch/models/modeling_llama.py
@@ -347,8 +347,8 @@ def __init__(
         # self.fusion_config.PRE_MOE_FUSION = model_config.mapping.has_tp(
         # )
         # TODO: re-enable these fusions
-        self.fusion_config.PRE_MOE_FUSION = False
-        self.fusion_config.POST_MLP_FUSION = False
+        # self.fusion_config.PRE_MOE_FUSION = False
+        # self.fusion_config.POST_MLP_FUSION = False
 
         self.self_attn = Llama4Attention(
             model_config,
@@ -374,6 +374,9 @@ def __init__(
 
             # self.fusion_config.POST_MLP_FUSION = model_config.mapping.has_tp(
             # )
+            self.fusion_config.PRE_MLP_FUSION = model_config.mapping.has_tp()
+            self.fusion_config.POST_MLP_FUSION = model_config.mapping.has_tp()
+
         else:
             self.feed_forward = Llama4MoE(
                 num_experts=config.num_local_experts,
@@ -385,6 +388,10 @@ def __init__(
                 aux_stream=aux_stream,
                 dtype=config.torch_dtype)
 
+            self.fusion_config.PRE_MOE_FUSION = model_config.mapping.has_tp()
+            self.fusion_config.POST_MOE_FUSION = model_config.mapping.has_tp()
+
+
             # self.fusion_config.POST_MOE_FUSION = model_config.mapping.has_tp(
             # )