Revert "enable moe/mlp fusion"

v-shobhit · v-shobhit · commit d7cc5082bfa2 · 2025-04-25T15:16:03.000-07:00
This reverts commit 6e175ce047c10302bd435d25af20f2892cb14bee.
diff --git a/tensorrt_llm/_torch/models/modeling_llama.py b/tensorrt_llm/_torch/models/modeling_llama.py
@@ -347,8 +347,8 @@ def __init__(
         # self.fusion_config.PRE_MOE_FUSION = model_config.mapping.has_tp(
         # )
         # TODO: re-enable these fusions
-        # self.fusion_config.PRE_MOE_FUSION = False
-        # self.fusion_config.POST_MLP_FUSION = False
+        self.fusion_config.PRE_MOE_FUSION = False
+        self.fusion_config.POST_MLP_FUSION = False
 
         self.self_attn = Llama4Attention(
             model_config,
@@ -374,9 +374,6 @@ def __init__(
 
             # self.fusion_config.POST_MLP_FUSION = model_config.mapping.has_tp(
             # )
-            self.fusion_config.PRE_MLP_FUSION = model_config.mapping.has_tp()
-            self.fusion_config.POST_MLP_FUSION = model_config.mapping.has_tp()
-
         else:
             self.feed_forward = Llama4MoE(
                 num_experts=config.num_local_experts,
@@ -388,10 +385,6 @@ def __init__(
                 aux_stream=aux_stream,
                 dtype=config.torch_dtype)
 
-            self.fusion_config.PRE_MOE_FUSION = model_config.mapping.has_tp()
-            self.fusion_config.POST_MOE_FUSION = model_config.mapping.has_tp()
-
-
             # self.fusion_config.POST_MOE_FUSION = model_config.mapping.has_tp(
             # )