quic · vbaddi · Apr 29, 2025 · Apr 29, 2025 · Apr 30, 2025 · May 2, 2025
@@ -9,6 +9,8 @@
 
 from torch import nn
 
+from QEfficient.utils.logging_utils import logger
+
 
 class PytorchTransform:
     """
@@ -110,3 +112,65 @@ def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
                     transformed = True
 
         return model, transformed
+
+
+class SplitGateUpWeightsTransform(PytorchTransform):
+    """
+    split fused Gate+Up weights and copy into the model
+
+    For every transformer layer inside `model`:
+      • expects   <PREFIX>.experts.gate_up_proj   in the *source* `sd`
+      • copies halves into
+            <PREFIX>.experts.gate_proj     <-- Gate   [E,H,I]
+            <PREFIX>.experts.up_proj       <-- Up     [E,H,I]
+    """
+
+    @classmethod
+    def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
+        transformed = False
+        model_class = model.__class__.__name__ if hasattr(model, "model") else model.__class__.__name__
+
+        if model_class not in VLM_SPLIT_GATE_UP_WEIGHTS:
+            return model, transformed
+
+        model_tmp = model.language_model if hasattr(model, "language_model") else model
+
+        num_layers = len(model_tmp.model.layers)
+        delete_fused_key = True
+        sd = model_tmp.state_dict()
+        for layer_idx in range(num_layers):
+            # ---- build the textual prefix once per layer ----------
+            prefix = f"model.layers.{layer_idx}.feed_forward.experts."
+
+            fused_key = prefix + "gate_up_proj"
+            gate_key = prefix + "gate_proj"
+            up_key = prefix + "up_proj"
+
+            # ---- split  [E,H,2I] → two  [E,H,I]  tensors ----------------------
+            fused = sd[fused_key]  # [E, H, 2I]  (no .weight here)
+            E, H, two_I = fused.shape
+            ffn_dim = two_I // 2
+            gate, up = fused.split(ffn_dim, dim=-1)  # views – no copy
+
+            experts = model_tmp.model.layers[layer_idx].feed_forward.experts
+            experts.gate_proj.data.copy_(gate)
+            experts.up_proj.data.copy_(up)
+
+            # ---- update the state-dict so load_state_dict sees the right keys
+            sd[gate_key] = gate
+            sd[up_key] = up
+
+            if delete_fused_key:
+                del sd[fused_key]
+
+            logger.info(f"[layer {layer_idx:02d}] loaded gate_proj & up_proj from fused tensor  (shape {fused.shape})")
+            transformed = True
+
+        if hasattr(model, "language_model"):
+            model.language_model = model_tmp
+        else:
+            model = model_tmp
+        return model, transformed
+
+
+VLM_SPLIT_GATE_UP_WEIGHTS = {"QEffLlama4ForConditionalGeneration"}
@@ -0,0 +1,6 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------