sgl-project
diff --git a/‎python/sgl_jax/srt/lora/__init__.py‎
Lines changed: 0 additions & 20 deletions b/‎python/sgl_jax/srt/lora/__init__.py‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎python/sgl_jax/srt/lora/layers.py‎
Lines changed: 188 additions & 0 deletions b/‎python/sgl_jax/srt/lora/layers.py‎
Lines changed: 188 additions & 0 deletions
@@ -0,0 +1,188 @@
+# Copyright 2023-2024 SGLang Team
+# Modifications copyright 2025 SGLang-JAX Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""LoRA layer wrappers using Flax Model Surgery."""
+
+from __future__ import annotations
+
+import jax
+import jax.numpy as jnp
+from flax import nnx
+
+
+class LoRALinear(nnx.Module):
+    """
+    LoRA wrapper for Linear layers using Flax NNX.
+
+    This wraps an existing Linear layer and adds LoRA (Low-Rank Adaptation)
+    computation. Uses Model Surgery to preserve the original weights and sharding.
+
+    The forward pass computes:
+        output = base_layer(x) + scaling * (x @ lora_A @ lora_B)
+
+    where the LoRA term is only added when `enabled=True`.
+
+    Attributes:
+        base_layer: Original Linear layer (preserves weights and sharding)
+        lora_A: LoRA A matrix (in_features, lora_rank)
+        lora_B: LoRA B matrix (lora_rank, out_features)
+        scaling: LoRA scaling factor (typically alpha / rank)
+        enabled: Whether LoRA computation is active
+    """
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        lora_rank: int,
+        base_layer: nnx.Linear | None = None,
+        rngs: nnx.Rngs | None = None,
+    ):
+        """
+        Initialize LoRA Linear layer.
+
+        Args:
+            in_features: Input dimension
+            out_features: Output dimension
+            lora_rank: Rank of LoRA matrices
+            base_layer: Existing Linear layer to wrap (optional)
+            rngs: Random number generators for initialization
+        """
+        self.in_features = in_features
+        self.out_features = out_features
+        self.lora_rank = lora_rank
+
+        # Base layer - will be populated via nnx.update() during surgery
+        if base_layer is not None:
+            self.base_layer = base_layer
+        else:
+            # Create placeholder base layer
+            if rngs is None:
+                rngs = nnx.Rngs(0)
+            self.base_layer = nnx.Linear(
+                in_features,
+                out_features,
+                use_bias=True,
+                rngs=rngs,
+            )
+
+        # LoRA parameters (initialized to small random values)
+        if rngs is None:
+            rngs = nnx.Rngs(0)
+
+        # Initialize lora_A with normal distribution scaled by 1/sqrt(rank)
+        self.lora_A = nnx.Param(
+            jax.random.normal(rngs(), (in_features, lora_rank)) / jnp.sqrt(lora_rank)
+        )
+
+        # Initialize lora_B to zeros (standard LoRA initialization)
+        self.lora_B = nnx.Param(jnp.zeros((lora_rank, out_features)))
+
+        # Control variables (not trainable)
+        self.scaling = nnx.Variable(1.0)  # Will be set to alpha / rank
+        self.enabled = nnx.Variable(False)  # Whether LoRA is active
+
+    def __call__(self, x: jax.Array) -> jax.Array:
+        """
+        Forward pass with optional LoRA computation.
+
+        Args:
+            x: Input tensor
+
+        Returns:
+            Output tensor with LoRA delta added (if enabled)
+        """
+        # Base layer computation (preserves original behavior)
+        output = self.base_layer(x)
+
+        # Add LoRA delta if enabled
+        if self.enabled.value:
+            # Compute: x @ lora_A @ lora_B
+            lora_delta = (x @ self.lora_A.value) @ self.lora_B.value
+            output = output + self.scaling.value * lora_delta
+
+        return output
+
+
+class LoRAEmbedding(nnx.Module):
+    """
+    LoRA wrapper for Embedding layers.
+
+    Similar to LoRALinear but for embedding layers.
+    Currently a placeholder for future implementation.
+    """
+
+    def __init__(
+        self,
+        num_embeddings: int,
+        features: int,
+        lora_rank: int,
+        base_layer: nnx.Embed | None = None,
+        rngs: nnx.Rngs | None = None,
+    ):
+        """
+        Initialize LoRA Embedding layer.
+
+        Args:
+            num_embeddings: Size of vocabulary
+            features: Embedding dimension
+            lora_rank: Rank of LoRA matrices
+            base_layer: Existing Embed layer to wrap (optional)
+            rngs: Random number generators
+        """
+        self.num_embeddings = num_embeddings
+        self.features = features
+        self.lora_rank = lora_rank
+
+        # Base layer
+        if base_layer is not None:
+            self.base_layer = base_layer
+        else:
+            if rngs is None:
+                rngs = nnx.Rngs(0)
+            self.base_layer = nnx.Embed(
+                num_embeddings,
+                features,
+                rngs=rngs,
+            )
+
+        # LoRA parameters for embeddings
+        if rngs is None:
+            rngs = nnx.Rngs(0)
+
+        self.lora_A = nnx.Param(jax.random.normal(rngs(), (num_embeddings, lora_rank)))
+        self.lora_B = nnx.Param(jnp.zeros((lora_rank, features)))
+
+        self.scaling = nnx.Variable(1.0)
+        self.enabled = nnx.Variable(False)
+
+    def __call__(self, x: jax.Array) -> jax.Array:
+        """
+        Forward pass for embedding with LoRA.
+
+        Args:
+            x: Input token indices
+
+        Returns:
+            Embedded output with LoRA delta
+        """
+        output = self.base_layer(x)
+
+        if self.enabled.value:
+            # Embedding LoRA: lookup lora_A then multiply by lora_B
+            lora_a_embed = self.lora_A.value[x]  # Shape: [..., lora_rank]
+            lora_delta = lora_a_embed @ self.lora_B.value  # Shape: [..., features]
+            output = output + self.scaling.value * lora_delta
+
+        return output