Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions examples/models/llama/norm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
Expand Down Expand Up @@ -44,9 +44,9 @@
class ScalelessRMSNorm(torch.nn.RMSNorm):
"""RMSNorm with weight hardcoded to ones and not trainable.

Equivalent to a scaleless RMSNorm (no learnable scaling) but implemented as a
torch.nn.RMSNorm so the op composes/decomposes cleanly for backends like QNN
instead of being expressed as a hand-rolled decomposition.
Subclasses torch.nn.RMSNorm so backends (QNN) see a proper RMSNorm op for
lowering, but overrides forward with the explicit fp32 decomposition to
stay numerically identical to the rlformers reference during eager execution.
"""

def __init__(self, dim: int, eps: float = 1e-6):
Expand All @@ -56,6 +56,15 @@
self.weight.fill_(1.0)
self.weight.requires_grad = False

def forward(self, x):
if torch.compiler.is_compiling():
return super().forward(x)
x_float = x.float()
return (
x_float
* torch.rsqrt((x_float * x_float).mean(-1, keepdim=True) + self.eps)
).type_as(x)


class RMSNormCoreML(torch.nn.Module):
def __init__(self, dim: int, eps: float = 1e-6):
Expand Down
Loading