KL Div formula fix

realAsma · realAsma · commit 46670e1b8a37 · 2025-11-25T11:59:42.000-08:00
Signed-off-by: realAsma &lt;akuriparambi@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/algorithms.py b/modelopt/torch/quantization/algorithms.py
@@ -1013,8 +1013,8 @@ def _get_kl_div_loss(
     prob_unquant: torch.Tensor, logits_quant: torch.Tensor, lm_head: nn.Module = None
 ) -> torch.Tensor:
     log_prob_quant = _get_prob_from_logits(logits_quant, return_log_prob=True, lm_head=lm_head)
-    # We dont need to calculate the full kl div loss here, just get p*log_q
-    return _get_p_log_q(prob_unquant, log_prob_quant)
+    # We dont need to calculate the full kl div loss here, just get - p*log_q
+    return -_get_p_log_q(prob_unquant, log_prob_quant)
 
 
 def _get_lm_head(model: nn.Module) -> nn.Module: