We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 03654c0 commit 75c5389Copy full SHA for 75c5389
applications/Chat/coati/models/utils.py
@@ -19,7 +19,7 @@ def compute_approx_kl(log_probs: torch.Tensor,
19
action_mask: Mask for actions.
20
"""
21
22
- log_ratio = log_probs - log_probs_base
+ log_ratio = log_probs_base - log_probs
23
approx_kl = (log_ratio.exp() - 1) - log_ratio
24
if action_mask is not None:
25
approx_kl = masked_mean(approx_kl, action_mask, dim=1)
0 commit comments