We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 55b40c8 commit fd9a5d9Copy full SHA for fd9a5d9
1 file changed
verl/trainer/core_algos.py
@@ -469,7 +469,7 @@ def compute_policy_loss(
469
if loss_type == "gspo_token":
470
log_importance_ratio = negative_approx_kl_in_seq.detach().unsqueeze(-1) + log_probs - log_probs.detach()
471
else:
472
- log_importance_ratio = negative_approx_kl_in_seq * response_mask
+ log_importance_ratio = negative_approx_kl_in_seq.unsqueeze(-1) * response_mask
473
474
log_importance_ratio = negative_approx_kl
475
0 commit comments