fix bug on gspo (THUDM#695)

zhuzilin · web-flow · commit 22f4e22f327c · 2025-11-11T11:05:51.000+08:00
diff --git a/slime/backends/megatron_utils/loss.py b/slime/backends/megatron_utils/loss.py
@@ -410,6 +410,7 @@ def policy_loss_function(
         ]
         ppo_kl = [kl.expand_as(log_prob) for kl, log_prob in zip(ppo_kl, log_probs)]
         ppo_kl = torch.cat(ppo_kl, dim=0)
+        old_log_probs = torch.cat(old_log_probs, dim=0)
         log_probs = torch.cat(log_probs, dim=0)
     else:
         old_log_probs = torch.cat(old_log_probs, dim=0)

Original file line number	Diff line number	Diff line change
`@@ -410,6 +410,7 @@ def policy_loss_function(`
`410`	`410`	`]`
`411`	`411`	`ppo_kl = [kl.expand_as(log_prob) for kl, log_prob in zip(ppo_kl, log_probs)]`
`412`	`412`	`ppo_kl = torch.cat(ppo_kl, dim=0)`
	`413`	`+ old_log_probs = torch.cat(old_log_probs, dim=0)`
`413`	`414`	`log_probs = torch.cat(log_probs, dim=0)`
`414`	`415`	`else:`
`415`	`416`	`old_log_probs = torch.cat(old_log_probs, dim=0)`