Skip to content

Commit 100b7de

Browse files
authored
Update actor.py
1 parent dfcf59e commit 100b7de

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

slime/backends/fsdp_utils/actor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,9 @@ def train(self, rollout_id: int, rollout_data_ref: Box) -> None:
486486
pg_clipfrac = sum_of_sample_mean(pg_clipfrac, response_lengths, loss_masks)
487487
ppo_kl = sum_of_sample_mean(ppo_kl.abs(), response_lengths, loss_masks)
488488

489-
train_rollout_logprob_diff = (old_log_probs - rollout_log_probs).abs()
490-
train_rollout_logprob_diff = sum_of_sample_mean(
491-
train_rollout_logprob_diff, response_lengths, loss_masks
489+
train_rollout_logprob_abs_diff = (old_log_probs - rollout_log_probs).abs()
490+
train_rollout_logprob_abs_diff = sum_of_sample_mean(
491+
train_rollout_logprob_abs_diff, response_lengths, loss_masks
492492
).detach()
493493

494494
loss = pg_loss
@@ -514,7 +514,7 @@ def train(self, rollout_id: int, rollout_data_ref: Box) -> None:
514514
"pg_loss": pg_loss.detach(),
515515
"pg_clipfrac": pg_clipfrac.detach(),
516516
"ppo_kl": ppo_kl.detach(),
517-
"train_rollout_logprob_diff": train_rollout_logprob_diff,
517+
"train_rollout_logprob_abs_diff": train_rollout_logprob_abs_diff,
518518
}
519519

520520
if self.args.use_kl_loss:

0 commit comments

Comments
 (0)