Skip to content

Commit 0eb6763

Browse files
tianshubThe tunix Authors
authored andcommitted
fix metric logging step
PiperOrigin-RevId: 876499502
1 parent df627a6 commit 0eb6763

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

tunix/rl/experimental/agentic_rl_learner.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -511,10 +511,8 @@ def _batch_to_train_example(
511511
"""
512512
# Create a merged training_input where each field from the original input
513513
# is repeated G times to align with the G completions.
514-
num_generations = self.algo_config.num_generations
515-
prompt_index = batch_results[0].pair_index // num_generations
516-
if mode == rl_cluster_lib.Mode.TRAIN and self._full_batch_size:
517-
expected_step = prompt_index // self._full_batch_size
514+
if mode == rl_cluster_lib.Mode.TRAIN:
515+
expected_step = batch_results[0].group_id // self._full_batch_size
518516
else:
519517
expected_step = self.rl_cluster.global_steps
520518

0 commit comments

Comments
 (0)