fix metric logging step

tianshub · The tunix Authors · commit 2d1b7364b745 · 2026-02-27T12:05:53.000-08:00
PiperOrigin-RevId: 876360948
diff --git a/tunix/rl/experimental/agentic_rl_learner.py b/tunix/rl/experimental/agentic_rl_learner.py
@@ -511,10 +511,8 @@ def _batch_to_train_example(
     """
     # Create a merged training_input where each field from the original input
     # is repeated G times to align with the G completions.
-    num_generations = self.algo_config.num_generations
-    prompt_index = batch_results[0].pair_index // num_generations
-    if mode == rl_cluster_lib.Mode.TRAIN and self._full_batch_size:
-      expected_step = prompt_index // self._full_batch_size
+    if mode == rl_cluster_lib.Mode.TRAIN:
+      expected_step = batch_results[0].group_id // self._full_batch_size
     else:
       expected_step = self.rl_cluster.global_steps
 
diff --git a/tunix/sft/peft_trainer.py b/tunix/sft/peft_trainer.py
@@ -41,7 +41,6 @@
 from tunix.sft import profiler
 from tunix.sft import progress_bar
 from tunix.sft import sharding_utils
-from tunix.sft import system_metrics_calculator
 from tunix.sft import utils
 
 _ModelInputT = Dict[str, ArrayLike]
@@ -233,7 +232,6 @@ def __init__(
     self._mode: sft_metrics_logger.Mode = sft_metrics_logger.Mode.TRAIN
     self._has_aux = False
     self._pbar = None
-    self._flops_measured: bool = False
 
     self._train_steps, self._restored_custom_metadata = (
         self.checkpoint_manager.maybe_restore(
@@ -659,24 +657,6 @@ def train(
               train_example, self.config.data_sharding_axis
           )
 
-          if not self._flops_measured and not skip_jit:
-            self._flops_measured = True
-
-            tflops_per_step = system_metrics_calculator.measure_tflops_per_step(
-                train_step_fn=train_step,
-                model=self.model,
-                optimizer=self.optimizer,
-                train_example=train_example,
-            )
-            if tflops_per_step is not None:
-              self.metrics_logger.log(
-                  self.metrics_prefix,
-                  "tflops_per_step",
-                  tflops_per_step,
-                  self._mode,
-                  0,
-              )
-
           self._throttler.wait_for_next()
           if self.training_hooks:
             self.training_hooks.on_train_step_start(self)