google
diff --git a/‎tests/rl/agentic/agentic_grpo_learner_test.py‎
Lines changed: 10 additions & 3 deletions b/‎tests/rl/agentic/agentic_grpo_learner_test.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎tests/rl/common_test.py‎
Lines changed: 12 additions & 5 deletions b/‎tests/rl/common_test.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎tests/rl/grpo/dapo_learner_test.py‎
Lines changed: 13 additions & 6 deletions b/‎tests/rl/grpo/dapo_learner_test.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎tests/rl/grpo/drgrpo_learner_test.py‎
Lines changed: 3 additions & 1 deletion b/‎tests/rl/grpo/drgrpo_learner_test.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/sft/dpo/dpo_trainer_test.py‎
Lines changed: 4 additions & 2 deletions b/‎tests/sft/dpo/dpo_trainer_test.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎tests/sft/dpo/orpo_trainer_test.py‎
Lines changed: 9 additions & 4 deletions b/‎tests/sft/dpo/orpo_trainer_test.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎tests/sft/peft_trainer_test.py‎
Lines changed: 61 additions & 0 deletions b/‎tests/sft/peft_trainer_test.py‎
Lines changed: 61 additions & 0 deletions
@@ -554,13 +554,15 @@ def __call__(self, inputs, positions, cache, attention_mask):
     policy_loss_fn = function_registry.get_policy_loss_fn(
         algo_config.policy_loss_fn
     )
-    loss, aux = policy_loss_fn(
+    loss_output = policy_loss_fn(
         model=MockModel(rngs=nnx.Rngs(0)),
         train_example=train_example,
         algo_config=algo_config,
         pad_id=0,
         eos_id=2,
     )
+    loss = loss_output.primary_loss.compute()
+    aux = loss_output.aux_metrics
     chex.assert_shape(loss, ())
     self.assertIn("kl", aux)
 
@@ -639,7 +641,7 @@ def __call__(self, inputs, positions, cache, attention_mask):
     policy_loss_fn = function_registry.get_policy_loss_fn(config.policy_loss_fn)
 
     model = MockModel(rngs=nnx.Rngs(0))
-    loss, _ = policy_loss_fn(
+    loss_output = policy_loss_fn(
         model=model,
         train_example=train_example,
         algo_config=config,
@@ -671,10 +673,12 @@ def __call__(self, inputs, positions, cache, attention_mask):
     else:
       expected_loss = float(jnp.mean(per_sequence_loss))
 
+    loss = loss_output.primary_loss.compute()
     np.testing.assert_allclose(loss, expected_loss, rtol=1e-6, atol=1e-6)
 
   def test_process_results_extracts_assistant_text(self):
     class MockTraj:
+
       def __init__(self, index):
         self.traj = {
             "conversation_text": [
@@ -695,6 +699,7 @@ def __init__(self, index):
     trajectories = [MockTraj(0), MockTraj(1)]
 
     extracted_completions = []
+
     def mock_compute_rewards(prompts, completions, **kwargs):
       extracted_completions.extend(completions)
       return jnp.ones(len(completions), dtype=jnp.float32)
@@ -748,7 +753,9 @@ def mock_compute_rewards(prompts, completions, **kwargs):
         chat_parser=MockChatParser(),
     )
 
-    with mock.patch.object(learner, "_compute_rewards", side_effect=mock_compute_rewards):
+    with mock.patch.object(
+        learner, "_compute_rewards", side_effect=mock_compute_rewards
+    ):
       with mock.patch.object(
           learner.rl_cluster,
           "get_ref_per_token_logps",
 
@@ -24,6 +24,11 @@
 jax.config.update("jax_threefry_partitionable", False)
 
 
+def _compute_loss(*args, **kwargs):
+  out = getattr(common, "aggregate_loss")(*args, **kwargs)
+  return out.compute()
+
+
 class CommonTest(parameterized.TestCase):
 
   @parameterized.named_parameters(
@@ -446,7 +451,9 @@ def test_pad_to_length(self):
           expected_loss=(0.1 + 0.2) / 4.0 / 1.0,
       ),
       dict(
-          testcase_name="sequence_mean_token_sum_norm_partial_zero_mask_default",
+          testcase_name=(
+              "sequence_mean_token_sum_norm_partial_zero_mask_default"
+          ),
           loss_agg_mode="sequence-mean-token-sum-norm",
           per_token_loss_list=[[0.1, 0.2], [0.3, 0.4]],
           completion_mask_list=[[1, 1], [0, 0]],
@@ -496,7 +503,7 @@ def test_aggregate_loss_values(
   ):
     per_token_loss = jnp.array(per_token_loss_list)
     completion_mask = jnp.array(completion_mask_list)
-    actual_loss = common.aggregate_loss(
+    actual_loss = _compute_loss(
         per_token_loss, completion_mask, loss_agg_mode, **kwargs
     )
     np.testing.assert_allclose(actual_loss, expected_loss, rtol=1e-6, atol=1e-6)
@@ -505,7 +512,7 @@ def test_invalid_mode(self):
     with self.assertRaisesRegex(
         ValueError, "Unsupported loss aggregation mode"
     ):
-      common.aggregate_loss(jnp.ones((2, 2)), jnp.ones((2, 2)), "invalid-mode")
+      _compute_loss(jnp.ones((2, 2)), jnp.ones((2, 2)), "invalid-mode")
 
   @parameterized.named_parameters(
       dict(
@@ -541,7 +548,7 @@ def test_invalid_mode(self):
   )
   def test_invalid_norm(self, norm_val, loss_agg_mode):
     with self.assertRaisesRegex(ValueError, "Invalid 'norm' value"):
-      common.aggregate_loss(
+      _compute_loss(
           jnp.ones((2, 2)),
           jnp.ones((2, 2)),
           loss_agg_mode,
@@ -567,7 +574,7 @@ def test_aggregate_loss_bf16(self):
     per_token_loss = jnp.array([1.0, 2.0, 3.0], dtype=jnp.bfloat16)
     completion_mask = jnp.array([1, 1, 0], dtype=jnp.int32)
 
-    loss = common.aggregate_loss(
+    loss = _compute_loss(
         per_token_loss, completion_mask, loss_agg_mode="token-mean"
     )
     self.assertEqual(loss.dtype, jnp.float32)
 
@@ -89,22 +89,27 @@ def test_diff_loss(self):
         rngs=nnx.Rngs(0),
     )
 
-    # Call DAPO loss function (DAPO sets ref_per_token_logps to None as it doesn't fetch it)
+    # Call DAPO loss function (DAPO sets ref_per_token_logps to None as it
+    # doesn't fetch it).
     dapo_train_example = self.create_train_example()
     dapo_train_example.ref_per_token_logps = None
-    dapo_loss, dapo_aux = dapo_loss_fn_impl(
+    dapo_loss_output = dapo_loss_fn_impl(
         model, dapo_train_example, dapo_config, pad_id, eos_id
     )
+    dapo_loss = dapo_loss_output.primary_loss.compute()
+    dapo_aux = dapo_loss_output.aux_metrics
 
     # Call GRPO loss function
-    grpo_loss, grpo_aux = grpo_loss_fn_impl(
+    grpo_loss_output = grpo_loss_fn_impl(
         model, train_example, grpo_config, pad_id, eos_id
     )
+    grpo_loss = grpo_loss_output.primary_loss.compute()
+    grpo_aux = grpo_loss_output.aux_metrics
 
     # Assert that the loss values are different
     self.assertNotEqual(
-        dapo_loss.item(),
-        grpo_loss.item(),
+        dapo_loss,
+        grpo_loss,
         msg=(
             "DAPO and GRPO loss values should be different for the same input"
             " due to different loss aggregation logics."
@@ -113,7 +118,9 @@ def test_diff_loss(self):
 
     self.assertIn("kl", dapo_aux)
     self.assertIn("kl", grpo_aux)
-    self.assertEqual(dapo_aux["kl"], 0.0)  # DAPO does not have KL term.
+    self.assertEqual(
+        dapo_aux["kl"].compute(), 0.0
+    )  # DAPO does not have KL term.
 
 
 class TestDAPOConfigPostInit(parameterized.TestCase):
 
@@ -125,9 +125,11 @@ def test_drgrpo_loss_fn(self):
     )
 
     # Call DrGRPO loss function
-    drgrpo_loss, drgrpo_aux = drgrpo_loss_fn_impl(
+    drgrpo_loss_output = drgrpo_loss_fn_impl(
         model, train_example, drgrpo_config, pad_id, eos_id
     )
+    drgrpo_loss = drgrpo_loss_output.primary_loss.compute()
+    drgrpo_aux = drgrpo_loss_output.aux_metrics
 
     self.assertIn("kl", drgrpo_aux)
     self.assertTrue(jnp.isfinite(drgrpo_loss).all())
 
@@ -270,14 +270,16 @@ def test_dpo_loss_fn(self):
     with mock.patch.object(
         common, "get_per_token_logps", return_value=jnp.array(per_token_logps)
     ):
-      loss, _ = dpo_lib.dpo_loss_fn(
+      loss_output = dpo_lib.dpo_loss_fn(
           model, train_example, beta=0.1, label_smoothing=0
       )
+      loss = loss_output.primary_loss.compute()
       np.testing.assert_allclose(loss, 0.753059, atol=1e-5)
 
-      loss, _ = dpo_lib.dpo_loss_fn(
+      loss_output = dpo_lib.dpo_loss_fn(
           model, train_example, beta=0.1, label_smoothing=0.3
       )
+      loss = loss_output.primary_loss.compute()
       np.testing.assert_allclose(loss, 0.925447, atol=1e-5)
 
   def test_dpo_prepare_inputs_for_strings(self):
 
@@ -161,7 +161,9 @@ def test_orpo_trainer(
           orpo_trainer._train_steps,
       )
       self.assertLen(
-          orpo_trainer.metrics_logger.get_metric_history("", metric_name, "eval"),
+          orpo_trainer.metrics_logger.get_metric_history(
+              "", metric_name, "eval"
+          ),
           3,
       )
 
@@ -253,13 +255,16 @@ def test_orpo_loss_fn(self):
         "compute_logps",
         return_value=(jnp.array(chosen_logps), jnp.array(rejected_logps)),
     ):
-      loss, aux = orpo_lib.dpo_loss_fn(
+      loss_output = orpo_lib.dpo_loss_fn(
           model,
           train_example,
           algorithm="orpo",
           lambda_orpo=0.1,
           label_smoothing=0,
       )
+      loss = loss_output.primary_loss.compute()
+      aux = loss_output.aux_metrics
+
       # Loss should be a scalar and finite
       self.assertEqual(loss.shape, ())
       self.assertTrue(jnp.isfinite(loss))
@@ -274,8 +279,8 @@ def test_orpo_loss_fn(self):
       self.assertIn("odds_ratio", aux)
 
       # Check that accuracy is between 0 and 1
-      self.assertGreaterEqual(aux["rewards/accuracy"], 0.0)
-      self.assertLessEqual(aux["rewards/accuracy"], 1.0)
+      self.assertGreaterEqual(aux["rewards/accuracy"].compute(), 0.0)
+      self.assertLessEqual(aux["rewards/accuracy"].compute(), 1.0)
 
   def test_orpo_prepare_inputs_for_strings(self):
     tokenizer = tc.MockVocab()
 
@@ -34,6 +34,7 @@
 from tunix.sft import hooks
 from tunix.sft import peft_trainer
 from tunix.sft import profiler
+from tunix.sft import utils
 from tunix.tests import test_common as tc
 from tunix.utils import compat
 
@@ -634,7 +635,67 @@ def _post_process_eval_step(self, aux):
     self.assertEqual(train_invoke, {'foo': 2, 'bar': 4})
     self.assertEqual(eval_invoke, {'foo': 1, 'bar': 16})
 
+  def test_loss_output_format(self):
+    def custom_loss_fn(
+        model: nnx.Module,
+        input_tokens: jax.Array,
+        input_mask: jax.Array,
+        positions: jax.Array,
+        attention_mask: jax.Array,
+        images: jax.Array | None = None,
+    ) -> utils.LossOutput:
+      del model, input_tokens, input_mask, positions, attention_mask, images
+      return utils.LossOutput(
+          primary_loss=utils.WeightedMetric(
+              jnp.array(2.0, dtype=jnp.float32),
+              jnp.array(2.0, dtype=jnp.float32),
+          ),
+          aux_metrics={
+              'foo': utils.WeightedMetric(
+                  jnp.array(10.0, dtype=jnp.float32),
+                  jnp.array(5.0, dtype=jnp.float32),
+              ),
+              'bar': utils.WeightedMetric(
+                  jnp.array(6.0, dtype=jnp.float32),
+                  jnp.array(2.0, dtype=jnp.float32),
+              ),
+          },
+      )
+
+    train_invoke = {'foo': 0.0, 'bar': 0.0}
+    eval_invoke = {'foo': 0.0, 'bar': 0.0}
+
+    class CustomTrainer(peft_trainer.PeftTrainer):
+
+      def _post_process_train_step(self, aux):
+        train_invoke['foo'] += aux['foo']
+        train_invoke['bar'] += aux['bar']
+
+      def _post_process_eval_step(self, aux):
+        eval_invoke['foo'] += aux['foo']
+        eval_invoke['bar'] += aux['bar']
+
+    config = peft_trainer.TrainingConfig(eval_every_n_steps=2, max_steps=100)
+    model = tc.ToyTransformer(config=tc.ModelConfig(), rngs=nnx.Rngs(0))
+
+    trainer = CustomTrainer(model, optax.sgd(1e-3), config)
+    trainer = trainer.with_gen_model_input_fn(
+        dummy_gen_model_input_fn
+    ).with_loss_fn(
+        custom_loss_fn
+    )  # Note: has_aux=False is default but LossOutput returns aux natively
+
+    trainer.train(self.train_ds, self.eval_ds)
+    # The dataset provides 2 training steps.
+    # foo = 10.0 / 5.0 = 2.0 per step.
+    # bar = 6.0 / 2.0 = 3.0 per step.
+    self.assertEqual(train_invoke, {'foo': 4.0, 'bar': 6.0})
+
+    # Since eval_ds is length 2, it evaluates at step 2.
+    self.assertEqual(eval_invoke, {'foo': 8.0, 'bar': 12.0})
+
   def test_injected_params(self):
+
     config = peft_trainer.TrainingConfig(eval_every_n_steps=2, max_steps=100)
     model = tc.ToyTransformer(config=tc.ModelConfig(), rngs=nnx.Rngs(0))
Original file line number	Diff line number	Diff line change
`@@ -125,9 +125,11 @@ def test_drgrpo_loss_fn(self):`
`125`	`125`	`)`
`126`	`126`
`127`	`127`	`# Call DrGRPO loss function`
`128`		`- drgrpo_loss, drgrpo_aux = drgrpo_loss_fn_impl(`
	`128`	`+ drgrpo_loss_output = drgrpo_loss_fn_impl(`
`129`	`129`	`model, train_example, drgrpo_config, pad_id, eos_id`
`130`	`130`	`)`
	`131`	`+ drgrpo_loss = drgrpo_loss_output.primary_loss.compute()`
	`132`	`+ drgrpo_aux = drgrpo_loss_output.aux_metrics`
`131`	`133`
`132`	`134`	`self.assertIn("kl", drgrpo_aux)`
`133`	`135`	`self.assertTrue(jnp.isfinite(drgrpo_loss).all())`