google
diff --git a/‎examples/deepscaler/train_deepscaler_nb.py‎
Lines changed: 14 additions & 5 deletions b/‎examples/deepscaler/train_deepscaler_nb.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎tests/perf/experimental/export_test.py‎
Lines changed: 30 additions & 0 deletions b/‎tests/perf/experimental/export_test.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎tests/perf/experimental/perfetto_test.py‎
Lines changed: 84 additions & 0 deletions b/‎tests/perf/experimental/perfetto_test.py‎
Lines changed: 84 additions & 0 deletions
@@ -70,6 +70,8 @@
   from tunix.utils import math_rewards
   from tunix.utils import compat
   from tunix.cli.utils import data as data_lib
+  from tunix import PerfMetricsConfig
+  from tunix.perf.experimental.export import PerfMetricsExport
 
 try:
   import pathwaysutils
@@ -109,7 +111,7 @@
 # The number of times the policy generates multiple responses for a given prompt
 # within a single training step. This corresponds to `G` in Algorithm 1 in the
 # paper. The "group" in GRPO comes from here.
-NUM_GENERATIONS = 8
+NUM_GENERATIONS = 2
 
 # === other GRPO configs ===
 # The number of iterations per batch (𝜇 in GRPO algo 1).
@@ -125,15 +127,15 @@
 
 # ====== Training ======
 ENABLE_REMAT = True
-BATCH_SIZE = 128
-MINI_BATCH_SIZE = 64
+BATCH_SIZE = 4
+MINI_BATCH_SIZE = 2
 NUM_BATCHES = 100
 # Keep `NUM_TEST_BATCHES` low so that evaluation runs quickly. It can be
 # increased to a max. of 330 (if batch size is 4).
 NUM_TEST_BATCHES = 50
 
-EVAL_EVERY_N_STEPS = 1000  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
-NUM_EPOCHS = 100  # can potentially train for more epochs
+EVAL_EVERY_N_STEPS = 50  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
+NUM_EPOCHS = 10  # can potentially train for more epochs
 
 # Number of training steps.
 MAX_STEPS = int(NUM_BATCHES * NUM_ITERATIONS * TRAIN_FRACTION * NUM_EPOCHS)
@@ -529,13 +531,20 @@ def get_lora_model(base_model, model_mesh):
     max_concurrency=MAX_CONCURRENCY,
 )
 
+# Perf Metrics logging
+perf_metrics_config = PerfMetricsConfig()
+perf_metrics_config.custom_export_fn_v2 = PerfMetricsExport(
+    "/tmp/agentic_perf"
+).export_metrics
+
 # %%
 # RL cluster
 rl_cluster = rl_cluster_lib.RLCluster(
     actor=qwen2_actor,
     reference=qwen2_ref,
     tokenizer=tokenizer,
     cluster_config=cluster_config,
+    perf_config=perf_metrics_config,
 )
 
 show_hbm_usage("after RLCluster creation")
 
@@ -0,0 +1,30 @@
+"""Tests for export."""
+
+import os
+import pathlib
+import time
+from absl.testing import absltest
+from tunix.perf.experimental import export
+from tunix.perf.experimental import tracer
+
+
+class ExportTest(absltest.TestCase):
+
+  def test_perf_metrics_export(self):
+    # Backward compatibility check
+    tmp_dir = pathlib.Path(self.create_tempdir().full_path)
+    exporter = export.PerfMetricsExport(trace_dir=tmp_dir)
+
+    # Create dummy timeline
+    t = tracer.PerfTracer(export_fn=exporter.export_metrics)
+    with t.span("test_span"):
+      time.sleep(0.001)
+    t.export()
+
+    files = os.listdir(tmp_dir)
+    self.assertLen(files, 1)
+    self.assertTrue(files[0].startswith("perfetto_trace_v2_"))
+
+
+if __name__ == "__main__":
+  absltest.main()
@@ -0,0 +1,84 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for perfetto."""
+
+import os
+import tempfile
+import time
+
+from absl.testing import absltest
+from tunix.perf.experimental import perfetto
+from tunix.perf.experimental import tracer
+
+
+class PerfettoTest(absltest.TestCase):
+
+  def test_create_span_name(self):
+    # Test basic span name with global_step
+    name = perfetto._create_span_name("my_span", {"global_step": 10})
+    self.assertEqual(name, "my_span (step=10)")
+
+    # Test peft_train_step with role
+    name = perfetto._create_span_name(
+        "peft_train_step", {"global_step": 20, "role": "actor"}
+    )
+    self.assertEqual(name, "peft_train_step (step=20, role=actor)")
+
+    # Test rollout with group_id and pair_index
+    name = perfetto._create_span_name(
+        "rollout", {"group_id": 5, "pair_index": 3, "global_step": 100}
+    )
+    self.assertEqual(name, "rollout (step=100, group_id=5, pair_index=3)")
+
+    # Test rollout with missing pair_index
+    name = perfetto._create_span_name("rollout", {"group_id": 5})
+    self.assertEqual(name, "rollout (group_id=5)")
+
+    # Test unknown name with extra tags (should ignore specific logic but keep step)
+    name = perfetto._create_span_name(
+        "unknown_span", {"role": "actor", "global_step": 50}
+    )
+    self.assertEqual(name, "unknown_span (step=50)")
+
+    # Test no tags
+    name = perfetto._create_span_name("simple_span", {})
+    self.assertEqual(name, "simple_span")
+
+  # TODO(noghabi): Add more tests for PerfettoTraceWriter.
+  def test_perfetto_trace_writer(self):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+      writer = perfetto.PerfettoTraceWriter(trace_dir=tmp_dir)
+
+      # Create some dummy timelines
+      t = tracer.Timeline("test_timeline", time.perf_counter())
+      s = t.start_span("test_span", time.perf_counter())
+      time.sleep(0.001)
+      t.stop_span(time.perf_counter())
+
+      timelines = {"test_timeline": t}
+
+      writer.write_timelines(timelines)
+
+      # Check if file was created
+      files = os.listdir(tmp_dir)
+      self.assertLen(files, 1)
+      self.assertTrue(files[0].startswith("perfetto_trace_v2_"))
+      self.assertTrue(files[0].endswith(".pb"))
+
+      # We could parse the proto back to verify content, but just existence is good for now.
+
+
+if __name__ == "__main__":
+  absltest.main()