Skip to content

Commit 3b55852

Browse files
jiangyangmuThe tunix Authors
authored andcommitted
add metric first_micro_batch_rollout_time in fully diagg mode.
PiperOrigin-RevId: 848314187
1 parent 9c82596 commit 3b55852

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

tests/perf/export_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def test_export_grpo_metrics_fully_disaggregated(self, mock_perf_counter):
260260
"perf/global_step_time": 1.3,
261261
"perf/weight_sync_time": 0.1,
262262
"perf/rollout_idle_time": 0.69,
263+
"perf/first_micro_batch_rollout_time": 0.41,
263264
"perf/sum/rollout_time": 0.11,
264265
"perf/sum/refer_inference_time": 0.11,
265266
"perf/sum/actor_train_time": 0.11,

tunix/perf/export.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,10 @@ def _grpo_metrics_fully_disaggregated(
288288
span.duration for span in actor_train_step_spans
289289
]
290290

291+
first_micro_batch_rollout_time: float = (
292+
rollout_spans[0].end - global_step_group.begin
293+
)
294+
291295
# append [0.0] to make size equal to micro batch
292296
actor_gap_time: list[float] = [
293297
b.end - a.begin
@@ -299,6 +303,7 @@ def _grpo_metrics_fully_disaggregated(
299303
"perf/global_step_time": (global_step_time, None),
300304
"perf/weight_sync_time": (weight_sync_time, None),
301305
"perf/rollout_idle_time": (rollout_idle_time, None),
306+
"perf/first_micro_batch_rollout_time": (first_micro_batch_rollout_time, None),
302307
"perf/sum/rollout_time": (np.sum(rollout_time), None),
303308
"perf/sum/refer_inference_time": (np.sum(refer_inference_time), None),
304309
"perf/sum/refer_gap_time": (np.sum(refer_gap_time), None),

0 commit comments

Comments
 (0)