Skip to content

Commit 2f14c03

Browse files
committed
feat: add tid to WorkerParkEvent and WorkerUnparkEvent
Add `tid: u32` to both events so the trace is self-describing for thread-role attribution. Previously, analysis tools had to query runtime state at flush time to recover the tid→worker_id mapping, which only works while the runtime is alive. The field is stamped via `events::current_tid()` (one gettid syscall on Linux, ~5 ns) in the existing park/unpark hooks. This is an additive schema change: old JS/Rust decoders ignore unknown fields in the schema-driven format.
1 parent 6f0fd96 commit 2f14c03

12 files changed

Lines changed: 116 additions & 0 deletions

File tree

dial9-tokio-telemetry/benches/threadlocal_encode_iai.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,15 @@ fn encode_batch(encoder: &mut Encoder<Vec<u8>>, batch: &[(u64, WorkerId, TaskId)
6666
worker_id: batch[0].1,
6767
local_queue: 0,
6868
cpu_time_ns: 600_000,
69+
tid: 0,
6970
});
7071
encoder.write_infallible(&WorkerUnparkEvent {
7172
timestamp_ns: batch[0].0,
7273
worker_id: batch[0].1,
7374
local_queue: 5,
7475
cpu_time_ns: 500_000,
7576
sched_wait_ns: 1_000,
77+
tid: 0,
7678
});
7779
}
7880

dial9-tokio-telemetry/benches/writer_encode_iai.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ fn encode(workers: Vec<usize>) -> Vec<u8> {
4444
local_queue: 5,
4545
cpu_time_ns: 500_000,
4646
sched_wait_ns: 1_000,
47+
tid: 0,
4748
});
4849

4950
for i in 0..170u64 {
@@ -82,6 +83,7 @@ fn encode(workers: Vec<usize>) -> Vec<u8> {
8283
worker_id: wid,
8384
local_queue: 0,
8485
cpu_time_ns: 600_000,
86+
tid: 0,
8587
});
8688
}
8789
}

dial9-tokio-telemetry/benches/writer_write_encoded_iai.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ fn make_encoded_batch(worker: usize) -> Batch {
4040
local_queue: 5,
4141
cpu_time_ns: 500_000,
4242
sched_wait_ns: 1_000,
43+
tid: 0,
4344
});
4445

4546
for i in 0..170u64 {
@@ -78,6 +79,7 @@ fn make_encoded_batch(worker: usize) -> Batch {
7879
worker_id: wid,
7980
local_queue: 0,
8081
cpu_time_ns: 600_000,
82+
tid: 0,
8183
});
8284
}
8385

dial9-tokio-telemetry/examples/trace_to_fat_jsonl.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ enum FatEvent {
2828
worker: u64,
2929
local_q: usize,
3030
cpu_ns: u64,
31+
tid: u32,
3132
},
3233
WorkerUnpark {
3334
timestamp_ns: u64,
3435
worker: u64,
3536
local_q: usize,
3637
cpu_ns: u64,
3738
sched_wait_ns: u64,
39+
tid: u32,
3840
},
3941
QueueSample {
4042
timestamp_ns: u64,
@@ -117,24 +119,28 @@ fn to_fat_event(event: &TelemetryEvent, reader: &TraceReader) -> Option<FatEvent
117119
worker_id,
118120
worker_local_queue_depth,
119121
cpu_time_nanos,
122+
tid,
120123
} => Some(FatEvent::WorkerPark {
121124
timestamp_ns: *timestamp_nanos,
122125
worker: worker_id.as_u64(),
123126
local_q: *worker_local_queue_depth,
124127
cpu_ns: *cpu_time_nanos,
128+
tid: *tid,
125129
}),
126130
TelemetryEvent::WorkerUnpark {
127131
timestamp_nanos,
128132
worker_id,
129133
worker_local_queue_depth,
130134
cpu_time_nanos,
131135
sched_wait_delta_nanos,
136+
tid,
132137
} => Some(FatEvent::WorkerUnpark {
133138
timestamp_ns: *timestamp_nanos,
134139
worker: worker_id.as_u64(),
135140
local_q: *worker_local_queue_depth,
136141
cpu_ns: *cpu_time_nanos,
137142
sched_wait_ns: *sched_wait_delta_nanos,
143+
tid: *tid,
138144
}),
139145
TelemetryEvent::QueueSample {
140146
timestamp_nanos,

dial9-tokio-telemetry/src/background_task/sealed.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ mod tests {
253253
worker_id: crate::telemetry::format::WorkerId::from(0usize),
254254
local_queue: 0,
255255
cpu_time_ns: 0,
256+
tid: 0,
256257
});
257258
writer
258259
.write_encoded_batch(&crate::telemetry::collector::Batch {
@@ -292,6 +293,7 @@ mod tests {
292293
worker_id: crate::telemetry::format::WorkerId::from(0usize),
293294
local_queue: 0,
294295
cpu_time_ns: 0,
296+
tid: 0,
295297
});
296298
writer
297299
.write_encoded_batch(&crate::telemetry::collector::Batch {
@@ -349,6 +351,7 @@ mod tests {
349351
worker_id: crate::telemetry::format::WorkerId::from(0usize),
350352
local_queue: 0,
351353
cpu_time_ns: 0,
354+
tid: 0,
352355
});
353356
writer
354357
.write_encoded_batch(&crate::telemetry::collector::Batch {
@@ -382,6 +385,7 @@ mod tests {
382385
worker_id: crate::telemetry::format::WorkerId::from(0usize),
383386
local_queue: 0,
384387
cpu_time_ns: 0,
388+
tid: 0,
385389
});
386390
enc.into_inner()
387391
}

dial9-tokio-telemetry/src/telemetry/analysis.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,7 @@ mod tests {
877877
worker_id: WorkerId::from(7usize),
878878
local_queue: 3,
879879
cpu_time_ns: 11,
880+
tid: 0,
880881
}),
881882
1,
882883
);
@@ -903,6 +904,7 @@ mod tests {
903904
worker_id,
904905
worker_local_queue_depth: 3,
905906
cpu_time_nanos: 11,
907+
..
906908
} if worker_id == WorkerId::from(7usize)
907909
));
908910
}
@@ -987,6 +989,7 @@ mod tests {
987989
worker_id: WorkerId::from(0usize),
988990
worker_local_queue_depth: 0,
989991
cpu_time_nanos: 0,
992+
tid: 0,
990993
},
991994
TelemetryEvent::QueueSample {
992995
timestamp_nanos: 5_000_000,
@@ -998,6 +1001,7 @@ mod tests {
9981001
worker_local_queue_depth: 0,
9991002
cpu_time_nanos: 0,
10001003
sched_wait_delta_nanos: 0,
1004+
tid: 0,
10011005
},
10021006
];
10031007
let idle = detect_idle_workers(&events);
@@ -1101,13 +1105,15 @@ mod tests {
11011105
worker_id: WorkerId::from(0usize),
11021106
worker_local_queue_depth: 0,
11031107
cpu_time_nanos: 0,
1108+
tid: 0,
11041109
},
11051110
TelemetryEvent::WorkerUnpark {
11061111
timestamp_nanos: 5_000_000,
11071112
worker_id: WorkerId::from(0usize),
11081113
worker_local_queue_depth: 0,
11091114
cpu_time_nanos: 0,
11101115
sched_wait_delta_nanos: 200_000, // 200us
1116+
tid: 0,
11111117
},
11121118
];
11131119
let delays = detect_sched_delays(&events, 100_000); // 100us threshold
@@ -1126,13 +1132,15 @@ mod tests {
11261132
worker_id: WorkerId::from(0usize),
11271133
worker_local_queue_depth: 0,
11281134
cpu_time_nanos: 0,
1135+
tid: 0,
11291136
},
11301137
TelemetryEvent::WorkerUnpark {
11311138
timestamp_nanos: 2_000_000,
11321139
worker_id: WorkerId::from(0usize),
11331140
worker_local_queue_depth: 0,
11341141
cpu_time_nanos: 0,
11351142
sched_wait_delta_nanos: 50_000, // 50us
1143+
tid: 0,
11361144
},
11371145
];
11381146
let delays = detect_sched_delays(&events, 100_000);
@@ -1147,26 +1155,30 @@ mod tests {
11471155
worker_id: WorkerId::from(0usize),
11481156
worker_local_queue_depth: 0,
11491157
cpu_time_nanos: 0,
1158+
tid: 0,
11501159
},
11511160
TelemetryEvent::WorkerPark {
11521161
timestamp_nanos: 1_000_000,
11531162
worker_id: WorkerId::from(1usize),
11541163
worker_local_queue_depth: 0,
11551164
cpu_time_nanos: 0,
1165+
tid: 0,
11561166
},
11571167
TelemetryEvent::WorkerUnpark {
11581168
timestamp_nanos: 3_000_000,
11591169
worker_id: WorkerId::from(0usize),
11601170
worker_local_queue_depth: 0,
11611171
cpu_time_nanos: 0,
11621172
sched_wait_delta_nanos: 500_000, // 500us
1173+
tid: 0,
11631174
},
11641175
TelemetryEvent::WorkerUnpark {
11651176
timestamp_nanos: 4_000_000,
11661177
worker_id: WorkerId::from(1usize),
11671178
worker_local_queue_depth: 0,
11681179
cpu_time_nanos: 0,
11691180
sched_wait_delta_nanos: 10_000, // 10us - below threshold
1181+
tid: 0,
11701182
},
11711183
];
11721184
let delays = detect_sched_delays(&events, 100_000);
@@ -1354,13 +1366,15 @@ mod tests {
13541366
worker_id: WorkerId::from(0usize),
13551367
worker_local_queue_depth: 0,
13561368
cpu_time_nanos: 0,
1369+
tid: 0,
13571370
},
13581371
TelemetryEvent::WorkerUnpark {
13591372
timestamp_nanos: 6_000_000,
13601373
worker_id: WorkerId::from(0usize),
13611374
worker_local_queue_depth: 0,
13621375
cpu_time_nanos: 0,
13631376
sched_wait_delta_nanos: 0,
1377+
tid: 0,
13641378
},
13651379
];
13661380
let idle = detect_idle_workers(&events);

dial9-tokio-telemetry/src/telemetry/events.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ pub enum TelemetryEvent {
9999
/// Thread CPU time (nanos) from CLOCK_THREAD_CPUTIME_ID.
100100
#[serde(rename = "cpu_ns")]
101101
cpu_time_nanos: u64,
102+
/// OS thread ID of the parking thread.
103+
tid: u32,
102104
},
103105
/// A worker thread unparked (resumed).
104106
WorkerUnpark {
@@ -117,6 +119,8 @@ pub enum TelemetryEvent {
117119
/// Scheduling wait delta (nanos) from schedstat.
118120
#[serde(rename = "sched_wait_ns")]
119121
sched_wait_delta_nanos: u64,
122+
/// OS thread ID of the unparking thread.
123+
tid: u32,
120124
},
121125
/// Periodic sample of the global task queue depth.
122126
QueueSample {

dial9-tokio-telemetry/src/telemetry/format.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ pub struct WorkerParkEvent {
141141
pub local_queue: u8,
142142
/// Thread CPU time in nanoseconds.
143143
pub cpu_time_ns: u64,
144+
/// OS thread ID of the parking thread. On Linux/Android, the result of gettid();
145+
/// on other platforms, a synthetic per-process counter — see `events::current_tid`.
146+
pub tid: u32,
144147
}
145148

146149
/// Wire-format event for a worker unpark.
@@ -157,6 +160,9 @@ pub struct WorkerUnparkEvent {
157160
pub cpu_time_ns: u64,
158161
/// Scheduling wait delta in nanoseconds.
159162
pub sched_wait_ns: u64,
163+
/// OS thread ID of the unparking thread. On Linux/Android, the result of gettid();
164+
/// on other platforms, a synthetic per-process counter — see `events::current_tid`.
165+
pub tid: u32,
160166
}
161167

162168
#[derive(TraceEvent)]
@@ -400,13 +406,15 @@ pub(crate) fn to_owned_event(
400406
worker_id: e.worker_id,
401407
worker_local_queue_depth: e.local_queue as usize,
402408
cpu_time_nanos: e.cpu_time_ns,
409+
tid: e.tid,
403410
},
404411
TelemetryEventRef::WorkerUnpark(e) => TelemetryEvent::WorkerUnpark {
405412
timestamp_nanos: e.timestamp_ns,
406413
worker_id: e.worker_id,
407414
worker_local_queue_depth: e.local_queue as usize,
408415
cpu_time_nanos: e.cpu_time_ns,
409416
sched_wait_delta_nanos: e.sched_wait_ns,
417+
tid: e.tid,
410418
},
411419
TelemetryEventRef::QueueSample(e) => TelemetryEvent::QueueSample {
412420
timestamp_nanos: e.timestamp_ns,

dial9-tokio-telemetry/src/telemetry/recorder/runtime_context.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ pub(super) fn make_worker_park(ctx: &RuntimeContext, shared: &SharedState) -> Wo
262262
worker_id: resolved.map(|(id, _)| id).unwrap_or(WorkerId::UNKNOWN),
263263
local_queue: worker_local_queue_depth as u8,
264264
cpu_time_ns: cpu_time_nanos,
265+
tid: crate::telemetry::events::current_tid(),
265266
}
266267
}
267268

@@ -283,5 +284,6 @@ pub(super) fn make_worker_unpark(ctx: &RuntimeContext, shared: &SharedState) ->
283284
local_queue: worker_local_queue_depth as u8,
284285
cpu_time_ns: cpu_time_nanos,
285286
sched_wait_ns: sched_wait_delta_nanos,
287+
tid: crate::telemetry::events::current_tid(),
286288
}
287289
}

dial9-tokio-telemetry/src/telemetry/writer.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ mod tests {
751751
worker_id: crate::telemetry::format::WorkerId::from(0usize),
752752
local_queue: 2,
753753
cpu_time_ns: 0,
754+
tid: 0,
754755
});
755756
Batch {
756757
encoded_bytes: enc.into_inner(),
@@ -1885,6 +1886,7 @@ mod tests {
18851886
worker_id: crate::telemetry::format::WorkerId::from(0usize),
18861887
local_queue: 0,
18871888
cpu_time_ns: 0,
1889+
tid: 0,
18881890
});
18891891
let buf = enc.into_inner();
18901892

@@ -1922,6 +1924,7 @@ mod tests {
19221924
worker_id: crate::telemetry::format::WorkerId::from(0usize),
19231925
local_queue: 0,
19241926
cpu_time_ns: 0,
1927+
tid: 0,
19251928
});
19261929
writer
19271930
.write_encoded_batch(&Batch {

0 commit comments

Comments
 (0)