Skip to content

Commit b956998

Browse files
authored
feat!: Include CPU id in CPU profile samples (#338)
* feat: Include CPU id in CPU profile samples perf-self-profile (breaking change): - Sample.cpu is now Option<u32> rather than u32. Perf sampling always returns Some(cpu) via PERF_SAMPLE_CPU; the ctimer fallback returns None when SYS_getcpu fails instead of silently reporting cpu 0. - SampleData/DrainedSample/SlotWriter::write in the lock-free ring buffer thread Option<u32> through the signal-handler-safe path. dial9-tokio-telemetry: - CpuSampleData.cpu: Option<u32> (in-memory). - CpuSampleEvent.cpu: Option<u64> on the wire. Widened to u64 so the field encodes as OptionalVarint: 1 byte when absent, 2 bytes total for typical small CPU ids. Narrowed back to Option<u32> on decode. - TelemetryEvent::CpuSample gains cpu: Option<u32>. Older traces without the field decode as None (forward-compatible). - RawCpuSample.cpu threaded through CpuProfiler::drain and SchedProfiler::drain. JS parser: CpuSample objects now expose cpu: number|null. Tests: - perf-self-profile: multithread.rs asserts every perf sample carries Some(cpu) (Linux-only). - dial9-tokio-telemetry: two round-trip tests in buffer.rs exercising encode+decode with cpu=Some(7) and cpu=None through ThreadLocalBuffer. Drive-by: narrow cfg(test) on SchedStat.fd to cfg(all(test, target_os = "linux")) to match its only reader, fixing a dead_code warning on non-Linux builds. Demo trace not yet regenerated (pending Linux environment). * Regenerate demo trace with cpu id in CpuSample events Verified 7411 / 7411 CpuSample events carry a cpu id (10 distinct CPU ids observed: 0-9). Generated via scripts/regenerate_demo_trace_docker.sh on the aarch64 docker builder.
1 parent 7103006 commit b956998

15 files changed

Lines changed: 140 additions & 34 deletions

File tree

dial9-tokio-telemetry/src/telemetry/analysis.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,7 @@ mod tests {
12711271
thread_name: None,
12721272
source: CpuSampleSource::CpuProfile,
12731273
callchain: vec![],
1274+
cpu: None,
12741275
},
12751276
TelemetryEvent::CpuSample {
12761277
timestamp_nanos: 1_800_000,
@@ -1279,6 +1280,7 @@ mod tests {
12791280
thread_name: None,
12801281
source: CpuSampleSource::SchedEvent,
12811282
callchain: vec![],
1283+
cpu: None,
12821284
},
12831285
TelemetryEvent::PollEnd {
12841286
timestamp_nanos: 2_000_000,
@@ -1332,6 +1334,7 @@ mod tests {
13321334
thread_name: None,
13331335
source: CpuSampleSource::CpuProfile,
13341336
callchain: vec![],
1337+
cpu: None,
13351338
},
13361339
];
13371340
let sampled = detect_sampled_polls(&events);

dial9-tokio-telemetry/src/telemetry/buffer.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ impl Encodable for RawEvent {
267267
source: data.source,
268268
thread_name,
269269
callchain,
270+
cpu: data.cpu.map(u64::from),
270271
});
271272
}
272273
}
@@ -607,4 +608,55 @@ mod tests {
607608
let guard = buf.lock().unwrap();
608609
assert_eq!(guard.event_count, 1);
609610
}
611+
612+
/// Encode a single `RawEvent::CpuSample` through a real thread-local buffer
613+
/// and decode it back via the public `decode_events` path, asserting that
614+
/// the `cpu` field round-trips.
615+
fn cpu_sample_round_trip(cpu: Option<u32>) -> crate::telemetry::events::TelemetryEvent {
616+
use crate::telemetry::events::{CpuSampleData, CpuSampleSource, RawEvent};
617+
use crate::telemetry::format::{WorkerId, decode_events};
618+
619+
let data = CpuSampleData {
620+
timestamp_nanos: 12_345,
621+
worker_id: WorkerId::from(0usize),
622+
tid: 4242,
623+
thread_name: None,
624+
source: CpuSampleSource::CpuProfile,
625+
callchain: vec![0xdead_beef, 0xcafe_babe],
626+
cpu,
627+
};
628+
let encoded = ThreadLocalBuffer::encode_single(&RawEvent::CpuSample(Box::new(data)));
629+
let events = decode_events(&encoded).expect("decode");
630+
assert_eq!(events.len(), 1);
631+
events.into_iter().next().unwrap()
632+
}
633+
634+
#[test]
635+
fn cpu_sample_event_round_trips_with_cpu() {
636+
use crate::telemetry::events::TelemetryEvent;
637+
match cpu_sample_round_trip(Some(7)) {
638+
TelemetryEvent::CpuSample {
639+
tid,
640+
cpu,
641+
callchain,
642+
..
643+
} => {
644+
assert_eq!(tid, 4242);
645+
assert_eq!(cpu, Some(7));
646+
assert_eq!(callchain, vec![0xdead_beef, 0xcafe_babe]);
647+
}
648+
other => panic!("expected CpuSample, got {other:?}"),
649+
}
650+
}
651+
652+
#[test]
653+
fn cpu_sample_event_round_trips_without_cpu() {
654+
use crate::telemetry::events::TelemetryEvent;
655+
match cpu_sample_round_trip(None) {
656+
TelemetryEvent::CpuSample { cpu, .. } => {
657+
assert_eq!(cpu, None);
658+
}
659+
other => panic!("expected CpuSample, got {other:?}"),
660+
}
661+
}
610662
}

dial9-tokio-telemetry/src/telemetry/cpu_profile.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ pub(crate) struct RawCpuSample {
8686
pub timestamp_nanos: u64,
8787
pub callchain: Vec<u64>,
8888
pub source: CpuSampleSource,
89+
pub cpu: Option<u32>,
8990
}
9091

9192
/// Manages the process-wide perf sampler. Yields raw samples without worker IDs.
@@ -134,6 +135,7 @@ impl CpuProfiler {
134135
timestamp_nanos: sample.time,
135136
callchain: sample.callchain.clone(),
136137
source: CpuSampleSource::CpuProfile,
138+
cpu: sample.cpu,
137139
},
138140
thread_name,
139141
);
@@ -172,6 +174,7 @@ impl SchedProfiler {
172174
timestamp_nanos: sample.time,
173175
callchain: sample.callchain.clone(),
174176
source: CpuSampleSource::SchedEvent,
177+
cpu: sample.cpu,
175178
});
176179
});
177180
}

dial9-tokio-telemetry/src/telemetry/events.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ pub enum TelemetryEvent {
161161
source: CpuSampleSource,
162162
/// Raw instruction pointer addresses (leaf first). Symbolized offline.
163163
callchain: Vec<u64>,
164+
/// CPU the sample was taken on, if the backend could determine it.
165+
/// Perf sampling fills this in; ctimer may report `None` if `getcpu`
166+
/// fails. Older traces recorded before this field existed decode as `None`.
167+
cpu: Option<u32>,
164168
},
165169
/// Maps an OS thread ID to its name (from `/proc/self/task/<tid>/comm`).
166170
/// Emitted before the first CpuSample referencing this tid in each file.
@@ -349,6 +353,8 @@ pub(crate) struct CpuSampleData {
349353
pub thread_name: Option<ThreadName>,
350354
pub source: CpuSampleSource,
351355
pub callchain: Vec<u64>,
356+
/// CPU the sample was taken on, if the backend could determine it.
357+
pub cpu: Option<u32>,
352358
}
353359

354360
/// Get the OS thread ID (tid) of the calling thread via `gettid()`.
@@ -450,7 +456,7 @@ pub(crate) fn clock_pair() -> (u64, u64) {
450456
pub(crate) struct SchedStat {
451457
pub wait_time_ns: u64,
452458
/// Raw fd backing this read, exposed for FD-lifecycle tests. Not used in production.
453-
#[cfg(test)]
459+
#[cfg(all(test, target_os = "linux"))]
454460
fd: std::os::fd::RawFd,
455461
}
456462

@@ -509,7 +515,7 @@ impl SchedStat {
509515
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidData, "bad schedstat"))?;
510516
Ok(Self {
511517
wait_time_ns,
512-
#[cfg(test)]
518+
#[cfg(all(test, target_os = "linux"))]
513519
fd,
514520
})
515521
}

dial9-tokio-telemetry/src/telemetry/format.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,11 @@ pub(crate) struct CpuSampleEvent {
196196
pub source: CpuSampleSource,
197197
pub thread_name: Option<InternedString>,
198198
pub callchain: InternedStackFrames,
199+
/// CPU the sample was taken on, if the backend could determine it.
200+
///
201+
/// Widened to `u64` on the wire so the field encodes as `OptionalVarint`:
202+
/// 1 byte when absent, typically 2 bytes (tag + small-varint) when present.
203+
pub cpu: Option<u64>,
199204
}
200205

201206
/// Wire-format event for a wake notification.
@@ -414,6 +419,8 @@ pub(crate) fn to_owned_event(
414419
.get(e.callchain)
415420
.expect("stack pool entry must exist for CpuSample callchain")
416421
.to_vec(),
422+
// CPU id is varint-encoded as u64 on the wire; real CPU ids fit in u32.
423+
cpu: e.cpu.map(|v| v as u32),
417424
},
418425
TelemetryEventRef::WakeEvent(e) => TelemetryEvent::WakeEvent {
419426
timestamp_nanos: e.timestamp_ns,

dial9-tokio-telemetry/src/telemetry/recorder/event_writer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ impl EventWriter {
8787
source: raw.source,
8888
callchain: raw.callchain,
8989
thread_name: thread_name.cloned(),
90+
cpu: raw.cpu,
9091
};
9192
record_event(
9293
RawEvent::CpuSample(Box::new(data)),
@@ -112,6 +113,7 @@ impl EventWriter {
112113
// TODO: we should be able to also track thread name here.
113114
// sampler is running on worker threads so no thread name
114115
thread_name: None,
116+
cpu: raw.cpu,
115117
};
116118
record_event(
117119
RawEvent::CpuSample(Box::new(data)),

dial9-tokio-telemetry/src/telemetry/recorder/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2393,6 +2393,7 @@ mod tests {
23932393
source: CpuSampleSource::CpuProfile,
23942394
thread_name: None,
23952395
callchain: callchain.clone(),
2396+
cpu: None,
23962397
};
23972398
*timestamp += 1;
23982399
ew.write_raw_event(RawEvent::CpuSample(Box::new(data)))

dial9-viewer/ui/demo-trace.bin

1.63 MB
Binary file not shown.

dial9-viewer/ui/trace_parser.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
* tid: number,
7474
* source: number,
7575
* callchain: string[],
76+
* cpu: number|null,
7677
* }} CpuSample
7778
*/
7879

@@ -356,12 +357,17 @@
356357
const chain = (v.callchain || []).map(
357358
(addr) => "0x" + BigInt(addr).toString(16)
358359
);
360+
// `cpu` is encoded as OptionalVarint: null when the backend could
361+
// not determine the CPU. Varints decode as strings for BigInt safety;
362+
// CPU ids always fit in a Number.
363+
const cpu = v.cpu == null ? null : Number(v.cpu);
359364
cpuSamples.push({
360365
timestamp: ts,
361366
workerId: num(v.worker_id),
362367
tid: num(v.tid),
363368
source: num(v.source),
364369
callchain: chain,
370+
cpu,
365371
});
366372
const tn = v.thread_name;
367373
if (tn) {

perf-self-profile/examples/basic.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,13 @@ fn main() {
4848
// --- Print a few raw samples ---
4949
eprintln!("=== First 3 samples ===");
5050
for (i, sample) in samples.iter().take(3).enumerate() {
51+
let cpu = sample
52+
.cpu
53+
.map(|c| c.to_string())
54+
.unwrap_or_else(|| "?".into());
5155
eprintln!(
52-
"Sample {i}: ip={:#x}, tid={}, cpu={}, frames:",
53-
sample.ip, sample.tid, sample.cpu
56+
"Sample {i}: ip={:#x}, tid={}, cpu={cpu}, frames:",
57+
sample.ip, sample.tid
5458
);
5559
for (j, &addr) in sample.callchain.iter().enumerate() {
5660
let sym = resolve_symbol(addr);

0 commit comments

Comments
 (0)