Skip to content

Commit cd4bc43

Browse files
authored
Merge branch 'main' into refactor-resolve-borrow
2 parents a801fb0 + cb945af commit cd4bc43

42 files changed

Lines changed: 2101 additions & 263 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

AGENTS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,7 @@ The demo trace is used for:
2626
- Testing the viewer with real data
2727

2828
Failing to update it will cause the viewer to fail when loading the demo.
29+
30+
## Meta
31+
32+
- Never declare done after pushing or opening a PR until CI is green. Check CI status and fix any failures before moving on.

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dial9-tokio-telemetry/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ serde = { version = "1", features = ["derive"] }
2121
serde_json = "1"
2222
smallvec = "1"
2323
dial9-perf-self-profile = { workspace = true, optional = true }
24+
tracing = "0.1.44"
2425

2526
[features]
2627
cpu-profiling = ["dep:dial9-perf-self-profile"]

dial9-tokio-telemetry/README.md

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ fn main() -> std::io::Result<()> {
1919
let mut builder = tokio::runtime::Builder::new_multi_thread();
2020
builder.worker_threads(4).enable_all();
2121

22-
let (runtime, _guard) = TracedRuntime::build_and_start(builder, Box::new(writer))?;
22+
let (runtime, _guard) = TracedRuntime::build_and_start(builder, writer)?;
2323

2424
runtime.block_on(async {
2525
// your async code here
@@ -70,7 +70,7 @@ Use `handle.spawn()` instead of `tokio::spawn()`:
7070
# fn main() -> std::io::Result<()> {
7171
# let writer = RotatingWriter::new("/tmp/t.bin", 1024, 4096)?;
7272
# let builder = tokio::runtime::Builder::new_multi_thread();
73-
let (runtime, guard) = TracedRuntime::build_and_start(builder, Box::new(writer))?;
73+
let (runtime, guard) = TracedRuntime::build_and_start(builder, writer)?;
7474
let handle = guard.handle();
7575
7676
runtime.block_on(async {
@@ -100,7 +100,7 @@ On non-Linux platforms these fields are zero.
100100

101101
With the `cpu-profiling` feature, you can enable `perf_event_open`-based CPU sampling. This gives two key pieces of data:
102102
1. Stack traces when code was running on the CPU — aka flamegraphs
103-
2. 2. Stack traces when the kernel _descheduled_ your thread. For example, if you use `std::thread::sleep` in your future or are seeing `std::sync::Mutex` contention, this will allow you to see precisely where this is happening in async code.
103+
2. Stack traces when the kernel _descheduled_ your thread. For example, if you use `std::thread::sleep` in your future or are seeing `std::sync::Mutex` contention, this will allow you to see precisely where this is happening in async code.
104104

105105
Both of these events are tied to the precise instant and thread that they happened on, so you can compare what was different between degraded and normal performance.
106106

@@ -117,7 +117,7 @@ let (runtime, guard) = TracedRuntime::builder()
117117
.with_cpu_profiling(CpuProfilingConfig::default())
118118
.with_sched_events(SchedEventConfig { include_kernel: true })
119119
.with_inline_callframe_symbols(true)
120-
.build(builder, Box::new(writer))?;
120+
.build_and_start(builder, writer)?;
121121
# Ok(())
122122
# }
123123
# #[cfg(not(feature = "cpu-profiling"))]
@@ -126,6 +126,30 @@ let (runtime, guard) = TracedRuntime::builder()
126126

127127
This pulls in [`dial9-perf-self-profile`](/perf-self-profile) for `perf_event_open` access. It records `CpuSample` events with full callchains and `CallframeDef` / `ThreadNameDef` metadata for offline symbolization.
128128

129+
#### Requirements
130+
131+
**Frame pointers**: CPU profile stack traces rely on frame-pointer-based unwinding. Compile your application with frame pointers enabled, otherwise stack traces will be truncated or missing:
132+
133+
```toml
134+
# .cargo/config.toml
135+
[build]
136+
rustflags = ["-C", "force-frame-pointers=yes"]
137+
```
138+
139+
**`perf_event_paranoid`**: CPU profiling features require `perf_event_paranoid` ≤ 2 for sampling, and ≤ 1 for scheduler event tracking (`with_sched_events`):
140+
141+
```bash
142+
# check current value
143+
cat /proc/sys/kernel/perf_event_paranoid
144+
145+
# allow CPU sampling and scheduler event tracking
146+
sudo sysctl kernel.perf_event_paranoid=1
147+
```
148+
149+
#### Diagnosing long polls with CPU samples
150+
151+
Because CPU samples are tagged with the worker thread they were collected on, and the trace records which task is being polled on each worker at each instant, the viewer can correlate samples with individual polls. When a poll takes an unusually long time (a "long poll"), the CPU samples collected during that poll show you exactly what code was running — expensive serialization, accidental blocking I/O, lock contention, etc. In the trace viewer, click on a long poll to see its flamegraph, or shift+drag to aggregate CPU samples across a time range.
152+
129153
## Getting started
130154

131155
`TracedRuntime::build` returns a `(Runtime, TelemetryGuard)`. The guard owns the flush thread and provides a `TelemetryHandle` for enabling/disabling recording at runtime:
@@ -137,7 +161,7 @@ This pulls in [`dial9-perf-self-profile`](/perf-self-profile) for `perf_event_op
137161
# let builder = tokio::runtime::Builder::new_multi_thread();
138162
let (runtime, guard) = TracedRuntime::builder()
139163
.with_task_tracking(true)
140-
.build(builder, Box::new(writer))?;
164+
.build(builder, writer)?;
141165
142166
// start disabled, enable later
143167
guard.enable();
@@ -151,7 +175,7 @@ handle.disable();
151175

152176
### Writers
153177

154-
`RotatingWriter` rotates files and evicts old ones to stay within a total size budget. `SimpleBinaryWriter` writes a single file with no size management, useful for quick experiments.
178+
`RotatingWriter` rotates files and evicts old ones to stay within a total size budget. For quick experiments, `RotatingWriter::single_file(path)` writes a single file with no rotation.
155179

156180
### Analyzing traces
157181

dial9-tokio-telemetry/benches/overhead_bench.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#[cfg(target_os = "linux")]
1717
use dial9_tokio_telemetry::telemetry::CpuProfilingConfig;
1818
use dial9_tokio_telemetry::telemetry::{
19-
NullWriter, SimpleBinaryWriter, TelemetryGuard, TelemetryHandle, TracedRuntime,
19+
NullWriter, RotatingWriter, TelemetryGuard, TelemetryHandle, TracedRuntime,
2020
};
2121
use hdrhistogram::Histogram;
2222
use std::sync::Arc;
@@ -172,8 +172,7 @@ fn main() {
172172

173173
let (server_rt, _guard): (tokio::runtime::Runtime, Option<TelemetryGuard>) = match mode {
174174
"telemetry" => {
175-
let writer =
176-
Box::new(SimpleBinaryWriter::new("/tmp/overhead_bench_trace.bin").unwrap());
175+
let writer = RotatingWriter::single_file("/tmp/overhead_bench_trace.bin").unwrap();
177176
let mut tb = TracedRuntime::builder().with_task_tracking(true);
178177
#[cfg(target_os = "linux")]
179178
{
@@ -186,7 +185,7 @@ fn main() {
186185
}
187186
"noop" => {
188187
let (rt, g) = TracedRuntime::builder()
189-
.build_and_start(builder, Box::new(NullWriter))
188+
.build_and_start(builder, NullWriter)
190189
.unwrap();
191190
(rt, Some(g))
192191
}

dial9-tokio-telemetry/design/tokio-telemetry-system.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ Simple wrapper combining event type with metrics snapshot.
167167
- `Send` bound for thread safety
168168
- Batch write method for efficiency
169169
- Explicit flush control
170-
- Implementations: `SimpleBinaryWriter`, `RotatingWriter`, `NullWriter` (for benchmarking)
170+
- Implementations: `RotatingWriter`, `NullWriter` (for benchmarking)
171171

172172
**Binary Format (v5):**
173173
Variable-size records optimize for the common case:
@@ -238,7 +238,7 @@ Variable-size records optimize for the common case:
238238
**Usage Pattern:**
239239
```rust
240240
let writer = RotatingWriter::new("trace.bin", 1_MB, 5_MB)?;
241-
let (runtime, guard) = TracedRuntime::build(builder, Box::new(writer))?;
241+
let (runtime, guard) = TracedRuntime::build(builder, writer)?;
242242

243243
// Use runtime normally
244244
runtime.block_on(async { /* ... */ });
@@ -387,7 +387,7 @@ src/
387387
├── events.rs # EventType, MetricsSnapshot, TelemetryEvent, CPU time helpers
388388
├── buffer.rs # ThreadLocalBuffer
389389
├── collector.rs # CentralCollector (Mutex-based)
390-
├── writer.rs # TraceWriter trait, SimpleBinaryWriter, RotatingWriter, NullWriter
390+
├── writer.rs # TraceWriter trait, RotatingWriter, NullWriter
391391
├── recorder.rs # TelemetryRecorder, TracedRuntime, TelemetryGuard
392392
├── format.rs # Binary format v5 (variable-size records)
393393
└── analysis.rs # TraceReader, analyze_trace, detect_idle_workers, compute_active_periods
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
//! Example demonstrating blocking pool tracking.
2+
//!
3+
//! Spawns work on tokio's blocking pool via `spawn_blocking`, then writes a
4+
//! trace. Blocking pool samples should appear with `worker_id = 254`
5+
//! (BLOCKING_WORKER) instead of 255 (UNKNOWN_WORKER).
6+
//!
7+
//! Usage:
8+
//! cargo run --release --features cpu-profiling --example blocking_pool_tracking
9+
10+
use dial9_tokio_telemetry::telemetry::{CpuProfilingConfig, RotatingWriter, TracedRuntime};
11+
use std::time::Duration;
12+
13+
fn burn_cpu(duration: Duration) {
14+
let start = std::time::Instant::now();
15+
let mut x: u64 = 1;
16+
while start.elapsed() < duration {
17+
for _ in 0..1000 {
18+
x = x.wrapping_mul(6364136223846793005).wrapping_add(1);
19+
}
20+
std::hint::black_box(x);
21+
}
22+
}
23+
24+
fn main() {
25+
let mut builder = tokio::runtime::Builder::new_multi_thread();
26+
builder.worker_threads(2).enable_all();
27+
28+
let writer = RotatingWriter::single_file("blocking_pool_trace.bin").unwrap();
29+
let (runtime, _guard) = TracedRuntime::builder()
30+
.with_task_tracking(true)
31+
.with_cpu_profiling(CpuProfilingConfig {
32+
frequency_hz: 999,
33+
..Default::default()
34+
})
35+
.with_inline_callframe_symbols(true)
36+
.build_and_start(builder, writer)
37+
.unwrap();
38+
39+
runtime.block_on(async {
40+
// Spawn CPU-intensive work on the blocking pool
41+
let handles: Vec<_> = (0..3)
42+
.map(|i| {
43+
tokio::task::spawn_blocking(move || {
44+
eprintln!("blocking task {i} starting");
45+
burn_cpu(Duration::from_millis(500));
46+
eprintln!("blocking task {i} done");
47+
})
48+
})
49+
.collect();
50+
51+
for h in handles {
52+
h.await.unwrap();
53+
}
54+
55+
// Let flush cycle capture everything
56+
tokio::time::sleep(Duration::from_millis(500)).await;
57+
});
58+
59+
eprintln!("Trace written to blocking_pool_trace.bin");
60+
}

dial9-tokio-telemetry/examples/blocking_sleep.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use dial9_tokio_telemetry::telemetry::{SimpleBinaryWriter, TracedRuntime};
1+
use dial9_tokio_telemetry::telemetry::{RotatingWriter, TracedRuntime};
22
use std::time::Duration;
33

44
async fn blocking_task(id: usize) {
@@ -14,7 +14,7 @@ fn main() {
1414
let mut builder = tokio::runtime::Builder::new_multi_thread();
1515
builder.worker_threads(2).enable_all();
1616

17-
let writer = Box::new(SimpleBinaryWriter::new("blocking_sleep_trace.bin").unwrap());
17+
let writer = RotatingWriter::single_file("blocking_sleep_trace.bin").unwrap();
1818
let (runtime, _guard) = TracedRuntime::builder()
1919
.with_task_tracking(true)
2020
.with_cpu_profiling(Default::default())

dial9-tokio-telemetry/examples/cpu_profile_workload.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
//! echo 2 | sudo tee /proc/sys/kernel/perf_event_paranoid
1111
1212
use dial9_tokio_telemetry::telemetry::{
13-
CpuProfilingConfig, SimpleBinaryWriter, TelemetryEvent, TracedRuntime,
13+
CpuProfilingConfig, RotatingWriter, TelemetryEvent, TracedRuntime,
1414
};
1515
use std::time::Duration;
1616

@@ -38,7 +38,7 @@ fn main() {
3838
let mut builder = tokio::runtime::Builder::new_multi_thread();
3939
builder.worker_threads(4).enable_all();
4040

41-
let writer = Box::new(SimpleBinaryWriter::new(trace_path).unwrap());
41+
let writer = RotatingWriter::single_file(trace_path).unwrap();
4242
let (runtime, guard) = TracedRuntime::builder()
4343
.with_task_tracking(true)
4444
.with_cpu_profiling(CpuProfilingConfig::default())

dial9-tokio-telemetry/examples/long_workload.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use dial9_tokio_telemetry::telemetry::{SimpleBinaryWriter, TracedRuntime};
1+
use dial9_tokio_telemetry::telemetry::{RotatingWriter, TracedRuntime};
22
use std::time::Duration;
33
use tokio::io::{AsyncReadExt, AsyncWriteExt};
44
use tokio::net::TcpListener;
@@ -83,7 +83,7 @@ fn main() {
8383
.and_then(|s| s.parse().ok())
8484
.unwrap_or(30u64);
8585

86-
let writer = Box::new(SimpleBinaryWriter::new("long_trace.bin").unwrap());
86+
let writer = RotatingWriter::single_file("long_trace.bin").unwrap();
8787
let (runtime, _guard) = TracedRuntime::builder()
8888
.build_and_start(builder, writer)
8989
.unwrap();

0 commit comments

Comments
 (0)