Skip to content

Commit 0e1e8fd

Browse files
authored
Add task dump capture behind taskdump feature (#354)
* Add task dump capture behind taskdump feature Wraps spawned futures in `TaskDumped<F>` when the `taskdump` feature is enabled. On each poll, if the previous idle gap exceeded the configured threshold, the frames captured at the last yield point are emitted as `TaskDumpEvent`. Capture itself runs inside `tokio::runtime::dump::trace_with` with a noop waker on a diagnostic re-poll, so it doesn't produce duplicate wake or poll events. Configured via `TracedRuntimeBuilder::with_task_dumps(TaskDumpConfig)` or `TelemetryCoreBuilder::task_dump_config`. Capture short-circuits when the guard is disabled, so a paused guard skips `trace_with` entirely. Bumps tokio to 1.52 for the `taskdump` feature. * switch ci to use private feature * cache the timestamp on poll_start
1 parent 00dc9bf commit 0e1e8fd

18 files changed

Lines changed: 667 additions & 8 deletions

File tree

.github/actions/rust-build/action.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ runs:
3232
if [ "${{ inputs.toolchain }}" = nightly ]; then
3333
rm -fv Cargo.lock
3434
fi
35+
# tokio's `taskdump` feature only compiles on linux aarch64/x86/x86_64.
36+
# On macOS drop it from --all-features; leave everything else intact.
3537
if [ "$RUNNER_OS" = "Linux" ]; then
3638
# Raise locked-memory limit for large perf ring buffers
3739
sudo prlimit --pid $$ --memlock=unlimited:unlimited
40+
cargo test --all-targets --all-features
41+
else
42+
cargo test --all-targets --features __nonlinux_all_features
3843
fi
39-
cargo test --all-targets --all-features

Cargo.lock

Lines changed: 37 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dial9-tokio-telemetry/Cargo.toml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ all-features = true
1919
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(shuttle)"] }
2020

2121
[dependencies]
22-
tokio = { version = "1.51.0", features = ["rt", "macros", "time", "rt-multi-thread", "sync", "net", "io-util"] }
22+
tokio = { version = "1.52.0", features = ["rt", "macros", "time", "rt-multi-thread", "sync", "net", "io-util"] }
2323
tokio-util = "0.7"
2424
arc-swap = "1"
2525
crossbeam-queue = "0.3"
@@ -29,6 +29,7 @@ pin-project-lite = "0.2"
2929
serde = { version = "1", features = ["derive"] }
3030
serde_json = "1"
3131
smallvec = "1"
32+
backtrace = { version = "0.3", optional = true }
3233
dial9-perf-self-profile = { workspace = true, optional = true }
3334
dial9-trace-format = { workspace = true, features = ["serde"] }
3435
tracing = "0.1.44"
@@ -50,8 +51,15 @@ shuttle = { version = "0.9.1", optional = true }
5051
analysis = []
5152
cpu-profiling = ["dep:dial9-perf-self-profile"]
5253
_shuttle = ["dep:shuttle", "metrique-timesource/custom-timesource", "metrique-timesource/test-util"]
54+
## Capture async backtraces at tokio yield points. Linux-only
55+
## (aarch64/x86/x86_64): tokio's upstream `taskdump` feature refuses to
56+
## compile on other targets. Enabling this feature on an unsupported target
57+
## is a hard compile error from tokio.
58+
taskdump = ["tokio/taskdump", "dep:backtrace"]
5359
tracing-layer = ["dep:tracing-subscriber"]
5460
worker-s3 = ["dep:aws-sdk-s3-transfer-manager", "dep:aws-sdk-s3", "dep:aws-config", "dep:time"]
61+
## All features except platform-specific ones (taskdump). Used in CI for non-Linux targets.
62+
__nonlinux_all_features = ["analysis", "cpu-profiling", "tracing-layer", "worker-s3"]
5563

5664
[dev-dependencies]
5765
dial9-tokio-telemetry = { path = ".", features = ["analysis", "tracing-layer", "worker-s3"] }
@@ -61,7 +69,7 @@ clap = { version = "4", features = ["derive", "env"] }
6169
hdrhistogram = "7"
6270
metrique-timesource = { version = "0.1", features = ["custom-timesource", "tokio"] }
6371
metrique-writer = { version = "0.1", features = ["test-util"] }
64-
tokio = { version = "1.51.0", features = ["test-util"] }
72+
tokio = { version = "1.52.0", features = ["test-util"] }
6573
proptest = "1"
6674
tempfile = "3"
6775
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
@@ -74,7 +82,7 @@ async-trait = "0.1.89"
7482
uuid = { version = "1", features = ["v4"] }
7583

7684
[target.'cfg(target_os = "linux")'.dev-dependencies]
77-
dial9-tokio-telemetry = { path = ".", features = ["cpu-profiling", "worker-s3", "analysis", "tracing-layer"] }
85+
dial9-tokio-telemetry = { path = ".", features = ["cpu-profiling", "worker-s3", "analysis", "taskdump", "tracing-layer"] }
7886
nix = { version = "0.29", features = ["process"] }
7987

8088
[[bench]]

dial9-tokio-telemetry/examples/trace_to_fat_jsonl.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ fn to_fat_event(event: &TelemetryEvent, reader: &TraceReader) -> Option<FatEvent
180180
}),
181181
TelemetryEvent::TaskSpawn { .. }
182182
| TelemetryEvent::TaskTerminate { .. }
183+
| TelemetryEvent::TaskDump { .. }
183184
| TelemetryEvent::ThreadNameDef { .. }
184185
| TelemetryEvent::SegmentMetadata { .. }
185186
| TelemetryEvent::ClockSync { .. } => None,

dial9-tokio-telemetry/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ pub mod background_task;
1616
pub(crate) mod metrics;
1717
pub(crate) mod primitives;
1818
pub(crate) mod rate_limit;
19+
#[cfg(feature = "taskdump")]
20+
pub(crate) mod task_dumped;
1921
/// Core telemetry types, recording, and trace I/O.
2022
pub mod telemetry;
2123
pub(crate) mod traced;

0 commit comments

Comments
 (0)