diff --git a/.github/actions/rust-build/action.yml b/.github/actions/rust-build/action.yml index 30a654fc..b5c7c46a 100644 --- a/.github/actions/rust-build/action.yml +++ b/.github/actions/rust-build/action.yml @@ -20,7 +20,7 @@ runs: if [ "${{ inputs.toolchain }}" != stable ]; then rm -fv Cargo.lock fi - cargo build + cargo build --all-targets --all-features - name: Enable perf_event_open and kallsyms for tests shell: bash run: | @@ -33,4 +33,4 @@ runs: run: | # Raise locked-memory limit for large perf ring buffers sudo prlimit --pid $$ --memlock=unlimited:unlimited - cargo test + cargo test --all-features diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 552ea11a..e67e870a 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -25,4 +25,4 @@ jobs: shell: bash env: RUSTFLAGS: "--cfg tokio_unstable" - run: cargo clippy --workspace -- -D warnings + run: cargo clippy --all-targets --all-features -- -D warnings diff --git a/Cargo.lock b/Cargo.lock index 46351896..b317929e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,15 +12,6 @@ dependencies = [ "regex", ] -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -797,21 +788,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide 0.8.9", - "object", - "rustc-demangle", - "windows-link", -] - [[package]] name = "base16ct" version = "0.1.1" @@ -2899,15 +2875,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3c00a0c9600379bd32f8972de90676a7672cba3bf4886986bc05902afc1e093" -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] - [[package]] name = "once_cell" version = "1.21.4" @@ -4093,7 +4060,6 @@ version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd" dependencies = [ - "backtrace", "bytes", "libc", "mio", diff --git a/dial9-tokio-telemetry/Cargo.toml b/dial9-tokio-telemetry/Cargo.toml index dfe165be..8210e42d 100644 --- a/dial9-tokio-telemetry/Cargo.toml +++ b/dial9-tokio-telemetry/Cargo.toml @@ -42,7 +42,6 @@ metrique = { version = "0.1.23", features = ["local-format"] } [features] analysis = [] cpu-profiling = ["dep:dial9-perf-self-profile"] -task-dump = ["tokio/taskdump"] worker-s3 = ["dep:aws-sdk-s3-transfer-manager", "dep:aws-sdk-s3", "dep:aws-config", "dep:time"] [dev-dependencies] @@ -67,11 +66,6 @@ uuid = { version = "1", features = ["v4"] } dial9-tokio-telemetry = { path = ".", features = ["cpu-profiling", "worker-s3", "analysis"] } nix = { version = "0.29", features = ["process"] } -[[bench]] -name = "poll_overhead" -harness = false -required-features = ["task-dump"] - [[bench]] name = "overhead_bench" harness = false @@ -80,22 +74,6 @@ harness = false name = "e2e_workload" harness = false -[[example]] -name = "long_sleep" -required-features = ["task-dump"] - -[[example]] -name = "completing_task" -required-features = ["task-dump"] - -[[example]] -name = "cancelled_task" -required-features = ["task-dump"] - -[[example]] -name = "debug_timing" -required-features = ["task-dump"] - [[example]] name = "blocking_sleep" required-features = ["cpu-profiling"] diff --git a/dial9-tokio-telemetry/README.md b/dial9-tokio-telemetry/README.md index 028e27c5..484e93af 100644 --- a/dial9-tokio-telemetry/README.md +++ b/dial9-tokio-telemetry/README.md @@ -221,7 +221,6 @@ See [TRACE_ANALYSIS_GUIDE.md](/dial9-tokio-telemetry/TRACE_ANALYSIS_GUIDE.md) fo ## Features - **`cpu-profiling`** — Linux only. Enables `perf_event_open`-based CPU sampling and scheduler event capture via `dial9-perf-self-profile`. -- **`task-dump`** — Enables Tokio's `taskdump` feature for async stack traces. Required for the `long_sleep`, `completing_task`, `cancelled_task`, and `debug_timing` examples. - **`worker-s3`** — Enables S3 upload support. Adds `aws-sdk-s3`, `aws-sdk-s3-transfer-manager`, `aws-config`, and `flate2`. ## S3 upload diff --git a/dial9-tokio-telemetry/benches/e2e_workload.rs b/dial9-tokio-telemetry/benches/e2e_workload.rs index 25c91a37..de69da0e 100644 --- a/dial9-tokio-telemetry/benches/e2e_workload.rs +++ b/dial9-tokio-telemetry/benches/e2e_workload.rs @@ -4,7 +4,7 @@ mod bmf; #[cfg(target_os = "linux")] -use dial9_tokio_telemetry::telemetry::CpuProfilingConfig; +use dial9_tokio_telemetry::telemetry::cpu_profile::CpuProfilingConfig; use dial9_tokio_telemetry::telemetry::{RotatingWriter, TracedRuntime}; use std::time::Instant; use tokio::io::{AsyncReadExt, AsyncWriteExt}; diff --git a/dial9-tokio-telemetry/benches/overhead_bench.rs b/dial9-tokio-telemetry/benches/overhead_bench.rs index b6c7eb5e..1f8e8130 100644 --- a/dial9-tokio-telemetry/benches/overhead_bench.rs +++ b/dial9-tokio-telemetry/benches/overhead_bench.rs @@ -169,7 +169,7 @@ fn run_bench(mode: &str, duration_secs: u64) -> BenchResult { let mut merged = Histogram::::new_with_bounds(1_000, 60_000_000_000, 3).unwrap(); for h in handles { - merged.add(&h.await.unwrap()).unwrap(); + merged.add(h.await.unwrap()).unwrap(); } (merged, wall) }); diff --git a/dial9-tokio-telemetry/benches/poll_overhead.rs b/dial9-tokio-telemetry/benches/poll_overhead.rs deleted file mode 100644 index aafc5d0e..00000000 --- a/dial9-tokio-telemetry/benches/poll_overhead.rs +++ /dev/null @@ -1,64 +0,0 @@ -use criterion::{Criterion, black_box, criterion_group, criterion_main}; -use dial9_tokio_telemetry::task_dump::DetectLongWait; -use std::future::Future; -use std::pin::Pin; -use std::task::{Context, Poll, Waker}; -use tokio::runtime::dump::Trace; -use tokio::sync::mpsc; - -struct AlwaysPending; - -impl Future for AlwaysPending { - type Output = (); - - fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { - Poll::Pending - } -} - -fn noop_waker() -> Waker { - use std::task::{RawWaker, RawWakerVTable}; - - unsafe fn clone(_: *const ()) -> RawWaker { - RawWaker::new(std::ptr::null(), &VTABLE) - } - unsafe fn wake(_: *const ()) {} - unsafe fn wake_by_ref(_: *const ()) {} - unsafe fn drop(_: *const ()) {} - - static VTABLE: RawWakerVTable = RawWakerVTable::new(clone, wake, wake_by_ref, drop); - - unsafe { Waker::from_raw(RawWaker::new(std::ptr::null(), &VTABLE)) } -} - -fn bench_poll_overhead(c: &mut Criterion) { - let rt = tokio::runtime::Runtime::new().unwrap(); - - c.bench_function("trace_capture", |b| { - b.iter(|| { - rt.block_on(async { - let mut future = AlwaysPending; - let waker = noop_waker(); - let mut cx = Context::from_waker(&waker); - let (_result, trace) = Trace::capture(|| Pin::new(&mut future).poll(&mut cx)); - black_box(trace); - }); - }); - }); - - c.bench_function("first_poll_with_capture", |b| { - b.iter(|| { - rt.block_on(async { - let (tx, _rx) = mpsc::unbounded_channel(); - let future = AlwaysPending; - let mut wrapped = DetectLongWait::new(future, tx); - let waker = noop_waker(); - let mut cx = Context::from_waker(&waker); - let _ = black_box(Pin::new(&mut wrapped).poll(&mut cx)); - }); - }); - }); -} - -criterion_group!(benches, bench_poll_overhead); -criterion_main!(benches); diff --git a/dial9-tokio-telemetry/examples/cancelled_task.rs b/dial9-tokio-telemetry/examples/cancelled_task.rs deleted file mode 100644 index 7f486ae6..00000000 --- a/dial9-tokio-telemetry/examples/cancelled_task.rs +++ /dev/null @@ -1,54 +0,0 @@ -use dial9_tokio_telemetry::task_dump::{DetectLongWait, LongPollTracker, SentinelStatus}; -use tokio::time::Duration; - -#[tokio::main] -async fn main() { - let (tracker, mut handle) = LongPollTracker::new(); - tracker.spawn(); - - let sleep_future = tokio::time::sleep(Duration::from_secs(20)); - let wrapped = DetectLongWait::new(sleep_future, handle.sentinel_tx.clone()); - - let task = tokio::spawn(async move { - wrapped.await; - println!("Sleep completed"); - }); - - println!("Waiting for long poll detection (7 seconds to ensure bucket rotation)..."); - tokio::time::sleep(Duration::from_secs(7)).await; - - // Check if we got a trace - if let Ok(event) = handle.rx.try_recv() { - println!("\n=== LONG POLL DETECTED ==="); - println!( - "Status: {:?}", - match &event.status { - SentinelStatus::Pending(_) => "Pending", - SentinelStatus::Completed => "Completed", - SentinelStatus::Cancelled => "Cancelled", - } - ); - println!("Send count: {}", event.send_count.0); - - if let SentinelStatus::Pending(traces) = &event.status { - println!("\nNumber of traces captured: {}", traces.len()); - if let Some((_, trace)) = traces.last() { - println!( - "\nTrace (first 10 lines):\n{}", - trace - .to_string() - .lines() - .take(10) - .collect::>() - .join("\n") - ); - } - } - } - - println!("\nNow cancelling task..."); - task.abort(); - println!("Task cancelled - sentinel should be marked as Cancelled"); - - std::process::exit(0); -} diff --git a/dial9-tokio-telemetry/examples/completing_task.rs b/dial9-tokio-telemetry/examples/completing_task.rs deleted file mode 100644 index 6caa0e28..00000000 --- a/dial9-tokio-telemetry/examples/completing_task.rs +++ /dev/null @@ -1,35 +0,0 @@ -use dial9_tokio_telemetry::task_dump::{DetectLongWait, LongPollTracker, SentinelStatus}; -use tokio::time::Duration; - -#[tokio::main] -async fn main() { - let (tracker, mut handle) = LongPollTracker::new(); - tracker.spawn(); - - println!("Starting short sleep (2s) that completes before threshold..."); - let sleep_future = tokio::time::sleep(Duration::from_secs(2)); - let wrapped = DetectLongWait::new(sleep_future, handle.sentinel_tx.clone()); - - wrapped.await; - println!("Sleep completed successfully"); - - println!("Waiting 5 more seconds to see if any trace is sent..."); - tokio::time::sleep(Duration::from_secs(5)).await; - - if let Ok(event) = handle.rx.try_recv() { - println!("\n=== UNEXPECTED: TRACE RECEIVED ==="); - println!( - "Status: {:?}", - match &event.status { - SentinelStatus::Pending(_) => "Pending", - SentinelStatus::Completed => "Completed", - SentinelStatus::Cancelled => "Cancelled", - } - ); - println!("Send count: {}", event.send_count.0); - } else { - println!("\nāœ“ SUCCESS: No trace sent for task that completed before threshold"); - } - - std::process::exit(0); -} diff --git a/dial9-tokio-telemetry/examples/debug_timing.rs b/dial9-tokio-telemetry/examples/debug_timing.rs deleted file mode 100644 index bf34e8f8..00000000 --- a/dial9-tokio-telemetry/examples/debug_timing.rs +++ /dev/null @@ -1,55 +0,0 @@ -use dial9_tokio_telemetry::task_dump::{DetectLongWait, LongPollTracker, SentinelStatus}; -use tokio::time::Duration; - -#[tokio::main] -async fn main() { - let (tracker, mut handle) = LongPollTracker::new(); - tracker.spawn(); - - println!("Starting sleep future..."); - let sleep_future = tokio::time::sleep(Duration::from_secs(20)); - let wrapped = DetectLongWait::new(sleep_future, handle.sentinel_tx.clone()); - - let task = tokio::spawn(async move { - wrapped.await; - println!("Sleep completed"); - }); - - println!("Waiting for long poll detection..."); - - for i in 1..=10 { - tokio::time::sleep(Duration::from_secs(1)).await; - println!("{}s elapsed...", i); - - if let Ok(event) = handle.rx.try_recv() { - println!("\n=== LONG POLL DETECTED at {}s ===", i); - println!( - "Status: {:?}", - match &event.status { - SentinelStatus::Pending(_) => "Pending", - SentinelStatus::Completed => "Completed", - SentinelStatus::Cancelled => "Cancelled", - } - ); - println!("Send count: {}", event.send_count.0); - - if let SentinelStatus::Pending(traces) = &event.status { - println!("\nNumber of traces captured: {}", traces.len()); - if let Some((_, trace)) = traces.last() { - println!( - "\nTrace (first 8 lines):\n{}", - trace - .to_string() - .lines() - .take(8) - .collect::>() - .join("\n") - ); - } - } - break; - } - } - - task.abort(); -} diff --git a/dial9-tokio-telemetry/examples/long_sleep.rs b/dial9-tokio-telemetry/examples/long_sleep.rs deleted file mode 100644 index d9434b38..00000000 --- a/dial9-tokio-telemetry/examples/long_sleep.rs +++ /dev/null @@ -1,41 +0,0 @@ -use dial9_tokio_telemetry::task_dump::{DetectLongWait, LongPollTracker, SentinelStatus}; -use tokio::time::Duration; - -#[tokio::main] -async fn main() { - let (tracker, mut handle) = LongPollTracker::new(); - tracker.spawn(); - - let sleep_future = tokio::time::sleep(Duration::from_secs(10)); - let wrapped = DetectLongWait::new(sleep_future, handle.sentinel_tx.clone()); - - tokio::spawn(async move { - wrapped.await; - println!("Sleep completed"); - }); - - println!("Waiting for long poll detection (6 seconds)..."); - - if let Some(event) = handle.rx.recv().await { - println!("\n=== LONG POLL DETECTED ==="); - println!( - "Status: {:?}", - match &event.status { - SentinelStatus::Pending(_) => "Pending", - SentinelStatus::Completed => "Completed", - SentinelStatus::Cancelled => "Cancelled", - } - ); - println!("Send count: {}", event.send_count.0); - - if let SentinelStatus::Pending(traces) = &event.status { - println!("\nNumber of traces captured: {}", traces.len()); - if let Some((time, trace)) = traces.last() { - println!("Latest trace captured at: {:?}", time); - println!("\nTrace:\n{}", trace); - } - } - } - - std::process::exit(0); -}