dial9-rs
diff --git a/‎.github/workflows/benchmarks.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/benchmarks.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dial9-tokio-telemetry/README.md‎
Lines changed: 13 additions & 13 deletions b/‎dial9-tokio-telemetry/README.md‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎dial9-tokio-telemetry/benches/overhead_bench.rs‎
Lines changed: 1 addition & 0 deletions b/‎dial9-tokio-telemetry/benches/overhead_bench.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎dial9-tokio-telemetry/examples/many_workers.rs‎
Lines changed: 57 additions & 0 deletions b/‎dial9-tokio-telemetry/examples/many_workers.rs‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎dial9-viewer/src/storage.rs‎
Lines changed: 2 additions & 65 deletions b/‎dial9-viewer/src/storage.rs‎
Lines changed: 2 additions & 65 deletions
@@ -43,6 +43,9 @@ jobs:
           - name: overhead_bench
             adapter: json
             command: cargo bench --bench overhead_bench -- --bmf 10
+          - name: overhead_bench_ctimer
+            adapter: json
+            command: DIAL9_FORCE_CTIMER=1 cargo bench --bench overhead_bench -- --bmf 10
           - name: e2e_workload
             adapter: json
             command: cargo bench --bench e2e_workload -- --bmf 10
@@ -90,6 +93,9 @@ jobs:
           - name: overhead_bench
             adapter: json
             command: cargo bench --bench overhead_bench -- --bmf 10
+          - name: overhead_bench_ctimer
+            adapter: json
+            command: DIAL9_FORCE_CTIMER=1 cargo bench --bench overhead_bench -- --bmf 10
           - name: e2e_workload
             adapter: json
             command: cargo bench --bench e2e_workload -- --bmf 10
 
@@ -101,7 +101,7 @@ dial9-tokio-telemetry is designed for always-on production use, but it's still e
 
 Yes, check out this [quick walkthrough (YouTube)](https://www.youtube.com/watch?v=zJOzU_6Mf7Q)!
 
-The [viewer](https://dial9-tokio-telemetry.netlify.app/) (autodeployed from code in `main`) is hosted on Netlify for convenience. You can [load the demo trace](https://dial9-tokio-telemetry.netlify.app/?trace=demo-trace.bin) directly, or use [serve.py](/dial9-tokio-telemetry/serve.py) to run it locally (pure HTML and JS, client side only).
+The [viewer](https://dial9-tokio-telemetry.netlify.app/) (autodeployed from code in `main`) is hosted on Netlify for convenience. You can [load the demo trace](https://dial9-tokio-telemetry.netlify.app/?trace=demo-trace.bin) directly, or use [serve.py](https://github.com/dial9-rs/dial9-tokio-telemetry/blob/main/dial9-tokio-telemetry/serve.py) to run it locally (pure HTML and JS, client side only).
 
 <img width="1288" height="659" alt="Screenshot 2026-03-01 at 3 52 59 PM" src="https://github.com/user-attachments/assets/77225801-70b1-4aef-b064-32bc2326b1ef" />
 
@@ -185,7 +185,7 @@ runtime.block_on(async {
 # }
 ```
 
-For frameworks like Axum where you don't control the spawn call, you need to wrap the accept loop. See [`examples/metrics-service/src/axum_traced.rs`](/examples/metrics-service/src/axum_traced.rs) for a working example that wraps both the accept loop and per-connection futures.
+For frameworks like Axum where you don't control the spawn call, you need to wrap the accept loop. See [`examples/metrics-service/src/axum_traced.rs`](https://github.com/dial9-rs/dial9-tokio-telemetry/blob/main/examples/metrics-service/src/axum_traced.rs) for a working example that wraps both the accept loop and per-connection futures.
 
 ## Custom events
 
@@ -219,7 +219,7 @@ record_event(
 # }
 ```
 
-For events with repeated string values (HTTP methods, endpoint paths, etc.), implement `Encodable` manually to use string interning — see [`examples/custom_events.rs`](/dial9-tokio-telemetry/examples/custom_events.rs) for a complete example showing both patterns.
+For events with repeated string values (HTTP methods, endpoint paths, etc.), implement `Encodable` manually to use string interning — see [`examples/custom_events.rs`](https://github.com/dial9-rs/dial9-tokio-telemetry/blob/main/dial9-tokio-telemetry/examples/custom_events.rs) for a complete example showing both patterns.
 
 Custom events are encoded into the same thread-local buffer as built-in events (~100–200 ns per call) and appear in the trace viewer alongside poll/park/wake events.
 
@@ -263,7 +263,7 @@ let (runtime, guard) = TracedRuntime::builder()
 # fn main() {}
 ```
 
-This pulls in [`dial9-perf-self-profile`](/perf-self-profile) for `perf_event_open` access. It records `CpuSample` events with raw stack frame addresses. When a `trace_path` is set, the background worker automatically symbolizes sealed segments (resolving addresses to function names via `/proc/self/maps` and blazesym) and gzip-compresses them on disk.
+This pulls in [`dial9-perf-self-profile`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/perf-self-profile) for `perf_event_open` access. It records `CpuSample` events with raw stack frame addresses. When a `trace_path` is set, the background worker automatically symbolizes sealed segments (resolving addresses to function names via `/proc/self/maps` and blazesym) and gzip-compresses them on disk.
 
 #### Requirements
 
@@ -351,7 +351,7 @@ let (io_rt, io_handle) = guard.trace_runtime("io").build(io_builder)?;
 # }
 ```
 
-See [`examples/thread_per_core.rs`](/dial9-tokio-telemetry/examples/thread_per_core.rs) and [`examples/multi_runtime.rs`](/dial9-tokio-telemetry/examples/multi_runtime.rs) for complete examples.
+See [`examples/thread_per_core.rs`](https://github.com/dial9-rs/dial9-tokio-telemetry/blob/main/dial9-tokio-telemetry/examples/thread_per_core.rs) and [`examples/multi_runtime.rs`](https://github.com/dial9-rs/dial9-tokio-telemetry/blob/main/dial9-tokio-telemetry/examples/multi_runtime.rs) for complete examples.
 
 **Shutdown**: Drop all runtimes before the `TelemetryGuard` so worker threads exit and flush their thread-local buffers. For a clean shutdown that waits for the background worker (e.g. S3 uploads) to drain, call `guard.graceful_shutdown(timeout)` instead of dropping the guard.
 
@@ -361,7 +361,7 @@ See [`examples/thread_per_core.rs`](/dial9-tokio-telemetry/examples/thread_per_c
 
 ### Analyzing traces
 
-[`dial9-viewer`](/dial9-viewer) is an interactive trace viewer and S3 browser. Point it at a local directory or an S3 bucket to browse and visualize traces in the browser. [Here's a demo.](https://www.youtube.com/watch?v=zJOzU_6Mf7Q)
+[`dial9-viewer`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/dial9-viewer) is an interactive trace viewer and S3 browser. Point it at a local directory or an S3 bucket to browse and visualize traces in the browser. [Here's a demo.](https://www.youtube.com/watch?v=zJOzU_6Mf7Q)
 
 ```bash
 # Install
@@ -388,7 +388,7 @@ cargo run --example analyze_trace --features analysis -- /tmp/my_traces/trace.0.
 cargo run --example trace_to_jsonl --features analysis -- /tmp/my_traces/trace.0.bin.gz output.jsonl
 ```
 
-See [TRACE_ANALYSIS_GUIDE.md](/dial9-tokio-telemetry/TRACE_ANALYSIS_GUIDE.md) for a walkthrough of diagnosing scheduling delays and CPU hotspots from trace data.
+See [TRACE_ANALYSIS_GUIDE.md](https://github.com/dial9-rs/dial9-tokio-telemetry/blob/main/dial9-tokio-telemetry/TRACE_ANALYSIS_GUIDE.md) for a walkthrough of diagnosing scheduling delays and CPU hotspots from trace data.
 
 ## Features
 
@@ -456,7 +456,7 @@ cargo run --example telemetry_rotating     # manual setup + rotating writer conf
 cargo run --example multi_runtime          # multiple runtimes, manual TelemetryCore
 ```
 
-The [`examples/metrics-service`](/examples/metrics-service) directory has a full Axum service with DynamoDB persistence, a load-generating client, and telemetry wired up end-to-end.
+The [`examples/metrics-service`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/examples/metrics-service) directory has a full Axum service with DynamoDB persistence, a load-generating client, and telemetry wired up end-to-end.
 
 ## Overhead
 
@@ -476,11 +476,11 @@ Overhead:   3.2%
 
 This repo is a Cargo workspace with five members:
 
-- [`dial9-tokio-telemetry`](/dial9-tokio-telemetry) — the main crate
-- [`dial9-viewer`](/dial9-viewer) — CLI and web UI for browsing traces in S3 or on the local filesystem
-- [`dial9-macro`](/dial9-macro) — the `#[dial9_tokio_telemetry::main]` attribute macro
-- [`dial9-perf-self-profile`](/perf-self-profile) — minimal Linux `perf_event_open` wrapper for CPU profiling and scheduler events
-- [`examples/metrics-service`](/examples/metrics-service) — end-to-end example service
+- [`dial9-tokio-telemetry`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/dial9-tokio-telemetry) — the main crate
+- [`dial9-viewer`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/dial9-viewer) — CLI and web UI for browsing traces in S3 or on the local filesystem
+- [`dial9-macro`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/dial9-macro) — the `#[dial9_tokio_telemetry::main]` attribute macro
+- [`dial9-perf-self-profile`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/perf-self-profile) — minimal Linux `perf_event_open` wrapper for CPU profiling and scheduler events
+- [`examples/metrics-service`](https://github.com/dial9-rs/dial9-tokio-telemetry/tree/main/examples/metrics-service) — end-to-end example service
 
 ## Future work
 
 
@@ -110,6 +110,7 @@ fn run_bench(mode: &str, duration_secs: u64) -> BenchResult {
     let (server_rt, guard): (tokio::runtime::Runtime, Option<TelemetryGuard>) = match mode {
         "telemetry" => {
             let writer = RotatingWriter::single_file("/tmp/overhead_bench_trace.bin").unwrap();
+            #[allow(unused_mut)]
             let mut tb = TracedRuntime::builder().with_task_tracking(true);
             #[cfg(target_os = "linux")]
             {
 
@@ -0,0 +1,57 @@
+//! Generate a trace with 48 workers for testing the viewer with many lanes.
+//!
+//! Usage:
+//!   cargo run --example many_workers
+//!
+//! Then open the trace in the viewer:
+//!   cargo run -p dial9-viewer -- serve --local-dir .
+
+use std::time::Duration;
+
+use dial9_tokio_telemetry::config::{Dial9Config, Dial9ConfigBuilder};
+use dial9_tokio_telemetry::telemetry::TelemetryHandle;
+
+fn my_config() -> Dial9Config {
+    Dial9ConfigBuilder::new(
+        "many_workers_trace.bin",
+        64 * 1024 * 1024,
+        256 * 1024 * 1024,
+    )
+    .with_tokio(|t| {
+        t.worker_threads(48);
+    })
+    .with_runtime(|r| r.with_task_tracking(true))
+    .build()
+}
+
+#[dial9_tokio_telemetry::main(config = my_config)]
+async fn main() {
+    println!("Running workload with 48 workers...");
+
+    let handle = TelemetryHandle::current();
+    let tasks: Vec<_> = (0..500)
+        .map(|i| {
+            handle.spawn(async move {
+                for _ in 0..5 {
+                    tokio::time::sleep(Duration::from_millis(5)).await;
+                    // Small CPU work to generate poll events
+                    let mut v = 0u64;
+                    for j in 0..50_000u64 {
+                        v = v.wrapping_add(j.wrapping_mul(j));
+                    }
+                    std::hint::black_box(v);
+                    tokio::task::yield_now().await;
+                }
+                if i % 100 == 0 {
+                    println!("Task {i} done");
+                }
+            })
+        })
+        .collect();
+
+    for task in tasks {
+        let _ = task.await;
+    }
+
+    println!("Trace written to many_workers_trace.*.bin");
+}
@@ -220,7 +220,7 @@ impl StorageBackend for LocalBackend {
             let prefix2 = prefix.clone();
             tokio::task::spawn_blocking(move || {
                 let mut objects = Vec::new();
-                collect_files(&root, &root, &prefix2, &mut objects, 0, &mut 0)?;
+                collect_files(&root, &root, &prefix2, &mut objects)?;
                 objects.sort_by(|a, b| a.key.cmp(&b.key));
                 Ok(objects)
             })
@@ -313,49 +313,18 @@ impl StorageBackend for LocalBackend {
     }
 }
 
-/// Maximum directory depth to recurse into when listing local files.
-const MAX_COLLECT_DEPTH: u32 = 10;
-
-/// Maximum number of files to return from a local directory listing.
-const MAX_COLLECT_FILES: usize = 50;
-
-/// Maximum number of directory entries to visit (files + dirs) across the
-/// entire recursive walk. This bounds the number of syscalls (`canonicalize`,
-/// `metadata`) so a huge directory tree cannot hang the listing.
-const MAX_ENTRIES_VISITED: usize = 500;
-
-/// Directory names to skip during recursive file collection.
-fn is_skipped_dir(name: &str) -> bool {
-    name.starts_with('.') || matches!(name, "target" | "node_modules")
-}
-
 fn collect_files(
     root: &Path,
     dir: &Path,
     prefix: &str,
     out: &mut Vec<ObjectInfo>,
-    depth: u32,
-    visited: &mut usize,
 ) -> Result<(), StorageError> {
-    if depth > MAX_COLLECT_DEPTH
-        || out.len() >= MAX_COLLECT_FILES
-        || *visited >= MAX_ENTRIES_VISITED
-    {
-        return Ok(());
-    }
     let entries = match std::fs::read_dir(dir) {
         Ok(e) => e,
         Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
-        Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
-            return Err(StorageError::Other("permission denied".into()));
-        }
         Err(e) => return Err(StorageError::Other(e.to_string())),
     };
     for entry in entries {
-        *visited += 1;
-        if out.len() >= MAX_COLLECT_FILES || *visited >= MAX_ENTRIES_VISITED {
-            break;
-        }
         let entry = entry.map_err(|e| StorageError::Other(e.to_string()))?;
         let path = entry.path();
         // Resolve symlinks and verify the target stays within root.
@@ -364,11 +333,7 @@ fn collect_files(
             _ => continue,
         };
         if canonical.is_dir() {
-            let name = entry.file_name();
-            let name = name.to_string_lossy();
-            if !is_skipped_dir(&name) {
-                collect_files(root, &canonical, prefix, out, depth + 1, visited)?;
-            }
+            collect_files(root, &canonical, prefix, out)?;
         } else if canonical.is_file() {
             let key = path
                 .strip_prefix(root)
@@ -392,31 +357,3 @@ fn collect_files(
     }
     Ok(())
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn collect_files_caps_entries_visited() {
-        let dir = tempfile::tempdir().unwrap();
-        // Create more files than MAX_ENTRIES_VISITED to prove we stop early.
-        let n = MAX_ENTRIES_VISITED + 500;
-        for i in 0..n {
-            std::fs::write(dir.path().join(format!("file_{i:05}.bin")), b"x").unwrap();
-        }
-        let mut out = Vec::new();
-        let mut visited = 0;
-        collect_files(dir.path(), dir.path(), "", &mut out, 0, &mut visited).unwrap();
-        // visited must be capped — we should NOT have iterated all n files.
-        assert!(
-            visited <= MAX_ENTRIES_VISITED,
-            "visited {visited} entries, expected at most {MAX_ENTRIES_VISITED}"
-        );
-        assert!(
-            out.len() <= MAX_COLLECT_FILES,
-            "collected {} files, expected at most {MAX_COLLECT_FILES}",
-            out.len()
-        );
-    }
-}
Original file line number	Diff line number	Diff line change
`@@ -110,6 +110,7 @@ fn run_bench(mode: &str, duration_secs: u64) -> BenchResult {`
`110`	`110`	`let (server_rt, guard): (tokio::runtime::Runtime, Option<TelemetryGuard>) = match mode {`
`111`	`111`	`"telemetry" => {`
`112`	`112`	`let writer = RotatingWriter::single_file("/tmp/overhead_bench_trace.bin").unwrap();`
	`113`	`+ #[allow(unused_mut)]`
`113`	`114`	`let mut tb = TracedRuntime::builder().with_task_tracking(true);`
`114`	`115`	`#[cfg(target_os = "linux")]`
`115`	`116`	`{`