dial9-rs
diff --git a/‎Cargo.lock‎
Lines changed: 2321 additions & 246 deletions b/‎Cargo.lock‎
Lines changed: 2321 additions & 246 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/metrics-service/Cargo.toml‎
Lines changed: 16 additions & 0 deletions b/‎examples/metrics-service/Cargo.toml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎examples/metrics-service/README.md‎
Lines changed: 77 additions & 0 deletions b/‎examples/metrics-service/README.md‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎examples/metrics-service/src/buffer.rs‎
Lines changed: 68 additions & 0 deletions b/‎examples/metrics-service/src/buffer.rs‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎examples/metrics-service/src/client.rs‎
Lines changed: 113 additions & 0 deletions b/‎examples/metrics-service/src/client.rs‎
Lines changed: 113 additions & 0 deletions
@@ -2,6 +2,7 @@
 resolver = "3"
 members = [
     "dial9-tokio-telemetry",
+    "examples/metrics-service",
 ]
 
 [workspace.dependencies]
 
@@ -0,0 +1,16 @@
+[package]
+name = "metrics-service"
+edition = "2024"
+version = "0.1.0"
+
+[dependencies]
+tokio = { version = "1", features = ["rt-multi-thread", "macros", "time", "sync"] }
+tokio-util = { version = "0.7", features = ["rt"] }
+axum = "0.8"
+aws-config = "1"
+aws-sdk-dynamodb = "1"
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+dial9-tokio-telemetry = { path = "../../dial9-tokio-telemetry" }
+reqwest = { version = "0.12", features = ["json"] }
+clap = { version = "4", features = ["derive"] }
@@ -0,0 +1,77 @@
+# Metrics Service Example
+
+A demonstration service that collects, aggregates, and stores metrics using Tokio, Axum, and DynamoDB, instrumented with `dial9-tokio-telemetry` for runtime tracing.
+
+## What It Does
+
+- **HTTP API**: Accepts metric submissions via POST and queries aggregated metrics via GET
+- **In-memory buffering**: Collects metrics in memory before periodic flushing
+- **DynamoDB persistence**: Stores aggregated metrics with timestamp-based partitioning
+- **Load testing**: Built-in client that simulates variable load patterns
+- **Telemetry**: Captures Tokio runtime traces to disk for performance analysis
+
+The service runs for 55 seconds with a load profile that ramps up, sustains, ramps down, and includes a thundering herd spike.
+
+## Usage
+
+Run the example:
+
+```bash
+cargo run
+```
+
+The service will:
+1. Start an HTTP server on `0.0.0.0:3001`
+2. Create a DynamoDB table named `metrics-service` (requires AWS credentials)
+3. Launch a background flush worker (10-second intervals)
+4. Run a load-generating client with varying concurrency
+5. Write telemetry traces to `/tmp/metrics-service-traces/`
+6. Shut down automatically after 55 seconds
+
+### API Endpoints
+
+**Record a metric:**
+```bash
+curl -X POST http://localhost:3001/metrics \
+  -H "Content-Type: application/json" \
+  -d '{"name": "cpu", "value": 42.5}'
+```
+
+**Query aggregated metrics:**
+```bash
+curl http://localhost:3001/metrics/cpu
+```
+
+Returns JSON array with timestamp, sum, count, min, max for each time window.
+
+## Configuration
+
+Edit constants in `src/main.rs`:
+
+| Constant | Default | Description |
+|----------|---------|-------------|
+| `FLUSH_INTERVAL` | 10s | How often to flush buffered metrics to DynamoDB |
+| `TABLE_NAME` | `"metrics-service"` | DynamoDB table name |
+| `SERVER_ADDR` | `"0.0.0.0:3001"` | HTTP server bind address |
+| `RUN_DURATION` | 55s | Total runtime before shutdown |
+
+### Load Profile
+
+Edit `src/client.rs` to adjust:
+- `MAX_WORKERS`: Peak concurrent requests (default: 40)
+- `THUNDERING_HERD`: Spike concurrency (default: 200)
+- `BASELINE`: Steady-state concurrency (default: 4)
+- `METRICS`: Metric names to cycle through
+
+### Telemetry
+
+Traces are written to `/tmp/metrics-service-traces/trace.bin` with:
+- Max file size: 1 MB (rotates automatically)
+- Max total size: 30 MB
+
+Change the path or limits in the `RotatingWriter::new()` call in `main.rs`.
+
+## Requirements
+
+- AWS credentials configured (for DynamoDB access)
+- Write permissions to `/tmp/metrics-service-traces/`
@@ -0,0 +1,68 @@
+use std::collections::HashMap;
+use tokio::sync::Mutex;
+
+use crate::ddb::DdbClient;
+
+#[derive(Default)]
+struct Aggregate {
+    sum: f64,
+    count: u64,
+    min: f64,
+    max: f64,
+}
+
+impl Aggregate {
+    fn record(&mut self, value: f64) {
+        if self.count == 0 {
+            self.min = value;
+            self.max = value;
+        } else {
+            self.min = self.min.min(value);
+            self.max = self.max.max(value);
+        }
+        self.sum += value;
+        self.count += 1;
+    }
+}
+
+pub struct MetricsBuffer {
+    inner: Mutex<HashMap<String, Aggregate>>,
+}
+
+impl MetricsBuffer {
+    pub fn new() -> Self {
+        Self {
+            inner: Mutex::new(HashMap::new()),
+        }
+    }
+
+    pub async fn record(&self, name: String, value: f64) {
+        self.inner
+            .lock()
+            .await
+            .entry(name)
+            .or_default()
+            .record(value);
+    }
+
+    pub async fn flush_to_ddb(&self, ddb: &DdbClient) {
+        let snapshot: HashMap<String, (f64, u64, f64, f64)> = {
+            let mut guard = self.inner.lock().await;
+            guard
+                .drain()
+                .map(|(k, v)| (k, (v.sum, v.count, v.min, v.max)))
+                .collect()
+        };
+
+        let ts = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+
+        for (name, (sum, count, min, max)) in snapshot {
+            if let Err(e) = ddb.put_aggregate(&name, ts, sum, count, min, max).await {
+                eprintln!("flush error for {name}: {e}");
+            }
+        }
+    }
+}
@@ -0,0 +1,113 @@
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use reqwest::Client;
+use serde_json::json;
+use tokio::sync::Semaphore;
+use tokio::time::sleep;
+use tokio_util::sync::CancellationToken;
+
+const METRICS: &[&str] = &["cpu", "memory", "latency", "error_rate", "queue_depth"];
+const MAX_WORKERS: usize = 40;
+const THUNDERING_HERD: usize = 200;
+const BASELINE: usize = 4;
+
+/// Load profile (elapsed_secs, target_concurrency):
+/// 0-10:  ramp 4 -> 40
+/// 10-20: hold at 40
+/// 20-30: ramp 40 -> 4
+/// 30-40: hold at 4 (baseline)
+/// 40-45: thundering herd (200)
+/// 45+:   back to baseline (4)
+fn target_concurrency(elapsed: f64) -> usize {
+    if elapsed < 10.0 {
+        let t = elapsed / 10.0;
+        (BASELINE as f64 + t * (MAX_WORKERS - BASELINE) as f64) as usize
+    } else if elapsed < 20.0 {
+        MAX_WORKERS
+    } else if elapsed < 30.0 {
+        let t = (elapsed - 20.0) / 10.0;
+        (MAX_WORKERS as f64 - t * (MAX_WORKERS - BASELINE) as f64) as usize
+    } else if elapsed < 40.0 {
+        BASELINE
+    } else if elapsed < 45.0 {
+        THUNDERING_HERD
+    } else {
+        BASELINE
+    }
+}
+
+pub async fn run(base_url: &str, shutdown: CancellationToken) {
+    let client = Arc::new(Client::new());
+    // semaphore controls how many workers run concurrently
+    let sem = Arc::new(Semaphore::new(0));
+    let start = Instant::now();
+
+    // spawn a large pool of workers that each wait for a permit
+    for i in 0..THUNDERING_HERD {
+        let client = client.clone();
+        let sem = sem.clone();
+        let base_url = base_url.to_string();
+        let shutdown = shutdown.clone();
+        tokio::spawn(async move {
+            let mut tick: u64 = i as u64;
+            loop {
+                tokio::select! {
+                    _ = shutdown.cancelled() => break,
+                    permit = sem.acquire() => {
+                        let _permit = permit.unwrap();
+                        do_work(&client, &base_url, i, tick).await;
+                        tick += 1;
+                    }
+                }
+            }
+        });
+    }
+
+    // coordinator: adjusts semaphore permits to match target concurrency
+    let mut current = 0usize;
+    loop {
+        if shutdown.is_cancelled() {
+            break;
+        }
+        let target = target_concurrency(start.elapsed().as_secs_f64());
+        match target.cmp(&current) {
+            std::cmp::Ordering::Greater => {
+                sem.add_permits(target - current);
+                println!("concurrency -> {target}");
+            }
+            std::cmp::Ordering::Less => {
+                // acquire and forget permits to reduce concurrency
+                let to_remove = current - target;
+                let sem2 = sem.clone();
+                tokio::spawn(async move {
+                    for _ in 0..to_remove {
+                        sem2.acquire().await.unwrap().forget();
+                    }
+                });
+                println!("concurrency -> {target}");
+            }
+            std::cmp::Ordering::Equal => {}
+        }
+        current = target;
+        sleep(Duration::from_millis(500)).await;
+    }
+}
+
+async fn do_work(client: &Client, base_url: &str, worker: usize, tick: u64) {
+    let metric = METRICS[tick as usize % METRICS.len()];
+    let value = (tick as f64 * 1.3 + worker as f64 * 7.7).sin().abs() * 100.0;
+
+    let _ = client
+        .post(format!("{base_url}/metrics"))
+        .json(&json!({"name": metric, "value": value}))
+        .send()
+        .await;
+
+    if tick.is_multiple_of(10) {
+        let _ = client
+            .get(format!("{base_url}/metrics/{metric}"))
+            .send()
+            .await;
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`resolver = "3"`
`3`	`3`	`members = [`
`4`	`4`	`"dial9-tokio-telemetry",`
	`5`	`+ "examples/metrics-service",`
`5`	`6`	`]`
`6`	`7`
`7`	`8`	`[workspace.dependencies]`