test it as well

Ariel Ben-Yehuda · Ariel Ben-Yehuda · commit 054745a11a32 · 2025-03-04T15:49:00.000Z
diff --git a/Cargo.toml b/Cargo.toml
@@ -37,6 +37,8 @@ num_cpus = "1.13.1"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
 tokio = { version = "1.41.0", features = ["full", "rt", "time", "macros", "test-util"] }
+metrics-util = { version = "0.19", features = ["debugging"] }
+metrics = { version = "0.24" }
 
 [[example]]
 name = "runtime"
diff --git a/src/runtime/metrics_integration.rs b/src/runtime/metrics_integration.rs
@@ -1,11 +1,13 @@
 use std::{fmt, time::Duration};
 
+use tokio::runtime::Handle;
+
 use super::{RuntimeIntervals, RuntimeMetrics, RuntimeMonitor};
 
 /// A reporter builder
 pub struct RuntimeMetricsReporterBuilder {
     interval: Duration,
-    metrics_transformer: Box<dyn FnMut(&'static str) -> metrics::Key>,
+    metrics_transformer: Box<dyn FnMut(&'static str) -> metrics::Key + Send>,
 }
 
 impl fmt::Debug for RuntimeMetricsReporterBuilder {
@@ -35,7 +37,12 @@ impl RuntimeMetricsReporterBuilder {
     }
 
     /// Build the reporter
-    pub fn build(mut self, monitor: RuntimeMonitor) -> RuntimeMetricsReporter {
+    pub fn build(self) -> RuntimeMetricsReporter {
+        self.build_with_monitor(RuntimeMonitor::new(&Handle::current()))
+    }
+
+    /// Build the reporter with a specific [`RuntimeMonitor`]
+    pub fn build_with_monitor(mut self, monitor: RuntimeMonitor) -> RuntimeMetricsReporter {
         RuntimeMetricsReporter {
             interval: self.interval,
             intervals: monitor.intervals(),
@@ -50,13 +57,13 @@ impl RuntimeMetricsReporterBuilder {
     }
 
     /// Run the reporter, describing the metrics beforehand
-    pub async fn describe_and_run(self, monitor: RuntimeMonitor) {
-        self.describe().build(monitor).run().await;
+    pub async fn describe_and_run(self) {
+        self.describe().build().run().await;
     }
 
     /// Run the reporter, not describing the metrics beforehand
-    pub async fn run_without_describing(self, monitor: RuntimeMonitor) {
-        self.build(monitor).run().await;
+    pub async fn run_without_describing(self) {
+        self.build().run().await;
     }
 }
 
@@ -77,15 +84,16 @@ macro_rules! metric_key {
     ($transform_fn:ident, $name:ident) => ($transform_fn(concat!("tokio_", stringify!($name))))
 }
 
+// calling `trim` since /// inserts spaces into docs
 macro_rules! describe_metric_ref {
     ($transform_fn:ident, $doc:expr, $name:ident: Counter<$unit:ident> []) => (
-        metrics::describe_counter!(metric_key!($transform_fn, $name).name().to_owned(), metrics::Unit::$unit, $doc)
+        metrics::describe_counter!(metric_key!($transform_fn, $name).name().to_owned(), metrics::Unit::$unit, $doc.trim())
     );
     ($transform_fn:ident, $doc:expr, $name:ident: Gauge<$unit:ident> []) => (
-        metrics::describe_gauge!(metric_key!($transform_fn, $name).name().to_owned(), metrics::Unit::$unit, $doc)
+        metrics::describe_gauge!(metric_key!($transform_fn, $name).name().to_owned(), metrics::Unit::$unit, $doc.trim())
     );
     ($transform_fn:ident, $doc:expr, $name:ident: Histogram<$unit:ident> []) => (
-        metrics::describe_histogram!(metric_key!($transform_fn, $name).name().to_owned(), metrics::Unit::$unit, $doc)
+        metrics::describe_histogram!(metric_key!($transform_fn, $name).name().to_owned(), metrics::Unit::$unit, $doc.trim())
     );
 }
 
@@ -260,7 +268,11 @@ impl MyMetricOp for (&metrics::Histogram, Vec<u64>) {
     fn op(self, tokio: &tokio::runtime::RuntimeMetrics) {
         for (i, bucket) in self.1.iter().enumerate() {
             let range = tokio.poll_time_histogram_bucket_range(i);
-            self.0.record_many(((range.start + range.end).as_micros() / 2) as f64, *bucket as usize);
+            if *bucket > 0 {
+                // emit using range.start to avoid very large numbers for open bucket
+                // FIXME: do we want to do something else here
+                self.0.record_many(range.start.as_micros() as f64, *bucket as usize);
+            }
         }
     }
 }
diff --git a/tests/auto_metrics.rs b/tests/auto_metrics.rs
@@ -0,0 +1,100 @@
+macro_rules! cfg_rt {
+    ($($item:item)*) => {
+        $(
+            #[cfg(all(tokio_unstable, feature = "rt"))]
+            #[cfg_attr(docsrs, doc(cfg(all(tokio_unstable, feature = "rt"))))]
+            $item
+        )*
+    };
+}
+
+cfg_rt! {
+    #[cfg(feature = "metrics-integration")]
+    #[test]
+    fn main() {
+        use metrics_util::debugging::DebugValue;
+        use std::{sync::Arc, time::Duration};
+        use tokio::runtime::{HistogramConfiguration, LogHistogram};
+        use tokio_metrics::RuntimeMetricsReporterBuilder;
+
+        let worker_threads = 10;
+
+        let config = HistogramConfiguration::log(LogHistogram::default());
+
+        let rt = tokio::runtime::Builder::new_multi_thread()
+            .enable_time()
+            .enable_metrics_poll_time_histogram()
+            .metrics_poll_time_histogram_configuration(config)
+            .worker_threads(worker_threads)
+            .build()
+            .unwrap();
+
+        rt.block_on(async {
+            let recorder = Arc::new(metrics_util::debugging::DebuggingRecorder::new());
+            metrics::set_global_recorder(recorder.clone()).unwrap();
+            tokio::task::spawn(RuntimeMetricsReporterBuilder::default().with_interval(Duration::from_millis(100)).describe_and_run());
+            let mut done = false;
+            for _ in 0..1000 {
+                tokio::time::sleep(Duration::from_millis(10)).await;
+                let snapshot = recorder.snapshotter().snapshot().into_vec();
+                if let Some(metric) = snapshot.iter().find(|metrics| {
+                    metrics.0.key().name() == "tokio_workers_count"
+                }) {
+                    done = true;
+                    match metric {
+                        (_, Some(metrics::Unit::Count), Some(s), DebugValue::Gauge(count))
+                            if &s[..] == "The number of worker threads used by the runtime" =>
+                        {
+                            assert_eq!(count.into_inner() as usize, worker_threads);
+                        }
+                        _ => panic!("bad {metric:?}"),
+                    }
+                    break;
+                }
+            }
+            assert!(done, "metric not found");
+            tokio::task::spawn(async {
+                // spawn a thread with a long poll time, let's see we can find it
+                std::thread::sleep(std::time::Duration::from_millis(100));
+            }).await.unwrap();
+            let mut long_polls_found = 0;
+            for _ in 0..15 {
+                tokio::time::sleep(Duration::from_millis(100)).await;
+                let snapshot = recorder.snapshotter().snapshot().into_vec();
+                if let Some(metric) = snapshot.iter().find(|metrics| {
+                    metrics.0.key().name() == "tokio_poll_time_histogram"
+                }) {
+                    match metric {
+                        (_, Some(metrics::Unit::Microseconds), Some(s), DebugValue::Histogram(hist))
+                            if &s[..] == "A histogram of task polls since the previous probe grouped by poll times" =>
+                        {
+                            for entry in hist {
+                                // look for a poll of around 100 milliseconds
+                                // the default bucket for 100 milliseconds is between 100 and 100/1.25 = 80
+                                if entry.into_inner() >= 80e3 && entry.into_inner() <= 250e3 {
+                                    long_polls_found += 1;
+                                }
+                            }
+                        }
+                        _ => panic!("bad {metric:?}"),
+                    }
+                }
+                let metric = snapshot.iter().find(|metrics| {
+                    metrics.0.key().name() == "tokio_total_polls_count"
+                }).unwrap();
+                match metric {
+                    (_, Some(metrics::Unit::Count), Some(s), DebugValue::Counter(count))
+                        if &s[..] == "The number of tasks that have been polled across all worker threads" && *count > 0 =>
+                    {
+                    }
+                    _ => panic!("bad {metric:?}"),
+                }
+                if long_polls_found > 0 {
+                    break
+                }
+            }
+            // check that we found exactly 1 poll in the 100ms region
+            assert_eq!(long_polls_found, 1);
+        });
+    }
+}