Skip to content

Commit b365645

Browse files
authored
chore: add metrics (#40)
* chore: add metrics * nit
1 parent 484e424 commit b365645

File tree

7 files changed

+505
-1
lines changed

7 files changed

+505
-1
lines changed

Cargo.toml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ members = [
1717
"crates/artifacts",
1818
"crates/calibrator",
1919
"crates/rpc",
20+
"crates/metrics",
2021
"programs/fibonacci",
2122
"programs/is-prime",
2223
]
@@ -74,6 +75,7 @@ tonic-build = "0.12.0"
7475
# misc
7576
anyhow = "1.0.71"
7677
async-trait = "0.1.68"
78+
axum = "0.7.4"
7779
backoff = { version = "0.4", features = ["futures", "tokio"] }
7880
bytes = "1.6.0"
7981
config = "0.14.0"
@@ -95,4 +97,13 @@ sqlx = { version = "0.8", features = [
9597
thiserror = "1.0.63"
9698
tokio = { version = "1.0", features = ["full"] }
9799
url = "2.5.0"
98-
reqwest = "0.12.0"
100+
reqwest = "0.12.0"
101+
102+
# metrics
103+
metrics = "0.24.0"
104+
metrics-derive = "0.1"
105+
metrics-exporter-prometheus = { version = "0.16.0", default-features = false }
106+
metrics-process = "2.1.0"
107+
metrics-util = { default-features = false, version = "0.18.0" }
108+
once_cell = "1.18.0"
109+
socket2 = { version = "0.5", default-features = false }

crates/metrics/Cargo.toml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
[package]
2+
name = "spn-metrics"
3+
description = ""
4+
readme = "README.md"
5+
version = { workspace = true }
6+
edition = { workspace = true }
7+
license = { workspace = true }
8+
repository = { workspace = true }
9+
keywords = { workspace = true }
10+
categories = { workspace = true }
11+
12+
[dependencies]
13+
metrics = { workspace = true }
14+
eyre = { workspace = true }
15+
metrics-exporter-prometheus = { workspace = true }
16+
metrics-process = { workspace = true }
17+
metrics-util = { workspace = true }
18+
metrics-derive = { workspace = true }
19+
tokio = { workspace = true }
20+
tracing = { workspace = true }
21+
once_cell = { workspace = true }
22+
23+
[target.'cfg(target_os = "linux")'.dependencies]
24+
procfs = "0.16.0"
25+
26+
[dev-dependencies]
27+
reqwest = { workspace = true }
28+
socket2 = { workspace = true }

crates/metrics/src/hooks.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use metrics_process::Collector;
2+
use once_cell::sync::Lazy;
3+
use std::{fmt, sync::Arc, time::SystemTime};
4+
5+
pub(crate) trait Hook: Fn() + Send + Sync {}
6+
impl<T: Fn() + Send + Sync> Hook for T {}
7+
8+
impl fmt::Debug for Hooks {
9+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
10+
let hooks_len = self.inner.len();
11+
f.debug_struct("Hooks")
12+
.field("inner", &format!("Arc<Vec<Box<dyn Hook>>>, len: {}", hooks_len))
13+
.finish()
14+
}
15+
}
16+
17+
/// Helper type for managing hooks.
18+
#[derive(Clone)]
19+
pub struct Hooks {
20+
inner: Arc<Vec<Box<dyn Hook<Output = ()>>>>,
21+
}
22+
23+
impl Hooks {
24+
/// Create a new set of hooks.
25+
pub fn new() -> Self {
26+
let collector = Collector::default();
27+
let hooks: Vec<Box<dyn Hook<Output = ()>>> = vec![
28+
Box::new(move || collector.collect()),
29+
Box::new(collect_memory_stats),
30+
Box::new(collect_io_stats),
31+
Box::new(collect_uptime_seconds),
32+
];
33+
Self { inner: Arc::new(hooks) }
34+
}
35+
36+
pub(crate) fn iter(&self) -> impl Iterator<Item = &Box<dyn Hook<Output = ()>>> {
37+
self.inner.iter()
38+
}
39+
}
40+
41+
impl Default for Hooks {
42+
fn default() -> Self {
43+
Self::new()
44+
}
45+
}
46+
47+
fn collect_memory_stats() {}
48+
49+
#[cfg(target_os = "linux")]
50+
fn collect_io_stats() {
51+
use metrics::counter;
52+
use tracing::error;
53+
54+
let Ok(process) = procfs::process::Process::myself()
55+
.map_err(|error| error!(%error, "failed to get currently running process"))
56+
else {
57+
return;
58+
};
59+
60+
let Ok(io) = process.io().map_err(
61+
|error| error!(%error, "failed to get io stats for the currently running process"),
62+
) else {
63+
return;
64+
};
65+
66+
counter!("io.rchar").absolute(io.rchar);
67+
counter!("io.wchar").absolute(io.wchar);
68+
counter!("io.syscr").absolute(io.syscr);
69+
counter!("io.syscw").absolute(io.syscw);
70+
counter!("io.read_bytes").absolute(io.read_bytes);
71+
counter!("io.write_bytes").absolute(io.write_bytes);
72+
counter!("io.cancelled_write_bytes").absolute(io.cancelled_write_bytes);
73+
}
74+
75+
#[cfg(not(target_os = "linux"))]
76+
const fn collect_io_stats() {}
77+
78+
/// Global start time of the process.
79+
static START_TIME: Lazy<SystemTime> = Lazy::new(SystemTime::now);
80+
81+
/// Collects and records the process uptime in seconds.
82+
fn collect_uptime_seconds() {
83+
use metrics::gauge;
84+
let uptime = START_TIME.elapsed().unwrap_or_default().as_secs();
85+
let gauge = gauge!("process_uptime_seconds");
86+
gauge.set(uptime as f64);
87+
}

crates/metrics/src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/// The metrics hooks for prometheus.
2+
pub mod hooks;
3+
pub mod recorder;
4+
/// The metric server serving the metrics.
5+
pub mod server;
6+
pub mod version;
7+
8+
pub use metrics_exporter_prometheus::*;
9+
pub use metrics_process::*;
10+
11+
pub use metrics_derive::Metrics;
12+
13+
pub use metrics;

crates/metrics/src/recorder.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//! Prometheus recorder
2+
3+
use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle};
4+
use metrics_util::layers::{PrefixLayer, Stack};
5+
use std::sync::OnceLock;
6+
7+
/// Installs the Prometheus recorder as the global recorder.
8+
pub fn get_or_init_prometheus(service_name: &str) -> &'static PrometheusHandle {
9+
PROMETHEUS_RECORDER_HANDLE.get_or_init(|| PrometheusRecorder::install(service_name))
10+
}
11+
12+
/// The default Prometheus recorder handle. We use a global static to ensure that it is only
13+
/// installed once.
14+
static PROMETHEUS_RECORDER_HANDLE: OnceLock<PrometheusHandle> = OnceLock::new();
15+
16+
/// Prometheus recorder installer.
17+
#[derive(Debug)]
18+
pub struct PrometheusRecorder;
19+
20+
impl PrometheusRecorder {
21+
/// Installs Prometheus as the metrics recorder.
22+
pub fn install(service_name: &str) -> PrometheusHandle {
23+
let builder = PrometheusBuilder::new().add_global_label("service", service_name);
24+
let recorder = builder.build_recorder();
25+
let handle = recorder.handle();
26+
27+
// Build metrics stack
28+
Stack::new(recorder)
29+
.push(PrefixLayer::new("spn"))
30+
.install()
31+
.expect("Couldn't set metrics recorder.");
32+
33+
handle
34+
}
35+
}
36+
37+
#[cfg(test)]
38+
mod tests {
39+
use super::*;
40+
// Dependencies using different version of the `metrics` crate (to be exact, 0.21 vs 0.22)
41+
// may not be able to communicate with each other through the global recorder.
42+
//
43+
// This test ensures that `metrics-process` dependency plays well with the current
44+
// `metrics-exporter-prometheus` dependency version.
45+
#[test]
46+
fn process_metrics() {
47+
// initialize the lazy handle
48+
let _ = PROMETHEUS_RECORDER_HANDLE.get_or_init(|| PrometheusRecorder::install("spn"));
49+
50+
let process = metrics_process::Collector::default();
51+
process.describe();
52+
process.collect();
53+
54+
let metrics =
55+
PROMETHEUS_RECORDER_HANDLE.get_or_init(|| PrometheusRecorder::install("spn")).render();
56+
assert!(metrics.contains("process_cpu_seconds_total"), "{metrics:?}");
57+
}
58+
}

0 commit comments

Comments
 (0)