Skip to content

Commit efb67b9

Browse files
committed
Add granular parameters
1 parent 77e82ea commit efb67b9

10 files changed

Lines changed: 1999 additions & 2899 deletions

lustrefs-exporter/src/metrics.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,11 @@ pub fn build_lustre_stats(output: &Vec<Record>, metrics: &mut Metrics) {
5454

5555
for x in output {
5656
match x {
57-
lustre_collector::Record::Host(x) => {
58-
build_host_stats(x, &mut metrics.host);
59-
}
60-
lustre_collector::Record::LNetStat(x) => {
61-
build_lnet_stats(x, &mut metrics.lnet);
62-
}
63-
lustre_collector::Record::Target(x) => {
64-
build_target_stats(x, metrics, &mut set);
65-
}
66-
lustre_collector::Record::LustreService(x) => {
67-
build_service_stats(x, &mut metrics.service);
68-
}
69-
_ => {}
57+
Record::Host(x) => build_host_stats(x, &mut metrics.host),
58+
Record::LNetStat(x) => build_lnet_stats(x, &mut metrics.lnet),
59+
Record::Target(x) => build_target_stats(x, metrics, &mut set),
60+
Record::LustreService(x) => build_service_stats(x, &mut metrics.service),
61+
_ => (),
7062
}
7163
}
7264
}

lustrefs-exporter/src/routes.rs

Lines changed: 64 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use prometheus_client::{encoding::text::encode, registry::Registry};
2121
use serde::Deserialize;
2222
use std::{
2323
borrow::Cow,
24+
collections::BTreeMap,
2425
io::{self, BufRead as _, BufReader},
2526
};
2627
use tokio::process::Command;
@@ -30,11 +31,31 @@ use tower::{
3031
};
3132
use tower_http::compression::CompressionLayer;
3233

33-
#[derive(Debug, Deserialize)]
34-
pub struct Params {
35-
// Only enable jobstats if "jobstats=true"
36-
#[serde(default)]
37-
jobstats: bool,
34+
#[derive(Debug, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash, Copy, Clone)]
35+
#[serde(rename_all = "snake_case")]
36+
pub enum Dimension {
37+
Jobstats,
38+
Lnet,
39+
Lustre,
40+
LnetStats,
41+
}
42+
43+
pub type Params = BTreeMap<Dimension, bool>;
44+
45+
static DEFAULT_PARAMS: [(Dimension, bool); 3] = [
46+
(Dimension::Lnet, true),
47+
(Dimension::Lustre, true),
48+
(Dimension::LnetStats, true),
49+
];
50+
51+
trait EnableConvenienceExt {
52+
fn enabled(&self, param: &Dimension) -> bool;
53+
}
54+
55+
impl EnableConvenienceExt for Params {
56+
fn enabled(&self, param: &Dimension) -> bool {
57+
self.get(param).copied().unwrap_or_default()
58+
}
3859
}
3960

4061
const TIMEOUT_DURATION_SECS: u64 = 120;
@@ -149,11 +170,18 @@ pub fn lnet_stats_output() -> Command {
149170
pub async fn scrape(Query(params): Query<Params>) -> Result<Response<Body>, Error> {
150171
let mut registry = Registry::default();
151172

152-
if params.jobstats {
153-
let child = tokio::task::spawn_blocking(move || jobstats_metrics_cmd().spawn()).await?;
173+
let mut targets = BTreeMap::from(DEFAULT_PARAMS);
174+
for (param, value) in params {
175+
targets
176+
.entry(param)
177+
.and_modify(|v| *v = value)
178+
.or_insert(value);
179+
}
154180

155-
if let Ok(mut child) =
156-
child.inspect_err(|e| tracing::debug!("Error while spawning lctl jobstats: {e}"))
181+
if targets.enabled(&Dimension::Jobstats) {
182+
if let Ok(mut child) = tokio::task::spawn_blocking(move || jobstats_metrics_cmd().spawn())
183+
.await?
184+
.inspect_err(|e| tracing::debug!("Error while spawning lctl jobstats: {e}"))
157185
{
158186
let reader = BufReader::with_capacity(
159187
128 * 1_024,
@@ -184,23 +212,28 @@ pub async fn scrape(Query(params): Query<Params>) -> Result<Response<Body>, Erro
184212
.await?
185213
.register_metric(&mut registry);
186214
}
187-
} else {
215+
}
216+
217+
if targets.enabled(&Dimension::Lustre) {
188218
let mut output = vec![];
189219

190220
let lctl = lustre_metrics_output().output().await?;
191-
192221
let mut lctl_output = parse_lctl_output(&lctl.stdout)?;
193222

194223
output.append(&mut lctl_output);
195224

196-
let lnetctl = net_show_output().output().await?;
225+
// Build the lustre stats
226+
let mut opentelemetry_metrics = Metrics::default();
197227

198-
let mut lnetctl_output = parse_lnetctl_output(&lnetctl.stdout)?;
228+
// Build and register Lustre metrics
229+
metrics::build_lustre_stats(&output, &mut opentelemetry_metrics);
230+
opentelemetry_metrics.register_metric(&mut registry);
231+
}
199232

200-
output.append(&mut lnetctl_output);
233+
if targets.enabled(&Dimension::LnetStats) {
234+
let mut output = vec![];
201235

202236
let lnetctl_stats_output = lnet_stats_output().output().await?;
203-
204237
let mut lnetctl_stats_record = parse_lnetctl_stats(&lnetctl_stats_output.stdout)?;
205238

206239
output.append(&mut lnetctl_stats_record);
@@ -213,6 +246,22 @@ pub async fn scrape(Query(params): Query<Params>) -> Result<Response<Body>, Erro
213246
opentelemetry_metrics.register_metric(&mut registry);
214247
}
215248

249+
if targets.enabled(&Dimension::Lnet) {
250+
let mut output = vec![];
251+
252+
let lnetctl = net_show_output().output().await?;
253+
let mut lnetctl_output = parse_lnetctl_output(&lnetctl.stdout)?;
254+
255+
output.append(&mut lnetctl_output);
256+
257+
// Build the lustre stats
258+
let mut opentelemetry_metrics = Metrics::default();
259+
260+
// Build and register Lustre metrics
261+
metrics::build_lustre_stats(&output, &mut opentelemetry_metrics);
262+
opentelemetry_metrics.register_metric(&mut registry);
263+
}
264+
216265
let mut buffer = String::new();
217266
encode(&mut buffer, &registry)?;
218267

lustrefs-exporter/src/snapshots/lustrefs_exporter__routes__tests__default.snap

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -847,30 +847,6 @@ lustre_oss_ost_create_stats{operation="req_qdepth",units="reqs"} 285063
847847
lustre_oss_ost_create_stats{operation="req_timeout",units="secs"} 285063
848848
lustre_oss_ost_create_stats{operation="req_waittime",units="usecs"} 285063
849849
lustre_oss_ost_create_stats{operation="reqbuf_avail",units="bufs"} 581187
850-
# HELP lustre_send_count_total Total number of messages that have been sent.
851-
# TYPE lustre_send_count_total counter
852-
lustre_send_count_total{nid="0@lo"} 180076
853-
lustre_send_count_total{nid="172.16.0.24@tcp"} 464970
854-
lustre_send_count_total{nid="172.16.0.25@tcp"} 464886
855-
# HELP lustre_receive_count_total Total number of messages that have been received.
856-
# TYPE lustre_receive_count_total counter
857-
lustre_receive_count_total{nid="0@lo"} 180072
858-
lustre_receive_count_total{nid="172.16.0.24@tcp"} 464963
859-
lustre_receive_count_total{nid="172.16.0.25@tcp"} 464877
860-
# HELP lustre_drop_count_total Total number of messages that have been dropped.
861-
# TYPE lustre_drop_count_total counter
862-
lustre_drop_count_total{nid="0@lo"} 4
863-
lustre_drop_count_total{nid="172.16.0.24@tcp"} 4
864-
lustre_drop_count_total{nid="172.16.0.25@tcp"} 3
865-
# HELP lustre_send_bytes_total Total number of bytes that have been sent.
866-
# TYPE lustre_send_bytes_total counter
867-
lustre_send_bytes_total{} 261865376
868-
# HELP lustre_receive_bytes_total Total number of bytes that have been received.
869-
# TYPE lustre_receive_bytes_total counter
870-
lustre_receive_bytes_total{} 254091008
871-
# HELP lustre_drop_bytes_total Total number of bytes that have been dropped.
872-
# TYPE lustre_drop_bytes_total counter
873-
lustre_drop_bytes_total{} 4832
874850
# HELP lustre_read_samples_total Total number of reads that have been recorded.
875851
# TYPE lustre_read_samples_total counter
876852
lustre_read_samples_total{component="ost",operation="read",target="ai400x2-OST0000"} 71482249
@@ -943,4 +919,28 @@ lustre_stats_time_total{component="mdt",operation="open",target="ai400x2-MDT0000
943919
lustre_stats_time_total{component="mdt",operation="setattr",target="ai400x2-MDT0000"} 22544
944920
lustre_stats_time_total{component="mdt",operation="statfs",target="ai400x2-MDT0000"} 1292933
945921
lustre_stats_time_total{component="mdt",operation="sync",target="ai400x2-MDT0000"} 7767
922+
# HELP lustre_send_bytes_total Total number of bytes that have been sent.
923+
# TYPE lustre_send_bytes_total counter
924+
lustre_send_bytes_total{} 261865376
925+
# HELP lustre_receive_bytes_total Total number of bytes that have been received.
926+
# TYPE lustre_receive_bytes_total counter
927+
lustre_receive_bytes_total{} 254091008
928+
# HELP lustre_drop_bytes_total Total number of bytes that have been dropped.
929+
# TYPE lustre_drop_bytes_total counter
930+
lustre_drop_bytes_total{} 4832
931+
# HELP lustre_send_count_total Total number of messages that have been sent.
932+
# TYPE lustre_send_count_total counter
933+
lustre_send_count_total{nid="0@lo"} 180076
934+
lustre_send_count_total{nid="172.16.0.24@tcp"} 464970
935+
lustre_send_count_total{nid="172.16.0.25@tcp"} 464886
936+
# HELP lustre_receive_count_total Total number of messages that have been received.
937+
# TYPE lustre_receive_count_total counter
938+
lustre_receive_count_total{nid="0@lo"} 180072
939+
lustre_receive_count_total{nid="172.16.0.24@tcp"} 464963
940+
lustre_receive_count_total{nid="172.16.0.25@tcp"} 464877
941+
# HELP lustre_drop_count_total Total number of messages that have been dropped.
942+
# TYPE lustre_drop_count_total counter
943+
lustre_drop_count_total{nid="0@lo"} 4
944+
lustre_drop_count_total{nid="172.16.0.24@tcp"} 4
945+
lustre_drop_count_total{nid="172.16.0.25@tcp"} 3
946946
# EOF

lustrefs-exporter/src/snapshots/lustrefs_exporter__routes__tests__jobstats=true.snap

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,30 +1153,6 @@ lustre_oss_ost_create_stats{operation="req_qdepth",units="reqs"} 285063
11531153
lustre_oss_ost_create_stats{operation="req_timeout",units="secs"} 285063
11541154
lustre_oss_ost_create_stats{operation="req_waittime",units="usecs"} 285063
11551155
lustre_oss_ost_create_stats{operation="reqbuf_avail",units="bufs"} 581187
1156-
# HELP lustre_send_count_total Total number of messages that have been sent.
1157-
# TYPE lustre_send_count_total counter
1158-
lustre_send_count_total{nid="0@lo"} 180076
1159-
lustre_send_count_total{nid="172.16.0.24@tcp"} 464970
1160-
lustre_send_count_total{nid="172.16.0.25@tcp"} 464886
1161-
# HELP lustre_receive_count_total Total number of messages that have been received.
1162-
# TYPE lustre_receive_count_total counter
1163-
lustre_receive_count_total{nid="0@lo"} 180072
1164-
lustre_receive_count_total{nid="172.16.0.24@tcp"} 464963
1165-
lustre_receive_count_total{nid="172.16.0.25@tcp"} 464877
1166-
# HELP lustre_drop_count_total Total number of messages that have been dropped.
1167-
# TYPE lustre_drop_count_total counter
1168-
lustre_drop_count_total{nid="0@lo"} 4
1169-
lustre_drop_count_total{nid="172.16.0.24@tcp"} 4
1170-
lustre_drop_count_total{nid="172.16.0.25@tcp"} 3
1171-
# HELP lustre_send_bytes_total Total number of bytes that have been sent.
1172-
# TYPE lustre_send_bytes_total counter
1173-
lustre_send_bytes_total{} 261865376
1174-
# HELP lustre_receive_bytes_total Total number of bytes that have been received.
1175-
# TYPE lustre_receive_bytes_total counter
1176-
lustre_receive_bytes_total{} 254091008
1177-
# HELP lustre_drop_bytes_total Total number of bytes that have been dropped.
1178-
# TYPE lustre_drop_bytes_total counter
1179-
lustre_drop_bytes_total{} 4832
11801156
# HELP lustre_read_samples_total Total number of reads that have been recorded.
11811157
# TYPE lustre_read_samples_total counter
11821158
lustre_read_samples_total{component="ost",operation="read",target="ai400x2-OST0000"} 71482249
@@ -1249,4 +1225,28 @@ lustre_stats_time_total{component="mdt",operation="open",target="ai400x2-MDT0000
12491225
lustre_stats_time_total{component="mdt",operation="setattr",target="ai400x2-MDT0000"} 22544
12501226
lustre_stats_time_total{component="mdt",operation="statfs",target="ai400x2-MDT0000"} 1292933
12511227
lustre_stats_time_total{component="mdt",operation="sync",target="ai400x2-MDT0000"} 7767
1228+
# HELP lustre_send_bytes_total Total number of bytes that have been sent.
1229+
# TYPE lustre_send_bytes_total counter
1230+
lustre_send_bytes_total{} 261865376
1231+
# HELP lustre_receive_bytes_total Total number of bytes that have been received.
1232+
# TYPE lustre_receive_bytes_total counter
1233+
lustre_receive_bytes_total{} 254091008
1234+
# HELP lustre_drop_bytes_total Total number of bytes that have been dropped.
1235+
# TYPE lustre_drop_bytes_total counter
1236+
lustre_drop_bytes_total{} 4832
1237+
# HELP lustre_send_count_total Total number of messages that have been sent.
1238+
# TYPE lustre_send_count_total counter
1239+
lustre_send_count_total{nid="0@lo"} 180076
1240+
lustre_send_count_total{nid="172.16.0.24@tcp"} 464970
1241+
lustre_send_count_total{nid="172.16.0.25@tcp"} 464886
1242+
# HELP lustre_receive_count_total Total number of messages that have been received.
1243+
# TYPE lustre_receive_count_total counter
1244+
lustre_receive_count_total{nid="0@lo"} 180072
1245+
lustre_receive_count_total{nid="172.16.0.24@tcp"} 464963
1246+
lustre_receive_count_total{nid="172.16.0.25@tcp"} 464877
1247+
# HELP lustre_drop_count_total Total number of messages that have been dropped.
1248+
# TYPE lustre_drop_count_total counter
1249+
lustre_drop_count_total{nid="0@lo"} 4
1250+
lustre_drop_count_total{nid="172.16.0.24@tcp"} 4
1251+
lustre_drop_count_total{nid="172.16.0.25@tcp"} 3
12521252
# EOF

0 commit comments

Comments
 (0)