Skip to content

Commit 5319358

Browse files
authored
Emit SUT metrics in real time (#161)
* disk metrics should be counters Entire-Checkpoint: 2b1195615a61 * make SUT metrics streamable Entire-Checkpoint: 4d2e6e04b1b1 * streaming SUT metrics * simplify Entire-Checkpoint: 80f190a205d4
1 parent 6a61b50 commit 5319358

4 files changed

Lines changed: 224 additions & 121 deletions

File tree

crates/system-adapter-protocol/src/lib.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,22 +222,22 @@ pub struct MetricsRequest {
222222
/// Resource utilization snapshot from the system under test
223223
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
224224
pub struct ResourceMetrics {
225-
/// CPU utilization as a percentage (0.0–100.0)
225+
/// Cumulative CPU seconds used
226226
#[serde(skip_serializing_if = "Option::is_none")]
227227
pub cpu_usage_percent: Option<f64>,
228228
/// Resident memory usage in bytes
229229
#[serde(skip_serializing_if = "Option::is_none")]
230230
pub memory_usage_bytes: Option<u64>,
231-
/// Disk bytes read since last scrape
231+
/// Cumulative disk bytes read
232232
#[serde(skip_serializing_if = "Option::is_none")]
233233
pub disk_read_bytes: Option<u64>,
234-
/// Disk bytes written since last scrape
234+
/// Cumulative disk bytes written
235235
#[serde(skip_serializing_if = "Option::is_none")]
236236
pub disk_write_bytes: Option<u64>,
237-
/// Disk read IOPS since last scrape
237+
/// Cumulative disk read operations
238238
#[serde(skip_serializing_if = "Option::is_none")]
239239
pub disk_read_iops: Option<u64>,
240-
/// Disk write IOPS since last scrape
240+
/// Cumulative disk write operations
241241
#[serde(skip_serializing_if = "Option::is_none")]
242242
pub disk_write_iops: Option<u64>,
243243
}

crates/test-framework/src/telemetry/mod.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use anyhow::Result;
2323

2424
use opentelemetry::metrics::{Meter, MeterProvider};
2525

26+
use opentelemetry_sdk::metrics::PeriodicReader;
2627
use opentelemetry_sdk::metrics::exporter::PushMetricExporter;
2728
use opentelemetry_sdk::metrics::reader::MetricReader;
2829
use opentelemetry_sdk::{
@@ -229,3 +230,99 @@ impl Telemetry {
229230
Ok(())
230231
}
231232
}
233+
234+
/// A dedicated periodic metrics pipeline for SUT resource metrics.
235+
///
236+
/// Always exports to the Arrow backend (when `SPICEAI_BENCHMARK_METRICS_KEY` is set).
237+
/// Additionally exports to an OTLP endpoint when configured.
238+
/// Instruments created on the returned [`Meter`] are exported every 5 seconds.
239+
pub struct SutMetricsPipeline {
240+
provider: SdkMeterProvider,
241+
meter: Meter,
242+
}
243+
244+
impl SutMetricsPipeline {
245+
/// Create the pipeline.
246+
///
247+
/// - `api_key_name`: env var name for the Arrow backend API key (e.g. `"SPICEAI_BENCHMARK_METRICS_KEY"`).
248+
/// - `otlp_endpoint`: optional OTLP gRPC endpoint to also export to.
249+
/// - `resource`: OTel resource attributes for exported metrics.
250+
pub async fn new(
251+
api_key_name: &str,
252+
otlp_endpoint: Option<&str>,
253+
resource: Resource,
254+
) -> Result<Self> {
255+
let mut builder = SdkMeterProvider::builder().with_resource(resource);
256+
257+
match std::env::var(api_key_name) {
258+
Ok(key) => {
259+
let token = Arc::new(SecretString::new(key.into()));
260+
// Arrow periodic reader (always, when API key is present)
261+
let arrow_exporter = otel_arrow::OtelArrowExporter::new(
262+
TelemetryExporterBuilder::new()
263+
.with_credentials(flight_client::Credentials::Bearer {
264+
token,
265+
prefix: false,
266+
})
267+
.with_service_name("benchmarks_telemetry".into())
268+
.with_endpoint(Arc::clone(&ENDPOINT))
269+
.build()
270+
.await?,
271+
);
272+
let reader = PeriodicReader::builder(arrow_exporter)
273+
.with_interval(Duration::from_secs(5))
274+
.build();
275+
builder = builder.with_reader(reader);
276+
println!(
277+
"SUT metrics: Arrow periodic exporter enabled (endpoint: {})",
278+
*ENDPOINT
279+
)
280+
}
281+
Err(e) => {
282+
eprintln!("Failed to create Arrow exporter for SUT metrics: {e}");
283+
}
284+
};
285+
286+
// OTLP periodic reader (when --otlp-endpoint is configured)
287+
if let Some(endpoint) = otlp_endpoint {
288+
match MetricExporter::builder()
289+
.with_tonic()
290+
.with_timeout(Duration::from_secs(10))
291+
.with_endpoint(endpoint)
292+
.build()
293+
{
294+
Ok(otlp_exporter) => {
295+
let reader = PeriodicReader::builder(otlp_exporter)
296+
.with_interval(Duration::from_secs(5))
297+
.build();
298+
builder = builder.with_reader(reader);
299+
println!("SUT metrics: OTLP periodic exporter enabled (endpoint: {endpoint})");
300+
}
301+
Err(e) => {
302+
eprintln!("Failed to create OTLP exporter for SUT metrics: {e}");
303+
}
304+
}
305+
}
306+
307+
let provider = builder.build();
308+
let meter = provider.meter("spicebench-sut");
309+
310+
Ok(Self { provider, meter })
311+
}
312+
313+
/// Get the meter for creating SUT instruments.
314+
#[must_use]
315+
pub fn meter(&self) -> Meter {
316+
self.meter.clone()
317+
}
318+
319+
/// Flush and shut down the pipeline.
320+
pub fn shutdown(&self) {
321+
if let Err(e) = self.provider.force_flush() {
322+
eprintln!("Failed to flush SUT metrics pipeline: {e}");
323+
}
324+
if let Err(e) = self.provider.shutdown() {
325+
eprintln!("Failed to shutdown SUT metrics pipeline: {e}");
326+
}
327+
}
328+
}

0 commit comments

Comments
 (0)