Skip to content

Commit 1021bd7

Browse files
committed
feat: add Prometheus metrics foundation with /metrics admin endpoint
- Add /metrics scrape endpoint to the admin listener alongside /healthy and /ready. - Install metrics + metrics-exporter-prometheus as workspace dependencies. - Emit praxis_http_requests_total counter and praxis_http_request_duration_seconds histogram from both HTTP handler logging hooks. - Labels: method, status_class (2xx/3xx/4xx/5xx), route (placeholder unknown), cluster (from router or none). Signed-off-by: Brent Salisbury <bsalisbu@redhat.com>
1 parent 771bcaa commit 1021bd7

13 files changed

Lines changed: 404 additions & 25 deletions

File tree

Cargo.lock

Lines changed: 87 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ criterion = { version = "0.8.2", features = ["async_tokio"] }
4545
futures = "0.3.32"
4646
h2 = "0.4.13"
4747
http = "1.4.0"
48+
metrics = "0.24.2"
49+
metrics-exporter-prometheus = { version = "0.16.2", default-features = false }
4850
# Pinned to match Pingora's dependency.
4951
nix = "0.24.3"
5052
notify = "7.0.0"

docs/configuration.md

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,22 @@ insecure_options: # Optional. Dev/test overrides. See development.md.
2020
## Admin
2121
2222
`admin.address` binds a separate HTTP listener that serves
23-
`/ready` and `/healthy`. `/healthy` returns `200 OK` with
24-
`{"status":"ok"}` once the server is accepting
25-
connections (liveness). `/ready` returns per-cluster
26-
health status with healthy/unhealthy/total counts when
27-
active health checks are configured; it returns 503
28-
when any cluster has zero healthy endpoints. Without
29-
health checks, `/ready` returns `{"status":"ok"}`. Any
30-
other path returns 404. Useful for orchestrator health
31-
checks without exposing them on the main listeners.
23+
`/healthy`, `/ready`, and `/metrics`.
24+
25+
- `/healthy` returns `200 OK` with `{"status":"ok"}`
26+
once the server is accepting connections (liveness).
27+
- `/ready` returns per-cluster health status with
28+
healthy/unhealthy/total counts when active health
29+
checks are configured; it returns 503 when any
30+
cluster has zero healthy endpoints. Without health
31+
checks, `/ready` returns `{"status":"ok"}`.
32+
- `/metrics` returns Prometheus text exposition format
33+
with HTTP request metrics (`praxis_http_requests_total`,
34+
`praxis_http_request_duration_seconds`).
35+
36+
Any other path returns 404. Useful for orchestrator
37+
health checks and monitoring without exposing them on
38+
the main listeners.
3239

3340
```yaml
3441
admin:

examples/configs/operations/admin-interface.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
# Admin Interface
22
#
3-
# Exposes an admin endpoint for operational health checks
4-
# and readiness probes. The admin listener is separate from
5-
# data-plane traffic and serves:
3+
# Exposes an admin endpoint for operational health checks,
4+
# readiness probes, and Prometheus metrics. The admin listener
5+
# is separate from data-plane traffic and serves:
66
#
7-
# GET /health liveness probe (always 200)
7+
# GET /healthy liveness probe (always 200)
88
# GET /ready readiness probe (checks upstream clusters)
9+
# GET /metrics Prometheus text exposition format
910
#
1011
# With `verbose: true`, the /ready response includes
1112
# per-cluster health detail in the body.
1213
#
1314
# Usage:
1415
# cargo run -p praxis -- -c examples/configs/operations/admin-interface.yaml
15-
# curl http://localhost:9901/health # liveness
16+
# curl http://localhost:9901/healthy # liveness
1617
# curl http://localhost:9901/ready # readiness (verbose)
18+
# curl http://localhost:9901/metrics # prometheus scrape
1719

1820
admin:
1921
address: "127.0.0.1:9901"

protocol/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ async-trait = { workspace = true }
2121
bytes = { workspace = true }
2222
futures = { workspace = true }
2323
http = { workspace = true }
24+
metrics = { workspace = true }
25+
metrics-exporter-prometheus = { workspace = true }
2426
pingora-core = { workspace = true }
2527
pingora-http = { workspace = true }
2628
pingora-proxy = { workspace = true }

protocol/src/http/pingora/context.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ pub struct PingoraRequestCtx {
5353
/// bytes are raw protocol frames (e.g. `WebSocket`), not HTTP bodies.
5454
pub connection_upgraded: bool,
5555

56+
/// Cluster name snapshot retained for metrics emission in the
57+
/// `logging()` hook, after `cluster` has been consumed by filter
58+
/// context construction.
59+
pub metrics_cluster: Option<Arc<str>>,
60+
5661
/// Pre-read body chunks (`StreamBuffer` mode). When `StreamBuffer` is
5762
/// active, the body is read during `request_filter` (before upstream
5863
/// selection) so that body-based routing can influence `upstream_peer`.
@@ -237,6 +242,7 @@ impl Default for PingoraRequestCtx {
237242
client_http_version: None,
238243
cluster: None,
239244
connection_upgraded: false,
245+
metrics_cluster: None,
240246
pre_read_body: None,
241247
request_body_buffer: None,
242248
request_body_bytes: 0,

protocol/src/http/pingora/handler/mod.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use praxis_filter::{CompressionConfig, FilterPipeline};
1111
use tokio::sync::Semaphore;
1212
use tracing::{debug, warn};
1313

14-
use super::context::PingoraRequestCtx;
14+
use super::{context::PingoraRequestCtx, metrics};
1515

1616
/// Shared hop-by-hop header stripping logic.
1717
mod hop_by_hop;
@@ -213,6 +213,27 @@ async fn logging_cleanup(pipeline: &FilterPipeline, ctx: &mut PingoraRequestCtx)
213213
}
214214
}
215215

216+
/// Emit Prometheus metrics for a completed HTTP request.
217+
fn emit_request_metrics(session: &Session, ctx: &PingoraRequestCtx) {
218+
let status_code = session.response_written().map_or(0, |resp| resp.status.as_u16());
219+
let status_class = metrics::status_class(status_code);
220+
221+
let raw_method = ctx.request_snapshot.as_ref().map_or("UNKNOWN", |r| r.method.as_str());
222+
let method = metrics::method_label(raw_method);
223+
224+
let cluster_name: &str = ctx.metrics_cluster.as_deref().unwrap_or("none");
225+
226+
let labels = metrics::RequestMetricLabels {
227+
method,
228+
status_class,
229+
route: "unknown",
230+
cluster: cluster_name,
231+
};
232+
233+
let duration_secs = ctx.request_start.elapsed().as_secs_f64();
234+
metrics::record_request_metrics(&labels, duration_secs);
235+
}
236+
216237
/// Build [`HttpServerOptions`] with h2c enabled.
217238
///
218239
/// [`HttpServerOptions`]: pingora_core::apps::HttpServerOptions

protocol/src/http/pingora/handler/no_body.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ use tokio::sync::Semaphore;
1717
use tracing::{debug, warn};
1818

1919
use super::{
20-
adjust_compression, handle_connect_failure, logging_cleanup, request_filter, response_filter, upstream_peer,
21-
upstream_request, via,
20+
adjust_compression, emit_request_metrics, handle_connect_failure, logging_cleanup, request_filter, response_filter,
21+
upstream_peer, upstream_request, via,
2222
};
2323
use crate::http::pingora::context::PingoraRequestCtx;
2424

@@ -164,7 +164,8 @@ impl ProxyHttp for PingoraHttpHandlerNoBody {
164164
upstream_peer::execute(ctx)
165165
}
166166

167-
async fn logging(&self, _session: &mut Session, _e: Option<&pingora_core::Error>, ctx: &mut Self::CTX) {
167+
async fn logging(&self, session: &mut Session, _e: Option<&pingora_core::Error>, ctx: &mut Self::CTX) {
168+
emit_request_metrics(session, ctx);
168169
logging_cleanup(&self.pipeline, ctx).await;
169170
}
170171
}

protocol/src/http/pingora/handler/request_filter/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ async fn run_pipeline(
148148
};
149149

150150
ctx.request_snapshot = Some(request);
151+
ctx.metrics_cluster = cluster.clone();
151152

152153
match action {
153154
Ok(FilterAction::Continue | FilterAction::Release) => {

protocol/src/http/pingora/handler/with_body.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ use tokio::sync::Semaphore;
1818
use tracing::{debug, warn};
1919

2020
use super::{
21-
adjust_compression, handle_connect_failure, logging_cleanup, request_body_filter, request_filter,
22-
response_body_filter, response_filter, upstream_peer, upstream_request, via,
21+
adjust_compression, emit_request_metrics, handle_connect_failure, logging_cleanup, request_body_filter,
22+
request_filter, response_body_filter, response_filter, upstream_peer, upstream_request, via,
2323
};
2424
use crate::http::pingora::context::PingoraRequestCtx;
2525

@@ -199,7 +199,8 @@ impl ProxyHttp for PingoraHttpHandler {
199199
upstream_peer::execute(ctx)
200200
}
201201

202-
async fn logging(&self, _session: &mut Session, _e: Option<&pingora_core::Error>, ctx: &mut Self::CTX) {
202+
async fn logging(&self, session: &mut Session, _e: Option<&pingora_core::Error>, ctx: &mut Self::CTX) {
203+
emit_request_metrics(session, ctx);
203204
logging_cleanup(&self.pipeline, ctx).await;
204205
}
205206
}

0 commit comments

Comments
 (0)