Skip to content

Commit db503b7

Browse files
avi-starkwareclaude
andcommitted
starknet_transaction_prover: bump prover_panics_total on panic hook fire
Bumps the new `prover_panics_total` counter from the panic hook before constructing the backtrace, so dashboards can alert on panic rate without log search. The pre-registered zero observation in `install_exporter` keeps the series visible at scrape time even before the first panic. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 2af08ad commit db503b7

3 files changed

Lines changed: 38 additions & 5 deletions

File tree

crates/starknet_transaction_prover/src/server/metrics.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ pub mod names {
2828
pub const BUILD_INFO: &str = "prover_build_info";
2929
/// Requests rejected because the concurrency semaphore was full.
3030
pub const CONCURRENCY_REJECTED_TOTAL: &str = "prover_concurrency_rejected_total";
31+
/// Unhandled panics caught by the global panic hook.
32+
pub const PANICS_TOTAL: &str = "prover_panics_total";
3133
/// Wall-clock duration of `prove_transaction` end-to-end. Bucketed.
3234
pub const PROVE_TRANSACTION_DURATION_SECONDS: &str =
3335
"prover_prove_transaction_duration_seconds";
@@ -72,6 +74,7 @@ pub fn install_exporter(version: &str, git_sha: &str) -> anyhow::Result<Promethe
7274
// before the first request — dashboards relying on `rate(...) > 0`
7375
// need the series to exist.
7476
metrics::counter!(names::CONCURRENCY_REJECTED_TOTAL).increment(0);
77+
metrics::counter!(names::PANICS_TOTAL).increment(0);
7578
super::http_metrics::preregister_http_metrics();
7679
Ok(handle)
7780
}

crates/starknet_transaction_prover/src/server/panic.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,17 @@
33
//! Without an explicit hook, panics in `tokio::spawn`ed work hit the runtime's
44
//! default handler and print to stderr in an ad-hoc format. We want one
55
//! structured `tracing` event with location + backtrace so log aggregators
6-
//! can index it. The hook only emits a log line — runtime abort-on-panic
7-
//! behavior is preserved.
6+
//! can index it, plus a `prover_panics_total` bump so dashboards/alerts can
7+
//! fire on panic rate rather than relying on log search. The hook does not
8+
//! abort — runtime abort-on-panic behavior is preserved.
89
910
use std::backtrace::Backtrace;
1011
use std::panic::PanicHookInfo;
1112

1213
use tracing::error;
1314

15+
use crate::server::metrics::names::PANICS_TOTAL;
16+
1417
#[cfg(test)]
1518
#[path = "panic_test.rs"]
1619
mod panic_test;
@@ -20,6 +23,9 @@ pub fn install_panic_hook() {
2023
}
2124

2225
fn panic_hook(info: &PanicHookInfo<'_>) {
26+
// Increment first — if `Backtrace::force_capture` or the `error!` macro
27+
// panic recursively, the counter still reflects the original panic.
28+
metrics::counter!(PANICS_TOTAL).increment(1);
2329
let message = extract_payload(info);
2430
let location = info
2531
.location()

crates/starknet_transaction_prover/src/server/panic_test.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::sync::{Arc, Mutex};
22

3-
use crate::server::panic::extract_payload;
3+
use crate::server::metrics::names::PANICS_TOTAL;
4+
use crate::server::panic::{extract_payload, install_panic_hook};
5+
use crate::server::test_recorder::shared_handle;
46

57
fn capture_payload<F: FnOnce() + std::panic::UnwindSafe>(f: F) -> String {
68
let captured: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
@@ -15,10 +17,32 @@ fn capture_payload<F: FnOnce() + std::panic::UnwindSafe>(f: F) -> String {
1517
value
1618
}
1719

18-
// Panic-capturing tests share global state (the panic hook), so they must
19-
// run serially. Keep as a single `#[test]` so ordering is explicit.
20+
// Panic-capturing tests share global state (the panic hook); both tests in
21+
// this module install/restore the hook around a single `catch_unwind`.
2022
#[test]
2123
fn extracts_static_str_and_formatted_payloads() {
2224
assert_eq!(capture_payload(|| panic!("static literal")), "static literal");
2325
assert_eq!(capture_payload(|| panic!("formatted {}", 42)), "formatted 42");
2426
}
27+
28+
#[test]
29+
fn panic_hook_bumps_panics_total_counter() {
30+
let handle = shared_handle();
31+
let before = counter_value(&handle.render(), PANICS_TOTAL);
32+
33+
let prev_hook = std::panic::take_hook();
34+
install_panic_hook();
35+
let _ = std::panic::catch_unwind(|| panic!("counter-test panic"));
36+
std::panic::set_hook(prev_hook);
37+
38+
let after = counter_value(&handle.render(), PANICS_TOTAL);
39+
assert_eq!(after - before, 1.0);
40+
}
41+
42+
fn counter_value(scrape: &str, name: &str) -> f64 {
43+
scrape
44+
.lines()
45+
.find(|line| line.starts_with(name) && !line.starts_with("# "))
46+
.and_then(|line| line.rsplit_once(' ').and_then(|(_, v)| v.parse().ok()))
47+
.unwrap_or(0.0)
48+
}

0 commit comments

Comments
 (0)