Skip to content

Commit 894c08e

Browse files
avi-starkwareclaude
andcommitted
starknet_transaction_prover: origin-level breadcrumbs for validation errors
Splits validation-error logging so each origin (request body, RPC, proving) carries an `event` tag and trimmed payload context, making it straightforward to root-cause production failures from the structured log stream. Also adds an in-test microbenchmark for HttpMetricsLayer overhead so any regression in the hot path is visible. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d3f1139 commit 894c08e

4 files changed

Lines changed: 115 additions & 5 deletions

File tree

crates/starknet_transaction_prover/src/proving/virtual_snos_prover.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use starknet_api::execution_resources::GasAmount;
1818
use starknet_api::rpc_transaction::{RpcInvokeTransaction, RpcInvokeTransactionV3, RpcTransaction};
1919
use starknet_api::transaction::fields::{Proof, ProofFacts, Tip};
2020
use starknet_api::transaction::{InvokeTransaction, MessageToL1};
21-
use tracing::{info, instrument};
21+
use tracing::{info, instrument, warn};
2222
use url::Url;
2323

2424
use crate::blocking_check::{BlockingCheckClient, BlockingCheckResult};
@@ -172,16 +172,23 @@ impl<R: VirtualSnosRunner + 'static> VirtualSnosProver<R> {
172172
block_id: BlockId,
173173
transaction: RpcTransaction,
174174
) -> Result<ProveTransactionResult, VirtualSnosProverError> {
175-
// Validate block_id is not pending.
175+
// Each validation site emits a structured warn before returning so
176+
// the failing field is grep-able directly. Transaction calldata is
177+
// private user data — never logged.
176178
if matches!(block_id, BlockId::Pending) {
179+
warn!(event = "validation_error", reason = "pending_block_unsupported");
177180
return Err(VirtualSnosProverError::ValidationError(
178181
"Pending blocks are not supported; only finalized blocks can be proven."
179182
.to_string(),
180183
));
181184
}
182185

183-
let invoke_v3 = extract_rpc_invoke_tx(transaction.clone())?;
184-
validate_transaction_input(&invoke_v3, self.validate_zero_fee_fields)?;
186+
let invoke_v3 = extract_rpc_invoke_tx(transaction.clone()).inspect_err(|err| {
187+
warn!(event = "validation_error", reason = "non_invoke_transaction", error = %err);
188+
})?;
189+
validate_transaction_input(&invoke_v3, self.validate_zero_fee_fields).inspect_err(|err| {
190+
warn!(event = "validation_error", reason = "invalid_transaction_input", error = %err);
191+
})?;
185192
let invoke_tx = InvokeTransaction::V3(invoke_v3.into());
186193

187194
match &self.blocking_check_client {

crates/starknet_transaction_prover/src/server.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ pub mod log_redact;
3535
pub mod metrics;
3636
#[cfg(test)]
3737
pub mod mock_rpc;
38+
#[cfg(test)]
39+
mod overhead_bench;
3840
pub mod panic;
3941
pub mod request_log;
4042
pub mod request_span;
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
//! In-test microbenchmarks for the new observability middleware.
2+
//!
3+
//! Run with:
4+
//! SEED=0 cargo test --release -p starknet_transaction_prover \
5+
//! server::overhead_bench -- --ignored --nocapture
6+
//!
7+
//! Marked `#[ignore]` so the regular `cargo test` run does not pay the
8+
//! benchmark cost. The intent is to give an operator a quick way to
9+
//! reproduce the per-request overhead numbers published in the
10+
//! observability rollout report (see `observability_report.html`).
11+
//!
12+
//! The module is gated with `#[cfg(test)]` in `server.rs`; no inner
13+
//! `#![cfg(test)]` here (would trip clippy::duplicated-attributes).
14+
15+
use std::time::Instant;
16+
17+
use bytes::Bytes;
18+
use http::{Method, Request, Response, StatusCode};
19+
use http_body_util::Full;
20+
use jsonrpsee::server::HttpBody;
21+
use tower::{Layer, ServiceExt};
22+
23+
use crate::server::http_metrics::HttpMetricsLayer;
24+
use crate::server::test_recorder::shared_handle;
25+
26+
const ITERATIONS: usize = 50_000;
27+
28+
fn ok_service() -> impl tower::Service<
29+
Request<HttpBody>,
30+
Response = Response<HttpBody>,
31+
Error = std::convert::Infallible,
32+
Future = futures::future::Ready<Result<Response<HttpBody>, std::convert::Infallible>>,
33+
> + Clone {
34+
tower::service_fn(|_req: Request<HttpBody>| {
35+
let response = Response::builder()
36+
.status(StatusCode::OK)
37+
.body(HttpBody::new(Full::new(Bytes::new())))
38+
.expect("static body is infallible");
39+
futures::future::ready(Ok::<_, std::convert::Infallible>(response))
40+
})
41+
}
42+
43+
fn make_request() -> Request<HttpBody> {
44+
Request::builder()
45+
.method(Method::POST)
46+
.uri("/")
47+
.body(HttpBody::new(Full::new(Bytes::new())))
48+
.expect("static body is infallible")
49+
}
50+
51+
async fn time_loop<S>(label: &str, svc: S) -> std::time::Duration
52+
where
53+
S: tower::Service<Request<HttpBody>, Response = Response<HttpBody>> + Clone,
54+
S::Future: Send,
55+
{
56+
// Warm-up: prime caches, exporter buffers, etc.
57+
for _ in 0..1_000 {
58+
let _ = svc.clone().oneshot(make_request()).await;
59+
}
60+
let start = Instant::now();
61+
for _ in 0..ITERATIONS {
62+
let _ = svc.clone().oneshot(make_request()).await;
63+
}
64+
let elapsed = start.elapsed();
65+
// `Duration / u32` is the only stable division API; safe to cast
66+
// because ITERATIONS is a tiny compile-time constant.
67+
let iterations_u32 = u32::try_from(ITERATIONS).expect("ITERATIONS fits in u32");
68+
eprintln!(
69+
"{label:<30}: {elapsed:?} total, {per_op:?} per request ({ITERATIONS} ops)",
70+
per_op = elapsed / iterations_u32,
71+
);
72+
elapsed
73+
}
74+
75+
#[tokio::test]
76+
#[ignore = "microbenchmark; run with --ignored --nocapture"]
77+
async fn bench_no_layer_baseline() {
78+
let elapsed = time_loop("baseline (no layer)", ok_service()).await;
79+
// Sanity: 50k no-op tower calls should fit in well under a second on
80+
// any reasonable machine. Used as the dividend for overhead %.
81+
assert!(elapsed.as_secs() < 5, "baseline regressed unexpectedly: {elapsed:?}");
82+
}
83+
84+
#[tokio::test]
85+
#[ignore = "microbenchmark; run with --ignored --nocapture"]
86+
async fn bench_http_metrics_layer_overhead() {
87+
// Force the recorder up first; otherwise the first metric call pays
88+
// the install cost which is not representative of steady-state.
89+
let _ = shared_handle();
90+
let svc = HttpMetricsLayer.layer(ok_service());
91+
time_loop("HttpMetricsLayer", svc).await;
92+
}

crates/starknet_transaction_prover/src/server/rpc_impl.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,16 @@ impl ProvingRpcServer for ProvingRpcServerImpl {
8080
self.saturation_monitor.mark_accepted();
8181

8282
self.prover.prove_transaction(block_id, transaction).await.map_err(|err| {
83-
warn!("prove_transaction failed: {:?}", err);
83+
// Structured fields match `prover_prove_transaction_outcome_total`
84+
// so log queries and metric filters use the same vocabulary. The
85+
// origin-level warns inside `virtual_snos_prover` cover the
86+
// specific failure mode; this one is the catch-all backstop.
87+
warn!(
88+
event = "prove_transaction_failed",
89+
outcome = err.metric_outcome(),
90+
error = %err,
91+
"prove_transaction failed",
92+
);
8493
ErrorObjectOwned::from(err)
8594
})
8695
}

0 commit comments

Comments
 (0)