diff --git a/crates/starknet_transaction_prover/src/errors.rs b/crates/starknet_transaction_prover/src/errors.rs index 09bdb6b6a24..6775326ee9c 100644 --- a/crates/starknet_transaction_prover/src/errors.rs +++ b/crates/starknet_transaction_prover/src/errors.rs @@ -131,6 +131,27 @@ pub enum VirtualSnosProverError { TransactionBlocked, } +impl VirtualSnosProverError { + /// Maps the variant to one of the bounded label values declared in + /// `crate::server::metrics::outcomes`. The single match keeps the + /// `prover_prove_transaction_outcome_total{outcome}` cardinality fixed — + /// adding a variant requires a dashboard update at the same time. + pub fn metric_outcome(&self) -> &'static str { + use crate::server::metrics::outcomes; + match self { + VirtualSnosProverError::InvalidTransactionType(_) + | VirtualSnosProverError::InvalidTransactionInput(_) + | VirtualSnosProverError::ValidationError(_) => outcomes::VALIDATION, + VirtualSnosProverError::TransactionBlocked => outcomes::BLOCKED, + VirtualSnosProverError::RunnerError(_) => outcomes::RUNNER, + VirtualSnosProverError::OutputParseError(_) + | VirtualSnosProverError::ProgramOutputError(_) => outcomes::OUTPUT_PARSE, + #[cfg(feature = "stwo_proving")] + VirtualSnosProverError::ProvingError(_) => outcomes::PROVING, + } + } +} + /// Errors that can occur during configuration. #[derive(Debug, Error)] pub enum ConfigError { diff --git a/crates/starknet_transaction_prover/src/proving/virtual_snos_prover.rs b/crates/starknet_transaction_prover/src/proving/virtual_snos_prover.rs index c2a7cca3a44..0d64eef26f3 100644 --- a/crates/starknet_transaction_prover/src/proving/virtual_snos_prover.rs +++ b/crates/starknet_transaction_prover/src/proving/virtual_snos_prover.rs @@ -25,6 +25,7 @@ use crate::blocking_check::{BlockingCheckClient, BlockingCheckResult}; use crate::config::ProverConfig; use crate::errors::VirtualSnosProverError; use crate::running::runner::{RpcRunnerFactory, RunnerOutput, VirtualSnosRunner}; +use crate::server::metrics::{names as metric_names, outcomes}; /// Result of a successful prove transaction operation. /// @@ -149,7 +150,28 @@ impl VirtualSnosProver { transaction: RpcTransaction, ) -> Result { let start_time = Instant::now(); + let result = self.prove_transaction_inner(block_id, transaction).await; + let total = start_time.elapsed(); + // One histogram observation per request. Latency is bucketed and is + // recorded regardless of outcome so SLO calculations (e.g. p99 over + // success-only) can be done in the query layer. + metrics::histogram!(metric_names::PROVE_TRANSACTION_DURATION_SECONDS) + .record(total.as_secs_f64()); + let outcome = match &result { + Ok(_) => outcomes::SUCCESS, + Err(err) => err.metric_outcome(), + }; + metrics::counter!(metric_names::PROVE_TRANSACTION_OUTCOME_TOTAL, "outcome" => outcome) + .increment(1); + info!(total_duration_ms = %total.as_millis(), outcome, "prove_transaction completed"); + result + } + async fn prove_transaction_inner( + &self, + block_id: BlockId, + transaction: RpcTransaction, + ) -> Result { // Validate block_id is not pending. if matches!(block_id, BlockId::Pending) { return Err(VirtualSnosProverError::ValidationError( @@ -162,15 +184,12 @@ impl VirtualSnosProver { validate_transaction_input(&invoke_v3, self.validate_zero_fee_fields)?; let invoke_tx = InvokeTransaction::V3(invoke_v3.into()); - let result = match &self.blocking_check_client { - None => self.run_and_prove(block_id, vec![invoke_tx]).await?, + match &self.blocking_check_client { + None => self.run_and_prove(block_id, vec![invoke_tx]).await, Some(client) => { - self.prove_with_blocking_check(client, block_id, transaction, invoke_tx).await? + self.prove_with_blocking_check(client, block_id, transaction, invoke_tx).await } - }; - - info!(total_duration_ms = %start_time.elapsed().as_millis(), "prove_transaction completed"); - Ok(result) + } } /// Runs the OS and generates a proof. This is the core proving pipeline. @@ -186,18 +205,18 @@ impl VirtualSnosProver { .await .map_err(|err| VirtualSnosProverError::RunnerError(Box::new(err)))?; - info!( - os_duration_ms = %os_start.elapsed().as_millis(), - "OS execution completed" - ); + let os_duration = os_start.elapsed(); + metrics::histogram!(metric_names::OS_RUN_DURATION_SECONDS) + .record(os_duration.as_secs_f64()); + info!(os_duration_ms = %os_duration.as_millis(), "OS execution completed"); let prove_start = Instant::now(); let result = self.prove_virtual_snos_run(runner_output).await?; - info!( - prove_duration_ms = %prove_start.elapsed().as_millis(), - "Proving completed" - ); + let prove_duration = prove_start.elapsed(); + metrics::histogram!(metric_names::STWO_PROVE_DURATION_SECONDS) + .record(prove_duration.as_secs_f64()); + info!(prove_duration_ms = %prove_duration.as_millis(), "Proving completed"); Ok(result) } diff --git a/crates/starknet_transaction_prover/src/server/metrics.rs b/crates/starknet_transaction_prover/src/server/metrics.rs index 580530ae915..992cd1122b3 100644 --- a/crates/starknet_transaction_prover/src/server/metrics.rs +++ b/crates/starknet_transaction_prover/src/server/metrics.rs @@ -28,6 +28,28 @@ pub mod names { pub const BUILD_INFO: &str = "prover_build_info"; /// Requests rejected because the concurrency semaphore was full. pub const CONCURRENCY_REJECTED_TOTAL: &str = "prover_concurrency_rejected_total"; + /// Wall-clock duration of `prove_transaction` end-to-end. Bucketed. + pub const PROVE_TRANSACTION_DURATION_SECONDS: &str = + "prover_prove_transaction_duration_seconds"; + /// `prove_transaction` outcomes by category. See [`super::outcomes`] for + /// the fixed set of label values. + pub const PROVE_TRANSACTION_OUTCOME_TOTAL: &str = "prover_prove_transaction_outcome_total"; + /// Virtual SNOS run sub-step duration. Bucketed. + pub const OS_RUN_DURATION_SECONDS: &str = "prover_os_run_duration_seconds"; + /// Stwo proving sub-step duration. Bucketed. + pub const STWO_PROVE_DURATION_SECONDS: &str = "prover_stwo_prove_duration_seconds"; +} + +/// Fixed, bounded set of values for the `outcome` label on +/// [`names::PROVE_TRANSACTION_OUTCOME_TOTAL`]. Adding a variant requires a +/// dashboard update. +pub mod outcomes { + pub const SUCCESS: &str = "success"; + pub const VALIDATION: &str = "failure_validation"; + pub const BLOCKED: &str = "failure_blocked"; + pub const RUNNER: &str = "failure_runner"; + pub const OUTPUT_PARSE: &str = "failure_output_parse"; + pub const PROVING: &str = "failure_proving"; } /// Initializes the global Prometheus exporter and emits the `build_info`