Skip to content

Commit b859861

Browse files
iulianbarbugithub-actions[bot]bkchr
authored
fatxpool: add labels for mempool_revalidation_invalid_txs metric (#9003)
# Description This PR considers errors propagated from mempool revalidation in the context of fork-aware transaction pool and groups them under unique labels to count them under `mempool_revalidation_invalid_txs` prometheus metric. Closes #7973 ## Integration Node developers/operators can use labels like `category` & `reason` to differentiate between invalid transactions, as reported by mempool revalidation. ## Review Notes Labeling mempool revalidation invalid txs is done based on `category` & `reason`: - the transaction validity invalid and unknown are mapped to category = "invalid" or "unknown", while the underlying variant name is the reason, converted to snake case. - `sc_transaction_pool::common::error::Error` returned from `sc_transaction_pool_api::api::validate_transaction_blocking` function is mapped to category = "validation_failed" and reason is variant name in snake_case. - txs part of subtrees of invalid txs are considered invalid, and they are mapped to category = "subtree" and reason = "invalid"|"unknown"|"validation_failed", based on the category of the invalid root tx that started the invalidation of the subtree the tx is part of. --------- Signed-off-by: Iulian Barbu <[email protected]> Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Bastian Köcher <[email protected]>
1 parent 05fd451 commit b859861

File tree

13 files changed

+199
-61
lines changed

13 files changed

+199
-61
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

prdoc/pr_9003.prdoc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
title: '`fatxpool`: add labels for mempool revalidation invalid txs counting'
2+
doc:
3+
- audience: [ Node Dev, Node Operator ]
4+
description: |-
5+
This PR considers errors propagated from mempool revalidation in the context of fork-aware transaction pool and groups them under unique labels to count them under `mempool_revalidation_invalid_txs` prometheus metric. This results in a breakdown of the type of root causes for why a transaction is considered invalid, helping with debugging.
6+
crates:
7+
- name: sc-transaction-pool
8+
bump: minor
9+
- name: sc-transaction-pool-api
10+
bump: minor
11+
- name: sp-runtime
12+
bump: minor

substrate/client/transaction-pool/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ sp-crypto-hashing = { workspace = true, default-features = true }
4040
sp-runtime = { workspace = true, default-features = true }
4141
sp-tracing = { workspace = true, default-features = true }
4242
sp-transaction-pool = { workspace = true, default-features = true }
43+
strum = { workspace = true, features = ["derive"] }
4344
thiserror = { workspace = true }
4445
tokio = { workspace = true, default-features = true, features = ["macros", "time"] }
4546
tokio-stream = { workspace = true }

substrate/client/transaction-pool/api/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ serde = { features = ["derive"], workspace = true, default-features = true }
2121
sp-blockchain = { workspace = true, default-features = true }
2222
sp-core = { workspace = true }
2323
sp-runtime = { workspace = true }
24+
strum = { workspace = true, features = ["derive"] }
2425
thiserror = { workspace = true }
2526

2627
[dev-dependencies]

substrate/client/transaction-pool/api/src/error.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use sp_runtime::transaction_validity::{
2626
pub type Result<T> = std::result::Result<T, Error>;
2727

2828
/// Transaction pool error type.
29-
#[derive(Debug, thiserror::Error)]
29+
#[derive(Debug, thiserror::Error, strum::AsRefStr)]
3030
#[allow(missing_docs)]
3131
pub enum Error {
3232
#[error("Unknown transaction validity: {0:?}")]
@@ -112,3 +112,19 @@ impl IntoPoolError for Error {
112112
Ok(self)
113113
}
114114
}
115+
116+
/// Provide a representation of the underlying instance as a prometheus metric label.
117+
pub trait IntoMetricsLabel {
118+
/// Short string representation of the underlying instance.
119+
///
120+
/// This is intended to be used for a prometheus metric that tracks this instance
121+
/// with the goal of distributing the observations over multiple groups, where an unique
122+
/// label represents a group of same instance, observed many times.
123+
fn label(&self) -> String;
124+
}
125+
126+
impl IntoMetricsLabel for Error {
127+
fn label(&self) -> String {
128+
self.as_ref().to_string()
129+
}
130+
}

substrate/client/transaction-pool/src/common/error.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ use sc_transaction_pool_api::error::Error as TxPoolError;
2424
pub type Result<T> = std::result::Result<T, Error>;
2525

2626
/// Transaction pool error type.
27-
#[derive(Debug, thiserror::Error)]
27+
#[derive(Debug, thiserror::Error, strum::AsRefStr)]
28+
#[strum(serialize_all = "snake_case")]
2829
#[allow(missing_docs)]
2930
pub enum Error {
3031
#[error("Transaction pool error: {0}")]
@@ -48,3 +49,9 @@ impl sc_transaction_pool_api::error::IntoPoolError for Error {
4849
}
4950
}
5051
}
52+
53+
impl sc_transaction_pool_api::error::IntoMetricsLabel for Error {
54+
fn label(&self) -> String {
55+
self.as_ref().to_string()
56+
}
57+
}

substrate/client/transaction-pool/src/fork_aware_txpool/metrics.rs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818

1919
//! Prometheus's metrics for a fork-aware transaction pool.
2020
21-
use super::tx_mem_pool::InsertionInfo;
21+
use super::tx_mem_pool::{InsertionInfo, InvalidTxReason};
2222
use crate::{
2323
common::metrics::{GenericMetricsLink, MetricsRegistrant},
2424
graph::{self, BlockHash, ExtrinsicHash},
2525
LOG_TARGET,
2626
};
2727
use futures::{FutureExt, StreamExt};
2828
use prometheus_endpoint::{
29-
exponential_buckets, histogram_opts, linear_buckets, register, Counter, Gauge, Histogram,
30-
PrometheusError, Registry, U64,
29+
exponential_buckets, histogram_opts, linear_buckets, register, Counter, CounterVec, Gauge,
30+
Histogram, Opts, PrometheusError, Registry, U64,
3131
};
3232
#[cfg(doc)]
3333
use sc_transaction_pool_api::TransactionPool;
@@ -74,7 +74,7 @@ pub struct Metrics {
7474
/// Total number of transactions submitted from mempool to views.
7575
pub submitted_from_mempool_txs: Counter<U64>,
7676
/// Total number of transactions found as invalid during mempool revalidation.
77-
pub mempool_revalidation_invalid_txs: Counter<U64>,
77+
pub mempool_revalidation_invalid_txs: MempoolInvalidTxReasonCounter,
7878
/// Total number of transactions found as invalid during view revalidation.
7979
pub view_revalidation_invalid_txs: Counter<U64>,
8080
/// Total number of valid transactions processed during view revalidation.
@@ -292,6 +292,39 @@ impl EventsHistograms {
292292
}
293293
}
294294

295+
/// Represents a labeled counter of invalid tx reasons.
296+
pub struct MempoolInvalidTxReasonCounter {
297+
inner: CounterVec<U64>,
298+
}
299+
300+
impl MempoolInvalidTxReasonCounter {
301+
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
302+
Ok(Self {
303+
inner: register(
304+
CounterVec::new(
305+
Opts::new(
306+
"substrate_sub_txpool_mempool_revalidation_invalid_txs_total",
307+
r#"Total number of transactions found as invalid during mempool revalidation.
308+
They are broken down into `category` and `reason` labels.
309+
- `category` can be `invalid`, `unknown`, `subtree` or `validation_failed`.
310+
- `reason` is more nuanced, but is worth mentioning that for `subtree` category,
311+
the underlying reason can be one of the other categories."#,
312+
),
313+
&["category", "reason"],
314+
)?,
315+
registry,
316+
)?,
317+
})
318+
}
319+
320+
// Increments the mempool invalid txs metrics based on a custom category & reason.
321+
pub fn observe(&self, label: &InvalidTxReason, count: u64) {
322+
self.inner
323+
.with_label_values(&[label.category().as_str(), label.reason().as_str()])
324+
.inc_by(count)
325+
}
326+
}
327+
295328
impl MetricsRegistrant for Metrics {
296329
fn register(registry: &Registry) -> Result<Box<Self>, PrometheusError> {
297330
Ok(Box::from(Self {
@@ -380,13 +413,7 @@ impl MetricsRegistrant for Metrics {
380413
)?,
381414
registry,
382415
)?,
383-
mempool_revalidation_invalid_txs: register(
384-
Counter::new(
385-
"substrate_sub_txpool_mempool_revalidation_invalid_txs_total",
386-
"Total number of transactions found as invalid during mempool revalidation.",
387-
)?,
388-
registry,
389-
)?,
416+
mempool_revalidation_invalid_txs: MempoolInvalidTxReasonCounter::register(registry)?,
390417
view_revalidation_invalid_txs: register(
391418
Counter::new(
392419
"substrate_sub_txpool_view_revalidation_invalid_txs_total",

substrate/client/transaction-pool/src/fork_aware_txpool/tx_mem_pool.rs

Lines changed: 91 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
use futures::{future::join_all, FutureExt};
3535
use itertools::Itertools;
3636
use parking_lot::RwLock;
37-
use sc_transaction_pool_api::{TransactionPriority, TransactionSource};
37+
use sc_transaction_pool_api::{error::IntoMetricsLabel, TransactionPriority, TransactionSource};
3838
use sp_blockchain::HashAndNumber;
3939
use sp_runtime::{
4040
traits::Block as BlockT,
@@ -58,8 +58,7 @@ use tracing::{debug, trace};
5858

5959
use crate::{
6060
common::tracing_log_xt::log_xt_trace,
61-
graph,
62-
graph::{base_pool::TimedTransactionSource, ExtrinsicFor, ExtrinsicHash},
61+
graph::{self, base_pool::TimedTransactionSource, ExtrinsicFor, ExtrinsicHash},
6362
ValidateTransactionPriority, LOG_TARGET,
6463
};
6564

@@ -78,6 +77,39 @@ pub(crate) const TXMEMPOOL_MAX_REVALIDATION_BATCH_SIZE: usize = 1000;
7877

7978
const SYNC_BRIDGE_EXPECT: &str = "The mempool blocking task shall not be terminated. qed.";
8079

80+
#[derive(strum::Display)]
81+
#[strum(serialize_all = "snake_case")]
82+
/// Provides a type safe way of determining the category and reason of why
83+
/// a transaction is marked as invalid, useful to extract labels in the context of
84+
/// `mempool_revalidation_invalid_txs` metric.
85+
pub(super) enum InvalidTxReason {
86+
/// Coresponds to invalid validity.
87+
Invalid(String),
88+
/// Corresponds to unknown validity.
89+
Unknown(String),
90+
/// Corresponds to a transaction which is marked as invalid because it is part of a subtree of
91+
/// a transaction with invalid or unknown validities.
92+
Subtree(String),
93+
/// Corresponds to a transaction for which a validity couldn't be determined due to a failure
94+
/// during the validation, or because of a missing runtime prerequisite.
95+
ValidationFailed(String),
96+
}
97+
98+
impl InvalidTxReason {
99+
pub(super) fn reason(&self) -> &String {
100+
match self {
101+
InvalidTxReason::Invalid(inner) |
102+
InvalidTxReason::Unknown(inner) |
103+
InvalidTxReason::Subtree(inner) |
104+
InvalidTxReason::ValidationFailed(inner) => inner,
105+
}
106+
}
107+
108+
pub(super) fn category(&self) -> String {
109+
self.to_string()
110+
}
111+
}
112+
81113
/// Represents the transaction in the intermediary buffer.
82114
pub(crate) struct TxInMemPool<ChainApi, Block>
83115
where
@@ -584,7 +616,11 @@ where
584616
/// Revalidates a batch of transactions against the provided finalized block.
585617
///
586618
/// Returns a vector of invalid transaction hashes.
587-
async fn revalidate_inner(&self, finalized_block: HashAndNumber<Block>) -> Vec<Block::Hash> {
619+
async fn revalidate_inner(
620+
&self,
621+
view_store: Arc<ViewStore<ChainApi, Block>>,
622+
finalized_block: HashAndNumber<Block>,
623+
) -> HashSet<ExtrinsicHash<ChainApi>> {
588624
trace!(
589625
target: LOG_TARGET,
590626
?finalized_block,
@@ -629,32 +665,69 @@ where
629665
let validated_count = validation_results.len();
630666

631667
let duration = start.elapsed();
632-
633668
let invalid_hashes = validation_results
634669
.into_iter()
635670
.filter_map(|(tx_hash, validation_result)| match validation_result {
636-
Ok(Ok(_)) |
637-
Ok(Err(TransactionValidityError::Invalid(InvalidTransaction::Future))) => None,
638-
Err(_) |
639-
Ok(Err(TransactionValidityError::Unknown(_))) |
640-
Ok(Err(TransactionValidityError::Invalid(_))) => {
671+
Ok(Ok(_) | Err(TransactionValidityError::Invalid(InvalidTransaction::Future))) =>
672+
None,
673+
Err(ref error) => {
674+
trace!(
675+
target: LOG_TARGET,
676+
?tx_hash,
677+
?validation_result,
678+
"mempool::revalidate_inner error during revalidation"
679+
);
680+
Some((tx_hash, InvalidTxReason::ValidationFailed(error.label())))
681+
},
682+
Ok(Err(TransactionValidityError::Unknown(error))) => {
641683
trace!(
642684
target: LOG_TARGET,
643685
?tx_hash,
644686
?validation_result,
645-
"mempool::revalidate_inner invalid"
687+
"mempool::revalidate_inner cannot determine transaction validity"
646688
);
647-
Some(tx_hash)
689+
Some((tx_hash, InvalidTxReason::Unknown(error.as_ref().to_string())))
690+
},
691+
Ok(Err(TransactionValidityError::Invalid(error))) => {
692+
trace!(
693+
target: LOG_TARGET,
694+
?tx_hash,
695+
?validation_result,
696+
"mempool::revalidate_inner transaction is invalid"
697+
);
698+
Some((tx_hash, InvalidTxReason::Invalid(error.as_ref().to_string())))
648699
},
649700
})
650701
.collect::<Vec<_>>();
651702

703+
let mut invalid_hashes_subtrees = Vec::new();
704+
// Include also subtree txs.
705+
for (tx, reason) in &invalid_hashes {
706+
let txs_in_subtree = view_store
707+
.remove_transaction_subtree(*tx, |_, _| {})
708+
.into_iter()
709+
.map(|tx| (tx.hash, InvalidTxReason::Subtree(reason.to_string())));
710+
invalid_hashes_subtrees.extend(txs_in_subtree);
711+
}
712+
713+
let revalidated_invalid_hashes_len = invalid_hashes.len();
714+
let invalid_hashes = invalid_hashes
715+
.into_iter()
716+
.chain(invalid_hashes_subtrees)
717+
.map(|(tx, reason)| {
718+
self.metrics
719+
.report(|metrics| metrics.mempool_revalidation_invalid_txs.observe(&reason, 1));
720+
tx
721+
})
722+
.collect::<HashSet<_>>();
723+
652724
debug!(
653725
target: LOG_TARGET,
654726
?finalized_block,
655727
validated_count,
656728
total_count,
657-
invalid_hashes = invalid_hashes.len(),
729+
invalid_hashes_subtrees_len = invalid_hashes.len(),
730+
revalidated_invalid_hashes_len,
658731
?duration,
659732
"mempool::revalidate_inner"
660733
);
@@ -686,53 +759,30 @@ where
686759
view_store: Arc<ViewStore<ChainApi, Block>>,
687760
finalized_block: HashAndNumber<Block>,
688761
) {
689-
let revalidated_invalid_hashes = self.revalidate_inner(finalized_block.clone()).await;
690-
691-
let mut invalid_hashes_subtrees =
692-
revalidated_invalid_hashes.clone().into_iter().collect::<HashSet<_>>();
693-
for tx in &revalidated_invalid_hashes {
694-
invalid_hashes_subtrees.extend(
695-
view_store
696-
.remove_transaction_subtree(*tx, |_, _| {})
697-
.into_iter()
698-
.map(|tx| tx.hash),
699-
);
700-
}
701-
762+
let invalid_hashes_subtrees =
763+
self.revalidate_inner(view_store.clone(), finalized_block.clone()).await;
702764
{
703765
let mut transactions = self.transactions.write().await;
704766
invalid_hashes_subtrees.iter().for_each(|tx_hash| {
705767
transactions.remove(&tx_hash);
706768
});
707769
};
708770

709-
self.metrics.report(|metrics| {
710-
metrics
711-
.mempool_revalidation_invalid_txs
712-
.inc_by(invalid_hashes_subtrees.len() as _)
713-
});
714-
715-
let revalidated_invalid_hashes_len = revalidated_invalid_hashes.len();
716-
let invalid_hashes_subtrees_len = invalid_hashes_subtrees.len();
717-
718-
let invalid_hashes_subtrees = invalid_hashes_subtrees.into_iter().collect::<Vec<_>>();
719-
720771
//note: here the consistency is assumed: it is expected that transaction will be
721772
// actually removed from the listener with Invalid event. This means assumption that no view
722773
// is referencing tx as ready.
723-
self.listener.transactions_invalidated(&invalid_hashes_subtrees);
774+
let invalid_hashes_subtrees = invalid_hashes_subtrees.into_iter().collect::<Vec<_>>();
775+
self.listener.transactions_invalidated(invalid_hashes_subtrees.as_slice());
724776
view_store
725777
.import_notification_sink
726-
.clean_notified_items(&invalid_hashes_subtrees);
778+
.clean_notified_items(invalid_hashes_subtrees.as_slice());
727779
view_store
728780
.dropped_stream_controller
729781
.remove_transactions(invalid_hashes_subtrees);
730782

731783
trace!(
732784
target: LOG_TARGET,
733785
?finalized_block,
734-
revalidated_invalid_hashes_len,
735-
invalid_hashes_subtrees_len,
736786
"mempool::revalidate"
737787
);
738788
}

0 commit comments

Comments
 (0)