Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,41 @@ pub struct BscParliaGethMetrics {
pub doublesign: Counter,
}

/// Metrics for the BSC block import service (`block_import/service.rs`).
///
/// Exposes the per-outcome distribution of `engine.new_payload` results so the
/// reorg-race vs. structural-invalid ratio is visible at the Prometheus layer.
/// This is the diagnostic surface that lets us tune the race-shaped error
/// classifier in `service.rs` without grepping logs, and tells us whether a
/// peer-drop incident is correlated with a spike of structural rejects (which
/// trigger the BadBlock penalty path) versus race rejects (which do not).
#[derive(Metrics, Clone)]
#[metrics(scope = "bsc.block_import")]
pub struct BscBlockImportMetrics {
/// `Invalid` outcomes whose error string matched a reorg-race heuristic
/// (`state root`, `parent block`, `unknown parent`, `missing trie`,
/// `receipts root`, `insert_canonical`, `reorg`). These do NOT emit an Err
/// outcome to the network manager and therefore do NOT trigger the
/// `BadBlock` reputation penalty.
pub invalid_race: Counter,

/// `Invalid` outcomes whose error string did NOT match any race heuristic.
/// These are treated as structural failures and DO emit an Err outcome,
/// which means reth's `NetworkManager::on_block_imported` applies a
/// `BadBlock` reputation change to the source peer.
pub invalid_structural: Counter,

/// `Valid` outcomes — block accepted by the engine.
pub valid: Counter,

/// `Syncing` outcomes — engine couldn't validate yet, FCU was forced.
pub syncing: Counter,

/// Payloads rejected before reaching the engine because the
/// announced-hash and computed-hash diverged.
pub hash_mismatch: Counter,
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -310,5 +345,6 @@ mod tests {
let _miner_metrics = BscMinerMetrics::default();
let _finality_metrics = BscFinalityMetrics::default();
let _blockchain_metrics = BscBlockchainMetrics::default();
let _block_import_metrics = BscBlockImportMetrics::default();
}
}
155 changes: 117 additions & 38 deletions src/node/network/block_import/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ where
failed_heads: crate::node::network::block_import::fork_recover::FailedHeadsCooler,
/// Periodic timer for head announcement.
announce_interval: tokio::time::Interval,
/// Per-outcome counters for `engine.new_payload` results. Cloned into each
/// import future since `Counter` is a cheap handle into the global registry.
metrics: crate::metrics::BscBlockImportMetrics,
}

fn resolve_bsc_peer_static(announcer: PeerId) -> Option<PeerId> {
Expand Down Expand Up @@ -174,13 +177,15 @@ where
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
interval
},
metrics: crate::metrics::BscBlockImportMetrics::default(),
}
}

/// Process a new payload and return the outcome
fn new_payload(&self, block: BlockMsg, peer_id: PeerId) -> ImportFut {
let engine = self.engine.clone();
let forkchoice_engine = self.forkchoice_engine.clone();
let metrics = self.metrics.clone();
let recovering_heads = self.recovering_heads.clone();
let failed_heads = self.failed_heads.clone();

Expand All @@ -189,6 +194,7 @@ where
tracing::debug!(target: "bsc::block_import", "New payload: block = ({:?}, {:?}), peer_id = {:?}", block.block.0.block.header.number, block_hash, peer_id);
Box::pin(async move {
if announced_hash != block_hash {
metrics.hash_mismatch.increment(1);
tracing::warn!(
target: "bsc::block_import",
number = block.block.0.block.header.number,
Expand All @@ -215,6 +221,7 @@ where
match engine.new_payload(payload).await {
Ok(payload_status) => match payload_status.status {
PayloadStatusEnum::Valid => {
metrics.valid.increment(1);
tracing::debug!(target: "bsc::block_import", "New payload is valid, block_hash = {:?}, block_number = {}, peer_id = {:?}", block.hash, header.number, peer_id);
// handle fork choice update with valid payload
if let Err(e) = forkchoice_engine.update_forkchoice(&header).await {
Expand All @@ -226,32 +233,81 @@ where
.into()
}
PayloadStatusEnum::Invalid { validation_error } => {
// Do NOT penalize the peer for Invalid blocks.
// Issue #320 / PR #297 context: on BSC's fast block time
// (post-Lorentz down to 0.45s) concurrent reorgs are
// routine. When two peers relay racing siblings, the
// loser returns Invalid from new_payload — but the block
// itself is legitimate, just executed against the wrong
// parent state. Reth's NetworkManager::on_block_imported
// penalizes EVERY Err with BadBlock (-16384 reputation),
// regardless of variant; 4 hits = peer banned (threshold
// -51200), recovery +1/sec = 4.5h to re-earn. Under BSC
// tip-follow, races arrive faster than that recovery, so
// the peer pool drains to zero (#320).
//
// In BSC's PoSA with devp2p block propagation, Invalid
// frequently results from timing issues during concurrent
// reorgs — the block itself is legitimate but was executed
// against the wrong state. Penalizing the peer with BadBlock
// (-16384 reputation) for this drains peers rapidly,
// especially under BSC's fast block time (0.45s), where
// concurrent forks are routine.
// PR #297 suppressed the penalty for EVERY Invalid.
// That's too blunt: a truly malformed block from a bad
// peer (wrong Parlia signature, structural hash
// mismatch, etc.) now also escapes the BadBlock
// mechanism. We classify the error string instead —
// race-shaped failures skip the penalty, structural
// failures keep the original behaviour so BadBlock
// still catches real offenders.
//
// This aligns with geth's behavior: geth's fetcher only
// drops a peer when header verification fails
// (verifyHeader), never when block execution fails
// (insertChain). Truly malicious peers are still caught by
// the network layer's BadMessage / BadProtocol penalties.
tracing::debug!(
target: "bsc::block_import",
block_hash = %header.hash_slow(),
block_number = header.number,
%validation_error,
peer = %peer_id,
"New payload returned Invalid - not penalizing peer"
);
None
// Aligns with geth's fetcher semantics: drop on header
// verification failure, not on execution failure.
let msg = validation_error.to_string();
let lower = msg.to_ascii_lowercase();
let looks_like_reorg_race = [
// EVM state root mismatch — classic "executed
// against wrong parent" race.
"state root",
"receipts root",
// Parent resolution failures — the engine didn't
// have the parent tip by the time we executed.
"parent block",
"unknown parent",
"missing trie",
// Canonical insertion races.
"insert_canonical",
"reorg",
]
.iter()
.any(|needle| lower.contains(needle));

if looks_like_reorg_race {
metrics.invalid_race.increment(1);
tracing::debug!(
target: "bsc::block_import",
block_hash = %header.hash_slow(),
block_number = header.number,
peer = %peer_id,
error = %msg,
"Invalid attributed to reorg race; not penalizing peer",
);
// No Outcome → NetworkManager never sees an Err →
// no BadBlock penalty applied. Early ValidHeader
// announcement from on_new_block still stands.
None
} else {
metrics.invalid_structural.increment(1);
tracing::info!(
target: "bsc::block_import",
block_hash = %header.hash_slow(),
block_number = header.number,
peer = %peer_id,
error = %msg,
"Invalid treated as structural; peer will be penalized",
);
Outcome {
peer: peer_id,
result: Err(BlockImportError::Other(msg.into())),
}
.into()
}
}
PayloadStatusEnum::Syncing => {
metrics.syncing.increment(1);
// Parent block is missing. Launch fork-aware ancestor
// recovery rather than a naive range fetch + premature
// FCU. The recovery task also owns the final FCU.
Expand Down Expand Up @@ -925,10 +981,37 @@ mod tests {

#[tokio::test]
async fn can_handle_invalid_new_payload() {
// When new_payload returns Invalid, the peer should NOT be penalized.
// The only event emitted is the early ValidHeader announcement from
// on_new_block; no BlockImportOutcome error should follow.
// "test error" doesn't hit any of the race-shaped heuristic needles,
// so it flows through the "structural" branch and must still emit an
// Err outcome — the BadBlock penalty path is preserved for genuine
// structural invalidity (bad signature, wrong turn difficulty, etc.).
let mut fixture = TestFixture::new(EngineResponses::invalid_new_payload()).await;
fixture
.assert_block_import(|outcome| {
matches!(
outcome,
BlockImportEvent::Outcome(BlockImportOutcome {
peer: _,
result: Err(BlockImportError::Other(_))
})
)
})
.await;
}

#[tokio::test]
async fn reorg_race_invalid_does_not_penalize() {
// Error string contains "state root" — our heuristic classifies this
// as a reorg race, so NO Err outcome should be emitted. Only the
// early ValidHeader announcement (from on_new_block) is observed.
let mut fixture = TestFixture::new(EngineResponses {
new_payload: PayloadStatusEnum::Invalid {
validation_error: "invalid state root (local != remote)".into(),
},
fcu: PayloadStatusEnum::Syncing,
})
.await;

fixture
.assert_block_import(|outcome| {
matches!(
Expand All @@ -938,29 +1021,25 @@ mod tests {
})
.await;

// Verify no error outcome was emitted
// Give the service time to process and confirm no follow-up Err
// outcome is emitted.
let waker = futures::task::noop_waker();
let mut cx = Context::from_waker(&waker);
let mut extra = Vec::new();
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_millis(200);
loop {
let mut extras = Vec::new();
while tokio::time::Instant::now() < deadline {
match fixture.handle.poll_outcome(&mut cx) {
Poll::Ready(Some(event)) => extra.push(event),
Poll::Ready(Some(ev)) => extras.push(ev),
Poll::Ready(None) => break,
Poll::Pending => {
if tokio::time::Instant::now() >= deadline {
break;
}
tokio::task::yield_now().await;
}
Poll::Pending => tokio::task::yield_now().await,
}
}
assert!(
!extra.iter().any(|e| matches!(
e,
!extras.iter().any(|ev| matches!(
ev,
BlockImportEvent::Outcome(BlockImportOutcome { result: Err(_), .. })
)),
"Should not penalize peer for Invalid new_payload. Extra events: {extra:?}"
"race-shaped Invalid must not emit an Err outcome (would drain peer reputation). extras = {extras:?}",
);
}

Expand Down
Loading