Skip to content

Commit a5db735

Browse files
[consensus] Address Last Round of Feedback (#3634)
1 parent 826c503 commit a5db735

7 files changed

Lines changed: 592 additions & 153 deletions

File tree

consensus/src/marshal/coding/marshaled.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -525,12 +525,30 @@ where
525525
.with_label("propose")
526526
.with_attribute("round", consensus_context.round)
527527
.spawn(move |runtime_context| async move {
528-
// On leader recovery, marshal may already hold a verified
529-
// block for this round (persisted before voting in consensus). Building
530-
// a fresh block would land on the same view index in the
531-
// prunable archive and be silently dropped, so reuse the
532-
// stored block instead.
528+
// On leader recovery, marshal may already hold a verified block
529+
// for this round (persisted before voting in consensus).
530+
//
531+
// Building a fresh block would land on the same prunable
532+
// archive index and be silently dropped, so the stored block
533+
// is the only proposal we can broadcast for this round.
534+
//
535+
// The recovered block is safe to reuse only if its embedded
536+
// context matches the context simplex just recovered.
537+
// Otherwise the cached block was built against a different
538+
// parent and cannot be broadcast under the current header, so
539+
// drop the receiver and let the voter nullify the view via
540+
// timeout.
533541
if let Some(block) = marshal.get_verified(consensus_context.round).await {
542+
let block_context = block.context();
543+
if block_context != consensus_context {
544+
debug!(
545+
round = ?consensus_context.round,
546+
?consensus_context,
547+
?block_context,
548+
"skipping proposal: cached verified block context no longer matches"
549+
);
550+
return;
551+
}
534552
let commitment = block.commitment();
535553
let round = consensus_context.round;
536554
let success = tx.send_lossy(commitment);

consensus/src/marshal/coding/mod.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,11 @@ mod tests {
177177
harness::verified_success_implies_recoverable_after_restart::<CodingHarness>(0..16);
178178
}
179179

180+
#[test_traced("WARN")]
181+
fn test_coding_certified_success_implies_recoverable_after_restart() {
182+
harness::certified_success_implies_recoverable_after_restart::<CodingHarness>(0..16);
183+
}
184+
180185
#[test_traced("WARN")]
181186
fn test_coding_delivery_visibility_implies_recoverable_after_restart() {
182187
harness::delivery_visibility_implies_recoverable_after_restart::<CodingHarness>(0..16);
@@ -2052,4 +2057,90 @@ mod tests {
20522057
);
20532058
});
20542059
}
2060+
2061+
/// Regression: if a pre-crash leader persisted a verified block for a
2062+
/// round but the simplex `Notarize` never reached the journal, replay
2063+
/// can recover a `consensus_context` whose parent differs from the one
2064+
/// the cached block was built against. The restarted leader must then
2065+
/// drop the receiver so the voter nullifies the view via
2066+
/// `MissingProposal`, rather than broadcasting the stale cached block
2067+
/// under a header that peers will reject.
2068+
#[test_traced("WARN")]
2069+
fn test_propose_skips_when_verified_block_context_changed() {
2070+
let runner = deterministic::Runner::timed(Duration::from_secs(60));
2071+
runner.start(|mut context| async move {
2072+
let Fixture {
2073+
participants,
2074+
schemes,
2075+
..
2076+
} = bls12381_threshold_vrf::fixture::<V, _>(&mut context, NAMESPACE, NUM_VALIDATORS);
2077+
let mut oracle =
2078+
setup_network_with_participants(context.clone(), NZUsize!(1), participants.clone())
2079+
.await;
2080+
2081+
let me = participants[0].clone();
2082+
let coding_config = coding_config_for_participants(NUM_VALIDATORS as u16);
2083+
2084+
let setup = CodingHarness::setup_validator(
2085+
context.with_label("validator_0"),
2086+
&mut oracle,
2087+
me.clone(),
2088+
ConstantProvider::new(schemes[0].clone()),
2089+
)
2090+
.await;
2091+
let marshal = setup.mailbox;
2092+
let shards = setup.extra;
2093+
2094+
let genesis_ctx = CodingCtx {
2095+
round: Round::zero(),
2096+
leader: default_leader(),
2097+
parent: (View::zero(), genesis_commitment()),
2098+
};
2099+
let genesis = make_coding_block(genesis_ctx, Sha256::hash(b""), Height::zero(), 0);
2100+
let genesis_parent_commitment = genesis_coding_commitment::<Sha256, _>(&genesis);
2101+
2102+
// Stash a stale block built against genesis as its parent at round V=2.
2103+
let round = Round::new(Epoch::zero(), View::new(2));
2104+
let stale_ctx = CodingCtx {
2105+
round,
2106+
leader: me.clone(),
2107+
parent: (View::zero(), genesis_parent_commitment),
2108+
};
2109+
let stale_block = make_coding_block(stale_ctx, genesis.digest(), Height::new(1), 100);
2110+
let stale_coded: CodedBlock<_, ReedSolomon<Sha256>, Sha256> =
2111+
CodedBlock::new(stale_block, coding_config, &Sequential);
2112+
assert!(marshal.verified(round, stale_coded).await);
2113+
2114+
// Simulate a replay where parent selection now points to a
2115+
// different parent commitment than the cached block was built for.
2116+
let new_parent_commitment = Commitment::from((
2117+
Sha256::hash(b"different-parent-block"),
2118+
Sha256::hash(b"different-parent-inner"),
2119+
Sha256::hash(b"different-parent-ctx"),
2120+
coding_config,
2121+
));
2122+
let new_ctx = CodingCtx {
2123+
round,
2124+
leader: me.clone(),
2125+
parent: (View::new(1), new_parent_commitment),
2126+
};
2127+
2128+
let mock_app: MockVerifyingApp<CodingB, S> = MockVerifyingApp::new(genesis);
2129+
let cfg = MarshaledConfig {
2130+
application: mock_app,
2131+
marshal: marshal.clone(),
2132+
shards: shards.clone(),
2133+
scheme_provider: ConstantProvider::new(schemes[0].clone()),
2134+
epocher: FixedEpocher::new(BLOCKS_PER_EPOCH),
2135+
strategy: Sequential,
2136+
};
2137+
let mut marshaled = Marshaled::new(context.clone(), cfg);
2138+
2139+
let commitment_rx = marshaled.propose(new_ctx).await;
2140+
assert!(
2141+
commitment_rx.await.is_err(),
2142+
"propose must drop the receiver when the cached block's context no longer matches"
2143+
);
2144+
});
2145+
}
20552146
}

0 commit comments

Comments
 (0)