diff --git a/consensus/src/simplex/actors/voter/actor.rs b/consensus/src/simplex/actors/voter/actor.rs index 73ed8e9387a..ee2ced67f72 100644 --- a/consensus/src/simplex/actors/voter/actor.rs +++ b/consensus/src/simplex/actors/voter/actor.rs @@ -499,10 +499,10 @@ impl< certificate_sender: &mut WrappedSender>, view: View, resolved: Resolved, - ) { + ) -> Vec { // Construct a notarization certificate let Some(notarization) = self.state.broadcast_notarization(view) else { - return; + return Vec::new(); }; // Only the leader sees an unbiased latency sample, so record it now. @@ -517,10 +517,11 @@ impl< .updated(Certificate::Notarization(notarization.clone())) .await; } - // Update our local round with the certificate. self.handle_notarization(notarization.clone()).await; - // Persist the certificate before informing others. + // The notarization must be durable before we derive ancestor certifications from it. self.sync_journal(view).await; + let inferred = self.apply_inferred_certifications(resolver, view).await; + // Broadcast the notarization certificate debug!(proposal=?notarization.proposal, "broadcasting notarization"); self.broadcast_certificate( @@ -532,6 +533,52 @@ impl< self.reporter .report(Activity::Notarization(notarization)) .await; + inferred + } + + /// Journals the certification decision and signals the resolver and reporter. + /// + /// Idempotent: returns `false` if the view was already concluded. + async fn conclude_certification( + &mut self, + resolver: &mut resolver::Mailbox, + view: View, + success: bool, + ) -> bool { + let Some(notarization) = self.handle_certification(view, success).await else { + return false; + }; + resolver.certified(view, success).await; + if success { + self.reporter + .report(Activity::Certification(notarization)) + .await; + } + true + } + + /// Derives certifications for rounds whose notarizations prove certification without + /// waiting on the automaton, then journals and signals each one. + /// + /// Important nuance: inferred certification establishes the protocol fact that the + /// round is certified, but it does not retroactively mean this replica locally verified + /// the proposal. Callers should therefore revisit the normal notify path for the inferred + /// view and rely on each vote/certificate constructor to enforce its own local gates. + async fn apply_inferred_certifications( + &mut self, + resolver: &mut resolver::Mailbox, + view: View, + ) -> Vec { + let mut inferred = Vec::new(); + for inferred_view in self.state.infer_certifications(view) { + if self + .conclude_certification(resolver, inferred_view, true) + .await + { + inferred.push(inferred_view); + } + } + inferred } /// Broadcast a nullify vote for `view` if the state machine allows it. @@ -670,17 +717,24 @@ impl< view: View, resolved: Resolved, ) { - self.try_broadcast_notarize(batcher, vote_sender, view) - .await; - self.try_broadcast_notarization(resolver, certificate_sender, view, resolved) - .await; - // We handle broadcast of `Nullify` votes in `timeout`, so this only emits certificates. - self.try_broadcast_nullification(resolver, certificate_sender, view, resolved) - .await; - self.try_broadcast_finalize(batcher, vote_sender, view) - .await; - self.try_broadcast_finalization(resolver, certificate_sender, view, resolved) - .await; + // Use an explicit worklist so inferred certification can revisit the same notify flow + // without building a recursive async future. + let mut pending = vec![(view, resolved)]; + while let Some((view, resolved)) = pending.pop() { + self.try_broadcast_notarize(batcher, vote_sender, view) + .await; + let inferred = self + .try_broadcast_notarization(resolver, certificate_sender, view, resolved) + .await; + // We handle broadcast of `Nullify` votes in `timeout`, so this only emits certificates. + self.try_broadcast_nullification(resolver, certificate_sender, view, resolved) + .await; + self.try_broadcast_finalize(batcher, vote_sender, view) + .await; + self.try_broadcast_finalization(resolver, certificate_sender, view, resolved) + .await; + pending.extend(inferred.into_iter().rev().map(|view| (view, Resolved::None))); + } } /// Spawns the actor event loop with the provided channels. @@ -756,17 +810,8 @@ impl< .await; } Artifact::Certification(round, success) => { - let Some(notarization) = - self.handle_certification(round.view(), success).await - else { - continue; - }; - resolver.certified(round.view(), success).await; - if success { - self.reporter - .report(Activity::Certification(notarization)) - .await; - } + self.conclude_certification(&mut resolver, round.view(), success) + .await; } Artifact::Nullify(nullify) => { self.handle_nullify(nullify.clone()).await; @@ -807,6 +852,13 @@ impl< } self.journal = Some(journal); + // Recover inferred certifications that were not journaled before a crash. + let recovery_views: Vec = self.state.notarized_views_desc().collect(); + let mut recovered_inferred = Vec::new(); + for view in recovery_views { + recovered_inferred.extend(self.apply_inferred_certifications(&mut resolver, view).await); + } + // Log current view after recovery let end = self.context.current(); let elapsed = end.duration_since(start).unwrap_or_default(); @@ -829,6 +881,17 @@ impl< debug!(%observed_view, %leader, ?reason, "nullifying round"); self.state.trigger_timeout(observed_view, reason); } + for view in recovered_inferred { + self.notify( + &mut batcher, + &mut resolver, + &mut vote_sender, + &mut certificate_sender, + view, + Resolved::None, + ) + .await; + } // Process messages let mut pending_propose: Option, D>> = None; @@ -968,20 +1031,12 @@ impl< if !certified { warn!(?round, "proposal failed certification"); } - let Some(notarization) = self.handle_certification(view, certified).await - else { - continue; - }; - // Always forward certification outcomes to resolver. - // This can happen after a nullification for the same view because - // certification is asynchronous; finalization is the boundary that - // cancels in-flight certification and suppresses late reporting. - resolver.certified(view, certified).await; - if certified { - self.reporter - .report(Activity::Certification(notarization)) - .await; - } + // Always forward certification outcomes to resolver. This can happen + // after a nullification for the same view because certification is + // asynchronous; finalization is the boundary that cancels in-flight + // certification and suppresses late reporting. + self.conclude_certification(&mut resolver, view, certified) + .await; } Err(err) => { // Unlike propose/verify (where failing to act will lead to a timeout diff --git a/consensus/src/simplex/actors/voter/mod.rs b/consensus/src/simplex/actors/voter/mod.rs index 52a251e6ef6..020338c5efc 100644 --- a/consensus/src/simplex/actors/voter/mod.rs +++ b/consensus/src/simplex/actors/voter/mod.rs @@ -63,8 +63,8 @@ mod tests { secp256r1, Scheme, }, types::{ - Certificate, Finalization, Finalize, Notarization, Notarize, Nullification, - Nullify, Proposal, Vote, + Artifact, Certificate, Finalization, Finalize, Notarization, Notarize, + Nullification, Nullify, Proposal, Vote, }, }, types::{Participant, Round, View}, @@ -84,6 +84,7 @@ mod tests { use commonware_runtime::{ deterministic, telemetry::traces::collector::TraceStorage, Clock, Metrics, Quota, Runner, }; + use commonware_storage::journal::segmented::variable::{Config as JConfig, Journal}; use commonware_utils::{channel::mpsc, sync::Mutex, NZUsize, NZU16}; use futures::FutureExt; use std::{ @@ -224,6 +225,44 @@ mod tests { Arc>, mocks::reporter::Reporter, ) + where + S: Scheme, + L: ElectorConfig, + { + setup_voter_with_partition_and_certifier( + context, + oracle, + participants, + schemes, + elector, + format!("voter_test_{}", participants[0]), + leader_timeout, + certification_timeout, + timeout_retry, + should_certify, + ) + .await + } + + #[allow(clippy::too_many_arguments)] + async fn setup_voter_with_partition_and_certifier( + context: &mut deterministic::Context, + oracle: &commonware_p2p::simulated::Oracle, + participants: &[S::PublicKey], + schemes: &[S], + elector: L, + partition: String, + leader_timeout: Duration, + certification_timeout: Duration, + timeout_retry: Duration, + should_certify: mocks::application::Certifier, + ) -> ( + Mailbox, + mpsc::Receiver>, + mpsc::Receiver>, + Arc>, + mocks::reporter::Reporter, + ) where S: Scheme, L: ElectorConfig, @@ -258,7 +297,7 @@ mod tests { automaton: application.clone(), relay: application.clone(), reporter: reporter.clone(), - partition: format!("voter_test_{me}"), + partition, epoch: Epoch::new(333), mailbox_size: 128, leader_timeout, @@ -6707,16 +6746,9 @@ mod tests { cancelled_certification_recertifies_after_restart::<_, _>(secp256r1::fixture); } - /// Demonstrates that validators in future views cannot retroactively help - /// stuck validators escape via nullification. - /// - /// This test extends the previous scenario to show that: - /// 1. A stuck validator (view 3) cannot be rescued by notarizations from future views - /// 2. The only escape route is a finalization certificate (which requires Byzantine cooperation) - /// - /// Once the f+1 honest validators certify view 3 and advance to view 4, - /// they can only vote to nullify view 4 (their current view) without equivocating. - /// The `timeout` function only votes to nullify `self.view` (current view). + /// A validator stuck on certifying view V because its automaton cannot + /// complete the request is rescued as soon as a notarization for view V+1 arrives, + /// because that notarization proves f+1 honest peers had already certified V. fn only_finalization_rescues_validator(mut fixture: F) where S: Scheme, @@ -6810,72 +6842,14 @@ mod tests { .resolved(Certificate::Notarization(notarization_5)) .await; - // The stuck validator should still not advance. - // - // Receiving a notarization for view 5 doesn't help because: - // 1. add_notarization() does not call enter_view() - it only adds to certification_candidates - // 2. To advance past view 4, the validator needs EITHER: - // a. Certification of view 4 to succeed (impossible - no context) - // b. A nullification certificate for view 4 (impossible - only f votes) - // c. A finalization certificate (requires Byzantine to vote finalize) - let advanced = loop { - select! { - msg = batcher_receiver.recv() => { - match msg.unwrap() { - batcher::Message::Update { - current, response, .. - } => { - response.send(None).unwrap(); - if current > view_4 { - break true; - } - } - batcher::Message::Constructed(Vote::Nullify(n)) => { - // Still voting nullify for view 4 - expected - assert_eq!( - n.view(), - view_4, - "should only vote nullify for stuck view" - ); - } - _ => {} - } - }, - _ = context.sleep(Duration::from_secs(5)) => { - break false; - }, - } - }; - - assert!( - !advanced, - "receiving a notarization for view 5 should NOT rescue the stuck validator - \ - they still can't certify view 4 (no context) and can't form a nullification \ - (not enough votes). The f+1 honest validators who advanced to view 5 cannot \ - retroactively help because they can only vote nullify for their current view (5), \ - not for view 4." - ); - - // HOWEVER: A finalization certificate WOULD rescue the stuck validator. - // If the Byzantine validators eventually cooperate and vote finalize, - // the finalization would abort the stuck certification and advance the view. - // - // Let's demonstrate this escape route works (if Byzantine cooperate): - let (_, finalization_5) = build_finalization(&schemes, &proposal_5, quorum); - mailbox - .resolved(Certificate::Finalization(finalization_5)) - .await; - - // Now the validator SHOULD advance (finalization aborts stuck certification) + // Inference from notarization_5 (parent = view 4) must rescue the stuck + // validator: f+1 honest voters on view 5 had already certified view 4. let rescued = loop { select! { msg = batcher_receiver.recv() => { - if let batcher::Message::Update { - current, response, .. - } = msg.unwrap() - { + if let batcher::Message::Update { current, response, .. } = msg.unwrap() { response.send(None).unwrap(); - if current > view_5 { + if current > view_4 { break true; } } @@ -6888,16 +6862,15 @@ mod tests { assert!( rescued, - "a finalization certificate SHOULD rescue the stuck validator - \ - this is the ONLY escape route, but it requires Byzantine cooperation \ - (they must vote finalize). If Byzantine permanently withhold finalize votes, \ - the stuck validators are permanently excluded from consensus." + "a notarization for view 5 must rescue the validator stuck on view 4: the \ + notarization proves f+1 honest peers already certified view 4, so the \ + validator can infer certification without running its own automaton." ); }); } #[test_traced] - fn test_only_finalization_rescues_validator() { + fn test_notarization_rescues_stuck_validator_via_inference() { only_finalization_rescues_validator::<_, _>(bls12381_threshold_vrf::fixture::); only_finalization_rescues_validator::<_, _>(bls12381_threshold_vrf::fixture::); only_finalization_rescues_validator::<_, _>(bls12381_multisig::fixture::); @@ -8770,4 +8743,345 @@ mod tests { batcher_update_triggers_timeout(ed25519::fixture); batcher_update_triggers_timeout(secp256r1::fixture); } + + /// A notarization for view v+1 proves f+1 honest voters already certified v. Even when + /// the local automaton never resolves `certify(v)` (Certifier::Pending), receiving the + /// next notarization must certify v through inference, unlock `Finalize(v)`, and signal + /// the resolver/reporter. + fn notarization_infers_parent_certification(mut fixture: F) + where + S: Scheme, + F: FnMut(&mut deterministic::Context, &[u8], u32) -> Fixture, + { + let n = 5; + let quorum = quorum(n); + let namespace = b"notarization_infers_parent_certification".to_vec(); + let executor = deterministic::Runner::timed(Duration::from_secs(30)); + executor.start(|mut context| async move { + let Fixture { + participants, + schemes, + .. + } = fixture(&mut context, &namespace, n); + let oracle = + start_test_network_with_peers(context.clone(), participants.clone(), true).await; + + // Automaton will never respond to certify requests. Inference is the only way + // to certify view 3 here. + let elector = RoundRobin::::default(); + let (mut mailbox, mut batcher_receiver, mut resolver_receiver, _relay, reporter) = + setup_voter_with_certifier( + &mut context, + &oracle, + &participants, + &schemes, + elector, + Duration::from_secs(600), + Duration::from_secs(600), + Duration::from_mins(60), + mocks::application::Certifier::Pending, + ) + .await; + + // Land on view 3 as a follower so we never drive local propose/verify. + let target_view = View::new(3); + advance_to_view( + &mut mailbox, + &mut batcher_receiver, + &schemes, + quorum, + target_view, + ) + .await; + + // Deliver a notarization for view 3. Automaton is Pending; certification cannot + // complete normally. + let proposal_3 = Proposal::new( + Round::new(Epoch::new(333), target_view), + target_view.previous().unwrap(), + Sha256::hash(b"view_3_proposal"), + ); + let (_, notarization_3) = build_notarization(&schemes, &proposal_3, quorum); + mailbox + .resolved(Certificate::Notarization(notarization_3.clone())) + .await; + + // Deliver a notarization for view 4 whose proposal.parent points at view 3. This + // is the evidence that certifies view 3 through inference. + let next_view = target_view.next(); + let proposal_4 = Proposal::new( + Round::new(Epoch::new(333), next_view), + target_view, + Sha256::hash(b"view_4_proposal"), + ); + let (_, notarization_4) = build_notarization(&schemes, &proposal_4, quorum); + mailbox + .resolved(Certificate::Notarization(notarization_4)) + .await; + + // Expect MailboxMessage::Certified{ view=3, success=true } and a local + // Finalize(v) vote for the inferred view. Inference proves certification and + // therefore should unlock finalization, but it must not be confused with + // retroactively satisfying the local precondition for notarization. + let mut certified = false; + let mut finalized = false; + loop { + select! { + msg = resolver_receiver.recv() => match msg.unwrap() { + MailboxMessage::Certified { view, success } if view == target_view => { + assert!(success, "inferred certification must be a success"); + certified = true; + } + _ => {} + }, + msg = batcher_receiver.recv() => match msg.unwrap() { + batcher::Message::Constructed(Vote::Finalize(finalize)) + if finalize.view() == target_view => + { + finalized = true; + } + batcher::Message::Constructed(Vote::Notarize(notarize)) + if notarize.view() == target_view => + { + panic!( + "inference must not retroactively justify notarize for view {target_view}" + ); + } + batcher::Message::Update { response, .. } => { + response.send(None).unwrap(); + } + _ => {} + }, + _ = context.sleep(Duration::from_secs(10)) => { + panic!( + "inferred certification/finalize for view {target_view} never completed" + ); + }, + } + if certified && finalized { + break; + } + } + + // The reporter must have recorded the inferred notarization under view 3 via + // Activity::Certification (stored in the notarizations map). + loop { + { + let notarizations = reporter.notarizations.lock(); + if matches!( + notarizations.get(&target_view), + Some(stored) if stored == ¬arization_3 + ) { + break; + } + } + context.sleep(Duration::from_millis(10)).await; + } + + // State must have advanced past view 3 since certification succeeded. + loop { + select! { + msg = batcher_receiver.recv() => { + if let batcher::Message::Update { current, response, .. } = msg.unwrap() { + response.send(None).unwrap(); + if current > target_view { + break; + } + } + }, + _ = context.sleep(Duration::from_secs(5)) => { + panic!("voter never advanced past view {target_view} after inference"); + }, + } + } + + reporter.assert_no_faults(); + reporter.assert_no_invalid(); + }); + } + + #[test_traced] + fn test_notarization_infers_parent_certification() { + notarization_infers_parent_certification::<_, _>( + bls12381_threshold_vrf::fixture::, + ); + notarization_infers_parent_certification::<_, _>( + bls12381_threshold_vrf::fixture::, + ); + notarization_infers_parent_certification::<_, _>(bls12381_multisig::fixture::); + notarization_infers_parent_certification::<_, _>(bls12381_multisig::fixture::); + notarization_infers_parent_certification::<_, _>(ed25519::fixture); + notarization_infers_parent_certification::<_, _>(secp256r1::fixture); + } + + /// Seeds a journal with notarizations for two adjacent views but no certifications, + /// mimicking the on-disk state left behind when a node crashes between syncing a + /// notarization and journaling its inferred ancestor certification. A fresh voter + /// must replay the notarizations and derive the missing certification through the + /// post-replay inference pass. + fn post_replay_pass_infers_ancestor_certification(mut fixture: F) + where + S: Scheme, + F: FnMut(&mut deterministic::Context, &[u8], u32) -> Fixture, + { + let n = 5; + let quorum = quorum(n); + let namespace = b"post_replay_pass_inference".to_vec(); + let executor = deterministic::Runner::timed(Duration::from_secs(30)); + executor.start(|mut context| async move { + let Fixture { + participants, + schemes, + .. + } = fixture(&mut context, &namespace, n); + let oracle = + start_test_network_with_peers(context.clone(), participants.clone(), true).await; + + let partition = "post_replay_pass_inference".to_string(); + let epoch = Epoch::new(333); + let view_3 = View::new(3); + let view_4 = View::new(4); + + let proposal_3 = Proposal::new( + Round::new(epoch, view_3), + view_3.previous().unwrap(), + Sha256::hash(b"post_replay_pass_view_3"), + ); + let (_, notarization_3) = build_notarization(&schemes, &proposal_3, quorum); + + let proposal_4 = Proposal::new( + Round::new(epoch, view_4), + view_3, + Sha256::hash(b"post_replay_pass_view_4"), + ); + let (_, notarization_4) = build_notarization(&schemes, &proposal_4, quorum); + + // Pre-populate the journal with both notarizations but no Certification(V3). + // This represents the state after a crash between syncing notarization_4 and + // journaling the inferred Certification(V3, true). + let scheme = schemes[0].clone(); + { + let mut journal = Journal::<_, Artifact>::init( + context.with_label("journal_seed"), + JConfig { + partition: partition.clone(), + compression: None, + codec_config: scheme.certificate_codec_config(), + page_cache: CacheRef::from_pooler(&context, PAGE_SIZE, PAGE_CACHE_SIZE), + write_buffer: NZUsize!(1024 * 1024), + }, + ) + .await + .expect("unable to open seed journal"); + journal + .append( + view_3.get(), + &Artifact::Notarization(notarization_3.clone()), + ) + .await + .expect("append notarization_3"); + journal + .append( + view_4.get(), + &Artifact::Notarization(notarization_4.clone()), + ) + .await + .expect("append notarization_4"); + journal.sync_all().await.expect("sync seed journal"); + } + + // Start a fresh voter with the same partition. Certifier::Cancel guarantees the + // automaton cannot certify, so any Certified{view=3,...} emission must come from + // the post-replay inference pass. + let (_, mut batcher_receiver, mut resolver_receiver, _relay, reporter) = + setup_voter_with_partition_and_certifier( + &mut context, + &oracle, + &participants, + &schemes, + RoundRobin::::default(), + partition, + Duration::from_secs(600), + Duration::from_secs(600), + Duration::from_mins(60), + mocks::application::Certifier::Cancel, + ) + .await; + + // Observe Certified{view=3, success=true} and the replay-triggered Finalize(v) + // vote. Replay inference should unlock finalization just like the live path, + // but only after batcher initialization makes the historical view interesting. + let mut certified = false; + let mut finalized = false; + loop { + select! { + msg = resolver_receiver.recv() => match msg.unwrap() { + MailboxMessage::Certified { view, success } if view == view_3 => { + assert!( + success, + "post-replay inference must certify view {view_3} as success" + ); + certified = true; + } + _ => {} + }, + msg = batcher_receiver.recv() => match msg.unwrap() { + batcher::Message::Constructed(Vote::Finalize(finalize)) + if finalize.view() == view_3 => + { + finalized = true; + } + batcher::Message::Update { response, .. } => { + response.send(None).unwrap(); + } + _ => {} + }, + _ = context.sleep(Duration::from_secs(10)) => { + panic!( + "post-replay pass never completed certification/finalize for view {view_3}" + ); + }, + } + if certified && finalized { + break; + } + } + + // The reporter should have observed Activity::Certification(view_3) via the + // reinstated notarization. + loop { + { + let notarizations = reporter.notarizations.lock(); + if matches!( + notarizations.get(&view_3), + Some(stored) if stored == ¬arization_3 + ) { + break; + } + } + context.sleep(Duration::from_millis(10)).await; + } + + reporter.assert_no_faults(); + reporter.assert_no_invalid(); + }); + } + + #[test_traced] + fn test_post_replay_pass_infers_ancestor_certification() { + post_replay_pass_infers_ancestor_certification::<_, _>( + bls12381_threshold_vrf::fixture::, + ); + post_replay_pass_infers_ancestor_certification::<_, _>( + bls12381_threshold_vrf::fixture::, + ); + post_replay_pass_infers_ancestor_certification::<_, _>( + bls12381_multisig::fixture::, + ); + post_replay_pass_infers_ancestor_certification::<_, _>( + bls12381_multisig::fixture::, + ); + post_replay_pass_infers_ancestor_certification::<_, _>(ed25519::fixture); + post_replay_pass_infers_ancestor_certification::<_, _>(secp256r1::fixture); + } } diff --git a/consensus/src/simplex/actors/voter/round.rs b/consensus/src/simplex/actors/voter/round.rs index 6f9dab5c1a6..b1b68f7aa99 100644 --- a/consensus/src/simplex/actors/voter/round.rs +++ b/consensus/src/simplex/actors/voter/round.rs @@ -33,6 +33,17 @@ enum CertifyState { Aborted, } +/// Public view of a round's certification state. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Certify { + /// No decision yet; inference or an automaton response can still resolve the round. + Open, + /// Certification concluded with the given outcome (`true` = certified, `false` = declined). + Decided(bool), + /// Certification was abandoned because a finalization superseded the view. + Aborted, +} + /// Per-[Rnd] state machine. pub struct Round { start: SystemTime, @@ -218,15 +229,13 @@ impl Round { self.finalization.as_ref() } - /// Returns true if we have explicitly certified the proposal. - pub const fn is_certified(&self) -> bool { - matches!(self.certify, CertifyState::Certified(true)) - } - - /// Returns true if certification was aborted due to finalization. - #[cfg(test)] - pub const fn is_certify_aborted(&self) -> bool { - matches!(self.certify, CertifyState::Aborted) + /// Returns a snapshot of the round's certification state. + pub const fn certify(&self) -> Certify { + match self.certify { + CertifyState::Ready | CertifyState::Outstanding(_) => Certify::Open, + CertifyState::Certified(result) => Certify::Decided(result), + CertifyState::Aborted => Certify::Aborted, + } } /// Returns how much time elapsed since the round started, if the clock monotonicity holds. @@ -521,7 +530,7 @@ impl Round { // If we haven't certified the proposal, return None. // // Note, this does not require verification. - if !self.is_certified() { + if !matches!(self.certify(), Certify::Decided(true)) { return None; } @@ -581,9 +590,10 @@ impl Round { Artifact::Finalization(_) => { self.broadcast_finalization = true; } - Artifact::Certification(_, success) => { - self.certified(*success); - } + // Certification transitions happen through state.certified during replay, so + // that the view advances and timeouts fire; those side effects cannot be + // expressed at the per-round level. + Artifact::Certification(_, _) => {} } } } diff --git a/consensus/src/simplex/actors/voter/state.rs b/consensus/src/simplex/actors/voter/state.rs index 2bb2b9dd201..c9dec0135de 100644 --- a/consensus/src/simplex/actors/voter/state.rs +++ b/consensus/src/simplex/actors/voter/state.rs @@ -1,4 +1,4 @@ -use super::round::Round; +use super::round::{Certify, Round}; use crate::{ simplex::{ elector::{Config as ElectorConfig, Elector}, @@ -418,7 +418,7 @@ impl, L: ElectorConfig, D: D /// Returns the proposal for `view` if it is eligible for forwarding. pub fn forwardable_proposal(&self, view: View) -> Option> { let round = self.views.get(&view)?; - if round.finalization().is_some() || round.is_certified() { + if round.finalization().is_some() || matches!(round.certify(), Certify::Decided(true)) { return round.proposal().cloned(); } None @@ -608,10 +608,22 @@ impl, L: ElectorConfig, D: D /// Marks proposal certification as complete and returns the notarization. /// - /// Returns `None` if the view was already pruned. Otherwise returns the notarization - /// regardless of success/failure. + /// Returns `None` if the view was already pruned or already concluded (so callers do + /// not re-journal or re-emit). Panics on a conflicting outcome. pub fn certified(&mut self, view: View, is_success: bool) -> Option> { let round = self.views.get_mut(&view)?; + match round.certify() { + Certify::Open => {} + Certify::Decided(previous) => { + assert_eq!( + previous, is_success, + "certification should not conflict: view {view}" + ); + return None; + } + Certify::Aborted => return None, + } + round.certified(is_success); if is_success { // Clear deadlines if the certification was successful @@ -636,6 +648,65 @@ impl, L: ElectorConfig, D: D Some(notarization) } + /// Returns views whose certifications can be inferred from `view`'s notarization. + /// + /// An honest voter only signs a notarize for a view after certifying its parent, so a + /// notarization for `view` proves f+1 peers already certified `view.parent`. Walks + /// `proposal.parent` pointers backward and also includes `view` itself if a notarized + /// descendant already points at it (out-of-order arrival). + pub fn infer_certifications(&self, view: View) -> Vec { + let mut inferred = Vec::new(); + + // Infer `view` itself when a notarized descendant already points at it as parent. + if self.views.get(&view).is_some_and(|round| { + matches!(round.certify(), Certify::Open) && round.notarization().is_some() + }) && self.has_notarized_child_with_parent(view) + { + inferred.push(view); + } + + let Some(notarization) = self.notarization(view) else { + return inferred; + }; + let mut cursor = notarization.proposal.parent; + loop { + if cursor <= self.last_finalized { + break; + } + let Some(round) = self.views.get(&cursor) else { + break; + }; + let Some(notarization) = round.notarization() else { + break; + }; + if !matches!(round.certify(), Certify::Open) { + break; + } + inferred.push(cursor); + cursor = notarization.proposal.parent; + } + + inferred + } + + /// Returns true if any tracked view greater than `view` has a notarization whose + /// proposal.parent equals `view`. + fn has_notarized_child_with_parent(&self, view: View) -> bool { + self.views.range(view.next()..).any(|(_, round)| { + round + .notarization() + .is_some_and(|n| n.proposal.parent == view) + }) + } + + /// Iterates views that currently hold a notarization, from highest to lowest. + pub fn notarized_views_desc(&self) -> impl Iterator + '_ { + self.views + .iter() + .rev() + .filter_map(|(view, round)| round.notarization().map(|_| *view)) + } + /// Drops any views that fall below the activity horizon and returns them for logging. pub fn prune(&mut self) -> Vec { let min = self.min_active(); @@ -656,7 +727,7 @@ impl, L: ElectorConfig, D: D // Check for explicit certification let round = self.views.get(&view)?; - if round.finalization().is_some() || round.is_certified() { + if round.finalization().is_some() || matches!(round.certify(), Certify::Decided(true)) { return Some(&round.proposal().expect("proposal must exist").payload); } None @@ -681,7 +752,7 @@ impl, L: ElectorConfig, D: D pub fn is_certify_aborted(&self, view: View) -> bool { self.views .get(&view) - .is_some_and(|round| round.is_certify_aborted()) + .is_some_and(|round| matches!(round.certify(), Certify::Aborted)) } /// Finds the parent payload for a given view by walking backwards through @@ -786,6 +857,62 @@ mod tests { Sha256Digest::from([0u8; 32]) } + fn make_state( + context: deterministic::Context, + verifier: ed25519::Scheme, + ) -> State { + let mut state = State::new( + context, + Config { + scheme: verifier, + elector: ::default(), + epoch: Epoch::new(1), + activity_timeout: ViewDelta::new(20), + leader_timeout: Duration::from_secs(1), + certification_timeout: Duration::from_secs(2), + timeout_retry: Duration::from_secs(3), + }, + ); + state.set_genesis(test_genesis()); + state + } + + fn make_notarization( + schemes: &[ed25519::Scheme], + verifier: &ed25519::Scheme, + view: View, + parent: View, + ) -> Notarization { + let proposal = Proposal::new( + Rnd::new(Epoch::new(1), view), + parent, + Sha256Digest::from([view.get() as u8; 32]), + ); + let votes: Vec<_> = schemes + .iter() + .map(|s| Notarize::sign(s, proposal.clone()).unwrap()) + .collect(); + Notarization::from_notarizes(verifier, votes.iter(), &Sequential).unwrap() + } + + fn make_finalization( + schemes: &[ed25519::Scheme], + verifier: &ed25519::Scheme, + view: View, + parent: View, + ) -> Finalization { + let proposal = Proposal::new( + Rnd::new(Epoch::new(1), view), + parent, + Sha256Digest::from([view.get() as u8; 32]), + ); + let votes: Vec<_> = schemes + .iter() + .map(|s| Finalize::sign(s, proposal.clone()).unwrap()) + .collect(); + Finalization::from_finalizes(verifier, votes.iter(), &Sequential).unwrap() + } + #[test] fn certificate_candidates_respect_force_flag() { let runtime = deterministic::Runner::default(); @@ -1985,39 +2112,16 @@ mod tests { let mut state = State::new(context, cfg); state.set_genesis(test_genesis()); - // Helper to create notarization for a view - let make_notarization = |view: View| { - let proposal = Proposal::new( - Rnd::new(Epoch::new(1), view), - GENESIS_VIEW, - Sha256Digest::from([view.get() as u8; 32]), - ); - let votes: Vec<_> = schemes - .iter() - .map(|s| Notarize::sign(s, proposal.clone()).unwrap()) - .collect(); - Notarization::from_notarizes(&verifier, votes.iter(), &Sequential).unwrap() - }; - - // Helper to create finalization for a view - let make_finalization = |view: View| { - let proposal = Proposal::new( - Rnd::new(Epoch::new(1), view), - GENESIS_VIEW, - Sha256Digest::from([view.get() as u8; 32]), - ); - let votes: Vec<_> = schemes - .iter() - .map(|s| Finalize::sign(s, proposal.clone()).unwrap()) - .collect(); - Finalization::from_finalizes(&verifier, votes.iter(), &Sequential).unwrap() - }; - let mut pool = AbortablePool::<()>::default(); // Add notarizations for views 3-8 for i in 3..=8u64 { - state.add_notarization(make_notarization(View::new(i))); + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(i), + GENESIS_VIEW, + )); } // All 6 views should be candidates @@ -2042,7 +2146,12 @@ mod tests { assert!(!state.is_certify_aborted(View::new(7))); // Add finalization for view 5 - aborts handles for views 3, 4, 5 - state.add_finalization(make_finalization(View::new(5))); + state.add_finalization(make_finalization( + &schemes, + &verifier, + View::new(5), + GENESIS_VIEW, + )); // Verify views 3, 4, 5 had their certification aborted assert!(state.is_certify_aborted(View::new(3))); @@ -2060,7 +2169,12 @@ mod tests { assert!(state.certify_candidates().is_empty()); // Add view 9, should be returned as candidate - state.add_notarization(make_notarization(View::new(9))); + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(9), + GENESIS_VIEW, + )); let candidates = state.certify_candidates(); assert_eq!(candidates.len(), 1); assert_eq!(candidates[0].0.round.view(), View::new(9)); @@ -2069,7 +2183,12 @@ mod tests { // Set handle for view 9, add view 10 let handle9 = pool.push(futures::future::pending()); state.set_certify_handle(View::new(9), handle9); - state.add_notarization(make_notarization(View::new(10))); + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(10), + GENESIS_VIEW, + )); // View 10 returned (view 9 has handle) let candidates = state.certify_candidates(); @@ -2078,11 +2197,21 @@ mod tests { assert!(!candidates[0].1); // Finalize view 9 - aborts view 9's handle - state.add_finalization(make_finalization(View::new(9))); + state.add_finalization(make_finalization( + &schemes, + &verifier, + View::new(9), + GENESIS_VIEW, + )); assert!(state.is_certify_aborted(View::new(9))); // Add view 11, should be returned - state.add_notarization(make_notarization(View::new(11))); + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(11), + GENESIS_VIEW, + )); let candidates = state.certify_candidates(); assert_eq!(candidates.len(), 1); assert_eq!(candidates[0].0.round.view(), View::new(11)); @@ -2111,38 +2240,27 @@ mod tests { let mut state = State::new(context, cfg); state.set_genesis(test_genesis()); - let make_notarization = |view: View| { - let proposal = Proposal::new( - Rnd::new(Epoch::new(1), view), - GENESIS_VIEW, - Sha256Digest::from([view.get() as u8; 32]), - ); - let votes: Vec<_> = schemes - .iter() - .map(|scheme| Notarize::sign(scheme, proposal.clone()).unwrap()) - .collect(); - Notarization::from_notarizes(&verifier, votes.iter(), &Sequential).unwrap() - }; - - let make_finalization = |view: View| { - let proposal = Proposal::new( - Rnd::new(Epoch::new(1), view), - GENESIS_VIEW, - Sha256Digest::from([view.get() as u8; 32]), - ); - let votes: Vec<_> = schemes - .iter() - .map(|scheme| Finalize::sign(scheme, proposal.clone()).unwrap()) - .collect(); - Finalization::from_finalizes(&verifier, votes.iter(), &Sequential).unwrap() - }; - let stale_view = View::new(2); let live_view = View::new(3); - state.add_notarization(make_notarization(stale_view)); - state.add_notarization(make_notarization(live_view)); - state.add_finalization(make_finalization(stale_view)); + state.add_notarization(make_notarization( + &schemes, + &verifier, + stale_view, + GENESIS_VIEW, + )); + state.add_notarization(make_notarization( + &schemes, + &verifier, + live_view, + GENESIS_VIEW, + )); + state.add_finalization(make_finalization( + &schemes, + &verifier, + stale_view, + GENESIS_VIEW, + )); // Reinsert a stale candidate to exercise the defensive finalized-view guard. state.certification_candidates.insert(stale_view); @@ -2557,4 +2675,250 @@ mod tests { assert!(state.construct_notarize(view).is_none()); }); } + + fn apply_inferred_certifications( + state: &mut State, + view: View, + ) -> Vec { + let inferred = state.infer_certifications(view); + for inferred_view in &inferred { + assert!( + state.certified(*inferred_view, true).is_some(), + "expected inferred certification for view {inferred_view}" + ); + } + inferred + } + + #[test] + fn infer_certifications_marks_parent_certified() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + // Parent view v has a notarization but is uncertified. + let parent_view = View::new(1); + state.add_notarization(make_notarization( + &schemes, + &verifier, + parent_view, + GENESIS_VIEW, + )); + + // Child view v+1 arrives with proposal.parent == v. + let child_view = View::new(2); + state.add_notarization(make_notarization( + &schemes, + &verifier, + child_view, + parent_view, + )); + + let inferred = apply_inferred_certifications(&mut state, child_view); + assert_eq!(inferred, vec![parent_view]); + assert!(matches!(state.views.get(&parent_view).unwrap().certify(), Certify::Decided(true))); + }); + } + + #[test] + fn infer_certifications_walks_full_ancestry() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + // Chain: v1 -> v2 -> v3 -> v4. + let mut parent = GENESIS_VIEW; + for raw in 1u64..=4 { + let view = View::new(raw); + state.add_notarization(make_notarization(&schemes, &verifier, view, parent)); + parent = view; + } + + // Inference from v4 should certify v3, v2, v1 (v4 itself has no child yet). + let inferred = apply_inferred_certifications(&mut state, View::new(4)); + assert_eq!(inferred, vec![View::new(3), View::new(2), View::new(1)]); + for raw in 1u64..=3 { + assert!(matches!(state.views.get(&View::new(raw)).unwrap().certify(), Certify::Decided(true))); + } + assert!(!matches!(state.views.get(&View::new(4)).unwrap().certify(), Certify::Decided(true))); + }); + } + + #[test] + fn infer_certifications_stops_at_last_finalized() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + // Chain of notarizations v1..=v5. + let mut parent = GENESIS_VIEW; + for raw in 1u64..=5 { + let view = View::new(raw); + state.add_notarization(make_notarization(&schemes, &verifier, view, parent)); + parent = view; + } + + // Finalizing v2 promotes last_finalized and should block inference for v1 and v2. + state.add_finalization(make_finalization( + &schemes, + &verifier, + View::new(2), + View::new(1), + )); + + let inferred = state.infer_certifications(View::new(5)); + assert_eq!(inferred, vec![View::new(4), View::new(3)]); + }); + } + + #[test] + fn infer_certifications_stops_at_missing_notarization() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + // v1 notarized; v2 missing; v3 notarized with parent v2. + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(1), + GENESIS_VIEW, + )); + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(3), + View::new(2), + )); + + let inferred = state.infer_certifications(View::new(3)); + assert!(inferred.is_empty()); + assert!(!matches!(state.views.get(&View::new(1)).unwrap().certify(), Certify::Decided(true))); + }); + } + + #[test] + fn infer_certifications_skips_already_certified_ancestor() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + // Chain v1 -> v2 -> v3. + let mut parent = GENESIS_VIEW; + for raw in 1u64..=3 { + let view = View::new(raw); + state.add_notarization(make_notarization(&schemes, &verifier, view, parent)); + parent = view; + } + + // Mark v2 as explicitly certified first; walk should stop there. + assert!(state.certified(View::new(2), true).is_some()); + + let inferred = state.infer_certifications(View::new(3)); + assert!(inferred.is_empty()); + assert!(!matches!(state.views.get(&View::new(1)).unwrap().certify(), Certify::Decided(true))); + }); + } + + #[test] + fn infer_certifications_infers_view_from_notarized_child() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + // v2 arrives first, pointing back to v1 as parent. + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(2), + View::new(1), + )); + + // Only now does v1's notarization arrive. + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(1), + GENESIS_VIEW, + )); + + // Inference triggered by v1's arrival should certify v1 via v2's evidence. + let inferred = apply_inferred_certifications(&mut state, View::new(1)); + assert_eq!(inferred, vec![View::new(1)]); + assert!(matches!(state.views.get(&View::new(1)).unwrap().certify(), Certify::Decided(true))); + }); + } + + #[test] + fn certified_is_idempotent_after_decision() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(1), + GENESIS_VIEW, + )); + + // First decision returns the notarization. + assert!(state.certified(View::new(1), true).is_some()); + // Repeating with the same outcome returns None so callers don't re-emit. + assert!(state.certified(View::new(1), true).is_none()); + }); + } + + #[test] + #[should_panic(expected = "certification should not conflict")] + fn certified_conflicting_outcome_panics() { + let runtime = deterministic::Runner::default(); + runtime.start(|mut context| async move { + let namespace = b"ns".to_vec(); + let Fixture { + schemes, verifier, .. + } = ed25519::fixture(&mut context, &namespace, 4); + let mut state = make_state(context, verifier.clone()); + + state.add_notarization(make_notarization( + &schemes, + &verifier, + View::new(1), + GENESIS_VIEW, + )); + assert!(state.certified(View::new(1), false).is_some()); + + // Contradicting a failed certification signals a protocol/automaton fault. + let _ = state.certified(View::new(1), true); + }); + } + }