From 707ceef24c2ce9a0d185ab57e04b33cc43c223ba Mon Sep 17 00:00:00 2001 From: clabby Date: Wed, 11 Mar 2026 22:56:37 -0700 Subject: [PATCH 1/4] [glue] Add `glue` crate --- .github/workflows/publish.yml | 5 + Cargo.lock | 29 + Cargo.toml | 2 + README.md | 1 + glue/Cargo.toml | 61 + glue/README.md | 9 + glue/conformance.toml | 7 + glue/src/lib.rs | 12 + glue/src/simulate/action.rs | 84 + glue/src/simulate/engine.rs | 78 + glue/src/simulate/exit.rs | 103 + glue/src/simulate/mod.rs | 23 + glue/src/simulate/plan.rs | 1020 ++++++ glue/src/simulate/processed.rs | 15 + glue/src/simulate/property.rs | 34 + glue/src/simulate/reporter.rs | 56 + glue/src/simulate/team.rs | 179 + glue/src/simulate/tracker.rs | 163 + glue/src/stateful/actor/bootstrap.rs | 454 +++ glue/src/stateful/actor/core.rs | 696 ++++ glue/src/stateful/actor/mailbox.rs | 290 ++ glue/src/stateful/actor/metrics.rs | 102 + glue/src/stateful/actor/mod.rs | 11 + glue/src/stateful/actor/processor.rs | 1557 +++++++++ glue/src/stateful/db/any.rs | 612 ++++ glue/src/stateful/db/compact_p2p/actor.rs | 448 +++ glue/src/stateful/db/compact_p2p/handler.rs | 213 ++ glue/src/stateful/db/compact_p2p/mailbox.rs | 188 + glue/src/stateful/db/compact_p2p/mod.rs | 13 + glue/src/stateful/db/current.rs | 675 ++++ glue/src/stateful/db/immutable.rs | 493 +++ glue/src/stateful/db/immutable_unjournaled.rs | 621 ++++ glue/src/stateful/db/keyless.rs | 549 +++ glue/src/stateful/db/keyless_unjournaled.rs | 837 +++++ glue/src/stateful/db/mod.rs | 3038 +++++++++++++++++ glue/src/stateful/db/p2p/actor.rs | 704 ++++ glue/src/stateful/db/p2p/handler.rs | 448 +++ glue/src/stateful/db/p2p/mailbox.rs | 244 ++ glue/src/stateful/db/p2p/metrics.rs | 66 + glue/src/stateful/db/p2p/mod.rs | 22 + glue/src/stateful/mod.rs | 219 ++ glue/src/stateful/tests/common.rs | 212 ++ glue/src/stateful/tests/mod.rs | 637 ++++ glue/src/stateful/tests/multi_db_app.rs | 671 ++++ glue/src/stateful/tests/properties.rs | 96 + glue/src/stateful/tests/single_db_app.rs | 561 +++ storage/src/journal/contiguous/mod.rs | 2 +- storage/src/merkle/mod.rs | 2 +- storage/src/qmdb/sync/compact.rs | 19 +- storage/src/qmdb/sync/engine.rs | 6 +- utils/src/channel/ring.rs | 30 + 51 files changed, 16611 insertions(+), 6 deletions(-) create mode 100644 glue/Cargo.toml create mode 100644 glue/README.md create mode 100644 glue/conformance.toml create mode 100644 glue/src/lib.rs create mode 100644 glue/src/simulate/action.rs create mode 100644 glue/src/simulate/engine.rs create mode 100644 glue/src/simulate/exit.rs create mode 100644 glue/src/simulate/mod.rs create mode 100644 glue/src/simulate/plan.rs create mode 100644 glue/src/simulate/processed.rs create mode 100644 glue/src/simulate/property.rs create mode 100644 glue/src/simulate/reporter.rs create mode 100644 glue/src/simulate/team.rs create mode 100644 glue/src/simulate/tracker.rs create mode 100644 glue/src/stateful/actor/bootstrap.rs create mode 100644 glue/src/stateful/actor/core.rs create mode 100644 glue/src/stateful/actor/mailbox.rs create mode 100644 glue/src/stateful/actor/metrics.rs create mode 100644 glue/src/stateful/actor/mod.rs create mode 100644 glue/src/stateful/actor/processor.rs create mode 100644 glue/src/stateful/db/any.rs create mode 100644 glue/src/stateful/db/compact_p2p/actor.rs create mode 100644 glue/src/stateful/db/compact_p2p/handler.rs create mode 100644 glue/src/stateful/db/compact_p2p/mailbox.rs create mode 100644 glue/src/stateful/db/compact_p2p/mod.rs create mode 100644 glue/src/stateful/db/current.rs create mode 100644 glue/src/stateful/db/immutable.rs create mode 100644 glue/src/stateful/db/immutable_unjournaled.rs create mode 100644 glue/src/stateful/db/keyless.rs create mode 100644 glue/src/stateful/db/keyless_unjournaled.rs create mode 100644 glue/src/stateful/db/mod.rs create mode 100644 glue/src/stateful/db/p2p/actor.rs create mode 100644 glue/src/stateful/db/p2p/handler.rs create mode 100644 glue/src/stateful/db/p2p/mailbox.rs create mode 100644 glue/src/stateful/db/p2p/metrics.rs create mode 100644 glue/src/stateful/db/p2p/mod.rs create mode 100644 glue/src/stateful/mod.rs create mode 100644 glue/src/stateful/tests/common.rs create mode 100644 glue/src/stateful/tests/mod.rs create mode 100644 glue/src/stateful/tests/multi_db_app.rs create mode 100644 glue/src/stateful/tests/properties.rs create mode 100644 glue/src/stateful/tests/single_db_app.rs diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a18d6d48702..b77572065b1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -130,6 +130,11 @@ jobs: continue-on-error: true env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + - name: Publish glue + run: cargo publish --manifest-path glue/Cargo.toml + continue-on-error: true + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - name: Publish chat run: cargo publish --manifest-path examples/chat/Cargo.toml continue-on-error: true diff --git a/Cargo.lock b/Cargo.lock index f6fad92ae86..0124723007e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1591,6 +1591,35 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "commonware-glue" +version = "2026.4.0" +dependencies = [ + "arbitrary", + "bytes", + "commonware-actor", + "commonware-broadcast", + "commonware-codec", + "commonware-conformance", + "commonware-consensus", + "commonware-cryptography", + "commonware-formatting", + "commonware-macros", + "commonware-p2p", + "commonware-parallel", + "commonware-resolver", + "commonware-runtime", + "commonware-storage", + "commonware-utils", + "futures", + "prometheus-client", + "rand 0.8.5", + "rand_core 0.6.4", + "thiserror 2.0.17", + "tracing", + "tracing-subscriber 0.3.20", +] + [[package]] name = "commonware-invariants" version = "2026.4.0" diff --git a/Cargo.toml b/Cargo.toml index 7b8857288f7..4eb908f23d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "cryptography", "deployer", "formatting", + "glue", "macros", "macros/impl", "math", @@ -114,6 +115,7 @@ commonware-consensus = { version = "2026.4.0", path = "consensus" } commonware-cryptography = { version = "2026.4.0", path = "cryptography", default-features = false } commonware-deployer = { version = "2026.4.0", path = "deployer", default-features = false } commonware-formatting = { version = "2026.4.0", path = "formatting", default-features = false } +commonware-glue = { version = "2026.4.0", path = "glue" } commonware-invariants = { version = "2026.4.0", path = "invariants" } commonware-macros = { version = "2026.4.0", path = "macros", default-features = false } commonware-macros-impl = { version = "2026.4.0", path = "macros/impl" } diff --git a/README.md b/README.md index c9809e1a826..73289d4e46b 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ _Primitives are designed for deployment in adversarial environments. If you find * [consensus](./consensus/README.md): Order opaque messages in a Byzantine environment. * [cryptography](./cryptography/README.md): Generate keys, sign arbitrary messages, and deterministically verify signatures. * [deployer](./deployer/README.md): Deploy infrastructure across cloud providers. +* [glue](./glue/README.md): Bootstrap applications with commonware primitive compositions. * [math](./math/README.md): Create and manipulate mathematical objects. * [p2p](./p2p/README.md): Communicate with authenticated peers over encrypted connections. * [parallel](./parallel/README.md): Parallelize fold operations with pluggable execution strategies. diff --git a/glue/Cargo.toml b/glue/Cargo.toml new file mode 100644 index 00000000000..4e7278925d5 --- /dev/null +++ b/glue/Cargo.toml @@ -0,0 +1,61 @@ +[package] +name = "commonware-glue" +edition.workspace = true +publish = true +version.workspace = true +license.workspace = true +description = "Bootstrap applications with commonware primitive compositions." +readme = "README.md" +homepage.workspace = true +repository = "https://github.com/commonwarexyz/monorepo/tree/main/glue" + +[lints] +workspace = true + +[dependencies] +arbitrary = { workspace = true, optional = true, features = ["derive"] } +bytes.workspace = true +commonware-actor.workspace = true +commonware-codec.workspace = true +commonware-consensus.workspace = true +commonware-cryptography.workspace = true +commonware-macros.workspace = true +commonware-p2p.workspace = true +commonware-parallel.workspace = true +commonware-resolver.workspace = true +commonware-runtime.workspace = true +commonware-storage.workspace = true +commonware-utils.workspace = true +futures.workspace = true +prometheus-client.workspace = true +rand.workspace = true +rand_core.workspace = true +thiserror.workspace = true +tracing.workspace = true + +[dev-dependencies] +commonware-broadcast.workspace = true +commonware-codec.workspace = true +commonware-conformance.workspace = true +commonware-consensus = { workspace = true, features = ["mocks"] } +commonware-cryptography = { workspace = true, features = ["mocks"] } +commonware-formatting.workspace = true +commonware-p2p.workspace = true +commonware-parallel.workspace = true +commonware-resolver.workspace = true +commonware-storage.workspace = true +tracing-subscriber.workspace = true + +[features] +test-utils = [] +arbitrary = [ + "commonware-codec/arbitrary", + "commonware-consensus/arbitrary", + "commonware-cryptography/arbitrary", + "commonware-p2p/arbitrary", + "commonware-resolver/arbitrary", + "commonware-runtime/arbitrary", + "commonware-storage/arbitrary", + "commonware-utils/arbitrary", + "dep:arbitrary", +] diff --git a/glue/README.md b/glue/README.md new file mode 100644 index 00000000000..37f2a9d321a --- /dev/null +++ b/glue/README.md @@ -0,0 +1,9 @@ +# commonware-glue + +[![Crates.io](https://img.shields.io/crates/v/commonware-glue.svg)](https://crates.io/crates/commonware-glue) + +Bootstrap applications with commonware primitive compositions. + +## Status + +Stability varies by primitive. See [README](https://github.com/commonwarexyz/monorepo#stability) for details. diff --git a/glue/conformance.toml b/glue/conformance.toml new file mode 100644 index 00000000000..ee42d57e72e --- /dev/null +++ b/glue/conformance.toml @@ -0,0 +1,7 @@ +["commonware_glue::stateful::db::p2p::handler::tests::conformance::CodecConformance>"] +n_cases = 65536 +hash = "44c363dc52376203a66269a71e13f7271c1a9beb1f8ee9dd9864a3ce7ae2c281" + +["commonware_glue::stateful::db::p2p::handler::tests::conformance::CodecConformance>"] +n_cases = 65536 +hash = "7ca1ff124e5835f5be5d9b915233b9df088db121a0f5c3baa54b9ab8daf8bd78" diff --git a/glue/src/lib.rs b/glue/src/lib.rs new file mode 100644 index 00000000000..62fe1eca6db --- /dev/null +++ b/glue/src/lib.rs @@ -0,0 +1,12 @@ +#![doc = include_str!("../README.md")] +#![doc( + html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg", + html_favicon_url = "https://commonware.xyz/favicon.ico" +)] + +commonware_macros::stability_scope!(ALPHA { + pub mod stateful; + + #[cfg(any(test, feature = "test-utils"))] + pub mod simulate; +}); diff --git a/glue/src/simulate/action.rs b/glue/src/simulate/action.rs new file mode 100644 index 00000000000..cd71ccfcefb --- /dev/null +++ b/glue/src/simulate/action.rs @@ -0,0 +1,84 @@ +//! Simulation action types for testing. + +use commonware_cryptography::PublicKey; +use commonware_p2p::simulated::Link; +use commonware_runtime::deterministic; +use std::time::Duration; + +/// Crash strategy for a simulation run. +#[derive(Clone)] +pub enum Crash { + /// Periodically crash random validators and restart them after + /// a downtime period. + Random { + /// How often to trigger crashes. + frequency: Duration, + /// How long crashed validators stay offline. + downtime: Duration, + /// Number of validators to crash each time. + count: usize, + }, + + /// Delay some validators from starting until after N finalizations. + Delay { + /// Number of validators to delay. + count: usize, + /// Number of finalizations before starting delayed validators. + after: u64, + }, + + /// Time-indexed action schedule for precise control. + Schedule(Schedule

), +} + +/// A time-ordered sequence of simulation actions. +#[derive(Clone)] +pub struct Schedule { + /// Time-indexed actions. + pub events: Vec<(Duration, Action

)>, +} + +impl Schedule

{ + /// Create an empty schedule. + pub const fn new() -> Self { + Self { events: vec![] } + } + + /// Add an action at the given simulation time. + pub fn at(mut self, time: Duration, action: Action

) -> Self { + self.events.push((time, action)); + self + } +} + +impl Default for Schedule

{ + fn default() -> Self { + Self::new() + } +} + +/// A single simulation action to apply at a specific time. +#[derive(Clone)] +pub enum Action { + /// Update deterministic storage fault injection. + SetStorageFault(deterministic::FaultConfig), + + /// Reset all directed links, restoring full connectivity with the given link. + Heal(Link), + + /// Update a specific directed link by removing and re-adding it. + UpdateLink { + /// Source peer. + from: P, + /// Destination peer. + to: P, + /// New link configuration. + link: Link, + }, + + /// Crash a specific validator. + Crash(P), + + /// Restart a previously crashed validator. + Restart(P), +} diff --git a/glue/src/simulate/engine.rs b/glue/src/simulate/engine.rs new file mode 100644 index 00000000000..1ff7e439d5a --- /dev/null +++ b/glue/src/simulate/engine.rs @@ -0,0 +1,78 @@ +//! Engine definition trait and supporting types. + +use super::tracker::FinalizationUpdate; +use commonware_cryptography::PublicKey; +use commonware_p2p::simulated::{self, Oracle}; +use commonware_runtime::{deterministic, Handle, Quota}; +use commonware_utils::channel::mpsc; +use std::future::Future; + +/// A registered p2p channel pair (sender, receiver). +pub type ChannelPair

= ( + simulated::Sender, + simulated::Receiver

, +); + +/// Arguments passed to [`EngineDefinition::init`]. +pub struct InitContext<'a, P: PublicKey> { + /// Labeled runtime context for this validator. + pub context: deterministic::Context, + /// Index of this validator in the participant list. + pub index: usize, + /// This validator's public key. + pub public_key: &'a P, + /// Network oracle for peer management. + pub oracle: &'a Oracle, + /// Registered p2p channel pairs (same order as `channels()`). + pub channels: Vec>, + /// All participants in the simulation. + pub participants: &'a [P], + /// Channel for reporting finalization events to the harness. + pub monitor: mpsc::Sender>, +} + +/// Defines how to construct and start one validator's service stack. +/// +/// The harness calls these methods for each validator in the simulation. +/// The lifecycle is: +/// 1. `channels()` -- declare which p2p channels are needed. +/// 2. `init()` -- construct the engine (actors, archives, mailboxes). +/// 3. `start()` -- start all actors, return a joinable handle. +/// +/// On restart after a crash, `init()` and `start()` are called again +/// with the same validator identity but a fresh runtime context (storage +/// state is preserved by the deterministic runtime). +pub trait EngineDefinition: Clone + Send + 'static { + /// The public key type used by this engine. + type PublicKey: PublicKey; + + /// The constructed engine, passed from `init` to `start`. + type Engine: Send + 'static; + + /// Per-validator state inspectable by property checkers. + type State: Send + Sync + 'static; + + /// The participants for this simulation. + /// + /// Called once by the harness to determine the validator set. The engine + /// is responsible for generating keys and any associated state (signing + /// schemes, databases, etc.) during construction. + fn participants(&self) -> Vec; + + /// Which p2p channels to register for each validator. + /// + /// Returns `(channel_id, quota)` pairs. The harness registers each + /// on the simulated oracle and passes sender/receiver pairs to + /// `init` in the same order. + fn channels(&self) -> Vec<(u64, Quota)>; + + /// Construct the engine for a single validator. + fn init( + &self, + ctx: InitContext<'_, Self::PublicKey>, + ) -> impl Future + Send; + + /// Start all actors in the engine. Returns a handle the harness + /// can join on (or abort on crash). + fn start(engine: Self::Engine) -> Handle<()>; +} diff --git a/glue/src/simulate/exit.rs b/glue/src/simulate/exit.rs new file mode 100644 index 00000000000..94d232a1ae8 --- /dev/null +++ b/glue/src/simulate/exit.rs @@ -0,0 +1,103 @@ +//! Exit conditions for simulation completion. + +use super::{processed::ProcessedHeight, tracker::ProgressTracker}; +use commonware_cryptography::PublicKey; +use std::{future::Future, pin::Pin}; + +/// Simulation termination condition. +/// +/// The simulator evaluates this condition against the current tracker and +/// active validator states. Returning `Ok(true)` ends the simulation. +pub trait ExitCondition: Send + Sync { + /// Human-readable name for logging and errors. + fn name(&self) -> &str; + + /// Whether this condition should also be checked on periodic polls. + /// + /// Conditions driven entirely by finalization updates can return `false`. + /// Conditions based on state that may advance between updates can return + /// `true`. + fn requires_polling(&self) -> bool { + false + } + + /// Check whether the condition has been satisfied. + /// + /// `target_count` is the number of validators that must satisfy the + /// condition in the current run state (for example, before delayed + /// validators start this is the count of currently active validators). + fn reached<'a>( + &'a self, + tracker: &'a ProgressTracker

, + states: &'a [&'a S], + target_count: usize, + ) -> Pin> + Send + 'a>>; +} + +/// Exit once at least `target_count` validators have finalized `required` +/// views. +#[derive(Clone)] +pub struct MinimumFinalizations { + required: u64, +} + +impl MinimumFinalizations { + pub const fn new(required: u64) -> Self { + Self { required } + } +} + +impl ExitCondition for MinimumFinalizations { + fn name(&self) -> &str { + "minimum_finalizations" + } + + fn reached<'a>( + &'a self, + tracker: &'a ProgressTracker

, + _states: &'a [&'a S], + target_count: usize, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { Ok(tracker.all_reached(target_count, self.required)) }) + } +} + +/// Exit once at least `target_count` validators have processed up to +/// `required` application height. +#[derive(Clone)] +pub struct ProcessedHeightAtLeast { + required: u64, +} + +impl ProcessedHeightAtLeast { + pub const fn new(required: u64) -> Self { + Self { required } + } +} + +impl ExitCondition for ProcessedHeightAtLeast { + fn name(&self) -> &str { + "processed_height_at_least" + } + + fn requires_polling(&self) -> bool { + true + } + + fn reached<'a>( + &'a self, + _tracker: &'a ProgressTracker

, + states: &'a [&'a S], + target_count: usize, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let mut reached = 0usize; + for state in states { + if state.processed_height().await >= self.required { + reached += 1; + } + } + Ok(reached >= target_count) + }) + } +} diff --git a/glue/src/simulate/mod.rs b/glue/src/simulate/mod.rs new file mode 100644 index 00000000000..50e1b4a5888 --- /dev/null +++ b/glue/src/simulate/mod.rs @@ -0,0 +1,23 @@ +//! Simulation harness for testing commonware primitive compositions. +//! +//! Provides a configurable test framework that composes the core consensus +//! stack (e.g. p2p, simplex, marshal, broadcast, application) with action injection, +//! progress tracking, and property checking. +//! +//! # Components +//! +//! - [`EngineDefinition`]: Trait for defining how to wire up a validator's +//! service stack. +//! - [`Plan`]: Declarative test configuration with action injection. +//! - [`Team`]: Manages running validators (start, crash, restart). +//! - [`ProgressTracker`]: Monitors finalization progress and agreement. + +pub mod action; +pub mod engine; +pub mod exit; +pub mod plan; +pub mod processed; +pub mod property; +pub mod reporter; +pub mod team; +pub mod tracker; diff --git a/glue/src/simulate/plan.rs b/glue/src/simulate/plan.rs new file mode 100644 index 00000000000..e6e854284d5 --- /dev/null +++ b/glue/src/simulate/plan.rs @@ -0,0 +1,1020 @@ +//! Simulation plan: declarative test configuration with select-loop orchestration. + +use super::{ + action::{Action, Crash, Schedule}, + engine::EngineDefinition, + exit::{ExitCondition, MinimumFinalizations}, + property::{FinalizationProperty, Property}, + team::Team, + tracker::{FinalizationUpdate, ProgressTracker}, +}; +use commonware_cryptography::PublicKey; +use commonware_macros::select_loop; +use commonware_p2p::{ + simulated::{self, Link, Network}, + Manager as _, +}; +use commonware_runtime::{deterministic, Clock, Runner as _, Spawner, Supervisor as _}; +use commonware_utils::{channel::mpsc, ordered::Set, NZUsize, TryCollect}; +use rand::seq::SliceRandom; +use std::{ + collections::HashSet, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::Duration, +}; +use tracing::{error, info}; + +/// Command sent from the action scheduler to the select loop. +enum ScheduleCmd { + Crash(P), + Restart(P), +} + +/// Result of a simulation run. +pub struct PlanResult { + /// Auditor state (deterministic hash) at simulation end. + pub state: String, + + /// Final progress tracker state. + pub tracker: ProgressTracker, + + /// Number of validator crashes that occurred during the simulation. + pub crashes: u64, + + /// Number of scheduled actions that were applied. + pub scheduled_actions: u64, + + /// Whether delayed validators were started (if Delay was configured). + pub delayed_started: bool, +} + +/// Declarative configuration for a simulation run. +/// +/// All parameters needed to reproduce a test deterministically. +pub struct Plan { + /// Deterministic seed. Same seed produces identical execution. + pub seed: u64, + + /// Participant public keys in order. The caller is responsible for + /// generating these (e.g. via `PrivateKey::from_seed`). + pub participants: Vec, + + /// Network link configuration. + pub link: Link, + + /// Maximum size of a p2p message (bytes). + pub max_message_size: u32, + + /// Engine definition (how to wire up each validator). + pub engine: D, + + /// Crash/action injection strategies. + pub crashes: Vec>, + + /// Number of finalizations required before the simulation stops. + /// + /// Used by the default exit condition when no custom condition is set. + pub required_finalizations: u64, + + /// Exit condition that determines when the simulation should terminate. + pub exit_condition: Box>, + + /// Maximum simulation wall-clock time (deterministic time). + pub timeout: Option, + + /// Optional storage fault injection configuration. + pub storage_fault: Option, + + /// Properties checked after each finalization. + pub finalization_property: Vec>>, + + /// Properties checked once at simulation end with state and tracker access. + pub property: Vec>>, +} + +/// Builder for constructing a [`Plan`] with sensible defaults. +/// +/// Only the engine is required. Everything else has defaults suitable +/// for quick tests. +pub struct PlanBuilder { + seeds: Vec, + participants: Vec, + link: Link, + max_message_size: u32, + engine: D, + crashes: Vec>, + required_finalizations: u64, + exit_condition: Option>, + timeout: Option, + storage_fault: Option, + finalization_property: Vec>, + property: Vec>, +} + +type ExitConditionFactory = Box< + dyn Fn() -> Box< + dyn ExitCondition<::PublicKey, ::State>, + >, +>; + +type FinalizationPropertyFactory = + Box Box::State>>>; + +type PropertyFactory = Box< + dyn Fn() + -> Box::PublicKey, ::State>>, +>; + +impl PlanBuilder { + /// Create a builder with the required engine and sensible defaults. + /// + /// Participants are derived from the engine via + /// [`EngineDefinition::participants`]. + /// + /// Defaults: seed 0, 1MB max message size, good links (10ms latency, + /// 5ms jitter, 100% success), no crashes, 10 required finalizations, + /// no timeout. + pub fn new(engine: D) -> Self { + let participants = engine.participants(); + Self { + seeds: vec![0], + participants, + link: Link { + latency: Duration::from_millis(10), + jitter: Duration::from_millis(5), + success_rate: 1.0, + }, + max_message_size: 1024 * 1024, + engine, + crashes: vec![], + required_finalizations: 10, + exit_condition: None, + timeout: None, + storage_fault: None, + finalization_property: vec![], + property: vec![], + } + } + + /// Set the deterministic seeds used by [`Self::run`]. + /// + /// At least one seed must be provided. + pub fn seeds(mut self, seeds: impl IntoIterator) -> Self { + let seeds: Vec = seeds.into_iter().collect(); + assert!(!seeds.is_empty(), "at least one seed must be configured"); + self.seeds = seeds; + self + } + + /// Convenience method for configuring a single seed. + pub fn seed(self, seed: u64) -> Self { + self.seeds([seed]) + } + + pub const fn link(mut self, link: Link) -> Self { + self.link = link; + self + } + + pub const fn max_message_size(mut self, size: u32) -> Self { + self.max_message_size = size; + self + } + + pub fn crash(mut self, crash: Crash) -> Self { + match crash { + Crash::Delay { .. } => assert!( + !self + .crashes + .iter() + .any(|crash| matches!(crash, Crash::Delay { .. })), + "only one Crash::Delay strategy may be configured" + ), + Crash::Random { .. } => assert!( + !self + .crashes + .iter() + .any(|crash| matches!(crash, Crash::Random { .. })), + "only one Crash::Random strategy may be configured" + ), + Crash::Schedule(_) => {} + } + self.crashes.push(crash); + self + } + + pub const fn required_finalizations(mut self, n: u64) -> Self { + self.required_finalizations = n; + self + } + + /// Override the default exit condition. + pub fn exit_condition( + mut self, + condition: impl ExitCondition + Clone + 'static, + ) -> Self { + self.exit_condition = Some(Box::new(move || Box::new(condition.clone()))); + self + } + + pub const fn timeout(mut self, timeout: Duration) -> Self { + self.timeout = Some(timeout); + self + } + + /// Enable deterministic storage fault injection for the simulation. + pub const fn with_storage_fault(mut self, faults: deterministic::FaultConfig) -> Self { + self.storage_fault = Some(faults); + self + } + + pub fn finalization_property( + mut self, + property: impl FinalizationProperty + Clone + 'static, + ) -> Self { + self.finalization_property + .push(Box::new(move || Box::new(property.clone()))); + self + } + + pub fn property( + mut self, + property: impl Property + Clone + 'static, + ) -> Self { + self.property + .push(Box::new(move || Box::new(property.clone()))); + self + } + + /// Build the [`Plan`]. + pub fn build(self) -> Plan { + let seed = self + .seeds + .first() + .copied() + .expect("at least one seed must be configured"); + self.build_with_seed(seed) + } + + fn build_with_seed(&self, seed: u64) -> Plan { + let exit_condition = self.exit_condition.as_ref().map_or_else( + || Box::new(MinimumFinalizations::new(self.required_finalizations)) as _, + |factory| factory(), + ); + let finalization_property = self + .finalization_property + .iter() + .map(|factory| factory()) + .collect(); + let property = self.property.iter().map(|factory| factory()).collect(); + Plan { + seed, + participants: self.participants.clone(), + link: self.link.clone(), + max_message_size: self.max_message_size, + engine: self.engine.clone(), + crashes: self.crashes.clone(), + required_finalizations: self.required_finalizations, + exit_condition, + timeout: self.timeout, + storage_fault: self.storage_fault.clone(), + finalization_property, + property, + } + } + + /// Build a fresh plan per seed and run each simulation. + pub fn run(self) -> Result>, String> { + let mut results = Vec::with_capacity(self.seeds.len()); + for &seed in &self.seeds { + let plan = self.build_with_seed(seed); + let result = plan.run().map_err(|e| format!("seed {seed}: {e}"))?; + results.push(result); + } + Ok(results) + } +} + +impl Plan { + fn uses_storage_faults(&self) -> bool { + self.storage_fault.is_some() + || self.schedules().any(|schedule| { + schedule + .events + .iter() + .any(|(_, action)| matches!(action, Action::SetStorageFault(_))) + }) + } + + fn delay_crash(&self) -> Option<(usize, u64)> { + self.crashes.iter().find_map(|crash| match crash { + Crash::Delay { count, after } => Some((*count, *after)), + _ => None, + }) + } + + fn random_crash(&self) -> Option<(Duration, Duration, usize)> { + self.crashes.iter().find_map(|crash| match crash { + Crash::Random { + frequency, + downtime, + count, + } => Some((*frequency, *downtime, *count)), + _ => None, + }) + } + + fn schedules(&self) -> impl Iterator> { + self.crashes.iter().filter_map(|crash| match crash { + Crash::Schedule(schedule) => Some(schedule), + _ => None, + }) + } + + /// Determine which participants should be delayed at startup. + fn delayed_participants(&self) -> HashSet { + if let Some((count, _)) = self.delay_crash() { + self.participants.iter().take(count).cloned().collect() + } else { + HashSet::new() + } + } + + /// Check post-run properties, log completion, and build the result. + async fn finish( + &self, + ctx: &deterministic::Context, + tracker: ProgressTracker, + team: &Team, + crashes: u64, + scheduled_actions: &AtomicU64, + delayed_started: bool, + ) -> Result, String> { + let states = team.active_states(); + for prop in &self.property { + match prop.check(&tracker, &states).await { + Ok(()) => { + info!( + target: "simulator", + property = prop.name(), + "post-run property passed" + ); + } + Err(e) => { + error!( + target: "simulator", + property = prop.name(), + error = %e, + "post-run property failed" + ); + return Err(format!( + "post-run property violation ({}): {e}", + prop.name() + )); + } + } + } + let scheduled_actions_applied = scheduled_actions.load(Ordering::Relaxed); + info!( + target: "simulator", + required = self.required_finalizations, + exit_condition = self.exit_condition.name(), + crashes, + scheduled_actions = scheduled_actions_applied, + delayed_started, + "all validators reached required progress" + ); + Ok(PlanResult { + state: ctx.auditor().state(), + tracker, + crashes, + scheduled_actions: scheduled_actions_applied, + delayed_started, + }) + } + + /// Run the simulation. This is the main async entry point. + async fn run_inner(&self, mut ctx: deterministic::Context) -> Result, String> { + let (network, oracle) = Network::<_, D::PublicKey>::new( + ctx.child("network"), + simulated::Config { + max_size: self.max_message_size, + disconnect_on_block: true, + tracked_peer_sets: NZUsize!(3), + }, + ); + network.start(); + + // Seed initial peers so resolver subscriptions can reconcile immediately. + let mut manager = oracle.manager(); + manager.track( + 0, + self.participants + .iter() + .cloned() + .try_collect::>() + .expect("participants must be unique"), + ); + + let total = self.participants.len(); + let mut team = Team::new(self.engine.clone(), self.participants.clone()); + let (monitor_tx, mut monitor_rx) = mpsc::channel::>(1024); + let (restart_tx, mut restart_rx) = mpsc::channel::(10); + let (crash_tx, mut crash_rx) = mpsc::channel::<()>(1); + let (schedule_tx, mut schedule_rx) = mpsc::channel::>(10); + let scheduled_actions = Arc::new(AtomicU64::new(0)); + + let delayed = self.delayed_participants(); + team.start( + &ctx, + &oracle, + self.link.clone(), + monitor_tx.clone(), + &delayed, + ) + .await; + + if let Some(storage_fault) = &self.storage_fault { + *ctx.storage_fault_config().write() = storage_fault.clone(); + info!( + target: "simulator", + ?storage_fault, + "enabled storage fault injection" + ); + } + + // Spawn crash ticker for Random crashes. + if let Some((frequency, _, _)) = self.random_crash() { + let crash_tx = crash_tx.clone(); + ctx.child("crash_ticker").spawn(move |ctx| async move { + loop { + ctx.sleep(frequency).await; + if crash_tx.send(()).await.is_err() { + break; + } + } + }); + } + + // Spawn action schedule actors. + for schedule in self.schedules() { + let schedule = schedule.clone(); + let fault_ctx = ctx.child("scheduler_fault"); + let oracle_clone = oracle.clone(); + let participants = self.participants.clone(); + let schedule_tx_clone = schedule_tx.clone(); + let scheduled_actions_clone = scheduled_actions.clone(); + ctx.child("scheduler").spawn(move |ctx| async move { + Self::run_action_scheduler( + ctx, + fault_ctx, + schedule, + &oracle_clone, + &participants, + schedule_tx_clone, + scheduled_actions_clone, + ) + .await; + }); + } + + let mut tracker = ProgressTracker::default(); + let mut delayed_started = false; + let active_count = total - delayed.len(); + let mut crashes: u64 = 0; + let mut result: Result, String> = + Err("simulation stopped before completion".into()); + const EXIT_POLL: Duration = Duration::from_millis(25); + + select_loop! { + ctx, + on_stopped => { + result = Err("simulation stopped".into()); + }, + Some(update) = monitor_rx.recv() else { + result = Err("monitor channel closed".into()); + break; + } => { + tracker.observe(update)?; + + // Check finalization properties + let states = team.active_states(); + for prop in &self.finalization_property { + match prop.check(&states).await { + Ok(()) => { + info!( + target: "simulator", + property = prop.name(), + "finalization property passed" + ); + } + Err(e) => { + error!( + target: "simulator", + property = prop.name(), + error = %e, + "finalization property failed" + ); + return Err(format!( + "finalization property violation ({}): {e}", + prop.name() + )); + } + } + } + + // Check termination. + let target_count = if delayed_started { total } else { active_count }; + let states = team.active_states(); + let done = self + .exit_condition + .reached(&tracker, &states, target_count) + .await + .map_err(|e| { + format!( + "exit condition evaluation failed ({}): {e}", + self.exit_condition.name() + ) + })?; + if done { + result = self + .finish( + &ctx, + tracker, + &team, + crashes, + &scheduled_actions, + delayed_started, + ) + .await; + break; + } + + // Start delayed validators after enough progress + if !delayed_started { + if let Some((_, after)) = self.delay_crash() { + if tracker.min_view() >= after { + info!(target: "simulator", "starting delayed participants"); + for pk in &delayed { + team.start_one(&ctx, &oracle, pk.clone(), monitor_tx.clone()) + .await; + } + delayed_started = true; + } + } + } + }, + _ = ctx.sleep(EXIT_POLL) => { + if !self.exit_condition.requires_polling() { + continue; + } + let target_count = if delayed_started { total } else { active_count }; + let states = team.active_states(); + let done = self + .exit_condition + .reached(&tracker, &states, target_count) + .await + .map_err(|e| { + format!( + "exit condition evaluation failed ({}): {e}", + self.exit_condition.name() + ) + })?; + if !done { + continue; + } + + result = self + .finish( + &ctx, + tracker, + &team, + crashes, + &scheduled_actions, + delayed_started, + ) + .await; + break; + }, + Some(pk) = restart_rx.recv() else break => { + team.restart(&ctx, &oracle, pk, monitor_tx.clone()).await; + }, + Some(cmd) = schedule_rx.recv() else break => match cmd { + ScheduleCmd::Crash(pk) => { + if team.crash(&pk) { + crashes += 1; + } + } + ScheduleCmd::Restart(pk) => { + team.restart(&ctx, &oracle, pk, monitor_tx.clone()).await; + } + }, + _ = crash_rx.recv() => { + let Some((_, downtime, count)) = self.random_crash() else { + continue; + }; + let active = team.active_keys(); + let crash_count = count.min(active.len()); + let to_crash: Vec = active + .choose_multiple(&mut ctx, crash_count) + .cloned() + .collect(); + for pk in to_crash { + if !team.crash(&pk) { + continue; + } + crashes += 1; + let restart_tx = restart_tx.clone(); + ctx.child("restart_delay").spawn(move |ctx| async move { + if downtime > Duration::ZERO { + ctx.sleep(downtime).await; + } + let _ = restart_tx.send(pk).await; + }); + } + }, + } + + // Assert that configured crashes were actually exercised. + if let Ok(ref r) = result { + if self.random_crash().is_some() { + assert!( + r.crashes > 0, + "Crash::Random configured but no crashes occurred. \ + Increase required_finalizations or decrease crash frequency." + ); + } + + let scheduled_events: usize = + self.schedules().map(|schedule| schedule.events.len()).sum(); + if scheduled_events > 0 { + assert!( + r.scheduled_actions > 0, + "Crash::Schedule configured with {} events but none were applied. \ + Schedule events may be timed after consensus completes.", + scheduled_events + ); + } + + if self.delay_crash().is_some() { + assert!( + r.delayed_started, + "Crash::Delay configured but delayed validators were never started. \ + Increase required_finalizations or decrease the `after` threshold." + ); + } + } + + result + } + + /// Schedule executor -- sleeps until each scheduled time and + /// applies the action. Network actions are applied directly via the + /// oracle; node actions (crash/restart) are sent as commands to the + /// select loop which owns the team. + async fn run_action_scheduler( + ctx: deterministic::Context, + fault_ctx: deterministic::Context, + schedule: Schedule, + oracle: &simulated::Oracle, + participants: &[D::PublicKey], + cmd_tx: mpsc::Sender>, + actions_applied: Arc, + ) { + let start = ctx.current(); + for (time, action) in schedule.events { + let elapsed = ctx + .current() + .duration_since(start) + .unwrap_or(Duration::ZERO); + if time > elapsed { + ctx.sleep(time - elapsed).await; + } + match action { + Action::SetStorageFault(storage_fault) => { + *fault_ctx.storage_fault_config().write() = storage_fault.clone(); + actions_applied.fetch_add(1, Ordering::Relaxed); + info!(target: "simulator", ?storage_fault, "storage faults updated"); + } + Action::Heal(ref link) => { + for v1 in participants { + for v2 in participants { + if v1 == v2 { + continue; + } + let _ = oracle.remove_link(v1.clone(), v2.clone()).await; + let _ = oracle.add_link(v1.clone(), v2.clone(), link.clone()).await; + } + } + actions_applied.fetch_add(1, Ordering::Relaxed); + info!(target: "simulator", "links reset"); + } + Action::UpdateLink { + ref from, + ref to, + ref link, + } => { + let _ = oracle.remove_link(from.clone(), to.clone()).await; + let _ = oracle + .add_link(from.clone(), to.clone(), link.clone()) + .await; + actions_applied.fetch_add(1, Ordering::Relaxed); + info!(target: "simulator", ?from, ?to, "link updated"); + } + Action::Crash(ref pk) => { + if cmd_tx.send(ScheduleCmd::Crash(pk.clone())).await.is_err() { + break; + } + actions_applied.fetch_add(1, Ordering::Relaxed); + } + Action::Restart(ref pk) => { + if cmd_tx.send(ScheduleCmd::Restart(pk.clone())).await.is_err() { + break; + } + actions_applied.fetch_add(1, Ordering::Relaxed); + } + } + } + } + + /// Run the simulation synchronously using [`Self::seed`]. + /// + /// Creates a deterministic runner with the plan's seed and timeout, + /// then executes the simulation. + pub fn run(&self) -> Result, String> { + self.run_with_seed(self.seed) + } + + /// Run the simulation synchronously with an explicit seed. + pub fn run_with_seed(&self, seed: u64) -> Result, String> { + let cfg = deterministic::Config::new() + .with_seed(seed) + .with_catch_panics(self.uses_storage_faults()) + .with_timeout(self.timeout); + let runner = deterministic::Runner::new(cfg); + runner.start(|ctx| self.run_inner(ctx)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_consensus::types::View; + use commonware_cryptography::{ed25519, Signer as _}; + use commonware_runtime::{Clock, Handle, Quota, Spawner}; + use std::{ + future::Future, + pin::Pin, + sync::atomic::{AtomicUsize, Ordering}, + }; + + #[derive(Clone)] + struct FinalizingEngine { + participants: Vec, + finalize_after: Duration, + finalizations: u64, + } + + struct FinalizingNode { + context: deterministic::Context, + monitor: mpsc::Sender>, + pk: ed25519::PublicKey, + finalize_after: Duration, + finalizations: u64, + } + + impl FinalizingEngine { + fn new(num_validators: u64, finalize_after: Duration, finalizations: u64) -> Self { + let participants = (0..num_validators) + .map(|seed| ed25519::PrivateKey::from_seed(seed).public_key()) + .collect(); + Self { + participants, + finalize_after, + finalizations, + } + } + } + + impl EngineDefinition for FinalizingEngine { + type PublicKey = ed25519::PublicKey; + type Engine = FinalizingNode; + type State = (); + + fn participants(&self) -> Vec { + self.participants.clone() + } + + fn channels(&self) -> Vec<(u64, Quota)> { + vec![] + } + + fn init( + &self, + ctx: super::super::engine::InitContext<'_, Self::PublicKey>, + ) -> impl Future + Send { + let finalize_after = self.finalize_after; + let finalizations = self.finalizations; + async move { + ( + FinalizingNode { + context: ctx.context, + monitor: ctx.monitor, + pk: ctx.public_key.clone(), + finalize_after, + finalizations, + }, + (), + ) + } + } + + fn start(engine: Self::Engine) -> Handle<()> { + let pk = engine.pk; + let monitor = engine.monitor; + let finalize_after = engine.finalize_after; + let finalizations = engine.finalizations; + engine.context.spawn(move |ctx| async move { + if finalize_after > Duration::ZERO { + ctx.sleep(finalize_after).await; + } + for view in 1..=finalizations { + let _ = monitor + .send(FinalizationUpdate { + pk: pk.clone(), + view: View::new(view), + block_digest: vec![view as u8], + }) + .await; + } + }) + } + } + + #[derive(Clone)] + struct AtLeastTrackedValidators { + min: usize, + } + + impl ExitCondition for AtLeastTrackedValidators { + fn name(&self) -> &str { + "at_least_tracked_validators" + } + + fn reached<'a>( + &'a self, + tracker: &'a ProgressTracker, + _states: &'a [&'a ()], + _target_count: usize, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { Ok(tracker.tracked_count() >= self.min) }) + } + } + + #[derive(Default)] + struct SingleUseProperty { + calls: AtomicUsize, + } + + impl Clone for SingleUseProperty { + fn clone(&self) -> Self { + Self::default() + } + } + + impl Property for SingleUseProperty { + fn name(&self) -> &str { + "single_use_property" + } + + fn check<'a>( + &'a self, + _tracker: &'a ProgressTracker, + _states: &'a [&'a ()], + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let previous = self.calls.fetch_add(1, Ordering::Relaxed); + if previous == 0 { + return Ok(()); + } + Err(format!( + "property reused across runs: call {}", + previous + 1 + )) + }) + } + } + + #[test] + fn schedule_action_applied_before_completion_is_counted() { + let link = Link { + latency: Duration::from_millis(10), + jitter: Duration::from_millis(0), + success_rate: 1.0, + }; + let result = PlanBuilder::new(FinalizingEngine::new(1, Duration::from_millis(100), 1)) + .required_finalizations(1) + .timeout(Duration::from_secs(2)) + .crash(Crash::Schedule( + Schedule::new() + .at(Duration::from_millis(1), Action::Heal(link.clone())) + .at(Duration::from_secs(5), Action::Heal(link)), + )) + .run() + .expect("simulation should complete") + .into_iter() + .next() + .expect("expected one result for the default seed"); + assert!( + result.scheduled_actions >= 1, + "expected at least one applied action before completion, got {}", + result.scheduled_actions + ); + } + + #[test] + fn delay_and_schedule_actions_compose() { + let link = Link { + latency: Duration::from_millis(10), + jitter: Duration::from_millis(0), + success_rate: 1.0, + }; + let result = PlanBuilder::new(FinalizingEngine::new(2, Duration::from_millis(100), 2)) + .required_finalizations(2) + .timeout(Duration::from_secs(2)) + .crash(Crash::Delay { count: 1, after: 1 }) + .crash(Crash::Schedule( + Schedule::new().at(Duration::from_millis(1), Action::Heal(link)), + )) + .run() + .expect("simulation should complete") + .into_iter() + .next() + .expect("expected one result for the default seed"); + assert!( + result.delayed_started, + "delayed validator should still start when schedule crashes are also configured" + ); + assert!( + result.scheduled_actions >= 1, + "scheduled crashes should still run when delay crashes are also configured" + ); + } + + #[test] + fn schedule_double_crash_before_restart_counts_one_crash() { + let pk = ed25519::PrivateKey::from_seed(0).public_key(); + let result = PlanBuilder::new(FinalizingEngine::new(1, Duration::from_millis(50), 1)) + .required_finalizations(1) + .timeout(Duration::from_secs(2)) + .crash(Crash::Schedule( + Schedule::new() + .at(Duration::from_millis(1), Action::Crash(pk.clone())) + .at(Duration::from_millis(2), Action::Crash(pk.clone())) + .at(Duration::from_millis(3), Action::Restart(pk)), + )) + .run() + .expect("simulation should complete") + .into_iter() + .next() + .expect("expected one result for the default seed"); + + assert_eq!( + result.crashes, 1, + "second crash before restart should be a no-op and not counted" + ); + } + + #[test] + fn custom_exit_condition_overrides_required_finalizations() { + let result = PlanBuilder::new(FinalizingEngine::new(2, Duration::from_millis(10), 1)) + .required_finalizations(100) + .exit_condition(AtLeastTrackedValidators { min: 2 }) + .timeout(Duration::from_secs(2)) + .run() + .expect("simulation should complete with custom exit condition") + .into_iter() + .next() + .expect("expected one result for the default seed"); + + assert_eq!( + result.tracker.tracked_count(), + 2, + "custom exit condition should see both validators" + ); + } + + #[test] + fn multi_seed_run_reconstructs_properties_per_seed() { + PlanBuilder::new(FinalizingEngine::new(1, Duration::from_millis(10), 1)) + .seeds([0, 1]) + .timeout(Duration::from_secs(1)) + .required_finalizations(1) + .property(SingleUseProperty::default()) + .run() + .expect("stateful properties should not be reused across seed runs"); + } +} diff --git a/glue/src/simulate/processed.rs b/glue/src/simulate/processed.rs new file mode 100644 index 00000000000..611b7948786 --- /dev/null +++ b/glue/src/simulate/processed.rs @@ -0,0 +1,15 @@ +//! Processed-height introspection for simulation state. + +use std::future::Future; + +/// Access the latest application-acknowledged processed height. +pub trait ProcessedHeight: Send + Sync { + /// Returns the latest processed height. + fn processed_height(&self) -> impl Future + Send; +} + +impl ProcessedHeight for () { + async fn processed_height(&self) -> u64 { + 0 + } +} diff --git a/glue/src/simulate/property.rs b/glue/src/simulate/property.rs new file mode 100644 index 00000000000..0516369fa25 --- /dev/null +++ b/glue/src/simulate/property.rs @@ -0,0 +1,34 @@ +//! Property traits for simulation testing. + +use super::tracker::ProgressTracker; +use commonware_cryptography::PublicKey; +use std::{future::Future, pin::Pin}; + +/// A property checked after each finalization. +/// +/// Receives the inspectable states of all active (non-crashed) validators. +pub trait FinalizationProperty: Send + Sync { + /// Human-readable name for error reporting. + fn name(&self) -> &str; + + /// Check the property. Returns `Err` with a description if violated. + fn check<'a>( + &'a self, + states: &'a [&'a S], + ) -> Pin> + Send + 'a>>; +} + +/// A property checked once at simulation end with access to both the +/// progress tracker and all validator states (inside the runtime, +/// before it shuts down). +pub trait Property: Send + Sync { + /// Human-readable name for error reporting. + fn name(&self) -> &str; + + /// Check the property. Returns `Err` with a description if violated. + fn check<'a>( + &'a self, + tracker: &'a ProgressTracker

, + states: &'a [&'a S], + ) -> Pin> + Send + 'a>>; +} diff --git a/glue/src/simulate/reporter.rs b/glue/src/simulate/reporter.rs new file mode 100644 index 00000000000..289a49b01d3 --- /dev/null +++ b/glue/src/simulate/reporter.rs @@ -0,0 +1,56 @@ +//! Reporter wrapper that forwards finalization events to the +//! simulation harness. +//! +//! Custom [`EngineDefinition`](super::engine::EngineDefinition) +//! implementations should include a [`MonitorReporter`] in their +//! reporter chain so the harness can track finalization progress. + +use super::tracker::FinalizationUpdate; +use commonware_actor::Feedback; +use commonware_consensus::{marshal::Update, Block, Reporter}; +use commonware_cryptography::{Digest, Digestible, PublicKey}; +use commonware_utils::channel::mpsc; + +/// Wraps another [`Reporter`] and forwards marshal [`Update`] +/// finalization events to the simulation harness via a monitor channel. +/// +/// Place this in the marshal reporter chain so it intercepts +/// [`Update::Tip`] events before delegation. +#[derive(Clone)] +pub struct MonitorReporter { + inner: R, + monitor: mpsc::Sender>, + pk: P, +} + +impl MonitorReporter { + /// Create a new monitor reporter. + /// + /// - `pk`: the public key of the validator this reporter belongs to. + /// - `monitor`: channel for sending finalization updates to the harness. + /// - `inner`: the wrapped reporter to delegate to after interception. + pub const fn new(pk: P, monitor: mpsc::Sender>, inner: R) -> Self { + Self { inner, monitor, pk } + } +} + +impl Reporter for MonitorReporter +where + P: PublicKey, + B: Block + Digestible, + ::Digest: Digest, + R: Reporter>, +{ + type Activity = Update; + + fn report(&mut self, activity: Self::Activity) -> Feedback { + if let Update::Tip(round, _, ref digest) = activity { + let _ = self.monitor.try_send(FinalizationUpdate { + pk: self.pk.clone(), + view: round.view(), + block_digest: digest.as_ref().to_vec(), + }); + } + self.inner.report(activity) + } +} diff --git a/glue/src/simulate/team.rs b/glue/src/simulate/team.rs new file mode 100644 index 00000000000..f8d450841ef --- /dev/null +++ b/glue/src/simulate/team.rs @@ -0,0 +1,179 @@ +//! Validator set management for simulation testing. +//! +//! Manages starting, crashing, and restarting validators. + +use super::{ + engine::{ChannelPair, EngineDefinition, InitContext}, + tracker::FinalizationUpdate, +}; +use commonware_p2p::simulated::{Link, Oracle}; +use commonware_runtime::{deterministic, Handle, Supervisor as _}; +use commonware_utils::channel::mpsc; +use std::collections::{BTreeMap, HashSet}; +use tracing::info; + +/// Manages running validators and their engines. +/// +/// Handles starting, crashing, and restarting individual nodes. +pub struct Team { + /// Engine definition (cloned per validator init). + definition: D, + + /// All participant public keys in order. + participants: Vec, + + /// Running task handles, keyed by public key. + handles: BTreeMap>, + + /// Inspectable state per validator. + states: BTreeMap, + + /// Restart count per validator (monotonically increasing). + restart_counts: BTreeMap, +} + +impl Team { + /// Create a new team with the given participants. + pub const fn new(definition: D, participants: Vec) -> Self { + Self { + definition, + participants, + handles: BTreeMap::new(), + states: BTreeMap::new(), + restart_counts: BTreeMap::new(), + } + } + + /// Start a single validator. Registers channels, calls init, start. + /// + /// If the validator is already running, aborts its existing handle first. + pub async fn start_one( + &mut self, + ctx: &deterministic::Context, + oracle: &Oracle, + pk: D::PublicKey, + monitor: mpsc::Sender>, + ) { + // Abort existing handle if present + if let Some(handle) = self.handles.remove(&pk) { + handle.abort(); + } + + let restart_count = self.restart_counts.entry(pk.clone()).or_insert(0); + let index = self + .participants + .iter() + .position(|p| p == &pk) + .expect("participant not found"); + let validator_ctx = ctx + .child("validator") + .with_attribute("index", index) + .with_attribute("restart", *restart_count); + *restart_count += 1; + + // Register channels + let control = oracle.control(pk.clone()); + let channel_specs = self.definition.channels(); + let mut channels: Vec> = Vec::with_capacity(channel_specs.len()); + for (channel_id, quota) in &channel_specs { + let pair = control + .register(*channel_id, *quota) + .await + .expect("channel registration failed"); + channels.push(pair); + } + + // Init engine + let (engine, state) = self + .definition + .init(InitContext { + context: validator_ctx, + index, + public_key: &pk, + oracle, + channels, + participants: &self.participants, + monitor, + }) + .await; + + // Start engine + let handle = D::start(engine); + self.handles.insert(pk.clone(), handle); + self.states.insert(pk, state); + } + + /// Start all non-delayed validators and link all peers. + pub async fn start( + &mut self, + ctx: &deterministic::Context, + oracle: &Oracle, + link: Link, + monitor: mpsc::Sender>, + delayed: &HashSet, + ) { + // Link all participants + let participants = self.participants.clone(); + for v1 in &participants { + for v2 in &participants { + if v1 == v2 { + continue; + } + oracle + .add_link(v1.clone(), v2.clone(), link.clone()) + .await + .unwrap(); + } + } + + // Start non-delayed participants + for pk in participants { + if delayed.contains(&pk) { + info!(target: "simulator", ?pk, "delayed participant"); + continue; + } + self.start_one(ctx, oracle, pk, monitor.clone()).await; + } + } + + /// Crash a validator by aborting its task handle. + /// + /// Returns `true` if the validator was running and is now crashed. + pub fn crash(&mut self, pk: &D::PublicKey) -> bool { + self.handles.remove(pk).is_some_and(|handle| { + handle.abort(); + info!(target: "simulator", ?pk, "crashed validator"); + true + }) + } + + /// Restart a previously crashed validator. + pub async fn restart( + &mut self, + ctx: &deterministic::Context, + oracle: &Oracle, + pk: D::PublicKey, + monitor: mpsc::Sender>, + ) { + info!(target: "simulator", ?pk, "restarting validator"); + self.start_one(ctx, oracle, pk, monitor).await; + } + + /// Collect references to all active (non-crashed) validator states. + pub fn active_states(&self) -> Vec<&D::State> { + self.handles + .keys() + .filter_map(|pk| self.states.get(pk)) + .collect() + } + + /// Get the public keys of all currently active validators. + pub fn active_keys(&self) -> Vec { + self.handles.keys().cloned().collect() + } + + /// All participants (including crashed ones). + pub fn participants(&self) -> &[D::PublicKey] { + &self.participants + } +} diff --git a/glue/src/simulate/tracker.rs b/glue/src/simulate/tracker.rs new file mode 100644 index 00000000000..9a15cc09931 --- /dev/null +++ b/glue/src/simulate/tracker.rs @@ -0,0 +1,163 @@ +//! Finalization progress tracking and agreement checking. + +use commonware_consensus::types::View; +use commonware_cryptography::PublicKey; +use std::collections::{BTreeMap, HashSet}; + +/// A finalization event from a validator. +pub struct FinalizationUpdate { + /// Which validator reported this finalization. + pub pk: P, + /// The finalized view. + pub view: View, + /// The digest of the finalized block (encoded as bytes). + pub block_digest: Vec, +} + +/// Tracks finalization progress across all validators. +/// +/// Validates safety invariants (agreement / no forks) and tracks +/// liveness (progress toward a finalization target). +pub struct ProgressTracker { + /// Latest finalized view per validator. + status: BTreeMap, + + /// Block digests seen at each view (for fork detection). + digests_by_view: BTreeMap>>, +} + +impl Default for ProgressTracker

{ + fn default() -> Self { + Self { + status: BTreeMap::new(), + digests_by_view: BTreeMap::new(), + } + } +} + +impl ProgressTracker

{ + /// Record a finalization update from a validator. + /// + /// Returns an error if a different block digest was already seen at + /// the same view (fork detected). + /// + /// Strictly lower views are silently ignored: after a crash/restart, + /// the consensus engine may replay finalizations a validator has + /// already advanced past. Same-view replays are still checked for + /// agreement so conflicting digests remain detectable. + pub fn observe(&mut self, update: FinalizationUpdate

) -> Result<(), String> { + let FinalizationUpdate { + pk, + view, + block_digest, + } = update; + + // Skip strictly stale replays after crash/restart. Same-view repeats + // still go through agreement tracking so conflicting digests remain + // detectable. + if let Some(prev) = self.status.get(&pk) { + if *prev > view { + return Ok(()); + } + } + + // Check agreement (fork detection) + let digests = self.digests_by_view.entry(view).or_default(); + digests.insert(block_digest); + if digests.len() > 1 { + return Err(format!("fork detected at view {:?}", view)); + } + + self.status.insert(pk, view); + Ok(()) + } + + /// Check if at least `total` validators have finalized past the required view. + pub fn all_reached(&self, total: usize, required: u64) -> bool { + let required_view = View::new(required); + self.status + .values() + .filter(|v| **v >= required_view) + .count() + >= total + } + + /// Minimum finalized view across all tracked validators. + pub fn min_view(&self) -> u64 { + self.status.values().map(|v| v.get()).min().unwrap_or(0) + } + + /// Number of validators currently being tracked. + pub fn tracked_count(&self) -> usize { + self.status.len() + } + + /// Number of unique finalized block digests observed at `view`. + pub fn unique_digests_at(&self, view: u64) -> usize { + self.digests_by_view + .get(&View::new(view)) + .map_or(0, HashSet::len) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::{ed25519, Signer as _}; + + #[test] + fn conflicting_same_view_from_same_validator_is_rejected() { + let pk = ed25519::PrivateKey::from_seed(7).public_key(); + let mut tracker = ProgressTracker::default(); + + tracker + .observe(FinalizationUpdate { + pk: pk.clone(), + view: View::new(3), + block_digest: vec![1, 2, 3], + }) + .expect("first update should be accepted"); + + let err = tracker + .observe(FinalizationUpdate { + pk, + view: View::new(3), + block_digest: vec![9, 9, 9], + }) + .expect_err("conflicting digest at same view should be rejected"); + assert!(err.contains("fork detected"), "unexpected error: {err}"); + } + + #[test] + fn stale_replay_does_not_poison_agreement_tracking() { + let pk1 = ed25519::PrivateKey::from_seed(1).public_key(); + let pk2 = ed25519::PrivateKey::from_seed(2).public_key(); + let mut tracker = ProgressTracker::default(); + + tracker + .observe(FinalizationUpdate { + pk: pk1.clone(), + view: View::new(5), + block_digest: vec![5, 5, 5], + }) + .expect("high-watermark update should be accepted"); + + // A stale replay from pk1 should be ignored and must not influence + // fork detection for that old view. + tracker + .observe(FinalizationUpdate { + pk: pk1, + view: View::new(3), + block_digest: vec![1, 1, 1], + }) + .expect("stale replay should be ignored"); + + tracker + .observe(FinalizationUpdate { + pk: pk2, + view: View::new(3), + block_digest: vec![2, 2, 2], + }) + .expect("stale replay from another validator should not trigger a fork"); + } +} diff --git a/glue/src/stateful/actor/bootstrap.rs b/glue/src/stateful/actor/bootstrap.rs new file mode 100644 index 00000000000..2d55e6034fc --- /dev/null +++ b/glue/src/stateful/actor/bootstrap.rs @@ -0,0 +1,454 @@ +//! Startup bootstrap for the [`Stateful`](crate::stateful::Stateful) actor. +//! +//! The [`bootstrap`] function runs on every startup before the actor can +//! process blocks. It initializes the databases, optionally runs state sync +//! (at most once), and transitions the actor into processing mode. +//! +//! A durable `sync_done` flag in a +//! [`Metadata`] store tracks whether +//! state sync has already completed. The combination of this flag and the +//! [`Mode`] in [`BootstrapConfig`] determines which path is taken: +//! +//! ## Already synced (`sync_done = true`, [`Mode::MarshalSync`]) +//! +//! A previous run already completed state sync. The databases are opened from +//! their existing on-disk state and reconciled with marshal's processed block: +//! +//! - Bootstrap loads sync targets for marshal's processed block and compares +//! them with the databases' committed targets. +//! - If they differ, bootstrap rewinds every database in the set to those +//! processed-block targets. +//! - This reconciliation assumes databases were not manually rolled back or +//! replaced out-of-band. +//! - Any rewind failure is fatal and causes a panic. +//! - Bootstrap then transitions to processing mode via +//! [`ApplicationMailbox::sync_complete`] at marshal's processed anchor. +//! +//! The marshal only advances its processed height after it has durably stored +//! the floor block, so reconciliation can read the processed block directly. +//! +//! ## Fresh start (`sync_done = false`, [`Mode::MarshalSync`]) +//! +//! No sync target was provided. Databases are initialized, the genesis block +//! digest is used as the last processed digest, `sync_done` is persisted, and +//! the actor transitions. +//! +//! ## State sync (`sync_done = false`, [`Mode::StateSync`]) +//! +//! A sync target block and a channel of anchored target updates are provided. +//! State sync runs at most once; subsequent boots take the "already synced" +//! path above. The procedure is: +//! +//! 1. Extract the initial anchor and sync targets from the +//! seed block. +//! 2. Run [`StateSyncSet::sync`], +//! which initializes and populates all databases via the provided +//! resolvers. Tip updates stream in via the `target_updates` channel as +//! new blocks finalize during the sync, so the final synced height is +//! determined by the sync routine itself, not pre-determined. +//! 3. Persist `sync_done = true` so subsequent boots skip state sync. +//! 4. Raise the marshal floor to the synced height via +//! [`MarshalMailbox::set_floor`], then assert that the marshal's processed +//! height is at that floor. +//! 5. Call [`ApplicationMailbox::sync_complete`] with the constructed databases +//! and the synced digest, transitioning the actor into block-processing +//! mode. +//! +//! ## Crash during state sync +//! +//! If the node crashes while state sync is in progress (before `sync_done` is +//! persisted), the database partitions may contain partial sync data that is +//! incompatible with a fresh [`ManagedDb::init`](crate::stateful::db::ManagedDb::init). +//! The operator must delete the database storage directory before restarting. +//! A future version may automate this cleanup. + +use crate::stateful::{ + db::{Anchor, DatabaseSet, StateSyncSet, SyncEngineConfig}, + Application, Mailbox as ApplicationMailbox, +}; +use commonware_consensus::{ + marshal::{ + core::{CommitmentFallback, Mailbox as MarshalMailbox, Variant as MarshalVariant}, + Identifier, + }, + simplex::types::Finalization, + types::{Height, Round}, + CertifiableBlock, Epochable, Heightable, Viewable, +}; +use commonware_cryptography::{certificate::Scheme, Digest, Digestible}; +use commonware_runtime::{ + telemetry::metrics::{MetricsExt, Registered}, + Clock, Metrics, Spawner, Storage, +}; +use commonware_storage::metadata::{Config as MetadataConfig, Metadata}; +use commonware_utils::{channel::ring, sequence::U64}; +use prometheus_client::metrics::gauge::Gauge; +use rand::Rng; + +/// Durable metadata key for "state sync completed". +const SYNC_DONE_KEY: U64 = U64::new(0); + +type SyncTargets = <>::Databases as DatabaseSet>::SyncTargets; +type BlockDigest = <>::Block as Digestible>::Digest; +type AnchoredUpdate = (Anchor>, SyncTargets); + +/// Bootstrap outcome before durable metadata is finalized. +enum BootstrapState +where + G: Digest, +{ + /// Databases are ready with no marshal floor update. + Ready { + databases: D, + last_processed: Anchor, + }, + /// Databases were state-synced and require marshal floor update. + Synced { + databases: D, + last_processed: Anchor, + floor: F, + }, +} + +/// Startup inputs for bootstrap. +pub(super) enum Mode +where + E: Rng + Spawner + Metrics + Clock + Storage, + A: Application, +{ + /// Initialize databases without running startup state sync, transitioning directly + /// to marshal sync + MarshalSync, + + /// Run startup state sync from initial targets and follow target updates. + StateSync { + /// The block whose embedded targets seed the initial sync pass. + block: A::Block, + /// The finalization for `block`. + finalization: F, + /// Channel of anchored target updates as new blocks finalize during sync. + target_updates: ring::Receiver>, + }, +} + +/// Configuration for startup bootstrap. +pub(super) struct BootstrapConfig +where + E: Rng + Spawner + Metrics + Clock + Storage, + A: Application, + A::Databases: StateSyncSet>, +{ + /// Runtime context used for metadata and database initialization. + pub(super) context: E, + + /// Database configuration for the managed set. + pub(super) db_config: >::Config, + + /// Metadata partition that stores the durable "state sync done" bit. + pub(super) metadata_partition: String, + + /// Per-database sync engine parameters. + pub(super) sync_config: SyncEngineConfig, + + /// Per-database resolvers used to fetch state from peers. + pub(super) resolvers: R, + + /// Startup mode and required inputs for that mode. + pub(super) mode: Mode, +} + +/// Initialize databases and transition the actor into processing mode. +/// +/// See the [module documentation](self) for the full procedure. +/// +/// # Panics +/// +/// Every failure in this function is intentionally a panic. A node that +/// cannot complete bootstrap has no valid state to operate on; continuing +/// with partial or corrupt databases risks consensus violations or silently +/// diverging from the network. Crashing is the safest response. +/// +/// - Metadata store unreachable. The durable `sync_done` flag lives in +/// a [`Metadata`] store. If it +/// cannot be opened or written, the node has no way to track whether sync +/// already ran, so it cannot start safely. +/// - `sync_done = true` with [`Mode::StateSync`]. This is a +/// configuration contradiction: the caller is requesting state sync for a +/// node that already completed it. This indicates a bug in the caller. +/// - State sync fails. The sync engine validates every batch of +/// operations against MMR proofs rooted at the target. Errors that reach +/// this point are not retryable: root mismatches after full sync +/// (operations do not reconstruct the expected root), journal or storage +/// I/O failures (disk full, corruption), invalid target updates (target +/// moved backward or stalled), and resolver errors that the engine could +/// not recover from internally. The sync engine already retries individual +/// fetch failures; errors that propagate here are terminal. +/// - Rewind to marshal-processed targets fails. Bootstrap recovery rewinds all +/// databases to marshal's processed block targets. Rewind errors indicate +/// unrecoverable local history loss/corruption (for example pruned rewind +/// boundaries or invalid commit targets), so startup must stop. +/// - Marshal unreachable after `set_floor`. After state sync the marshal +/// floor must be raised so that the node does not attempt to re-process +/// blocks below the synced height. If the marshal does not respond, or +/// reports a processed height that does not equal the floor, the node +/// cannot safely determine where to resume. +pub(super) async fn bootstrap( + marshal: MarshalMailbox, + application: ApplicationMailbox, + config: BootstrapConfig>, +) where + E: Rng + Spawner + Metrics + Clock + Storage, + A: Application, + A::Databases: StateSyncSet>, + S: Scheme, + V: MarshalVariant, +{ + let BootstrapConfig { + context, + db_config, + metadata_partition, + sync_config, + resolvers, + mode, + } = config; + + let state_sync_done: Registered = + context.gauge("state_sync_done", "Whether state sync has completed"); + state_sync_done.set(0); + + let mut metadata = Metadata::::init( + context.child("state_sync_metadata"), + MetadataConfig { + partition: metadata_partition, + codec_config: (), + }, + ) + .await + .expect("failed to initialize state sync metadata store"); + + if metadata.get(&SYNC_DONE_KEY).copied().unwrap_or(false) { + state_sync_done.set(1); + assert!( + matches!(mode, Mode::MarshalSync), + "state sync bootstrap received a sync startup target after state sync was already marked complete", + ); + + let genesis = application.genesis().await; + let databases = A::Databases::init(context.child("db_set"), db_config).await; + + // After a crash following state sync, the block at the floor height + // may not yet be in the marshal's archive: `set_floor` advanced + // `processed_height`, but the local marshal had not finalized that + // block through its own consensus flow before the crash. If the + // block is missing, hint the marshal to fetch it from the network, + // then poll until it arrives. + let (processed_anchor, processed_targets) = + processed_anchor_targets::(&marshal, &genesis) + .await + .expect("marshal processed block must be available"); + + let db_targets = databases.committed_targets().await; + if db_targets != processed_targets { + databases.rewind_to_targets(processed_targets.clone()).await; + let rewound_targets = databases.committed_targets().await; + assert!( + rewound_targets == processed_targets, + "database targets must match marshal processed targets after rewind", + ); + } + + application.sync_complete(databases, processed_anchor); + return; + } + + let state = match mode { + Mode::MarshalSync => { + let databases = A::Databases::init(context.child("db_set"), db_config).await; + let genesis = application.genesis().await; + let genesis_context = genesis.context(); + let last_processed = Anchor { + height: Height::zero(), + round: Round::new(genesis_context.epoch(), genesis_context.view()), + digest: genesis.digest(), + }; + BootstrapState::Ready { + databases, + last_processed, + } + } + Mode::StateSync { + block, + finalization, + target_updates, + } => { + let block_context = block.context(); + let initial_anchor = Anchor { + height: block.height(), + round: Round::new(block_context.epoch(), block_context.view()), + digest: block.digest(), + }; + let initial_targets = A::sync_targets(&block); + let (databases, last_processed) = A::Databases::sync( + context.child("state_sync"), + db_config, + resolvers, + initial_anchor, + initial_targets, + target_updates, + sync_config, + ) + .await + .unwrap_or_else(|err| panic!("state sync failed: {err:?}")); + let floor = if last_processed.height == initial_anchor.height { + finalization + } else { + marshal + .get_finalization(last_processed.height) + .await + .expect("marshal must respond with finalization after state sync") + }; + BootstrapState::Synced { + databases, + last_processed, + floor, + } + } + }; + + let (databases, last_processed) = match state { + BootstrapState::Ready { + databases, + last_processed, + } => { + metadata + .put_sync(SYNC_DONE_KEY, true) + .await + .expect("must persist state sync completion metadata"); + state_sync_done.set(1); + (databases, last_processed) + } + BootstrapState::Synced { + databases, + last_processed, + floor: finalization, + } => { + let floor = last_processed.height; + metadata + .put_sync(SYNC_DONE_KEY, true) + .await + .expect("must persist state sync completion metadata"); + state_sync_done.set(1); + // Marshal fetches and stores the floor block before advancing its + // processed height, which also clears pending acknowledgements + // below that floor. + let floor_commitment = finalization.proposal.payload; + marshal.set_floor(finalization); + wait_for_floor(&marshal, floor, floor_commitment).await; + (databases, last_processed) + } + }; + + application.sync_complete(databases, last_processed); +} + +/// Load marshal's current processed anchor and derived sync targets. +/// +/// Returns `None` when the marshal's processed height is non-zero but the +/// block is missing from the archive. With current floor semantics this should +/// only happen if marshal storage is inconsistent: [`MarshalMailbox::set_floor`] +/// does not advance the processed height until the floor block is stored. +/// +/// # Panics +/// +/// - Marshal does not return its processed height. +async fn processed_anchor_targets( + marshal: &MarshalMailbox, + genesis: &A::Block, +) -> Option<(Anchor>, SyncTargets)> +where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, +{ + let processed_height = marshal + .get_processed_height() + .await + .expect("state sync bootstrap must fetch marshal processed height"); + if processed_height.is_zero() { + let genesis_context = genesis.context(); + return Some(( + Anchor { + height: Height::zero(), + round: Round::new(genesis_context.epoch(), genesis_context.view()), + digest: genesis.digest(), + }, + A::sync_targets(genesis), + )); + } + + let block = marshal + .get_block(Identifier::Height(processed_height)) + .await + .map(V::into_inner)?; + + Some(( + Anchor { + height: processed_height, + round: Round::new(block.context().epoch(), block.context().view()), + digest: block.digest(), + }, + A::sync_targets(&block), + )) +} + +async fn wait_for_floor( + marshal: &MarshalMailbox, + floor: Height, + commitment: V::Commitment, +) where + S: Scheme, + V: MarshalVariant, +{ + let block = marshal + .subscribe_by_commitment(commitment, CommitmentFallback::Wait) + .await + .expect("marshal floor block subscription cancelled"); + assert_eq!( + block.height(), + floor, + "marshal returned unexpected state sync floor block height", + ); + + let processed_height = marshal + .get_processed_height() + .await + .expect("marshal must respond with processed height after set_floor"); + assert_eq!( + processed_height, floor, + "marshal processed height must match updated floor after state sync", + ); +} + +#[cfg(test)] +mod tests { + #[test] + fn synced_bootstrap_persists_sync_done_before_advancing_floor() { + let source = include_str!("bootstrap.rs"); + let synced_arm = source + .split("BootstrapState::Synced") + .nth(2) + .expect("synced arm should exist"); + let set_floor = synced_arm + .find("marshal.set_floor") + .expect("synced bootstrap should advance marshal floor"); + let put_sync = synced_arm + .find("put_sync(SYNC_DONE_KEY, true)") + .expect("synced bootstrap should persist sync_done"); + + assert!( + put_sync < set_floor, + "sync_done must be durable before the marshal floor advances", + ); + } +} diff --git a/glue/src/stateful/actor/core.rs b/glue/src/stateful/actor/core.rs new file mode 100644 index 00000000000..2f6af0f7e41 --- /dev/null +++ b/glue/src/stateful/actor/core.rs @@ -0,0 +1,696 @@ +//! Consensus-facing stateful application that manages pending state on behalf of +//! an inner application. + +use crate::stateful::{ + actor::{ + bootstrap::{bootstrap, BootstrapConfig, Mode as BootstrapMode}, + mailbox::{ErasedAncestorStream, Message}, + metrics::Metrics as ProcessorMetrics, + processor::{FinalizeStatus, Processor}, + Mailbox, + }, + db::{Anchor, AttachableResolverSet, DatabaseSet, StateSyncSet, SyncEngineConfig}, + Application, +}; +use commonware_actor::mailbox as actor_mailbox; +use commonware_consensus::{ + marshal::{ + ancestry::BlockProvider, + core::{DigestFallback, Mailbox as MarshalMailbox, Variant as MarshalVariant}, + }, + simplex::types::Finalization, + types::{Height, Round}, + CertifiableBlock, Epochable, Heightable, Viewable, +}; +use commonware_cryptography::{certificate::Scheme, Digestible}; +use commonware_macros::select_loop; +use commonware_runtime::{spawn_cell, Clock, ContextCell, Handle, Metrics, Spawner, Storage}; +use commonware_utils::{ + acknowledgement::Exact, + channel::{fallible::OneshotExt, oneshot, ring}, + Acknowledgement, +}; +use futures::SinkExt; +use rand::Rng; +use std::num::NonZeroUsize; +use tracing::{debug, info}; + +type BlockDigest = <>::Block as Digestible>::Digest; +type AnchoredUpdate = ( + Anchor>, + <>::Databases as DatabaseSet>::SyncTargets, +); + +/// Buffered verify request while startup sync is in progress. +struct HeldVerify +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + context: (E, A::Context), + ancestry: ErasedAncestorStream, + response: oneshot::Sender, +} + +/// Buffered finalization while startup sync is in progress. +struct HeldFinalization { + block: B, + acknowledgement: Exact, +} + +/// Tracks the attached database set and pending subscribers. +struct DatabaseAttachment { + databases: Option, + subscribers: Vec>, +} + +impl DatabaseAttachment { + const fn new() -> Self { + Self { + databases: None, + subscribers: Vec::new(), + } + } + + fn prune_closed_subscribers(&mut self) { + self.subscribers + .retain(|subscriber| !subscriber.is_closed()); + } + + fn subscribe(&mut self, response: oneshot::Sender) { + let Some(databases) = self.databases.clone() else { + self.subscribers.push(response); + return; + }; + response.send_lossy(databases); + } + + fn attach(&mut self, databases: D) { + self.databases = Some(databases.clone()); + for subscriber in self.subscribers.drain(..) { + subscriber.send_lossy(databases.clone()); + } + } +} + +const STATE_SYNC_METADATA_SUFFIX: &str = "_state_sync_metadata"; + +/// Startup mode for the [`Stateful`] application. +pub enum StartupMode { + /// Initialize databases and let marshal backfill. + MarshalSync, + /// State sync the databases, starting at the given block's embedded targets. + /// + /// It is up to the user to determine whether or not this block is a valid member + /// of the canonical chain. The finalization is used to advance marshal's + /// floor after state sync completes. + StateSync { block: B, finalization: F }, +} + +/// Configuration for constructing a [`Stateful`] application. +pub struct Config +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// The inner application that drives state transitions. + pub app: A, + + /// Configuration used to construct the database set. + pub db_config: >::Config, + + /// Source of input (e.g. transactions) passed to the application on propose. + pub input_provider: A::InputProvider, + + /// Marshal mailbox used for startup anchoring and lazy recovery. + pub marshal: P, + + /// Capacity of the stateful actor mailbox channel. + pub mailbox_size: usize, + + /// Partition prefix used to derive the durable state-sync metadata partition. + pub partition_prefix: String, + + /// Explicit startup mode. + pub startup: StartupMode, + + /// Resolver(s) for startup sync fetches and post-bootstrap serving. + pub resolvers: R, + + /// Sync engine tuning knobs. + pub sync_config: SyncEngineConfig, +} + +/// Actor state while state sync is in progress. +struct SyncingState +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// The inner application that drives state transitions. + app: A, + + /// Anchored target updates forwarded to the bootstrap sync task. + tip_sender: ring::Sender>, + + /// Resolver set attached once sync completes. + sync_resolvers: R, + + /// Verify requests held while syncing. + /// + /// The simplex voter keeps at most one in-flight verify request, so this + /// list is bounded by protocol behavior. + held_verify_requests: Vec>, + + /// Finalizations held while syncing. + /// + /// Marshal bounds in-flight application updates by `max_pending_acks`, + /// so this list is also bounded by protocol behavior. + held_finalizations: Vec>, +} + +impl SyncingState +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + const fn new( + app: A, + tip_sender: ring::Sender>, + sync_resolvers: R, + ) -> Self { + Self { + app, + tip_sender, + sync_resolvers, + held_verify_requests: Vec::new(), + held_finalizations: Vec::new(), + } + } +} + +/// Runtime actor mode. +enum Mode +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// Actor is syncing startup state. + Syncing(SyncingState), + + /// Actor is processing consensus-driven execution with marshal backfill sync. + Processing(Processor), +} + +/// Application dependencies shared across both modes. +struct Shared +where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, +{ + /// Runtime context providing RNG, task spawning, metrics, and clock. + context: ContextCell, + + /// Source of input (e.g. transactions) passed to the application on propose. + input_provider: A::InputProvider, + + /// Marshal mailbox used for lazy block lookup. + marshal: MarshalMailbox, + + /// Attached database set and pending subscribers. + database_attachment: DatabaseAttachment, +} + +/// Stateful application that manages the pending-tip DAG of merkleized +/// batches on behalf of an [`Application`], implementing the consensus +/// application and verifying traits. +pub struct Stateful +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// Sender half of the actor mailbox channel. + sender: actor_mailbox::Sender>, + + /// Runtime context providing RNG, task spawning, metrics, and clock. + context: ContextCell, + + /// The receiver for messages. + mailbox: actor_mailbox::Receiver>, + + /// The inner application that drives state transitions. + inner: A, + + /// Source of input (e.g. transactions) passed to the application on propose. + input_provider: A::InputProvider, + + /// Marshal mailbox used for startup anchoring and lazy recovery. + marshal: P, + + /// Configuration used to initialize the database set at startup. + db_config: >::Config, + + /// Partition prefix used to derive the durable state-sync metadata partition. + partition_prefix: String, + + /// Explicit startup mode. + startup: StartupMode, + + /// Resolver(s) for startup sync fetches and post-bootstrap serving. + resolvers: R, + + /// Sync engine tuning knobs. + sync_config: SyncEngineConfig, +} + +impl Stateful +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// Construct a [`Stateful`] actor and its [`Mailbox`]. + /// + /// This only wires dependencies and allocates the mailbox. The actor does + /// not process messages until [`Stateful::start`] is called. + pub fn init(context: E, config: Config) -> (Self, Mailbox) { + let mailbox_size = + NonZeroUsize::new(config.mailbox_size).expect("mailbox_size must be non-zero"); + let (sender, mailbox) = actor_mailbox::new(context.child("mailbox"), mailbox_size); + ( + Self { + sender: sender.clone(), + context: ContextCell::new(context), + mailbox, + inner: config.app, + input_provider: config.input_provider, + marshal: config.marshal, + db_config: config.db_config, + partition_prefix: config.partition_prefix, + startup: config.startup, + resolvers: config.resolvers, + sync_config: config.sync_config, + }, + Mailbox::new(sender), + ) + } + + /// Start the actor and run startup bootstrap in the background. + /// + /// This is the single startup entrypoint for both modes: + /// - [`StartupMode::MarshalSync`]: initialize databases and backfill from marshal. + /// - [`StartupMode::StateSync`]: run one-time startup state sync. + pub fn start(self) -> Handle<()> + where + E: Rng + Spawner + Metrics + Clock + Storage, + A: Application, + A::Databases: StateSyncSet>, + S: Scheme, + V: MarshalVariant, + P: BlockProvider + Clone + Into>, + F: Into>, + MarshalMailbox: BlockProvider, + R: AttachableResolverSet, + { + let (tip_sender, target_updates) = ring::channel(self.sync_config.update_channel_size); + let bootstrap_mode = match self.startup { + StartupMode::MarshalSync => BootstrapMode::MarshalSync, + StartupMode::StateSync { + block, + finalization, + } => BootstrapMode::StateSync { + block, + finalization: finalization.into(), + target_updates, + }, + }; + let bootstrap_resolvers = self.resolvers.clone(); + let bootstrap_context = self.context.as_present().child("state_sync"); + let bootstrap_task_context = self.context.as_present().child("state_sync_bootstrap"); + let marshal: MarshalMailbox = self.marshal.clone().into(); + let mut service = Service { + mailbox: self.mailbox, + shared: Shared { + context: self.context, + input_provider: self.input_provider, + marshal: marshal.clone(), + database_attachment: DatabaseAttachment::new(), + }, + mode: Mode::Syncing(SyncingState::new(self.inner, tip_sender, self.resolvers)), + }; + let bootstrap_config = BootstrapConfig { + context: bootstrap_context, + db_config: self.db_config, + metadata_partition: format!("{}{STATE_SYNC_METADATA_SUFFIX}", self.partition_prefix), + sync_config: self.sync_config, + resolvers: bootstrap_resolvers, + mode: bootstrap_mode, + }; + let mailbox = Mailbox::new(self.sender); + bootstrap_task_context.spawn(move |_| bootstrap(marshal, mailbox, bootstrap_config)); + spawn_cell!(service.shared.context, service.run()) + } +} + +/// Stateful application service. +struct Service +where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, +{ + mailbox: actor_mailbox::Receiver>, + shared: Shared, + mode: Mode, +} + +impl Service +where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, +{ + async fn run(mut self) + where + R: AttachableResolverSet, + { + select_loop! { + self.shared.context, + on_start => { + self.shared.database_attachment.prune_closed_subscribers(); + }, + on_stopped => { + debug!("context shutdown, stopping stateful application"); + }, + Some(message) = self.mailbox.recv() else { + debug!("mailbox closed, shutting down"); + break; + } => { + match (&mut self.mode, message) { + // Shared + (_, Message::Genesis { response }) => { + let genesis = match &mut self.mode { + Mode::Syncing(syncing) => syncing.app.genesis().await, + Mode::Processing(processor) => processor.genesis().await, + }; + response.send_lossy(genesis); + } + (_, Message::SubscribeDatabases { response }) => { + self.shared.database_attachment.subscribe(response); + } + + // Syncing Mode + (Mode::Syncing(_), Message::Propose { response, .. }) => { + debug!("proposal rejected: state sync in progress"); + response.send_lossy(None); + } + ( + Mode::Syncing(syncing), + Message::Verify { + context, + ancestry, + response, + }, + ) => { + syncing + .held_verify_requests + .retain(|request| !request.response.is_closed()); + syncing.held_verify_requests.push(HeldVerify { + context, + ancestry, + response, + }); + debug!( + held_verify_requests = syncing.held_verify_requests.len(), + "verify held: state sync in progress" + ); + } + ( + Mode::Syncing(syncing), + Message::Finalized { + block, + acknowledgement, + }, + ) => { + debug!( + height = block.height().get(), + "finalization held during sync" + ); + syncing.held_finalizations.push(HeldFinalization { + block, + acknowledgement, + }); + } + (Mode::Syncing(syncing), Message::Tip { height, digest }) => { + handle_tip(&mut self.shared, syncing, height, digest).await; + } + ( + Mode::Syncing(syncing), + Message::SyncComplete { + databases, + last_processed, + }, + ) => { + let attached_databases = databases.clone(); + let processor = handle_sync_complete( + self.shared.context.as_present(), + self.shared.marshal.clone(), + syncing, + databases, + last_processed, + ) + .await; + self.shared.database_attachment.attach(attached_databases); + self.mode = Mode::Processing(processor); + } + + // Processing mode + ( + Mode::Processing(processor), + Message::Propose { + context, + ancestry, + response, + }, + ) => { + processor + .propose( + self.shared.context.as_present(), + self.shared.marshal.clone(), + context, + ancestry, + &mut self.shared.input_provider, + response, + ) + .await; + } + ( + Mode::Processing(processor), + Message::Verify { + context, + ancestry, + response, + }, + ) => { + processor + .verify( + self.shared.context.as_present(), + self.shared.marshal.clone(), + context, + ancestry, + response, + ) + .await; + } + ( + Mode::Processing(processor), + Message::Finalized { + block, + acknowledgement, + }, + ) => { + if let FinalizeStatus::Persisted { height } = processor + .finalize(self.shared.context.as_present(), block) + .await + { + debug!(height = height.get(), "persisted finalized database batch"); + } + acknowledgement.acknowledge(); + } + (Mode::Processing(_), Message::Tip { .. }) => {} + (Mode::Processing(_), Message::SyncComplete { .. }) => {} + } + }, + } + } +} + +/// Handles a [`Message::Tip`]. +/// +/// In [`Mode::Syncing`], fetches the block from marshal, extracts +/// per-database sync targets via [`Application::sync_targets`], and +/// forwards them to the background sync engines. +async fn handle_tip( + shared: &mut Shared, + syncing: &mut SyncingState, + height: Height, + digest: ::Digest, +) where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, +{ + let Some(block) = shared + .marshal + .subscribe_by_digest(digest, DigestFallback::Wait) + .await + .ok() + .map(V::into_inner) + else { + debug!( + height = height.get(), + "tip block not available from provider, skipping target update" + ); + return; + }; + + let block_context = block.context(); + let anchored_update = ( + Anchor { + height, + round: Round::new(block_context.epoch(), block_context.view()), + digest, + }, + A::sync_targets(&block), + ); + if syncing.tip_sender.send(anchored_update).await.is_err() { + debug!( + height = height.get(), + "tip update channel unavailable, skipping target update" + ); + } +} + +/// Handles a [`Message::SyncComplete`]. +/// +/// Attaches resolvers to the databases and returns a [`Processor`] ready for +/// consensus execution. +async fn handle_sync_complete( + context: &E, + marshal: MarshalMailbox, + syncing: &mut SyncingState, + databases: A::Databases, + last_processed: Anchor<::Digest>, +) -> Processor +where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, + R: AttachableResolverSet, +{ + let app = syncing.app.clone(); + syncing + .sync_resolvers + .attach_databases(databases.clone()) + .await; + let metrics = ProcessorMetrics::new(context.child("processor_metrics")); + let mut processor = Processor::new(app, databases, last_processed, metrics); + + // In case any verification requests were delivered after the floor was updated, + // process them now to ensure we progress consensus. + for HeldVerify { + context: request_context, + ancestry, + response, + } in syncing.held_verify_requests.drain(..) + { + processor + .verify( + context, + marshal.clone(), + request_context, + ancestry, + response, + ) + .await; + } + + // In case any finalizations were delivered after the floor was updated, + // process them now to ensure we progress marshal. + for HeldFinalization { + block, + acknowledgement, + } in syncing.held_finalizations.drain(..) + { + if block.height() <= last_processed.height { + // Block is already persisted at or below the reconciled floor. + // The acknowledgement can be dropped, since marshal cancels + // pending acks when the floor is updated. + continue; + } + processor.finalize(context, block).await; + acknowledgement.acknowledge(); + } + + info!("sync complete, database attached to processor"); + processor +} + +#[cfg(test)] +mod tests { + use super::DatabaseAttachment; + use commonware_utils::channel::oneshot; + + #[test] + fn database_attachment_notifies_pending_subscribers() { + let mut attachment = DatabaseAttachment::new(); + let (tx1, rx1) = oneshot::channel(); + let (tx2, rx2) = oneshot::channel(); + + attachment.subscribe(tx1); + attachment.subscribe(tx2); + attachment.attach(7u64); + + assert_eq!(rx1.blocking_recv(), Ok(7)); + assert_eq!(rx2.blocking_recv(), Ok(7)); + } + + #[test] + fn database_attachment_replays_to_late_subscribers() { + let mut attachment = DatabaseAttachment::new(); + attachment.attach(11u64); + + let (tx, rx) = oneshot::channel(); + attachment.subscribe(tx); + + assert_eq!(rx.blocking_recv(), Ok(11)); + } + + #[test] + fn database_attachment_prunes_closed_subscribers() { + let mut attachment = DatabaseAttachment::new(); + let (closed_tx, closed_rx) = oneshot::channel::(); + let (open_tx, open_rx) = oneshot::channel(); + + drop(closed_rx); + attachment.subscribe(closed_tx); + attachment.subscribe(open_tx); + + assert_eq!(attachment.subscribers.len(), 2); + + attachment.prune_closed_subscribers(); + + assert_eq!(attachment.subscribers.len(), 1); + + attachment.attach(13u64); + assert_eq!(open_rx.blocking_recv(), Ok(13)); + } +} diff --git a/glue/src/stateful/actor/mailbox.rs b/glue/src/stateful/actor/mailbox.rs new file mode 100644 index 00000000000..86841348f5f --- /dev/null +++ b/glue/src/stateful/actor/mailbox.rs @@ -0,0 +1,290 @@ +//! Mailbox for the [`super::Stateful`] actor. + +use crate::stateful::{db::Anchor, Application}; +use commonware_actor::{ + mailbox::{Overflow, Policy, Sender}, + Feedback, +}; +use commonware_consensus::{ + marshal::Update, types::Height, Application as ConsensusApplication, Reporter, +}; +use commonware_cryptography::Digestible; +use commonware_runtime::{Clock, Metrics, Spawner}; +use commonware_utils::{acknowledgement::Exact, channel::oneshot}; +use futures::Stream; +use rand::Rng; +use std::{collections::VecDeque, pin::Pin}; + +/// Type alias for an ancestor stream sent through the actor mailbox. +pub(crate) type ErasedAncestorStream = Pin + Send>>; + +/// Messages processed by the actor loop. +pub(crate) enum Message +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// A request for the genesis block. + Genesis { response: oneshot::Sender }, + + /// A request to propose a block. + Propose { + context: (E, A::Context), + ancestry: ErasedAncestorStream, + response: oneshot::Sender>, + }, + + /// A request to verify a block. + Verify { + context: (E, A::Context), + ancestry: ErasedAncestorStream, + response: oneshot::Sender, + }, + + /// A reporting of a new finalized block. + Finalized { + block: A::Block, + acknowledgement: Exact, + }, + + /// A new finalized tip observed by marshal. + /// + /// During state sync, the actor uses this to fetch the block and + /// extract updated sync targets. In processing mode, this is a no-op. + Tip { + height: Height, + digest: ::Digest, + }, + + /// Signals that state sync is complete and the actor should transition + /// to `Mode::Processing`. + SyncComplete { + databases: A::Databases, + last_processed: Anchor<::Digest>, + }, + + /// Requests the attached database set. + /// + /// The actor replies once the database set is attached, or immediately if + /// it is already available. + SubscribeDatabases { + response: oneshot::Sender, + }, +} + +impl Message +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + fn response_closed(&self) -> bool { + match self { + Self::Genesis { response } => response.is_closed(), + Self::Propose { response, .. } => response.is_closed(), + Self::Verify { response, .. } => response.is_closed(), + Self::SubscribeDatabases { response } => response.is_closed(), + Self::Finalized { .. } | Self::Tip { .. } | Self::SyncComplete { .. } => false, + } + } +} + +pub(crate) struct Pending(VecDeque>) +where + E: Rng + Spawner + Metrics + Clock, + A: Application; + +impl Default for Pending +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + fn default() -> Self { + Self(VecDeque::new()) + } +} + +impl Overflow> for Pending +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + fn drain(&mut self, mut push: F) + where + F: FnMut(Message) -> Option>, + { + while let Some(message) = self.0.pop_front() { + if message.response_closed() { + continue; + } + + if let Some(message) = push(message) { + self.0.push_front(message); + break; + } + } + } +} + +impl Policy for Message +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + type Overflow = Pending; + + fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + if message.response_closed() { + return true; + } + overflow.0.push_back(message); + true + } +} + +/// Channel-based proxy to the [`Stateful`](super::Stateful) actor. +/// +/// Implements the consensus application and verifying traits by forwarding +/// each call to the actor via a message and awaiting the response. +pub struct Mailbox +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + sender: Sender>, +} + +impl Clone for Mailbox +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + fn clone(&self) -> Self { + Self { + sender: self.sender.clone(), + } + } +} + +impl Mailbox +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// Create a mailbox from the send half of the actor's message channel. + pub(crate) const fn new(sender: Sender>) -> Self { + Self { sender } + } +} + +impl Mailbox +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// Fetch the application's genesis block from the actor. + pub(crate) async fn genesis(&self) -> A::Block { + let (response, receiver) = oneshot::channel(); + let _ = self.sender.enqueue(Message::Genesis { response }); + receiver + .await + .expect("stateful actor dropped during genesis") + } + + /// Signal that state sync is complete, providing the constructed databases + /// and the finalized digest to transition the actor to processing mode. + pub fn sync_complete( + &self, + databases: A::Databases, + last_processed: Anchor<::Digest>, + ) { + let feedback = self.sender.enqueue(Message::SyncComplete { + databases, + last_processed, + }); + assert!( + feedback.accepted(), + "stateful actor dropped during sync_complete" + ); + } + + /// Wait for the attached database set. + /// + /// This resolves when startup bootstrap finishes and the actor has + /// attached the database set. Late callers receive the current database + /// set immediately. + pub async fn subscribe_databases(&self) -> A::Databases { + let (response, receiver) = oneshot::channel(); + let _ = self + .sender + .enqueue(Message::SubscribeDatabases { response }); + receiver + .await + .expect("stateful actor dropped during subscribe_databases") + } +} + +impl ConsensusApplication for Mailbox +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + type SigningScheme = A::SigningScheme; + type Context = A::Context; + type Block = A::Block; + + async fn propose( + &mut self, + context: (E, Self::Context), + ancestry: impl Stream + Send + 'static, + ) -> Option { + let (response, receiver) = oneshot::channel(); + let _ = self.sender.enqueue(Message::Propose { + context, + ancestry: Box::pin(ancestry), + response, + }); + receiver.await.ok().flatten() + } + + async fn verify( + &mut self, + context: (E, Self::Context), + ancestry: impl Stream + Send + 'static, + ) -> bool { + // We must panic if we don't get a response; We cannot override the decision + // of the application based on the availabilitiy of the actor. + let (response, receiver) = oneshot::channel(); + let _ = self.sender.enqueue(Message::Verify { + context, + ancestry: Box::pin(ancestry), + response, + }); + receiver + .await + .expect("stateful actor dropped during verify") + } +} + +impl Reporter for Mailbox +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + type Activity = Update; + + fn report(&mut self, activity: Self::Activity) -> Feedback { + let message = match activity { + Update::Tip(_, height, digest) => Message::Tip { height, digest }, + Update::Block(block, acknowledgement) => Message::Finalized { + block, + acknowledgement, + }, + }; + + self.sender.enqueue(message) + } +} diff --git a/glue/src/stateful/actor/metrics.rs b/glue/src/stateful/actor/metrics.rs new file mode 100644 index 00000000000..5ef67842b09 --- /dev/null +++ b/glue/src/stateful/actor/metrics.rs @@ -0,0 +1,102 @@ +//! Metrics for the [`Processor`](super::processor::Processor). + +use commonware_runtime::{ + telemetry::metrics::{histogram::Timed, Registered}, + Metrics as MetricsTrait, +}; +use prometheus_client::metrics::{counter::Counter, gauge::Gauge, histogram::Histogram}; + +/// Buckets for histograms. +/// +/// These buckets are much less coarse than [`Buckets::LOCAL`]. +/// +/// [`Buckets::LOCAL`]: commonware_runtime::telemetry::metrics::histogram::Buckets::LOCAL +const BUCKETS: [f64; 10] = [0.001, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0]; + +/// Metrics for the stateful processor. +/// +/// All duration histograms use [`Timed`] wrappers for automatic recording via +/// [`Timer`](commonware_runtime::telemetry::metrics::histogram::Timer). +#[derive(Clone)] +pub(crate) struct Metrics { + /// Current number of entries in the in-memory pending map. + pub pending_blocks: Registered, + + /// Total pending entries pruned after finalizations. + pub pruned_forks: Registered, + + /// Wall-clock duration of a full propose cycle. + pub propose_duration: Timed, + + /// Wall-clock duration of a full verify cycle. + pub verify_duration: Timed, + + /// Wall-clock duration of a finalization. + pub finalize_duration: Timed, + + /// Wall-clock duration of lazy-recovery replays via `rebuild_pending`. + pub rebuild_pending_duration: Timed, + + /// Number of blocks replayed during the most recent `rebuild_pending` call. + pub rebuild_pending_depth: Registered, +} + +impl Metrics { + /// Create and register all processor metrics. + /// + /// The provided `context` is cloned internally to avoid further nesting the + /// label hierarchy. + pub fn new(context: E) -> Self { + let pending_blocks = context.register( + "pending_blocks", + "Current entries in the in-memory pending map", + Gauge::default(), + ); + + let pruned_forks = context.register( + "pruned_forks", + "Total pending entries pruned after finalizations", + Counter::default(), + ); + + let propose_hist = context.register( + "propose_duration", + "Wall-clock duration of a full propose cycle", + Histogram::new(BUCKETS), + ); + + let verify_hist = context.register( + "verify_duration", + "Wall-clock duration of a full verify cycle", + Histogram::new(BUCKETS), + ); + + let finalize_hist = context.register( + "finalize_duration", + "Wall-clock duration of a finalization", + Histogram::new(BUCKETS), + ); + + let rebuild_hist = context.register( + "rebuild_pending_duration", + "Wall-clock duration of lazy-recovery replays", + Histogram::new(BUCKETS), + ); + + let rebuild_pending_depth = context.register( + "rebuild_pending_depth", + "Blocks replayed during the most recent rebuild_pending", + Gauge::default(), + ); + + Self { + pending_blocks, + pruned_forks, + propose_duration: Timed::new(propose_hist), + verify_duration: Timed::new(verify_hist), + finalize_duration: Timed::new(finalize_hist), + rebuild_pending_duration: Timed::new(rebuild_hist), + rebuild_pending_depth, + } + } +} diff --git a/glue/src/stateful/actor/mod.rs b/glue/src/stateful/actor/mod.rs new file mode 100644 index 00000000000..cd86a825d75 --- /dev/null +++ b/glue/src/stateful/actor/mod.rs @@ -0,0 +1,11 @@ +mod core; +pub use core::{Config, StartupMode, Stateful}; + +mod mailbox; +pub use mailbox::Mailbox; + +mod bootstrap; + +mod metrics; + +mod processor; diff --git a/glue/src/stateful/actor/processor.rs b/glue/src/stateful/actor/processor.rs new file mode 100644 index 00000000000..ac3cc953c97 --- /dev/null +++ b/glue/src/stateful/actor/processor.rs @@ -0,0 +1,1557 @@ +//! Speculative execution engine for the [`Stateful`](super::Stateful) actor. +//! +//! The [`Processor`] owns the in-memory pending-tip DAG and the committed +//! database set. It is the workhorse behind the actor's `Processing` mode, +//! handling three operations: +//! +//! - Propose/Verify: fork unmerkleized batches from a parent's pending +//! state (or from committed state), delegate to the [`Application`], and +//! cache the resulting merkleized batches keyed by block digest. +//! +//! - Lazy recovery: when a parent's pending state is missing (e.g. after +//! restart), [`Processor::rebuild_pending`] walks the block DAG backward +//! via marshal to the nearest known anchor, then replays +//! forward via [`Application::apply`], inserting each intermediate result +//! into the pending map. +//! +//! - Finalization: apply the winning fork's merkleized batches to the +//! committed databases, then prune all pending entries at or below the +//! finalized round. +//! +//! All propose/verify paths are cancellation-aware: if the caller drops the +//! response channel, in-progress work stops at the next await point via +//! [`await_or_cancel`]. + +use super::metrics::Metrics as ProcessorMetrics; +use crate::stateful::{ + db::{Anchor, DatabaseSet}, + Application, Proposed, +}; +use commonware_consensus::{ + marshal::{ + ancestry::BlockProvider, + core::{DigestFallback, Mailbox as MarshalMailbox, Variant as MarshalVariant}, + }, + types::{Height, Round}, + Block, CertifiableBlock, Epochable, Heightable, Viewable, +}; +use commonware_cryptography::{certificate::Scheme, Digestible}; +use commonware_macros::select; +use commonware_runtime::{telemetry::metrics::GaugeExt, Clock, Metrics, Spawner}; +use commonware_utils::channel::{fallible::OneshotExt, oneshot}; +use futures::{stream, Stream, StreamExt}; +use rand::Rng; +use std::{ + collections::{BTreeMap, HashSet, VecDeque}, + future::Future, +}; +use tracing::{debug, warn}; + +type PendingDigest = <>::Block as Digestible>::Digest; +type PendingBatches = <>::Databases as DatabaseSet>::Merkleized; + +/// Cached speculative state for a block digest. +struct PendingEntry +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + round: Round, + parent: PendingDigest, + merkleized: PendingBatches, +} + +type PendingMap = BTreeMap, PendingEntry>; + +/// Errors while preparing parent-relative batches for propose/verify. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum PrepareBatchesError { + /// Parent ancestry is provably invalid. + Invalid, + /// Caller dropped the response while waiting. + Cancelled, +} + +/// Finalization result for a finalized block report. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum FinalizeStatus { + /// The finalized digest was already processed. + Duplicate, + + /// The finalized state was persisted and in-memory forks were pruned. + Persisted { height: Height }, +} + +/// Owns speculative execution and state persistence for a running stateful actor. +pub(super) struct Processor +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + app: A, + databases: A::Databases, + pending: PendingMap, + last_processed: Anchor>, + metrics: ProcessorMetrics, +} + +impl Processor +where + E: Rng + Spawner + Metrics + Clock, + A: Application, +{ + /// Create a new processor with the given application, databases, and + /// the last finalized block's anchor. + pub(super) const fn new( + app: A, + databases: A::Databases, + last_processed: Anchor>, + metrics: ProcessorMetrics, + ) -> Self { + Self { + app, + databases, + pending: BTreeMap::new(), + last_processed, + metrics, + } + } + + /// Delegate to the application to produce the genesis block. + pub(super) async fn genesis(&mut self) -> A::Block { + self.app.genesis().await + } + + /// Prepare parent-relative batches and delegate to the application to + /// build a new block proposal. The resulting block and its merkleized + /// state are cached in `pending`. Sends `None` on `response` if the + /// ancestry is invalid or the application declines to propose. + pub(super) async fn propose( + &mut self, + context: &E, + marshal: MarshalMailbox, + (runtime_context, consensus_context): (E, A::Context), + ancestry: impl Stream + Send + 'static, + input_provider: &mut A::InputProvider, + mut response: oneshot::Sender>, + ) where + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, + { + let timer = self.metrics.propose_duration.timer(context); + + let mut ancestry = Box::pin(ancestry); + let Some(parent) = ancestry.next().await else { + response.send_lossy(None); + return; + }; + let parent_digest = parent.digest(); + let ancestry = stream::once(std::future::ready(parent.clone())).chain(ancestry); + + let round = Round::new(consensus_context.epoch(), consensus_context.view()); + let batches = match self + .prepare_batches(context, marshal, parent, &mut response) + .await + { + Ok(batches) => batches, + Err(PrepareBatchesError::Invalid) => { + response.send_lossy(None); + return; + } + Err(PrepareBatchesError::Cancelled) => { + debug!( + ?parent_digest, + "proposal request cancelled during prepare_batches" + ); + return; + } + }; + + let proposed = match await_or_cancel( + &mut response, + self.app.propose( + (runtime_context, consensus_context), + ancestry, + batches, + input_provider, + ), + ) + .await + { + Some(result) => result, + None => { + debug!(?parent_digest, "proposal request cancelled during propose"); + return; + } + }; + + let Some(Proposed { block, merkleized }) = proposed else { + response.send_lossy(None); + return; + }; + self.cache_pending(block.digest(), parent_digest, round, merkleized); + let _ = self.metrics.pending_blocks.try_set(self.pending.len()); + timer.observe(context); + response.send_lossy(Some(block)); + } + + /// Prepare parent-relative batches and delegate to the application to + /// verify a received block. On success the block's merkleized state is + /// cached in `pending` and `true` is sent on `response`. + pub(super) async fn verify( + &mut self, + context: &E, + marshal: MarshalMailbox, + (runtime_context, consensus_context): (E, A::Context), + ancestry: impl Stream + Send + 'static, + mut response: oneshot::Sender, + ) where + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, + { + let timer = self.metrics.verify_duration.timer(context); + + let mut ancestry = Box::pin(ancestry); + let Some(block) = ancestry.next().await else { + response.send_lossy(false); + return; + }; + let block_digest = block.digest(); + + // If the block has already been executed, don't execute again. + if self.pending.contains_key(&block_digest) { + timer.observe(context); + response.send_lossy(true); + return; + } + + // The voter may ask us to verify blocks that are at or below the + // already-processed height. This happens because marshal/state sync and + // simplex advance on different message streams. + // + // Re-execution is impossible because databases already contain state at + // or beyond that height, but we still need to prove the block matches + // the canonical finalized chain before short-circuiting. + // + // `last_processed.height` is only advanced from finalized state + // (genesis, startup reconciliation, or finalize/ack path). + match is_already_processed(self.last_processed, marshal.clone(), &block, &mut response) + .await + { + Ok(true) => { + timer.observe(context); + response.send_lossy(true); + return; + } + Ok(false) => { + if block.height() <= self.last_processed.height { + response.send_lossy(false); + return; + } + } + Err(PrepareBatchesError::Cancelled) => { + debug!( + ?block_digest, + "verification request cancelled during processed-block check" + ); + return; + } + Err(PrepareBatchesError::Invalid) => { + unreachable!("processed-block check cannot return Invalid") + } + } + + let round = Round::new(consensus_context.epoch(), consensus_context.view()); + let Some(parent) = ancestry.next().await else { + response.send_lossy(false); + return; + }; + let parent_digest = parent.digest(); + let batches = match self + .prepare_batches(context, marshal, parent.clone(), &mut response) + .await + { + Ok(batches) => batches, + Err(PrepareBatchesError::Invalid) => { + warn!( + ?parent_digest, + ?block_digest, + pending_keys = self.pending.len(), + last_processed = ?self.last_processed.digest, + "verification rejected: prepare_batches returned Invalid" + ); + response.send_lossy(false); + return; + } + Err(PrepareBatchesError::Cancelled) => { + debug!( + ?parent_digest, + "verification request cancelled during prepare_batches" + ); + return; + } + }; + + let ancestry = stream::iter([block.clone(), parent]).chain(ancestry); + let verified = match await_or_cancel( + &mut response, + self.app + .verify((runtime_context, consensus_context), ancestry, batches), + ) + .await + { + Some(result) => result, + None => { + debug!( + ?parent_digest, + "verification request cancelled during verify" + ); + return; + } + }; + + let Some(merkleized) = verified else { + warn!( + ?parent_digest, + ?block_digest, + "verification rejected: app.verify returned None" + ); + response.send_lossy(false); + return; + }; + self.cache_pending(block_digest, parent_digest, round, merkleized); + let _ = self.metrics.pending_blocks.try_set(self.pending.len()); + timer.observe(context); + response.send_lossy(true); + } + + /// Ensure parent state exists, then prepare unmerkleized batches for execution. + pub(super) async fn prepare_batches( + &mut self, + context: &E, + marshal: MarshalMailbox, + parent: A::Block, + response: &mut oneshot::Sender, + ) -> Result<>::Unmerkleized, PrepareBatchesError> + where + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, + { + let parent_digest = parent.digest(); + // Rebuild pending state if no pending state exists for the parent and the + // parent is not the processed tip. + if self.last_processed.digest != parent_digest && !self.pending.contains_key(&parent_digest) + { + self.rebuild_pending(context, marshal, parent, response) + .await?; + } + + await_or_cancel(response, self.fork_batches(&parent_digest)) + .await + .unwrap_or(Err(PrepareBatchesError::Cancelled)) + } + + /// Fork unmerkleized batches from known parent state. + pub(super) async fn fork_batches( + &mut self, + parent: &::Digest, + ) -> Result<>::Unmerkleized, PrepareBatchesError> { + if let Some(entry) = self.pending.get(parent) { + return Ok(>::fork_batches( + &entry.merkleized, + )); + } + if &self.last_processed.digest == parent { + return Ok(self.databases.new_batches().await); + } + Err(PrepareBatchesError::Invalid) + } + + /// Rebuild missing pending ancestry up to `target` lazily from marshal. + pub(super) async fn rebuild_pending( + &mut self, + context: &E, + marshal: MarshalMailbox, + target: A::Block, + response: &mut oneshot::Sender, + ) -> Result<(), PrepareBatchesError> + where + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, + { + let timer = self.metrics.rebuild_pending_duration.timer(context); + let target_digest = target.digest(); + + // Walk backward until we hit a known safe anchor. + let mut replay_path = Vec::new(); + let mut cursor = target; + while cursor.digest() != self.last_processed.digest + && !self.pending.contains_key(&cursor.digest()) + { + let Some(parent) = + await_or_cancel(response, marshal.clone().subscribe_parent(&cursor)).await + else { + return Err(PrepareBatchesError::Cancelled); + }; + + let Some(parent) = parent else { + // A dropped subscription is not proof of invalidity, so retry. + // + // This loop is cancellation-bound by consensus timeouts: the + // caller drops `response` when propose/verify expires, and every + // await in this method is wrapped with `await_or_cancel`. So, + // this will never deadlock. + debug!( + ?target_digest, + cursor = ?cursor.digest(), + "ancestor subscription ended before delivery, retrying" + ); + continue; + }; + + let cursor_height = cursor.height(); + if cursor_height <= self.last_processed.height { + warn!( + ?target_digest, + cursor = ?cursor.digest(), + current_height = cursor_height.get(), + last_processed_height = self.last_processed.height.get(), + last_processed = ?self.last_processed.digest, + "rebuild_pending reached stale ancestry below processed height" + ); + return Err(PrepareBatchesError::Invalid); + } + + // By definition, there are no blocks below height 0. + if cursor_height.previous().is_none() { + warn!( + ?target_digest, + cursor = ?cursor.digest(), + reached_height = %cursor_height, + last_processed = ?self.last_processed.digest, + pending_keys = self.pending.len(), + "rebuild reached ancestry boundary without known anchor" + ); + return Err(PrepareBatchesError::Invalid); + } + + replay_path.push(cursor); + cursor = parent; + } + + let depth = replay_path.len(); + + // Replay from oldest to newest and cache intermediate tips. + for block in replay_path.into_iter().rev() { + let (digest, parent_digest) = (block.digest(), block.parent()); + let consensus_context = block.context(); + let round = Round::new(consensus_context.epoch(), consensus_context.view()); + + let Some(batches) = await_or_cancel(response, self.fork_batches(&parent_digest)).await + else { + return Err(PrepareBatchesError::Cancelled); + }; + let batches = batches.expect("rebuild replay parent must be available"); + + let Some(merkleized) = await_or_cancel( + response, + self.app.apply( + (context.child("rebuild_pending_apply"), consensus_context), + &block, + batches, + ), + ) + .await + else { + return Err(PrepareBatchesError::Cancelled); + }; + + self.cache_pending(digest, parent_digest, round, merkleized); + } + + let _ = self.metrics.pending_blocks.try_set(self.pending.len()); + let _ = self.metrics.rebuild_pending_depth.try_set(depth); + timer.observe(context); + Ok(()) + } + + /// Persist finalized state and prune dead in-memory forks. + pub(super) async fn finalize(&mut self, context: &E, block: A::Block) -> FinalizeStatus { + let (height, digest) = (block.height(), block.digest()); + if height < self.last_processed.height { + panic!( + "received finalized block below processed height: finalized={} processed={}", + height.get(), + self.last_processed.height.get(), + ); + } + if height == self.last_processed.height { + assert_eq!( + digest, self.last_processed.digest, + "received conflicting finalized block at processed height", + ); + return FinalizeStatus::Duplicate; + } + + let timer = self.metrics.finalize_duration.timer(context); + let block_context = block.context(); + let round = Round::new(block_context.epoch(), block_context.view()); + + // Marshal finalization is ordered. A pending miss means we can replay + // this block on top of finalized state. + // + // Safety contract: replayed `Application::apply` output must match the + // block commitments previously enforced by `Application::verify`. + let batch = match self.pending.remove(&digest) { + Some(entry) => entry.merkleized, + None => { + let batches = self.databases.new_batches().await; + let batch = self + .app + .apply( + (context.child("finalize_replay"), block_context), + &block, + batches, + ) + .await; + assert!( + A::Databases::matches_sync_targets(&batch, &A::sync_targets(&block)), + "finalize replay state root must match block commitments", + ); + batch + } + }; + + self.databases.finalize(batch).await; + self.app + .finalized( + (context.child("finalized"), block.context()), + &block, + &self.databases, + ) + .await; + self.prune_pending_after_finalize(&digest, round); + self.last_processed = Anchor { + height, + round, + digest, + }; + timer.observe(context); + + FinalizeStatus::Persisted { height } + } + + /// Remove pending state that is not compatible with the finalized winner. + /// + /// A pending block is kept only when: + /// - it is a descendant of `finalized_digest`, and + /// - it was created after `finalized_round`. + fn prune_pending_after_finalize( + &mut self, + finalized_digest: &::Digest, + finalized_round: Round, + ) { + let mut children_by_parent = BTreeMap::new(); + for (candidate_digest, entry) in &self.pending { + children_by_parent + .entry(entry.parent) + .or_insert_with(Vec::new) + .push(*candidate_digest); + } + + let mut compatible = HashSet::new(); + compatible.insert(*finalized_digest); + + let mut to_visit = VecDeque::new(); + to_visit.push_back(*finalized_digest); + while let Some(parent) = to_visit.pop_front() { + let Some(children) = children_by_parent.get(&parent) else { + continue; + }; + + for &child in children { + if compatible.insert(child) { + to_visit.push_back(child); + } + } + } + + let before = self.pending.len(); + self.pending.retain(|candidate_digest, entry| { + entry.round > finalized_round && compatible.contains(candidate_digest) + }); + let pruned = before - self.pending.len(); + self.metrics.pruned_forks.inc_by(pruned as u64); + let _ = self.metrics.pending_blocks.try_set(self.pending.len()); + } + + /// Cache merkleized pending state for a block digest. + fn cache_pending( + &mut self, + digest: PendingDigest, + parent: PendingDigest, + round: Round, + merkleized: PendingBatches, + ) { + if let Some(existing) = self.pending.get(&digest) { + debug_assert_eq!(existing.parent, parent, "pending parent changed for digest"); + debug_assert_eq!(existing.round, round, "pending round changed for digest"); + return; + } + self.pending.insert( + digest, + PendingEntry { + round, + parent, + merkleized, + }, + ); + } +} + +/// Returns true when `block` is already covered by committed state. +async fn is_already_processed( + last_processed: Anchor<::Digest>, + marshal: MarshalMailbox, + block: &V::ApplicationBlock, + response: &mut oneshot::Sender, +) -> Result +where + S: Scheme, + V: MarshalVariant, + V::ApplicationBlock: Block + Clone, + MarshalMailbox: BlockProvider, +{ + let target_height = block.height(); + if target_height > last_processed.height { + return Ok(false); + } + if target_height == last_processed.height { + return Ok(block.digest() == last_processed.digest); + } + + let Some(fetched) = await_or_cancel( + response, + marshal.clone().subscribe_by_digest( + last_processed.digest, + DigestFallback::FetchByRound { + round: last_processed.round, + }, + ), + ) + .await + else { + return Err(PrepareBatchesError::Cancelled); + }; + let Some(mut cursor) = fetched.ok().map(V::into_inner) else { + warn!( + last_processed = ?last_processed.digest, + target_height = target_height.get(), + processed_height = last_processed.height.get(), + "failed to fetch canonical processed ancestry for stale-block check" + ); + return Ok(false); + }; + + loop { + let cursor_height = cursor.height(); + if cursor_height == target_height { + return Ok(cursor.digest() == block.digest()); + } + if cursor_height < target_height { + return Ok(false); + } + + let Some(canonical) = + await_or_cancel(response, marshal.clone().subscribe_parent(&cursor)).await + else { + return Err(PrepareBatchesError::Cancelled); + }; + let Some(canonical) = canonical else { + warn!( + cursor = ?cursor.digest(), + target_height = target_height.get(), + processed_height = last_processed.height.get(), + "failed to fetch canonical processed ancestry for stale-block check" + ); + return Ok(false); + }; + + cursor = canonical; + } +} + +/// Wait for `future` unless the response receiver is dropped. +pub(super) async fn await_or_cancel( + response: &mut oneshot::Sender, + future: F, +) -> Option +where + F: Future, +{ + select! { + _ = response.closed() => None, + output = future => Some(output), + } +} + +#[cfg(test)] +mod tests { + use super::{await_or_cancel, FinalizeStatus, PrepareBatchesError, Processor}; + use crate::stateful::{ + actor::metrics::Metrics as ProcessorMetrics, + db::{Anchor, DatabaseSet, Merkleized as _, Unmerkleized as _}, + Application, Proposed, + }; + use commonware_codec::{Encode, EncodeSize, Error as CodecError, Read, ReadExt as _, Write}; + use commonware_consensus::{ + marshal::ancestry::BlockProvider, + simplex::{mocks::scheme::Scheme as MockScheme, types::Context as ConsensusContext}, + types::{Epoch, Height, Round, View}, + Block as ConsensusBlock, CertifiableBlock, Epochable, Heightable, Viewable, + }; + use commonware_cryptography::{ + ed25519, sha256::Digest, Digest as _, Digestible, Hasher, Sha256, Signer as _, + }; + use commonware_parallel::Sequential; + use commonware_runtime::{ + buffer::paged::CacheRef, deterministic, ContextCell, Runner as _, Supervisor as _, + }; + use commonware_storage::{ + journal::contiguous::fixed::Config as FixedLogConfig, + mmr::{self, full::Config as MmrJournalConfig, Location}, + qmdb::{any, any::sync::Target}, + translator::TwoCap, + }; + use commonware_utils::{ + channel::oneshot, + non_empty_range, + range::NonEmptyRange, + sync::{AsyncRwLock, Mutex}, + NZUsize, NZU16, NZU64, + }; + use futures::{Stream, StreamExt}; + use std::{ + collections::BTreeMap, + future::Future, + num::NonZeroUsize, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + }; + + type TestContext = ConsensusContext; + + const PAGE_SIZE: std::num::NonZeroU16 = NZU16!(1024); + const PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(8); + const IO_BUFFER_SIZE: NonZeroUsize = NZUsize!(2048); + + type Qmdb = + any::unordered::fixed::Db; + type DbSet = Arc>>; + + #[derive(Clone, Debug, PartialEq, Eq)] + struct Block { + context: TestContext, + parent: Digest, + height: Height, + state_root: Digest, + range: NonEmptyRange, + } + + impl Write for Block { + fn write(&self, buf: &mut impl commonware_runtime::BufMut) { + self.context.write(buf); + self.parent.write(buf); + self.height.write(buf); + self.state_root.write(buf); + self.range.write(buf); + } + } + + impl EncodeSize for Block { + fn encode_size(&self) -> usize { + self.context.encode_size() + + self.parent.encode_size() + + self.height.encode_size() + + self.state_root.encode_size() + + self.range.encode_size() + } + } + + impl Read for Block { + type Cfg = (); + + fn read_cfg( + buf: &mut impl commonware_runtime::Buf, + _: &Self::Cfg, + ) -> Result { + Ok(Self { + context: TestContext::read(buf)?, + parent: Digest::read(buf)?, + height: Height::read(buf)?, + state_root: Digest::read(buf)?, + range: commonware_utils::range::NonEmptyRange::read(buf)?, + }) + } + } + + impl Digestible for Block { + type Digest = Digest; + + fn digest(&self) -> Digest { + Sha256::hash(&self.encode()) + } + } + + impl Heightable for Block { + fn height(&self) -> Height { + self.height + } + } + + impl ConsensusBlock for Block { + fn parent(&self) -> Digest { + self.parent + } + } + + impl CertifiableBlock for Block { + type Context = TestContext; + + fn context(&self) -> Self::Context { + self.context.clone() + } + } + + impl Block { + fn genesis() -> Self { + Self { + context: consensus_context(Digest::EMPTY, View::zero()), + parent: Digest::EMPTY, + height: Height::zero(), + state_root: Digest::EMPTY, + range: non_empty_range!(Location::new(0), Location::new(1)), + } + } + } + + fn consensus_context(parent: Digest, view: View) -> TestContext { + TestContext { + round: Round::new(Epoch::zero(), view), + leader: ed25519::PrivateKey::from_seed(0).public_key(), + parent: ( + if view.is_zero() { + View::zero() + } else { + View::new(view.get() - 1) + }, + parent, + ), + } + } + + fn u64_to_digest(value: u64) -> Digest { + let mut bytes = [0u8; 32]; + bytes[..8].copy_from_slice(&value.to_be_bytes()); + Digest::from(bytes) + } + + fn digest_to_u64(value: &Digest) -> u64 { + let bytes: &[u8] = value.as_ref(); + u64::from_be_bytes( + bytes[..8] + .try_into() + .expect("digest prefix should be 8 bytes"), + ) + } + + fn height_key(height: Height) -> Digest { + Sha256::hash(&height.get().to_be_bytes()) + } + + fn counter_key() -> Digest { + Sha256::hash(b"processor_harness_counter") + } + + #[derive(Clone)] + struct FinalizedObserver { + db_config: any::FixedConfig, + reopened_counters: Arc>>, + } + + #[derive(Clone)] + struct ExecutionApp { + genesis: Block, + finalized_observer: Option, + } + + impl ExecutionApp { + fn new() -> Self { + Self { + genesis: Block::genesis(), + finalized_observer: None, + } + } + + fn with_finalized_observer( + db_config: any::FixedConfig, + ) -> (Self, Arc>>) { + let reopened_counters = Arc::new(Mutex::new(Vec::new())); + let observer = FinalizedObserver { + db_config, + reopened_counters: reopened_counters.clone(), + }; + ( + Self { + genesis: Block::genesis(), + finalized_observer: Some(observer), + }, + reopened_counters, + ) + } + + async fn execute( + height: Height, + view: View, + mut batches: as DatabaseSet>::Unmerkleized, + ) -> as DatabaseSet>::Merkleized + { + let current_counter = batches + .get(&counter_key()) + .await + .expect("counter read should succeed") + .map_or(0, |digest| digest_to_u64(&digest)); + batches = batches.write(counter_key(), Some(u64_to_digest(current_counter + 1))); + batches = batches.write(height_key(height), Some(u64_to_digest(view.get()))); + batches.merkleize().await.expect("merkleize should succeed") + } + } + + impl Application for ExecutionApp { + type SigningScheme = MockScheme; + type Context = TestContext; + type Block = Block; + type Databases = DbSet; + type InputProvider = (); + + async fn genesis(&mut self) -> Self::Block { + self.genesis.clone() + } + + async fn propose( + &mut self, + context: (deterministic::Context, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + _input: &mut Self::InputProvider, + ) -> Option> { + let mut ancestry = Box::pin(ancestry); + let parent = ancestry.next().await?; + let context = context.1.clone(); + let view = context.round.view(); + let height = parent.height().next(); + let merkleized = Self::execute(height, view, batches).await; + let block = Block { + context, + parent: parent.digest(), + height, + state_root: merkleized.root(), + range: non_empty_range!(Location::new(0), Location::new(1)), + }; + Some(Proposed { block, merkleized }) + } + + async fn verify( + &mut self, + _context: (deterministic::Context, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + ) -> Option<>::Merkleized> { + let mut ancestry = Box::pin(ancestry); + let block = ancestry.next().await?; + let merkleized = + Self::execute(block.height(), block.context.round.view(), batches).await; + if merkleized.root() != block.state_root { + return None; + } + Some(merkleized) + } + + async fn apply( + &mut self, + _context: (deterministic::Context, Self::Context), + block: &Self::Block, + batches: >::Unmerkleized, + ) -> >::Merkleized { + Self::execute(block.height(), block.context.round.view(), batches).await + } + + fn sync_targets( + block: &Self::Block, + ) -> >::SyncTargets { + Target::new(block.state_root, block.range.clone()) + } + + async fn finalized( + &mut self, + context: (deterministic::Context, Self::Context), + _block: &Self::Block, + _databases: &Self::Databases, + ) { + let Some(observer) = &self.finalized_observer else { + return; + }; + + let reopened = Qmdb::init( + context.0.child("finalized_observer_reopen"), + observer.db_config.clone(), + ) + .await + .expect("database reopen inside finalized hook should succeed"); + let counter = reopened + .get(&counter_key()) + .await + .expect("reopened counter read should succeed") + .map(|value| digest_to_u64(&value)) + .unwrap_or(0); + observer.reopened_counters.lock().push(counter); + } + } + + #[derive(Clone, Default)] + struct MapProvider { + blocks: Arc>>, + fetches: Arc, + } + + impl MapProvider { + fn insert(&self, block: Block) { + self.blocks.lock().insert(block.digest(), block); + } + + fn fetch_by_digest(&self, digest: Digest) -> Option { + self.fetches.fetch_add(1, Ordering::SeqCst); + self.blocks.lock().get(&digest).cloned() + } + + fn fetches(&self) -> usize { + self.fetches.load(Ordering::SeqCst) + } + } + + impl BlockProvider for MapProvider { + type Block = Block; + + fn subscribe_parent( + &self, + block: &Self::Block, + ) -> impl Future> + Send + 'static { + let provider = self.clone(); + let parent = block.parent(); + async move { provider.fetch_by_digest(parent) } + } + } + + struct Harness { + context_cell: ContextCell, + processor: Processor, + provider: MapProvider, + db_config: any::FixedConfig, + finalized_reopened_counters: Option>>>, + } + + impl Harness { + async fn new(context: deterministic::Context) -> Self { + let provider = MapProvider::default(); + let config = qmdb_config(&next_partition_prefix(), &context); + Self::with_app(context, provider, config.clone(), ExecutionApp::new(), None).await + } + + async fn new_with_finalized_observer(context: deterministic::Context) -> Self { + let provider = MapProvider::default(); + let config = qmdb_config(&next_partition_prefix(), &context); + let (app, finalized_reopened_counters) = + ExecutionApp::with_finalized_observer(config.clone()); + Self::with_app( + context, + provider, + config, + app, + Some(finalized_reopened_counters), + ) + .await + } + + async fn with_app( + context: deterministic::Context, + provider: MapProvider, + config: any::FixedConfig, + app: ExecutionApp, + finalized_reopened_counters: Option>>>, + ) -> Self { + let databases = as DatabaseSet< + deterministic::Context, + >>::init(context.child("db_set"), config.clone()) + .await; + let metrics = ProcessorMetrics::new(context.child("processor_metrics")); + Self { + context_cell: ContextCell::new(context), + processor: Processor::new( + app, + databases, + Anchor { + height: Height::zero(), + round: Block::genesis().context().round, + digest: Block::genesis().digest(), + }, + metrics, + ), + provider, + db_config: config, + finalized_reopened_counters, + } + } + + async fn stage_pending_child(&mut self, parent: &Block, view: View) -> Block { + let context = consensus_context(parent.digest(), view); + let height = Height::new(parent.height().get() + 1); + let batches = self + .processor + .fork_batches(&parent.digest()) + .await + .expect("parent should be available"); + let merkleized = ExecutionApp::execute(height, view, batches).await; + let block = Block { + context, + parent: parent.digest(), + height, + state_root: merkleized.root(), + range: non_empty_range!(Location::new(0), Location::new(1)), + }; + let round = Round::new(Epoch::zero(), view); + self.processor + .cache_pending(block.digest(), parent.digest(), round, merkleized); + self.provider.insert(block.clone()); + block + } + + async fn rebuild_pending( + &mut self, + target: Digest, + response: &mut oneshot::Sender, + ) -> Result<(), PrepareBatchesError> { + let mut replay_path = Vec::new(); + let mut cursor = target; + while cursor != self.processor.last_processed.digest + && !self.processor.pending.contains_key(&cursor) + { + let Some(block) = + await_or_cancel(response, async { self.provider.fetch_by_digest(cursor) }) + .await + else { + return Err(PrepareBatchesError::Cancelled); + }; + let Some(block) = block else { + continue; + }; + if block.height() <= self.processor.last_processed.height { + return Err(PrepareBatchesError::Invalid); + } + if block.height().previous().is_none() { + return Err(PrepareBatchesError::Invalid); + } + + cursor = block.parent(); + replay_path.push(block); + } + + for block in replay_path.into_iter().rev() { + let (digest, parent_digest) = (block.digest(), block.parent()); + let consensus_context = block.context(); + let round = Round::new(consensus_context.epoch(), consensus_context.view()); + let batches = self + .processor + .fork_batches(&parent_digest) + .await + .expect("rebuild replay parent must be available"); + let merkleized = self + .processor + .app + .apply( + ( + self.context_cell + .as_present() + .child("rebuild_pending_apply"), + consensus_context, + ), + &block, + batches, + ) + .await; + self.processor + .cache_pending(digest, parent_digest, round, merkleized); + } + + Ok(()) + } + + fn is_canonical_processed(&self, block: &Block) -> bool { + let target_height = block.height(); + if target_height > self.processor.last_processed.height { + return false; + } + if target_height == self.processor.last_processed.height { + return block.digest() == self.processor.last_processed.digest; + } + + let mut cursor = self.processor.last_processed.digest; + while let Some(canonical) = self.provider.fetch_by_digest(cursor) { + let canonical_height = canonical.height(); + if canonical_height == target_height { + return canonical.digest() == block.digest(); + } + if canonical_height < target_height { + return false; + } + cursor = canonical.parent(); + } + + false + } + + async fn finalize(&mut self, block: Block) -> FinalizeStatus { + self.processor + .finalize(self.context_cell.as_present(), block) + .await + } + + async fn height_value(&self, height: Height) -> Option { + let db = self.processor.databases.read().await; + db.get(&height_key(height)) + .await + .expect("database read should succeed") + .map(|value| digest_to_u64(&value)) + } + + async fn counter_value(&self) -> Option { + let db = self.processor.databases.read().await; + db.get(&counter_key()) + .await + .expect("database read should succeed") + .map(|value| digest_to_u64(&value)) + } + + async fn reopen_height_value( + &self, + context: deterministic::Context, + height: Height, + ) -> Option { + let reopened: Qmdb = + Qmdb::init(context.child("reopen_db"), self.db_config.clone()) + .await + .expect("database reopen should succeed"); + reopened + .get(&height_key(height)) + .await + .expect("reopened db read should succeed") + .map(|value| digest_to_u64(&value)) + } + + fn finalized_reopened_counters(&self) -> Vec { + self.finalized_reopened_counters + .as_ref() + .expect("finalized observer should be configured") + .lock() + .clone() + } + } + + fn next_partition_prefix() -> String { + static NEXT_ID: AtomicUsize = AtomicUsize::new(0); + let id = NEXT_ID.fetch_add(1, Ordering::SeqCst); + format!("processor_harness_{id}") + } + + fn qmdb_config( + prefix: &str, + context: &deterministic::Context, + ) -> any::FixedConfig { + let page_cache = CacheRef::from_pooler(context, PAGE_SIZE, PAGE_CACHE_SIZE); + any::FixedConfig { + merkle_config: MmrJournalConfig { + journal_partition: format!("{prefix}_mmr_journal"), + metadata_partition: format!("{prefix}_mmr_metadata"), + items_per_blob: NZU64!(11), + write_buffer: IO_BUFFER_SIZE, + strategy: Sequential, + page_cache: page_cache.clone(), + }, + journal_config: FixedLogConfig { + partition: format!("{prefix}_log_journal"), + items_per_blob: NZU64!(7), + page_cache, + write_buffer: IO_BUFFER_SIZE, + }, + translator: TwoCap, + } + } + + #[test] + fn execution_finalization_prunes_losing_fork() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + let block1 = harness.stage_pending_child(&genesis, View::new(1)).await; + let winner = harness.stage_pending_child(&block1, View::new(3)).await; + let loser = harness.stage_pending_child(&block1, View::new(2)).await; + + assert!(harness.processor.pending.contains_key(&winner.digest())); + assert!(harness.processor.pending.contains_key(&loser.digest())); + + let status = harness.finalize(winner.clone()).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(2) + }, + "finalization should persist winner state", + ); + assert!( + !harness.processor.pending.contains_key(&loser.digest()), + "losing fork at finalized round should be pruned", + ); + assert_eq!(harness.processor.last_processed.digest, winner.digest()); + assert_eq!(harness.height_value(Height::new(2)).await, Some(3)); + }); + } + + #[test] + fn execution_finalization_prunes_losing_fork_descendants() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + let block1 = harness.stage_pending_child(&genesis, View::new(1)).await; + let loser = harness.stage_pending_child(&block1, View::new(2)).await; + let winner = harness.stage_pending_child(&block1, View::new(3)).await; + let loser_child = harness.stage_pending_child(&loser, View::new(4)).await; + + assert!(harness.processor.pending.contains_key(&winner.digest())); + assert!(harness.processor.pending.contains_key(&loser.digest())); + assert!(harness + .processor + .pending + .contains_key(&loser_child.digest())); + + let status = harness.finalize(winner.clone()).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(2) + }, + "finalization should persist winner state", + ); + assert!( + !harness.processor.pending.contains_key(&loser.digest()), + "losing fork at finalized round should be pruned", + ); + assert!( + !harness + .processor + .pending + .contains_key(&loser_child.digest()), + "descendants of the losing fork should also be pruned", + ); + }); + } + + #[test] + fn execution_rebuild_pending_restores_missing_chain() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + let block1 = harness.stage_pending_child(&genesis, View::new(1)).await; + let status = harness.finalize(block1.clone()).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(1) + } + ); + + let block2 = harness.stage_pending_child(&block1, View::new(2)).await; + let block3 = harness.stage_pending_child(&block2, View::new(3)).await; + harness.processor.pending.clear(); + harness.provider.insert(block2.clone()); + harness.provider.insert(block3.clone()); + + let (mut response, _rx) = oneshot::channel::(); + let result = harness + .rebuild_pending(block3.digest(), &mut response) + .await; + assert_eq!(result, Ok(()), "rebuild should succeed"); + assert!( + harness.processor.pending.contains_key(&block2.digest()), + "first missing descendant should be reconstructed", + ); + assert!( + harness.processor.pending.contains_key(&block3.digest()), + "target block should be reconstructed", + ); + }); + } + + #[test] + fn execution_rebuild_pending_rejects_stale_ancestor_quickly() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + + let mut chain = Vec::new(); + let mut parent = genesis; + for view in 1..=5 { + let block = harness.stage_pending_child(&parent, View::new(view)).await; + let status = harness.finalize(block.clone()).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(view), + } + ); + parent = block.clone(); + chain.push(block); + } + + harness.processor.pending.clear(); + let stale_parent = chain[1].digest(); // height 2, below processed height 5 + let fetches_before = harness.provider.fetches(); + + let (mut response, _rx) = oneshot::channel::(); + let result = harness.rebuild_pending(stale_parent, &mut response).await; + assert_eq!( + result, + Err(PrepareBatchesError::Invalid), + "stale ancestry should be rejected", + ); + + let fetches_after = harness.provider.fetches(); + assert_eq!( + fetches_after.saturating_sub(fetches_before), + 1, + "stale ancestry should be rejected after a single fetch", + ); + }); + } + + #[test] + fn execution_verify_rejects_conflicting_stale_block() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + + let canonical = harness.stage_pending_child(&genesis, View::new(1)).await; + let conflicting = harness.stage_pending_child(&genesis, View::new(2)).await; + + let status = harness.finalize(canonical).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(1), + } + ); + + assert!( + !harness.is_canonical_processed(&conflicting), + "conflicting stale block must not be accepted as already processed", + ); + }); + } + + #[test] + #[should_panic(expected = "received conflicting finalized block at processed height")] + fn execution_finalize_panics_on_conflicting_duplicate_height() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + + let canonical = harness.stage_pending_child(&genesis, View::new(1)).await; + let conflicting = harness.stage_pending_child(&genesis, View::new(2)).await; + + let status = harness.finalize(canonical).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(1), + } + ); + + let _ = harness.finalize(conflicting).await; + }); + } + + #[test] + fn execution_finalization_persists_state_to_db() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context.child("harness")).await; + let genesis = Block::genesis(); + let block1 = harness.stage_pending_child(&genesis, View::new(1)).await; + + let status = harness.finalize(block1).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(1) + } + ); + assert_eq!(harness.counter_value().await, Some(1)); + assert_eq!( + harness + .reopen_height_value(context.child("reopen"), Height::new(1)) + .await, + Some(1), + "height state should survive reopen after finalization", + ); + }); + } + + #[test] + #[should_panic(expected = "finalize replay state root must match block commitments")] + fn execution_finalize_replay_rejects_state_root_mismatch() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new(context).await; + let genesis = Block::genesis(); + let mut block1 = harness.stage_pending_child(&genesis, View::new(1)).await; + block1.state_root = u64_to_digest(999); + harness.processor.pending.clear(); + + let _ = harness.finalize(block1.clone()).await; + }); + } + + #[test] + fn execution_finalized_hook_runs_after_durable_finalize() { + deterministic::Runner::default().start(|context| async move { + let mut harness = Harness::new_with_finalized_observer(context).await; + let genesis = Block::genesis(); + let block1 = harness.stage_pending_child(&genesis, View::new(1)).await; + + let status = harness.finalize(block1).await; + assert_eq!( + status, + FinalizeStatus::Persisted { + height: Height::new(1) + } + ); + assert_eq!( + harness.finalized_reopened_counters(), + vec![1], + "finalized hook should observe the durably committed state", + ); + }); + } +} diff --git a/glue/src/stateful/db/any.rs b/glue/src/stateful/db/any.rs new file mode 100644 index 00000000000..35962fd4b6f --- /dev/null +++ b/glue/src/stateful/db/any.rs @@ -0,0 +1,612 @@ +//! [`ManagedDb`] implementation for QMDB [`any`](commonware_storage::qmdb::any) databases. +//! +//! The QMDB batch API passes `&db` to `get()` and `merkleize()` for +//! read-through to committed state. This module provides wrapper types +//! that capture `Arc>` alongside the raw batch so the +//! [`Unmerkleized`](super::Unmerkleized) and [`Merkleized`](super::Merkleized) +//! traits can be implemented without a DB parameter. + +use crate::stateful::db::{ + ManagedDb, Merkleized as MerkleizedTrait, StateSyncDb, SyncEngineConfig, + Unmerkleized as UnmerkleizedTrait, +}; +use commonware_codec::{Codec, Read as CodecRead}; +use commonware_cryptography::Hasher; +use commonware_parallel::Strategy; +use commonware_runtime::{Clock, Metrics, Storage}; +use commonware_storage::{ + index::{ + unordered::Index as UnorderedIdx, Ordered as OrderedIndex, Unordered as UnorderedIndex, + }, + journal::contiguous::{ + fixed::Journal as FixedJournal, variable::Journal as VariableJournal, Contiguous, Mutable, + }, + merkle::{Family, Location}, + qmdb::{ + any::{ + batch::{MerkleizedBatch, UnmerkleizedBatch}, + db::Db, + operation::{Operation, Update}, + ordered, + sync::Target as AnySyncTarget, + unordered, + value::{self, FixedEncoding, ValueEncoding, VariableEncoding}, + FixedConfig, VariableConfig, + }, + operation::Key, + sync::{self, resolver::Resolver}, + Error, + }, + translator::Translator, + Persistable, +}; +use commonware_utils::{channel::mpsc, non_empty_range, sync::AsyncRwLock, Array}; +use std::{ops::Deref, sync::Arc}; + +// Matches commonware_storage::qmdb::any::BITMAP_CHUNK_BYTES, which is crate-private. +const ANY_BITMAP_CHUNK_BYTES: usize = 64; + +type AnyDbHandle = + Arc>>; + +/// Wraps a QMDB [`UnmerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Unmerkleized`](super::Unmerkleized) trait. +pub struct AnyUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + batch: UnmerkleizedBatch, + db: AnyDbHandle, + metadata: Option, +} + +/// Key-value operations for the `any` unordered update kind. +impl AnyUnmerkleized, S> +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + /// Set commit metadata included in the next + /// [`merkleize`](UnmerkleizedTrait::merkleize) call. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &K) -> Result, Error> { + let db = self.db.read().await; + self.batch.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&K]) -> Result>, Error> { + let db = self.db.read().await; + self.batch.get_many(keys, &*db).await + } + + /// Record a mutation. `Some(value)` for upsert, `None` for delete. + pub fn write(mut self, key: K, value: Option) -> Self { + self.batch = self.batch.write(key, value); + self + } +} + +/// Wraps a QMDB [`MerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Merkleized`](super::Merkleized) trait. +pub struct AnyMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + inner: Arc>, + db: AnyDbHandle, +} + +impl Deref for AnyUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + type Target = UnmerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.batch + } +} + +impl Deref for AnyMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + type Target = MerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +/// Key-value operations for the `any` ordered update kind. +impl AnyUnmerkleized, S> +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: OrderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + /// Set commit metadata included in the next + /// [`merkleize`](UnmerkleizedTrait::merkleize) call. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &K) -> Result, Error> { + let db = self.db.read().await; + self.batch.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&K]) -> Result>, Error> { + let db = self.db.read().await; + self.batch.get_many(keys, &*db).await + } + + /// Record a mutation. `Some(value)` for upsert, `None` for delete. + pub fn write(mut self, key: K, value: Option) -> Self { + self.batch = self.batch.write(key, value); + self + } +} + +/// Read-through operations for the `any` merkleized batch. +impl AnyMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &U::Key) -> Result, Error> { + let db = self.db.read().await; + self.inner.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&U::Key]) -> Result>, Error> { + let db = self.db.read().await; + self.inner.get_many(keys, &*db).await + } +} + +/// Implement [`Unmerkleized`](UnmerkleizedTrait) for the `any` unordered update kind. +impl UnmerkleizedTrait + for AnyUnmerkleized, S> +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + type Merkleized = AnyMerkleized, S>; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize(&*db, self.metadata).await?; + Ok(AnyMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +/// Implement [`Unmerkleized`](UnmerkleizedTrait) for the `any` ordered update kind. +impl UnmerkleizedTrait + for AnyUnmerkleized, S> +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: OrderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + type Merkleized = AnyMerkleized, S>; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize(&*db, self.metadata).await?; + Ok(AnyMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +/// Implement [`Merkleized`](MerkleizedTrait) for all supported `any` update kinds. +impl MerkleizedTrait for AnyMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + U: Update, + C: Mutable> + Persistable, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation: Codec, + AnyUnmerkleized: UnmerkleizedTrait, +{ + type Digest = H::Digest; + type Unmerkleized = AnyUnmerkleized; + + fn root(&self) -> H::Digest { + self.inner.root() + } + + fn new_batch(&self) -> Self::Unmerkleized { + AnyUnmerkleized { + batch: self.inner.new_batch::(), + db: self.db.clone(), + metadata: None, + } + } +} + +/// Implement [`ManagedDb`] for unordered QMDB databases with fixed-size values. +/// +/// `new_batch` captures the `Arc>` in the returned +/// wrapper so that `get()` and `merkleize()` can read through to +/// committed state. +/// +/// `finalize` applies the merkleized batch's changeset and durably +/// commits it to disk. +impl ManagedDb + for Db< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + ANY_BITMAP_CHUNK_BYTES, + S, + > +where + F: Family, + E: Storage + Clock + Metrics, + K: Array, + V: value::FixedValue + 'static, + H: Hasher + 'static, + T: Translator, + S: Strategy, +{ + type Unmerkleized = AnyUnmerkleized< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + S, + >; + type Merkleized = AnyMerkleized< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + S, + >; + type Error = Error; + type Config = FixedConfig; + type SyncTarget = AnySyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + AnyUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + AnySyncTarget::new( + self.root(), + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.sync().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +/// Implement [`ManagedDb`] for unordered QMDB databases with variable-size values. +impl ManagedDb + for Db< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + ANY_BITMAP_CHUNK_BYTES, + S, + > +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: value::VariableValue + 'static, + H: Hasher, + T: Translator, + S: Strategy, + Operation>>: Codec, +{ + type Unmerkleized = AnyUnmerkleized< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + S, + >; + type Merkleized = AnyMerkleized< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + S, + >; + type Error = Error; + type Config = VariableConfig< + T, + >> as CodecRead>::Cfg, + S, + >; + type SyncTarget = AnySyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + AnyUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + AnySyncTarget::new( + self.root(), + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.sync().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +impl StateSyncDb + for Db< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + ANY_BITMAP_CHUNK_BYTES, + S, + > +where + F: Family, + E: Storage + Clock + Metrics, + K: Array, + V: value::FixedValue + 'static, + H: Hasher, + T: Translator, + S: Strategy, + R: Resolver< + Family = F, + Op = Operation>>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} + +impl StateSyncDb + for Db< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + ANY_BITMAP_CHUNK_BYTES, + S, + > +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: value::VariableValue + 'static, + H: Hasher, + T: Translator, + S: Strategy, + Operation>>: Codec, + R: Resolver< + Family = F, + Op = Operation>>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} diff --git a/glue/src/stateful/db/compact_p2p/actor.rs b/glue/src/stateful/db/compact_p2p/actor.rs new file mode 100644 index 00000000000..b64de0f5ab0 --- /dev/null +++ b/glue/src/stateful/db/compact_p2p/actor.rs @@ -0,0 +1,448 @@ +//! Actor for compact QMDB sync over P2P. + +use super::{handler, mailbox, Mailbox}; +use commonware_actor::mailbox as actor_mailbox; +use commonware_codec::{Codec, Decode as _, Encode}; +use commonware_cryptography::{Hasher, PublicKey}; +use commonware_macros::select_loop; +use commonware_p2p::{Blocker, Provider, Receiver, Sender}; +use commonware_resolver::{p2p, Resolver as _}; +use commonware_runtime::{spawn_cell, BufferPooler, Clock, ContextCell, Handle, Metrics, Spawner}; +use commonware_storage::{ + merkle::{Family, Location, MAX_PROOF_DIGESTS_PER_ELEMENT}, + qmdb::{self, sync::compact}, +}; +use commonware_utils::{ + channel::{fallible::OneshotExt, oneshot}, + sync::AsyncRwLock, +}; +use futures::future; +use rand::Rng; +use std::{collections::BTreeMap, num::NonZeroUsize, sync::Arc, time::Duration}; +use tracing::info; + +const MAX_PINNED_NODES: usize = 64; + +type DbResolver = Arc>; +type DbOp = as compact::Resolver>::Op; +type Pending = + oneshot::Sender, mailbox::ResponseDropped>>; +type PendingSubs = BTreeMap, Vec>>; + +/// Configuration for [`Actor`]. +pub struct Config +where + P: PublicKey, + D: Provider, + B: Blocker, +{ + /// Provider for the current peer set. + pub peer_provider: D, + + /// Blocker used when peers send invalid data. + pub blocker: B, + + /// Local database used to serve incoming requests when available. + pub database: Option>, + + /// Maximum size of resolver mailbox backlogs. + pub mailbox_size: NonZeroUsize, + + /// Local node identity if available. + pub me: Option

, + + /// Initial expected performance for new peers. + pub initial: Duration, + + /// Request timeout. + pub timeout: Duration, + + /// Retry cadence for pending fetches. + pub fetch_retry_timeout: Duration, + + /// Send fetch requests with network priority. + pub priority_requests: bool, + + /// Send responses with network priority. + pub priority_responses: bool, +} + +enum State { + NoDb, + HasDb(DbResolver), +} + +enum MailboxAction { + None, + Fetch(handler::Request), +} + +/// Runs a compact QMDB sync resolver service over P2P. +pub struct Actor +where + E: BufferPooler + Clock + Spawner + Rng + Metrics, + P: PublicKey, + D: Provider, + B: Blocker, + F: Family, + H: Hasher, + DbResolver: compact::Resolver, + DbOp: Codec + Clone + Send + Sync + 'static, +{ + context: ContextCell, + config: Config, + mailbox_rx: actor_mailbox::Receiver, H::Digest>>, + state: State, + pending: PendingSubs, H::Digest>, +} + +impl Actor +where + E: BufferPooler + Clock + Spawner + Rng + Metrics, + P: PublicKey, + D: Provider, + B: Blocker, + F: Family, + H: Hasher, + DbResolver: compact::Resolver, + DbOp: Codec + Clone + Send + Sync + 'static, +{ + /// Create a new compact resolver actor and mailbox. + pub fn new(context: E, mut config: Config) -> (Self, Mailbox, H>) { + let state = config.database.take().map_or(State::NoDb, State::HasDb); + let (mailbox_tx, mailbox_rx) = + actor_mailbox::new(context.child("mailbox"), config.mailbox_size); + let mailbox = Mailbox::new(mailbox_tx); + let actor = Self { + context: ContextCell::new(context), + config, + mailbox_rx, + state, + pending: BTreeMap::new(), + }; + (actor, mailbox) + } + + /// Start the resolver service. + pub fn start( + mut self, + net: (impl Sender, impl Receiver), + ) -> Handle<()> { + spawn_cell!(self.context, self.run(net)) + } + + async fn run( + mut self, + (sender, receiver): (impl Sender, impl Receiver), + ) { + let (handler_tx, mut handler_rx) = + actor_mailbox::new(self.context.child("handler"), self.config.mailbox_size); + let handler = handler::Handler::::new(handler_tx); + let (engine, mut resolver_mailbox) = p2p::Engine::new( + self.context.as_present().child("resolver"), + p2p::Config { + peer_provider: self.config.peer_provider.clone(), + blocker: self.config.blocker.clone(), + consumer: handler.clone(), + producer: handler, + mailbox_size: self.config.mailbox_size, + me: self.config.me.clone(), + initial: self.config.initial, + timeout: self.config.timeout, + fetch_retry_timeout: self.config.fetch_retry_timeout, + priority_requests: self.config.priority_requests, + priority_responses: self.config.priority_responses, + }, + ); + let mut resolver_task = engine.start((sender, receiver)); + + select_loop! { + self.context, + on_start => { + self.pending.retain(|_, subscribers| { + subscribers.retain(|subscriber| !subscriber.is_closed()); + !subscribers.is_empty() + }); + let mailbox_message = async { + match self.mailbox_rx.recv().await { + Some(message) => Some(message), + None => future::pending().await, + } + }; + }, + on_stopped => { + return; + }, + _ = &mut resolver_task => { + return; + }, + Some(message) = mailbox_message else continue => { + match self.handle_mailbox_message(message) { + MailboxAction::None => {} + MailboxAction::Fetch(request) => { + resolver_mailbox.fetch(request); + } + } + }, + Some(message) = handler_rx.recv() else { + return; + } => match message { + handler::EngineMessage::Deliver { + key, + value, + response, + } => { + self.handle_deliver(key, value, response); + } + handler::EngineMessage::Produce { key, response } => { + self.handle_produce(key, response).await; + } + }, + } + } + + fn handle_mailbox_message( + &mut self, + message: mailbox::Message, H::Digest>, + ) -> MailboxAction { + match message { + mailbox::Message::AttachDatabase(db) => { + let replacing_existing = matches!(self.state, State::HasDb(_)); + info!(replacing_existing, "attached compact resolver database"); + self.state = State::HasDb(db); + MailboxAction::None + } + mailbox::Message::GetState { request, response } => { + if let Some(subscribers) = self.pending.get_mut(&request) { + subscribers.retain(|subscriber| !subscriber.is_closed()); + if !subscribers.is_empty() { + subscribers.push(response); + return MailboxAction::None; + } + } + self.pending.insert(request.clone(), vec![response]); + MailboxAction::Fetch(request) + } + } + } + + fn handle_deliver( + &mut self, + key: handler::Request, + value: bytes::Bytes, + response: oneshot::Sender, + ) { + let Some(subscribers) = self.pending.remove(&key) else { + response.send_lossy(true); + return; + }; + + let cfg = ( + (..=MAX_PINNED_NODES).into(), + (), + MAX_PROOF_DIGESTS_PER_ELEMENT, + ); + let state = match compact::State::, H::Digest>::decode_cfg(value, &cfg) { + Ok(state) => state, + Err(_) => { + self.pending.insert(key, subscribers); + response.send_lossy(false); + return; + } + }; + + if !Self::valid_state_response(&key, &state) { + self.pending.insert(key, subscribers); + response.send_lossy(false); + return; + } + + for subscriber in subscribers { + let _ = subscriber.send(Ok(state.clone())); + } + response.send_lossy(true); + } + + fn valid_state_response( + key: &handler::Request, + state: &compact::State, H::Digest>, + ) -> bool { + let target = key.to_target(); + if state.leaf_count != target.leaf_count || state.leaf_count == Location::new(0) { + return false; + } + + let hasher = qmdb::hasher::(); + qmdb::verify_proof( + &hasher, + &state.last_commit_proof, + Location::new(*state.leaf_count - 1), + std::slice::from_ref(&state.last_commit_op), + &target.root, + ) + } + + async fn handle_produce( + &mut self, + key: handler::Request, + response: oneshot::Sender, + ) { + let State::HasDb(database) = &self.state else { + return; + }; + let Ok(state) = compact::Resolver::get_compact_state(database, key.to_target()).await + else { + return; + }; + response.send_lossy(state.encode()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::{ed25519, sha256, Sha256}; + use commonware_p2p::{Provider, TrackedPeers}; + use commonware_parallel::Sequential; + use commonware_runtime::{deterministic, Runner as _, Supervisor as _}; + use commonware_storage::{ + merkle::Proof, + mmr, + qmdb::keyless::fixed::{self as keyless_fixed, Operation as KeylessOp}, + }; + use commonware_utils::{ + channel::{mpsc, oneshot}, + sequence::U64, + sync::AsyncRwLock, + NZUsize, + }; + use std::{sync::Arc, time::Duration}; + + #[derive(Clone, Debug)] + struct DummyProvider; + + impl Provider for DummyProvider { + type PublicKey = ed25519::PublicKey; + + async fn peer_set(&mut self, _id: u64) -> Option> { + None + } + + async fn subscribe(&mut self) -> commonware_p2p::PeerSetSubscription { + let (_tx, rx) = mpsc::unbounded_channel(); + rx + } + } + + #[derive(Clone)] + struct DummyBlocker; + + impl commonware_p2p::Blocker for DummyBlocker { + type PublicKey = ed25519::PublicKey; + + fn block(&mut self, _peer: Self::PublicKey) -> commonware_actor::Feedback { + commonware_actor::Feedback::Ok + } + } + + type TestDb = + keyless_fixed::CompactDb; + type TestActor = Actor< + deterministic::Context, + ed25519::PublicKey, + DummyProvider, + DummyBlocker, + mmr::Family, + TestDb, + Sha256, + >; + type TestOp = KeylessOp; + + fn test_config( + database: Option>>, + ) -> Config { + Config { + peer_provider: DummyProvider, + blocker: DummyBlocker, + database, + mailbox_size: NZUsize!(16), + me: None, + initial: Duration::from_millis(10), + timeout: Duration::from_millis(10), + fetch_retry_timeout: Duration::from_millis(10), + priority_requests: false, + priority_responses: false, + } + } + + async fn init_db(context: deterministic::Context) -> TestDb { + TestDb::init( + context, + keyless_fixed::CompactConfig { + merkle: commonware_storage::merkle::compact::Config { + partition: "compact-p2p-test".into(), + strategy: Sequential, + }, + commit_codec_config: (), + }, + ) + .await + .expect("db init should succeed") + } + + #[test] + fn invalid_proof_is_rejected() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context, test_config(None)); + let target = compact::Target { + root: sha256::Digest::from([7; 32]), + leaf_count: mmr::Location::new(1), + }; + let request = handler::Request::from_target(target); + let (pending_tx, _pending_rx) = oneshot::channel(); + actor.pending.insert(request.clone(), vec![pending_tx]); + + let bad_state = compact::State:: { + leaf_count: mmr::Location::new(1), + pinned_nodes: Vec::new(), + last_commit_op: TestOp::Commit(None, mmr::Location::new(0)), + last_commit_proof: Proof { + leaves: mmr::Location::new(1), + inactive_peaks: 0, + digests: Vec::new(), + }, + }; + + let (valid_tx, valid_rx) = oneshot::channel(); + actor.handle_deliver(request.clone(), bad_state.encode(), valid_tx); + + assert!(!valid_rx.await.expect("validation response should arrive")); + assert!(actor.pending.contains_key(&request)); + }); + } + + #[test] + fn produce_serves_attached_database() { + deterministic::Runner::default().start(|context| async move { + let db = init_db(context.child("db")).await; + let target = db.current_target(); + let db = Arc::new(AsyncRwLock::new(db)); + let (mut actor, _mailbox) = TestActor::new(context, test_config(Some(db))); + let request = handler::Request::from_target(target.clone()); + let (response_tx, response_rx) = oneshot::channel(); + + actor.handle_produce(request, response_tx).await; + + let encoded = response_rx.await.expect("response should be served"); + let cfg = ( + (..=MAX_PINNED_NODES).into(), + (), + MAX_PROOF_DIGESTS_PER_ELEMENT, + ); + let state = + compact::State::::decode_cfg(encoded, &cfg) + .expect("served state should decode"); + assert_eq!(state.leaf_count, target.leaf_count); + }); + } +} diff --git a/glue/src/stateful/db/compact_p2p/handler.rs b/glue/src/stateful/db/compact_p2p/handler.rs new file mode 100644 index 00000000000..1bbfb4ed0ec --- /dev/null +++ b/glue/src/stateful/db/compact_p2p/handler.rs @@ -0,0 +1,213 @@ +//! Handler types for compact resolver actor coordination. + +use bytes::{Buf, BufMut, Bytes}; +use commonware_actor::mailbox::{Overflow, Policy, Sender}; +use commonware_codec::{EncodeSize, Error as CodecError, Read, ReadExt as _, Write}; +use commonware_cryptography::Digest; +use commonware_resolver::{self as resolver, p2p::Producer, Delivery}; +use commonware_storage::{merkle::Family, qmdb::sync::compact}; +use commonware_utils::{channel::oneshot, Span}; +use std::{ + collections::VecDeque, + fmt, + hash::{Hash, Hasher}, +}; + +#[derive(Clone, Debug)] +pub(super) struct Request { + root: D, + leaf_count: commonware_storage::merkle::Location, +} + +impl Request { + pub(super) const fn from_target(target: compact::Target) -> Self { + Self { + root: target.root, + leaf_count: target.leaf_count, + } + } + + pub(super) const fn to_target(&self) -> compact::Target { + compact::Target { + root: self.root, + leaf_count: self.leaf_count, + } + } +} + +impl PartialEq for Request { + fn eq(&self, other: &Self) -> bool { + self.root == other.root && self.leaf_count == other.leaf_count + } +} + +impl Eq for Request {} + +impl PartialOrd for Request { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Request { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.root + .cmp(&other.root) + .then_with(|| self.leaf_count.cmp(&other.leaf_count)) + } +} + +impl Hash for Request { + fn hash(&self, state: &mut H) { + self.root.hash(state); + self.leaf_count.hash(state); + } +} + +impl fmt::Display for Request { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CompactRequest(root={}, leaf_count={})", + self.root, self.leaf_count + ) + } +} + +impl Write for Request { + fn write(&self, buf: &mut impl BufMut) { + self.root.write(buf); + self.leaf_count.write(buf); + } +} + +impl EncodeSize for Request { + fn encode_size(&self) -> usize { + self.root.encode_size() + self.leaf_count.encode_size() + } +} + +impl Read for Request { + type Cfg = (); + + fn read_cfg(buf: &mut impl Buf, _: &()) -> Result { + let root = D::read(buf)?; + let leaf_count = commonware_storage::merkle::Location::::read(buf)?; + let target = compact::Target { root, leaf_count }; + target.validate().map_err(|reason| { + CodecError::Invalid( + "commonware_glue::stateful::db::compact_p2p::Request", + reason, + ) + })?; + Ok(Self::from_target(target)) + } +} + +impl Span for Request {} + +pub(super) enum EngineMessage { + Deliver { + key: Request, + value: Bytes, + response: oneshot::Sender, + }, + Produce { + key: Request, + response: oneshot::Sender, + }, +} + +impl EngineMessage { + fn response_closed(&self) -> bool { + match self { + Self::Deliver { response, .. } => response.is_closed(), + Self::Produce { response, .. } => response.is_closed(), + } + } +} + +pub(super) struct EnginePending(VecDeque>); + +impl Default for EnginePending { + fn default() -> Self { + Self(VecDeque::new()) + } +} + +impl Overflow> for EnginePending { + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + fn drain

(&mut self, mut push: P) + where + P: FnMut(EngineMessage) -> Option>, + { + while let Some(message) = self.0.pop_front() { + if message.response_closed() { + continue; + } + + if let Some(message) = push(message) { + self.0.push_front(message); + break; + } + } + } +} + +impl Policy for EngineMessage { + type Overflow = EnginePending; + + fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + if message.response_closed() { + return true; + } + overflow.0.push_back(message); + true + } +} + +#[derive(Clone)] +pub(super) struct Handler { + sender: Sender>, +} + +impl Handler { + pub(super) const fn new(sender: Sender>) -> Self { + Self { sender } + } +} + +impl resolver::Consumer for Handler { + type Key = Request; + type Value = Bytes; + type Subscriber = (); + + fn deliver( + &mut self, + delivery: Delivery, + value: Self::Value, + ) -> oneshot::Receiver { + let (response, receiver) = oneshot::channel(); + let _ = self.sender.enqueue(EngineMessage::Deliver { + key: delivery.key, + value, + response, + }); + receiver + } +} + +impl Producer for Handler { + type Key = Request; + + fn produce(&mut self, key: Self::Key) -> oneshot::Receiver { + let (response, receiver) = oneshot::channel(); + let _ = self + .sender + .enqueue(EngineMessage::Produce { key, response }); + receiver + } +} diff --git a/glue/src/stateful/db/compact_p2p/mailbox.rs b/glue/src/stateful/db/compact_p2p/mailbox.rs new file mode 100644 index 00000000000..9bf2092c432 --- /dev/null +++ b/glue/src/stateful/db/compact_p2p/mailbox.rs @@ -0,0 +1,188 @@ +//! Mailbox for the compact QMDB P2P resolver. + +use super::handler; +use crate::stateful::db::AttachableResolver; +use commonware_actor::mailbox::{Overflow, Policy, Sender}; +use commonware_cryptography::{Digest, Hasher}; +use commonware_storage::{merkle::Family, qmdb::sync::compact}; +use commonware_utils::{channel::oneshot, sync::AsyncRwLock}; +use std::{collections::VecDeque, future::Future, sync::Arc}; + +/// The resolver actor dropped the response before completion. +#[derive(Debug, thiserror::Error)] +#[error("response dropped before completion")] +pub struct ResponseDropped; + +pub(super) enum Message { + AttachDatabase(Arc>), + GetState { + request: handler::Request, + response: oneshot::Sender, ResponseDropped>>, + }, +} + +impl Message { + fn response_closed(&self) -> bool { + match self { + Self::AttachDatabase(_) => false, + Self::GetState { response, .. } => response.is_closed(), + } + } +} + +pub(super) struct Pending { + database: Option>>, + messages: VecDeque>, +} + +impl Default for Pending { + fn default() -> Self { + Self { + database: None, + messages: VecDeque::new(), + } + } +} + +impl Overflow> for Pending { + fn is_empty(&self) -> bool { + self.database.is_none() && self.messages.is_empty() + } + + fn drain

(&mut self, mut push: P) + where + P: FnMut(Message) -> Option>, + { + if let Some(database) = self.database.take() { + if let Some(Message::AttachDatabase(database)) = push(Message::AttachDatabase(database)) + { + self.database = Some(database); + return; + } + } + + while let Some(message) = self.messages.pop_front() { + if message.response_closed() { + continue; + } + + if let Some(message) = push(message) { + self.messages.push_front(message); + break; + } + } + } +} + +impl Policy for Message { + type Overflow = Pending; + + fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + if message.response_closed() { + return true; + } + + match message { + Self::AttachDatabase(database) => { + overflow.database = Some(database); + } + message => overflow.messages.push_back(message), + } + true + } +} + +/// Client-facing resolver mailbox used by compact QMDB sync. +pub struct Mailbox { + sender: Sender>, +} + +impl Clone for Mailbox { + fn clone(&self) -> Self { + Self { + sender: self.sender.clone(), + } + } +} + +impl Mailbox { + pub(super) const fn new(sender: Sender>) -> Self { + Self { sender } + } +} + +impl Mailbox { + pub fn attach_database(&self, db: Arc>) { + let _ = self.sender.enqueue(Message::AttachDatabase(db)); + } +} + +impl compact::Resolver for Mailbox +where + DB: Send + Sync + 'static, + F: Family, + Op: Send + Sync + Clone + 'static, + H: Hasher, +{ + type Digest = H::Digest; + type Error = ResponseDropped; + type Family = F; + type Op = Op; + + async fn get_compact_state( + &self, + target: compact::Target, + ) -> Result, Self::Error> { + let request = handler::Request::from_target(target); + let (response, receiver) = oneshot::channel(); + let _ = self.sender.enqueue(Message::GetState { request, response }); + receiver.await.map_err(|_| ResponseDropped)? + } +} + +impl AttachableResolver for Mailbox +where + DB: Send + Sync + 'static, + F: Family, + Op: Send + Sync + Clone + 'static, + H: Hasher, +{ + fn attach_database(&self, db: Arc>) -> impl Future + Send { + Self::attach_database(self, db); + std::future::ready(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::sha256::Sha256; + use commonware_runtime::{deterministic, Runner as _}; + use commonware_storage::{mmr, qmdb::sync::compact::Resolver as _}; + use commonware_utils::NZUsize; + + #[test] + fn get_compact_state_sends_request() { + deterministic::Runner::default().start(|context| async move { + let (sender, mut receiver) = commonware_actor::mailbox::new(context, NZUsize!(4)); + let mailbox = Mailbox::<(), mmr::Family, u64, Sha256>::new(sender); + let target = compact::Target { + root: [1u8; 32].into(), + leaf_count: mmr::Location::new(7), + }; + + let get = mailbox.get_compact_state(target.clone()); + let observe = async move { + let message = receiver.recv().await.expect("request should be queued"); + let Message::GetState { request, response } = message else { + panic!("unexpected attach message"); + }; + assert_eq!(request.to_target(), target); + drop(response); + }; + + let (result, _) = futures::join!(get, observe); + assert!(matches!(result, Err(ResponseDropped))); + }); + } +} diff --git a/glue/src/stateful/db/compact_p2p/mod.rs b/glue/src/stateful/db/compact_p2p/mod.rs new file mode 100644 index 00000000000..eed8caffb44 --- /dev/null +++ b/glue/src/stateful/db/compact_p2p/mod.rs @@ -0,0 +1,13 @@ +//! P2P implementation of the compact QMDB sync resolver. +//! +//! Implements [`commonware_storage::qmdb::sync::compact::Resolver`] over +//! [`commonware_resolver::p2p::Engine`]. Use this for compact-storage QMDBs +//! that fetch one authenticated frontier state instead of replaying operations. + +mod actor; +pub use actor::{Actor, Config}; + +mod handler; + +mod mailbox; +pub use mailbox::{Mailbox, ResponseDropped}; diff --git a/glue/src/stateful/db/current.rs b/glue/src/stateful/db/current.rs new file mode 100644 index 00000000000..4f63469c4c4 --- /dev/null +++ b/glue/src/stateful/db/current.rs @@ -0,0 +1,675 @@ +//! [`ManagedDb`] implementation for QMDB [`current`](commonware_storage::qmdb::current) databases. +//! +//! The QMDB batch API passes `&db` to `get()` and `merkleize()` for +//! read-through to committed state. This module provides wrapper types +//! that capture `Arc>` alongside the raw batch so the +//! [`Unmerkleized`](super::Unmerkleized) and [`Merkleized`](super::Merkleized) +//! traits can be implemented without a DB parameter. + +use crate::stateful::db::{ + ManagedDb, Merkleized as MerkleizedTrait, StateSyncDb, SyncEngineConfig, + Unmerkleized as UnmerkleizedTrait, +}; +use commonware_codec::{Codec, Read as CodecRead}; +use commonware_cryptography::Hasher; +use commonware_parallel::Strategy; +use commonware_runtime::{Clock, Metrics, Storage}; +use commonware_storage::{ + index::{ + unordered::Index as UnorderedIdx, Ordered as OrderedIndex, Unordered as UnorderedIndex, + }, + journal::contiguous::{ + fixed::Journal as FixedJournal, variable::Journal as VariableJournal, Contiguous, Mutable, + }, + merkle::{Graftable, Location}, + qmdb::{ + any::{ + operation::{Operation, Update}, + ordered, unordered, + value::{self, FixedEncoding, ValueEncoding, VariableEncoding}, + }, + current::{ + batch::{MerkleizedBatch, UnmerkleizedBatch}, + db::Db, + sync::Target as CurrentSyncTarget, + FixedConfig, VariableConfig, + }, + operation::Key, + sync::{self, resolver::Resolver}, + Error, + }, + translator::Translator, + Persistable, +}; +use commonware_utils::{channel::mpsc, non_empty_range, sync::AsyncRwLock, Array}; +use std::{ops::Deref, sync::Arc}; + +type CurrentDbHandle = + Arc>>; + +/// Wraps a QMDB [`UnmerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Unmerkleized`](super::Unmerkleized) trait. +pub struct CurrentUnmerkleized +where + F: Graftable, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + batch: UnmerkleizedBatch, + db: CurrentDbHandle, + metadata: Option, +} + +/// Key-value operations for the `current` unordered update kind. +impl + CurrentUnmerkleized, N, S> +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + /// Set commit metadata included in the next + /// [`merkleize`](UnmerkleizedTrait::merkleize) call. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &K) -> Result, Error> { + let db = self.db.read().await; + self.batch.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&K]) -> Result>, Error> { + let db = self.db.read().await; + self.batch.get_many(keys, &*db).await + } + + /// Record a mutation. `Some(value)` for upsert, `None` for delete. + pub fn write(mut self, key: K, value: Option) -> Self { + self.batch = self.batch.write(key, value); + self + } +} + +/// Wraps a QMDB [`MerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Merkleized`](super::Merkleized) trait. +pub struct CurrentMerkleized +where + F: Graftable, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + inner: Arc>, + db: CurrentDbHandle, +} + +impl Deref for CurrentUnmerkleized +where + F: Graftable, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + type Target = UnmerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.batch + } +} + +impl Deref for CurrentMerkleized +where + F: Graftable, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex>, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + type Target = MerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +/// Key-value operations for the `current` ordered update kind. +impl + CurrentUnmerkleized, N, S> +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: OrderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + /// Set commit metadata included in the next + /// [`merkleize`](UnmerkleizedTrait::merkleize) call. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &K) -> Result, Error> { + let db = self.db.read().await; + self.batch.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&K]) -> Result>, Error> { + let db = self.db.read().await; + self.batch.get_many(keys, &*db).await + } + + /// Record a mutation. `Some(value)` for upsert, `None` for delete. + pub fn write(mut self, key: K, value: Option) -> Self { + self.batch = self.batch.write(key, value); + self + } +} + +/// Read-through operations for the `current` merkleized batch. +impl CurrentMerkleized +where + F: Graftable, + E: Storage + Clock + Metrics, + U: Update, + C: Contiguous>, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation: Codec, +{ + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &U::Key) -> Result, Error> { + let db = self.db.read().await; + self.inner.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&U::Key]) -> Result>, Error> { + let db = self.db.read().await; + self.inner.get_many(keys, &*db).await + } +} + +/// Implement [`Unmerkleized`](UnmerkleizedTrait) for the `current` unordered update kind. +impl UnmerkleizedTrait + for CurrentUnmerkleized, N, S> +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + type Merkleized = CurrentMerkleized, N, S>; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize(&*db, self.metadata).await?; + Ok(CurrentMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +/// Implement [`Unmerkleized`](UnmerkleizedTrait) for the `current` ordered update kind. +impl UnmerkleizedTrait + for CurrentUnmerkleized, N, S> +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding + 'static, + C: Mutable>> + + Persistable, + I: OrderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation>: Codec, +{ + type Merkleized = CurrentMerkleized, N, S>; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize(&*db, self.metadata).await?; + Ok(CurrentMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +/// Implement [`Merkleized`](MerkleizedTrait) for all supported `current` update kinds. +impl MerkleizedTrait + for CurrentMerkleized +where + F: Graftable, + E: Storage + Clock + Metrics, + U: Update, + C: Mutable> + Persistable, + I: UnorderedIndex> + 'static, + H: Hasher, + S: Strategy, + Operation: Codec, + CurrentUnmerkleized: UnmerkleizedTrait, +{ + type Digest = H::Digest; + type Unmerkleized = CurrentUnmerkleized; + + fn root(&self) -> H::Digest { + self.inner.root() + } + + fn new_batch(&self) -> Self::Unmerkleized { + CurrentUnmerkleized { + batch: self.inner.new_batch::(), + db: self.db.clone(), + metadata: None, + } + } +} + +/// Implement [`ManagedDb`] for unordered current QMDB databases with fixed-size values. +impl ManagedDb + for Db< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + > +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Array, + V: value::FixedValue + 'static, + H: Hasher + 'static, + T: Translator, + S: Strategy, +{ + type Unmerkleized = CurrentUnmerkleized< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + >; + type Merkleized = CurrentMerkleized< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + >; + type Error = Error; + type Config = FixedConfig; + type SyncTarget = CurrentSyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + CurrentUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + let hasher = commonware_storage::qmdb::hasher::(); + let witness = self + .ops_root_witness(&hasher) + .await + .expect("failed to build ops root witness for sync target"); + CurrentSyncTarget::new( + self.root(), + self.ops_root(), + witness, + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.sync().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +/// Workaround for . +/// +/// Inside a `ManagedDb` trait impl, `::init(...)` in a non-async `fn` +/// resolves to the *trait* method (infinite recursion), while in an +/// `async fn` it resolves correctly to the inherent method but the compiler +/// cannot verify the RPITIT future is `Send`. By placing the call in this +/// module -- which does not import `ManagedDb` -- the compiler +/// unambiguously picks the inherent `Db::init`. +mod open { + use commonware_codec::{Codec, Read}; + use commonware_cryptography::Hasher; + use commonware_parallel::Strategy; + use commonware_runtime::{Clock, Metrics, Storage}; + use commonware_storage::{ + merkle::Graftable, + qmdb::{ + any::{ + operation::Operation, + unordered, + value::{VariableEncoding, VariableValue}, + }, + current::{unordered::variable::Db, VariableConfig}, + Error, + }, + }; + use commonware_utils::Array; + + type VConfig = VariableConfig< + T, + >> as Read>::Cfg, + S, + >; + + pub(super) async fn variable( + context: E, + config: VConfig, + ) -> Result, Error> + where + F: Graftable, + E: Storage + Clock + Metrics, + K: Array, + V: VariableValue + 'static, + H: Hasher, + T: commonware_storage::translator::Translator, + S: Strategy, + Operation>>: Codec, + { + Db::init(context, config).await + } +} + +/// Implement [`ManagedDb`] for unordered current QMDB databases with variable-size values. +impl ManagedDb + for Db< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + > +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Key + Array, + V: value::VariableValue + 'static, + H: Hasher, + T: Translator, + S: Strategy, + Operation>>: Codec, +{ + type Unmerkleized = CurrentUnmerkleized< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + >; + type Merkleized = CurrentMerkleized< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + >; + type Error = Error; + type Config = VariableConfig< + T, + >> as CodecRead>::Cfg, + S, + >; + type SyncTarget = CurrentSyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + open::variable(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + CurrentUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + let hasher = commonware_storage::qmdb::hasher::(); + let witness = self + .ops_root_witness(&hasher) + .await + .expect("failed to build ops root witness for sync target"); + CurrentSyncTarget::new( + self.root(), + self.ops_root(), + witness, + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.sync().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +impl StateSyncDb + for Db< + F, + E, + FixedJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + > +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Array, + V: value::FixedValue + 'static, + H: Hasher, + T: Translator, + S: Strategy, + R: Resolver< + Family = F, + Op = Operation>>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} + +impl StateSyncDb + for Db< + F, + E, + VariableJournal>>>, + UnorderedIdx>, + H, + unordered::Update>, + N, + S, + > +where + F: Graftable, + E: Storage + Clock + Metrics, + K: Key + Array, + V: value::VariableValue + 'static, + H: Hasher, + T: Translator, + S: Strategy, + Operation>>: Codec, + R: Resolver< + Family = F, + Op = Operation>>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} diff --git a/glue/src/stateful/db/immutable.rs b/glue/src/stateful/db/immutable.rs new file mode 100644 index 00000000000..102b532b7c1 --- /dev/null +++ b/glue/src/stateful/db/immutable.rs @@ -0,0 +1,493 @@ +//! [`ManagedDb`] implementation for QMDB [`immutable`](commonware_storage::qmdb::immutable) +//! databases. +//! +//! Immutable databases support adding new keyed values but not updates or +//! deletions. The wrapper types here capture `Arc>` +//! so the batch API can read through to committed state. + +use crate::stateful::db::{ + ManagedDb, Merkleized as MerkleizedTrait, StateSyncDb, SyncEngineConfig, + Unmerkleized as UnmerkleizedTrait, +}; +use commonware_codec::{Codec, EncodeShared, Read as CodecRead}; +use commonware_cryptography::Hasher; +use commonware_parallel::Strategy; +use commonware_runtime::{Clock, Metrics, Storage}; +use commonware_storage::{ + journal::{ + contiguous::{ + fixed::Journal as FixedJournal, variable::Journal as VariableJournal, Mutable, + }, + Error as JournalError, + }, + merkle::{Family, Location}, + qmdb::{ + any::{ + sync::Target as AnySyncTarget, + value::{FixedEncoding, FixedValue, ValueEncoding, VariableEncoding, VariableValue}, + }, + immutable::{ + batch::{MerkleizedBatch, UnmerkleizedBatch}, + fixed, variable, Immutable, Operation, + }, + operation::Key, + sync::{self, resolver::Resolver}, + Error, + }, + translator::Translator, + Persistable, +}; +use commonware_utils::{channel::mpsc, non_empty_range, sync::AsyncRwLock, Array}; +use std::{ops::Deref, sync::Arc}; + +type ImmutableDbHandle = + Arc>>; + +/// Wraps an immutable [`UnmerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Unmerkleized`](super::Unmerkleized) trait. +pub struct ImmutableUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + batch: UnmerkleizedBatch, + db: ImmutableDbHandle, + metadata: Option, + inactivity_floor: Option>, +} + +impl Deref for ImmutableUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + type Target = UnmerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.batch + } +} + +impl ImmutableUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + /// Set commit metadata included in the next + /// [`merkleize`](UnmerkleizedTrait::merkleize) call. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Set the inactivity floor to include within the next [`merkleize`](UnmerkleizedTrait::merkleize) call. + /// + /// If unset, [`merkleize`](UnmerkleizedTrait::merkleize) will use the [`Default`] of [`Location`]. + pub const fn with_inactivity_floor(mut self, floor: Location) -> Self { + self.inactivity_floor = Some(floor); + self + } + + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &K) -> Result, Error> { + let db = self.db.read().await; + self.batch.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&K]) -> Result>, Error> { + let db = self.db.read().await; + self.batch.get_many(keys, &*db).await + } + + /// Set `key` to `value` in the speculative batch. + pub fn set(mut self, key: K, value: V::Value) -> Self { + self.batch = self.batch.set(key, value); + self + } +} + +/// Wraps an immutable [`MerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Merkleized`](super::Merkleized) trait. +pub struct ImmutableMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + inner: Arc>, + db: ImmutableDbHandle, +} + +impl Deref for ImmutableMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + type Target = MerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl ImmutableMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + /// Read a value by key, falling back to committed state. + pub async fn get(&self, key: &K) -> Result, Error> { + let db = self.db.read().await; + self.inner.get(key, &*db).await + } + + /// Read multiple values by key, falling back to committed state. + /// + /// Returns results in the same order as the input keys. + pub async fn get_many(&self, keys: &[&K]) -> Result>, Error> { + let db = self.db.read().await; + self.inner.get_many(keys, &*db).await + } +} + +impl UnmerkleizedTrait for ImmutableUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + type Merkleized = ImmutableMerkleized; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize( + &*db, + self.metadata, + self.inactivity_floor.unwrap_or_default(), + ); + Ok(ImmutableMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +impl MerkleizedTrait for ImmutableMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + T: Translator, + S: Strategy, + Operation: EncodeShared, +{ + type Digest = H::Digest; + type Unmerkleized = ImmutableUnmerkleized; + + fn root(&self) -> H::Digest { + self.inner.root() + } + + fn new_batch(&self) -> Self::Unmerkleized { + ImmutableUnmerkleized { + batch: self.inner.new_batch::(), + db: self.db.clone(), + metadata: None, + inactivity_floor: None, + } + } +} + +impl ManagedDb for fixed::Db +where + F: Family, + E: Storage + Clock + Metrics, + K: Array, + V: FixedValue + 'static, + H: Hasher + 'static, + T: Translator, + S: Strategy, +{ + type Unmerkleized = ImmutableUnmerkleized< + F, + E, + K, + FixedEncoding, + FixedJournal>, + H, + T, + S, + >; + type Merkleized = ImmutableMerkleized< + F, + E, + K, + FixedEncoding, + FixedJournal>, + H, + T, + S, + >; + type Error = Error; + type Config = fixed::Config; + type SyncTarget = AnySyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + ImmutableUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + AnySyncTarget::new( + self.root(), + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.sync().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +impl ManagedDb for variable::Db +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: VariableValue + 'static, + H: Hasher + 'static, + T: Translator, + S: Strategy, + variable::Operation: Codec, +{ + type Unmerkleized = ImmutableUnmerkleized< + F, + E, + K, + VariableEncoding, + VariableJournal>, + H, + T, + S, + >; + type Merkleized = ImmutableMerkleized< + F, + E, + K, + VariableEncoding, + VariableJournal>, + H, + T, + S, + >; + type Error = Error; + type Config = variable::Config as CodecRead>::Cfg, S>; + type SyncTarget = AnySyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + ImmutableUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + AnySyncTarget::new( + self.root(), + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.sync().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +impl StateSyncDb for fixed::Db +where + F: Family, + E: Storage + Clock + Metrics, + K: Array, + V: FixedValue + 'static, + H: Hasher + 'static, + T: Translator, + S: Strategy, + R: Resolver, Digest = H::Digest>, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} + +impl StateSyncDb for variable::Db +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: VariableValue + 'static, + H: Hasher + 'static, + T: Translator, + S: Strategy, + variable::Operation: Codec, + R: Resolver, Digest = H::Digest>, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} diff --git a/glue/src/stateful/db/immutable_unjournaled.rs b/glue/src/stateful/db/immutable_unjournaled.rs new file mode 100644 index 00000000000..fdf4e2ba5a2 --- /dev/null +++ b/glue/src/stateful/db/immutable_unjournaled.rs @@ -0,0 +1,621 @@ +//! [`ManagedDb`] implementation for unjournaled QMDB +//! [`immutable`](commonware_storage::qmdb::immutable) databases. +//! +//! These compact databases retain only the current Merkle peaks, so the glue +//! adapters expose set and merkleization operations but no historical reads. + +use crate::stateful::db::{ + ManagedDb, Merkleized as MerkleizedTrait, StateSyncDb, SyncEngineConfig, + Unmerkleized as UnmerkleizedTrait, +}; +use commonware_codec::{EncodeShared, Read as CodecRead}; +use commonware_cryptography::Hasher; +use commonware_macros::select; +use commonware_parallel::Strategy; +use commonware_runtime::{Clock, Metrics, Storage}; +use commonware_storage::{ + merkle::{Family, Location}, + qmdb::{ + any::value::{FixedEncoding, FixedValue, ValueEncoding, VariableEncoding, VariableValue}, + immutable::{ + fixed, variable, CompactDb, CompactMerkleizedBatch, CompactUnmerkleizedBatch, Operation, + }, + operation::Key, + sync::{self}, + Error, + }, +}; +use commonware_utils::{channel::mpsc, sync::AsyncRwLock, Array}; +use std::{ops::Deref, sync::Arc}; + +type ImmutableUnjournaledDbHandle = + Arc>>; + +fn drain_latest_target(tip_updates: &mut mpsc::Receiver) -> Option { + let mut latest = None; + loop { + match tip_updates.try_recv() { + Ok(update) => latest = Some(update), + Err(mpsc::error::TryRecvError::Empty | mpsc::error::TryRecvError::Disconnected) => { + return latest; + } + } + } +} + +/// Wraps an unjournaled immutable batch before merkleization. +pub struct ImmutableUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + batch: CompactUnmerkleizedBatch, + db: ImmutableUnjournaledDbHandle, + metadata: Option, + inactivity_floor: Option>, +} + +impl Deref for ImmutableUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Target = CompactUnmerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.batch + } +} + +impl ImmutableUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + /// Set commit metadata included in the next merkleization. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Set the inactivity floor included in the next merkleization. + pub const fn with_inactivity_floor(mut self, floor: Location) -> Self { + self.inactivity_floor = Some(floor); + self + } + + /// Set `key` to `value` in the speculative batch. + pub fn set(mut self, key: K, value: V::Value) -> Self { + self.batch = self.batch.set(key, value); + self + } +} + +/// Wraps an unjournaled immutable batch after merkleization. +pub struct ImmutableUnjournaledMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + inner: Arc>, + db: ImmutableUnjournaledDbHandle, +} + +impl Deref for ImmutableUnjournaledMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Target = CompactMerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl UnmerkleizedTrait + for ImmutableUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Merkleized = ImmutableUnjournaledMerkleized; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize( + &*db, + self.metadata, + self.inactivity_floor.unwrap_or_default(), + ); + Ok(ImmutableUnjournaledMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +impl MerkleizedTrait for ImmutableUnjournaledMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Digest = H::Digest; + type Unmerkleized = ImmutableUnjournaledUnmerkleized; + + fn root(&self) -> H::Digest { + self.inner.root() + } + + fn new_batch(&self) -> Self::Unmerkleized { + ImmutableUnjournaledUnmerkleized { + batch: self.inner.new_batch::(), + db: self.db.clone(), + metadata: None, + inactivity_floor: None, + } + } +} + +impl ManagedDb for fixed::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + K: Array, + V: FixedValue + 'static, + H: Hasher + 'static, + S: Strategy, + Operation>: EncodeShared + CodecRead, +{ + type Unmerkleized = ImmutableUnjournaledUnmerkleized, H, S, ()>; + type Merkleized = ImmutableUnjournaledMerkleized, H, S, ()>; + type Error = Error; + type Config = fixed::CompactConfig; + type SyncTarget = sync::compact::Target; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + ImmutableUnjournaledUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner)?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.current_target() + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + // Compact storage only retains the previous logical commit range. + self.rewind().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after one-step rewind", + ); + Ok(()) + } +} + +impl ManagedDb for variable::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: VariableValue + 'static, + H: Hasher + 'static, + Operation>: EncodeShared + CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Unmerkleized = ImmutableUnjournaledUnmerkleized, H, S, C>; + type Merkleized = ImmutableUnjournaledMerkleized, H, S, C>; + type Error = Error; + type Config = variable::CompactConfig; + type SyncTarget = sync::compact::Target; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + ImmutableUnjournaledUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner)?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.current_target() + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + // Compact storage only retains the previous logical commit range. + self.rewind().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after one-step rewind", + ); + Ok(()) + } +} + +impl StateSyncDb for fixed::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + K: Array, + V: FixedValue + 'static, + H: Hasher + 'static, + S: Strategy, + Operation>: EncodeShared + CodecRead, + R: sync::compact::Resolver< + Family = F, + Op = Operation>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + mut target: Self::SyncTarget, + mut tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let mut attempt = 0u64; + loop { + let context = context.child("sync").with_attribute("attempt", attempt); + attempt += 1; + let db = sync::compact::sync(sync::compact::Config:: { + context, + resolver: resolver.clone(), + target: target.clone(), + db_config: config.clone(), + }) + .await?; + + if let Some(update) = drain_latest_target(&mut tip_updates) { + target = update; + continue; + } + + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(target.clone()).await.is_err() { + return Ok(db); + } + } + + let Some(finish) = finish.as_mut() else { + return Ok(db); + }; + select! { + _ = finish.recv() => return Ok(db), + update = tip_updates.recv() => { + let Some(update) = update else { + return Ok(db); + }; + target = update; + }, + } + } + } +} + +impl StateSyncDb for variable::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + K: Key, + V: VariableValue + 'static, + H: Hasher + 'static, + Operation>: EncodeShared + CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, + R: sync::compact::Resolver< + Family = F, + Op = Operation>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + mut target: Self::SyncTarget, + mut tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let mut attempt = 0u64; + loop { + let context = context.child("sync").with_attribute("attempt", attempt); + attempt += 1; + let db = sync::compact::sync(sync::compact::Config:: { + context, + resolver: resolver.clone(), + target: target.clone(), + db_config: config.clone(), + }) + .await?; + + if let Some(update) = drain_latest_target(&mut tip_updates) { + target = update; + continue; + } + + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(target.clone()).await.is_err() { + return Ok(db); + } + } + + let Some(finish) = finish.as_mut() else { + return Ok(db); + }; + select! { + _ = finish.recv() => return Ok(db), + update = tip_updates.recv() => { + let Some(update) = update else { + return Ok(db); + }; + target = update; + }, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::{sha256::Digest, Sha256}; + use commonware_parallel::Sequential; + use commonware_runtime::{deterministic, Runner as _, Supervisor as _}; + use commonware_storage::merkle::{compact::Config as MerkleConfig, mmr}; + use commonware_utils::{NZUsize, NZU64}; + + type FixedDb = + fixed::CompactDb; + type VariableDb = variable::CompactDb< + mmr::Family, + deterministic::Context, + Digest, + Vec, + Sha256, + ((), (commonware_codec::RangeCfg, ())), + Sequential, + >; + + fn fixed_config(suffix: &str) -> fixed::CompactConfig { + fixed::CompactConfig { + merkle: MerkleConfig { + partition: format!("stateful-immutable-unjournaled-{suffix}"), + strategy: Sequential, + }, + commit_codec_config: (), + } + } + + const fn sync_config() -> SyncEngineConfig { + SyncEngineConfig { + fetch_batch_size: NZU64!(1), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NZUsize!(1), + max_retained_roots: 0, + } + } + + fn assert_managed_db>() {} + + fn assert_state_sync_db() + where + T: StateSyncDb, + { + } + + #[test] + fn immutable_unjournaled_trait_impls_compile() { + assert_managed_db::(); + assert_managed_db::(); + assert_state_sync_db::>(); + assert_state_sync_db::>(); + } + + #[test] + fn managed_db_finalize_commits_fixed_immutable_unjournaled_batches() { + deterministic::Runner::default().start(|context| async move { + let config = fixed_config("managed-db"); + let db = FixedDb::init(context.child("db"), config).await.unwrap(); + let db = Arc::new(AsyncRwLock::new(db)); + let key = Sha256::hash(&[1]); + let value = Sha256::hash(&[2]); + let metadata = Sha256::hash(&[3]); + + let batch = >::new_batch(&db) + .await + .set(key, value) + .with_inactivity_floor(mmr::Location::new(1)) + .with_metadata(metadata); + let merkleized = crate::stateful::db::Unmerkleized::merkleize(batch) + .await + .unwrap(); + let expected_root = merkleized.root(); + + { + let mut guard = db.write().await; + >::finalize(&mut *guard, merkleized) + .await + .unwrap(); + } + + let guard = db.read().await; + assert_eq!(guard.root(), expected_root); + assert_eq!(guard.get_metadata(), Some(metadata)); + + let target = >::sync_target(&*guard).await; + assert_eq!(target.root, guard.root()); + assert_eq!(target.leaf_count, mmr::Location::new(3)); + }); + } + + #[test] + fn state_sync_fetches_fixed_immutable_compact_state() { + deterministic::Runner::default().start(|context| async move { + let mut source = FixedDb::init(context.child("source"), fixed_config("source")) + .await + .unwrap(); + let metadata = Sha256::hash(&[3]); + let floor = source.inactivity_floor_loc(); + let batch = source + .new_batch() + .set(Sha256::hash(&[1]), Sha256::hash(&[2])) + .merkleize(&source, Some(metadata), floor); + source.apply_batch(batch).unwrap(); + source.sync().await.unwrap(); + + let target = source.current_target(); + let (_update_tx, update_rx) = mpsc::channel(1); + let synced = >>::sync_db( + context.child("target"), + fixed_config("target"), + Arc::new(source), + target.clone(), + update_rx, + None, + None, + sync_config(), + ) + .await + .unwrap(); + + assert_eq!(synced.current_target(), target); + assert_eq!(synced.get_metadata(), Some(metadata)); + }); + } + + #[test] + fn managed_db_rewinds_fixed_immutable_unjournaled_one_commit_range() { + deterministic::Runner::default().start(|context| async move { + let config = fixed_config("rewind"); + let mut db = FixedDb::init(context.child("db"), config).await.unwrap(); + + let floor = db.inactivity_floor_loc(); + let batch = db + .new_batch() + .set(Sha256::hash(&[1]), Sha256::hash(&[2])) + .merkleize(&db, Some(Sha256::hash(&[11])), floor); + db.apply_batch(batch).unwrap(); + db.sync().await.unwrap(); + let first_target = >::sync_target(&db).await; + + let floor = db.inactivity_floor_loc(); + let batch = db + .new_batch() + .set(Sha256::hash(&[3]), Sha256::hash(&[4])) + .merkleize(&db, Some(Sha256::hash(&[22])), floor); + db.apply_batch(batch).unwrap(); + db.sync().await.unwrap(); + let second_target = >::sync_target(&db).await; + assert_ne!(second_target, first_target); + + >::rewind_to_target(&mut db, first_target.clone()) + .await + .unwrap(); + + let rewound_target = >::sync_target(&db).await; + assert_eq!(rewound_target, first_target); + assert_eq!(db.get_metadata(), Some(Sha256::hash(&[11]))); + }); + } +} diff --git a/glue/src/stateful/db/keyless.rs b/glue/src/stateful/db/keyless.rs new file mode 100644 index 00000000000..6fdfdfe2b85 --- /dev/null +++ b/glue/src/stateful/db/keyless.rs @@ -0,0 +1,549 @@ +//! [`ManagedDb`] implementation for QMDB [`keyless`](commonware_storage::qmdb::keyless) +//! databases. +//! +//! Keyless databases are append-only. Operations are addressed by +//! [`Location`] rather than by key. +//! The wrapper types here capture `Arc>` so the batch API +//! can read through to committed state. + +use crate::stateful::db::{ + ManagedDb, Merkleized as MerkleizedTrait, StateSyncDb, SyncEngineConfig, + Unmerkleized as UnmerkleizedTrait, +}; +use commonware_codec::{EncodeShared, Read as CodecRead}; +use commonware_cryptography::Hasher; +use commonware_parallel::Strategy; +use commonware_runtime::{Clock, Metrics, Storage}; +use commonware_storage::{ + journal::{ + contiguous::{ + fixed::Journal as FixedJournal, variable::Journal as VariableJournal, Mutable, + }, + Error as JournalError, + }, + merkle::{Family, Location}, + qmdb::{ + any::{ + sync::Target as AnySyncTarget, + value::{FixedEncoding, FixedValue, ValueEncoding, VariableEncoding, VariableValue}, + }, + keyless::{ + batch::{MerkleizedBatch, UnmerkleizedBatch}, + fixed, variable, Keyless, Operation, + }, + sync::{self, resolver::Resolver}, + Error, + }, + Persistable, +}; +use commonware_utils::{channel::mpsc, non_empty_range, sync::AsyncRwLock}; +use std::{ops::Deref, sync::Arc}; + +type KeylessDbHandle = Arc>>; + +/// Wraps a keyless [`UnmerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Unmerkleized`](super::Unmerkleized) trait. +pub struct KeylessUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + batch: UnmerkleizedBatch, + db: KeylessDbHandle, + metadata: Option, + inactivity_floor: Option>, +} + +impl Deref for KeylessUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + type Target = UnmerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.batch + } +} + +impl KeylessUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + /// Set commit metadata included in the next + /// [`merkleize`](UnmerkleizedTrait::merkleize) call. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Set the inactivity floor to include within the next [`merkleize`](UnmerkleizedTrait::merkleize) call. + /// + /// If unset, [`merkleize`](UnmerkleizedTrait::merkleize) will use the [`Default`] of [`Location`]. + pub const fn with_inactivity_floor(mut self, floor: Location) -> Self { + self.inactivity_floor = Some(floor); + self + } + + /// Read a value by location, falling back to committed state. + pub async fn get(&self, location: Location) -> Result, Error> { + let db = self.db.read().await; + self.batch.get(location, &*db).await + } + + /// Read multiple values by location, falling back to committed state. + /// + /// Locations must be sorted in ascending order. Returns results in the same + /// order as the input locations. + pub async fn get_many( + &self, + locations: &[Location], + ) -> Result>, Error> { + let db = self.db.read().await; + self.batch.get_many(locations, &*db).await + } + + /// Append a value to the speculative batch. + pub fn append(mut self, value: V::Value) -> Self { + self.batch = self.batch.append(value); + self + } +} + +/// Wraps a keyless [`MerkleizedBatch`] with a reference to the parent +/// database, implementing the [`Merkleized`](super::Merkleized) trait. +pub struct KeylessMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + inner: Arc>, + db: KeylessDbHandle, +} + +impl Deref for KeylessMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + type Target = MerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl KeylessMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + /// Read a value by location, falling back to committed state. + pub async fn get(&self, location: Location) -> Result, Error> { + let db = self.db.read().await; + self.inner.get(location, &*db).await + } + + /// Read multiple values by location, falling back to committed state. + /// + /// Locations must be sorted in ascending order. Returns results in the same + /// order as the input locations. + pub async fn get_many( + &self, + locations: &[Location], + ) -> Result>, Error> { + let db = self.db.read().await; + self.inner.get_many(locations, &*db).await + } +} + +impl UnmerkleizedTrait for KeylessUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + type Merkleized = KeylessMerkleized; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize( + &*db, + self.metadata, + self.inactivity_floor.unwrap_or_default(), + ); + Ok(KeylessMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +impl MerkleizedTrait for KeylessMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + C: Mutable> + Persistable, + H: Hasher, + S: Strategy, + Operation: EncodeShared, +{ + type Digest = H::Digest; + type Unmerkleized = KeylessUnmerkleized; + + fn root(&self) -> H::Digest { + self.inner.root() + } + + fn new_batch(&self) -> Self::Unmerkleized { + KeylessUnmerkleized { + batch: self.inner.new_batch::(), + db: self.db.clone(), + metadata: None, + inactivity_floor: None, + } + } +} + +impl ManagedDb for fixed::Db +where + F: Family, + E: Storage + Clock + Metrics, + V: FixedValue + 'static, + H: Hasher + 'static, + S: Strategy, +{ + type Unmerkleized = + KeylessUnmerkleized, FixedJournal>, H, S>; + type Merkleized = + KeylessMerkleized, FixedJournal>, H, S>; + type Error = Error; + type Config = fixed::Config; + type SyncTarget = AnySyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + KeylessUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.commit().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + AnySyncTarget::new( + self.root(), + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.commit().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +impl ManagedDb for variable::Db +where + F: Family, + E: Storage + Clock + Metrics, + V: VariableValue + 'static, + H: Hasher + 'static, + S: Strategy, +{ + type Unmerkleized = KeylessUnmerkleized< + F, + E, + VariableEncoding, + VariableJournal>, + H, + S, + >; + type Merkleized = KeylessMerkleized< + F, + E, + VariableEncoding, + VariableJournal>, + H, + S, + >; + type Error = Error; + type Config = variable::Config< as CodecRead>::Cfg, S>; + type SyncTarget = AnySyncTarget; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + KeylessUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner).await?; + self.commit().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + let bounds = self.bounds().await; + AnySyncTarget::new( + self.root(), + non_empty_range!(self.sync_boundary(), bounds.end), + ) + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + self.rewind(target.range.end()).await?; + self.commit().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after rewind", + ); + Ok(()) + } +} + +impl StateSyncDb for fixed::Db +where + F: Family, + E: Storage + Clock + Metrics, + V: FixedValue + 'static, + H: Hasher + 'static, + S: Strategy, + R: Resolver, Digest = H::Digest>, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} + +impl StateSyncDb for variable::Db +where + F: Family, + E: Storage + Clock + Metrics, + V: VariableValue + 'static, + H: Hasher + 'static, + S: Strategy, + R: Resolver, Digest = H::Digest>, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> Result { + sync::sync(sync::engine::Config { + context, + resolver, + target, + max_outstanding_requests: sync_config.max_outstanding_requests, + fetch_batch_size: sync_config.fetch_batch_size, + apply_batch_size: sync_config.apply_batch_size, + db_config: config, + update_rx: Some(tip_updates), + finish_rx: finish, + reached_target_tx: reached_target, + max_retained_roots: sync_config.max_retained_roots, + }) + .await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::Sha256; + use commonware_parallel::Sequential; + use commonware_runtime::{ + buffer::paged::CacheRef, deterministic, BufferPooler, Runner as _, Supervisor as _, + }; + use commonware_storage::{ + journal::contiguous::fixed::Config as FixedJournalConfig, + merkle::full::Config as MerkleConfig, mmr, qmdb::keyless as storage_keyless, + }; + use commonware_utils::{sequence::U64, NZUsize, NZU16, NZU64}; + use std::num::{NonZeroU16, NonZeroUsize}; + + type FixedDb = fixed::Db; + type VariableDb = + variable::Db, Sha256, Sequential>; + + const PAGE_SIZE: NonZeroU16 = NZU16!(101); + const PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(11); + + fn fixed_config(suffix: &str, pooler: &impl BufferPooler) -> fixed::Config { + let page_cache = CacheRef::from_pooler(pooler, PAGE_SIZE, PAGE_CACHE_SIZE); + storage_keyless::Config { + merkle: MerkleConfig { + journal_partition: format!("journal-{suffix}"), + metadata_partition: format!("metadata-{suffix}"), + items_per_blob: NZU64!(11), + write_buffer: NZUsize!(1024), + strategy: Sequential, + page_cache: page_cache.clone(), + }, + log: FixedJournalConfig { + partition: format!("log-{suffix}"), + items_per_blob: NZU64!(7), + page_cache, + write_buffer: NZUsize!(1024), + }, + } + } + + fn assert_managed_db>() {} + + fn assert_state_sync_db() + where + T: StateSyncDb, + { + } + + #[test] + fn keyless_trait_impls_compile() { + assert_managed_db::(); + assert_managed_db::(); + assert_state_sync_db::>(); + assert_state_sync_db::>(); + } + + #[test] + fn managed_db_finalize_commits_fixed_keyless_batches() { + deterministic::Runner::default().start(|context| async move { + let config = fixed_config("stateful-keyless-managed-db", &context); + let db = FixedDb::init(context.child("db"), config).await.unwrap(); + let db = Arc::new(AsyncRwLock::new(db)); + + let batch = >::new_batch(&db) + .await + .append(U64::new(7)) + .with_inactivity_floor(mmr::Location::new(1)) + .with_metadata(U64::new(9)); + let merkleized = crate::stateful::db::Unmerkleized::merkleize(batch) + .await + .unwrap(); + + { + let mut guard = db.write().await; + >::finalize(&mut *guard, merkleized) + .await + .unwrap(); + } + + let guard = db.read().await; + assert_eq!( + guard.get(mmr::Location::new(1)).await.unwrap(), + Some(U64::new(7)) + ); + assert_eq!(guard.get_metadata().await.unwrap(), Some(U64::new(9))); + + let target = >::sync_target(&*guard).await; + assert_eq!(target.root, guard.root()); + assert_eq!(target.range.start(), mmr::Location::new(1)); + assert_eq!(target.range.end(), mmr::Location::new(3)); + }); + } +} diff --git a/glue/src/stateful/db/keyless_unjournaled.rs b/glue/src/stateful/db/keyless_unjournaled.rs new file mode 100644 index 00000000000..db9d9996bcc --- /dev/null +++ b/glue/src/stateful/db/keyless_unjournaled.rs @@ -0,0 +1,837 @@ +//! [`ManagedDb`] implementation for unjournaled QMDB +//! [`keyless`](commonware_storage::qmdb::keyless) databases. +//! +//! These compact databases retain only the current Merkle peaks, so the glue +//! adapters expose append and merkleization operations but no historical reads. + +use crate::stateful::db::{ + ManagedDb, Merkleized as MerkleizedTrait, StateSyncDb, SyncEngineConfig, + Unmerkleized as UnmerkleizedTrait, +}; +use commonware_codec::{EncodeShared, Read as CodecRead}; +use commonware_cryptography::Hasher; +use commonware_macros::select; +use commonware_parallel::Strategy; +use commonware_runtime::{Clock, Metrics, Storage}; +use commonware_storage::{ + merkle::{Family, Location}, + qmdb::{ + any::value::{FixedEncoding, FixedValue, ValueEncoding, VariableEncoding, VariableValue}, + keyless::{ + fixed, variable, CompactDb, CompactMerkleizedBatch, CompactUnmerkleizedBatch, Operation, + }, + sync::{self}, + Error, + }, +}; +use commonware_utils::{channel::mpsc, sync::AsyncRwLock}; +use futures::future::{pending, Either}; +use std::{ops::Deref, sync::Arc}; + +type KeylessUnjournaledDbHandle = Arc>>; + +fn drain_latest_target(tip_updates: &mut mpsc::Receiver) -> Option { + let mut latest = None; + loop { + match tip_updates.try_recv() { + Ok(update) => latest = Some(update), + Err(mpsc::error::TryRecvError::Empty | mpsc::error::TryRecvError::Disconnected) => { + return latest; + } + } + } +} + +/// Wraps an unjournaled keyless batch before merkleization. +pub struct KeylessUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + batch: CompactUnmerkleizedBatch, + db: KeylessUnjournaledDbHandle, + metadata: Option, + inactivity_floor: Option>, +} + +impl Deref for KeylessUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Target = CompactUnmerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.batch + } +} + +impl KeylessUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + /// Set commit metadata included in the next merkleization. + pub fn with_metadata(mut self, metadata: V::Value) -> Self { + self.metadata = Some(metadata); + self + } + + /// Set the inactivity floor included in the next merkleization. + pub const fn with_inactivity_floor(mut self, floor: Location) -> Self { + self.inactivity_floor = Some(floor); + self + } + + /// Append a value to the speculative batch. + pub fn append(mut self, value: V::Value) -> Self { + self.batch = self.batch.append(value); + self + } +} + +/// Wraps an unjournaled keyless batch after merkleization. +pub struct KeylessUnjournaledMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + inner: Arc>, + db: KeylessUnjournaledDbHandle, +} + +impl Deref for KeylessUnjournaledMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Target = CompactMerkleizedBatch; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl UnmerkleizedTrait for KeylessUnjournaledUnmerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Merkleized = KeylessUnjournaledMerkleized; + type Error = Error; + + async fn merkleize(self) -> Result> { + let db = self.db.read().await; + let merkleized = self.batch.merkleize( + &*db, + self.metadata, + self.inactivity_floor.unwrap_or_default(), + ); + Ok(KeylessUnjournaledMerkleized { + inner: merkleized, + db: self.db.clone(), + }) + } +} + +impl MerkleizedTrait for KeylessUnjournaledMerkleized +where + F: Family, + E: Storage + Clock + Metrics, + V: ValueEncoding, + H: Hasher, + Operation: EncodeShared, + Operation: CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Digest = H::Digest; + type Unmerkleized = KeylessUnjournaledUnmerkleized; + + fn root(&self) -> H::Digest { + self.inner.root() + } + + fn new_batch(&self) -> Self::Unmerkleized { + KeylessUnjournaledUnmerkleized { + batch: self.inner.new_batch::(), + db: self.db.clone(), + metadata: None, + inactivity_floor: None, + } + } +} + +impl ManagedDb for fixed::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + V: FixedValue + 'static, + H: Hasher + 'static, + S: Strategy, + Operation>: EncodeShared + CodecRead, +{ + type Unmerkleized = KeylessUnjournaledUnmerkleized, H, S, ()>; + type Merkleized = KeylessUnjournaledMerkleized, H, S, ()>; + type Error = Error; + type Config = fixed::CompactConfig; + type SyncTarget = sync::compact::Target; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + KeylessUnjournaledUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner)?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.current_target() + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + // Compact storage only retains the previous logical commit range. + self.rewind().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after one-step rewind", + ); + Ok(()) + } +} + +impl ManagedDb for variable::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + V: VariableValue + 'static, + H: Hasher + 'static, + Operation>: EncodeShared + CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, +{ + type Unmerkleized = KeylessUnjournaledUnmerkleized, H, S, C>; + type Merkleized = KeylessUnjournaledMerkleized, H, S, C>; + type Error = Error; + type Config = variable::CompactConfig; + type SyncTarget = sync::compact::Target; + + async fn init(context: E, config: Self::Config) -> Result> { + ::init(context, config).await + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let inner = db.read().await; + KeylessUnjournaledUnmerkleized { + batch: inner.new_batch(), + db: db.clone(), + metadata: None, + inactivity_floor: None, + } + } + + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool { + batch.root() == target.root + } + + async fn finalize(&mut self, batch: Self::Merkleized) -> Result<(), Error> { + self.apply_batch(batch.inner)?; + self.sync().await?; + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.current_target() + } + + async fn rewind_to_target(&mut self, target: Self::SyncTarget) -> Result<(), Error> { + // Compact storage only retains the previous logical commit range. + self.rewind().await?; + + let rewound_target = self.sync_target().await; + assert_eq!( + rewound_target, target, + "rewound database target mismatch after one-step rewind", + ); + Ok(()) + } +} + +impl StateSyncDb for fixed::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + V: FixedValue + 'static, + H: Hasher + 'static, + S: Strategy, + Operation>: EncodeShared + CodecRead, + R: sync::compact::Resolver>, Digest = H::Digest>, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + mut target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let mut attempt = 0u64; + let mut tip_updates = Some(tip_updates); + loop { + if let Some(tip_updates) = tip_updates.as_mut() { + if let Some(update) = drain_latest_target(tip_updates) { + target = update; + } + } + + let context = context.child("sync").with_attribute("attempt", attempt); + attempt += 1; + let update_future = tip_updates.as_mut().map_or_else( + || Either::Right(pending()), + |updates| Either::Left(updates.recv()), + ); + let db = select! { + update = update_future => { + let Some(update) = update else { + tip_updates = None; + continue; + }; + target = update; + continue; + }, + db = sync::compact::sync(sync::compact::Config:: { + context, + resolver: resolver.clone(), + target: target.clone(), + db_config: config.clone(), + }) => db?, + }; + + if let Some(tip_updates) = tip_updates.as_mut() { + if let Some(update) = drain_latest_target(tip_updates) { + target = update; + continue; + } + } + + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(target.clone()).await.is_err() { + return Ok(db); + } + } + + let Some(finish) = finish.as_mut() else { + return Ok(db); + }; + let Some(tip_updates) = tip_updates.as_mut() else { + return Ok(db); + }; + select! { + _ = finish.recv() => return Ok(db), + update = tip_updates.recv() => { + let Some(update) = update else { + return Ok(db); + }; + target = update; + }, + } + } + } +} + +impl StateSyncDb for variable::CompactDb +where + F: Family, + E: Storage + Clock + Metrics, + V: VariableValue + 'static, + H: Hasher + 'static, + Operation>: EncodeShared + CodecRead, + C: Clone + Send + Sync + 'static, + S: Strategy, + R: sync::compact::Resolver< + Family = F, + Op = Operation>, + Digest = H::Digest, + >, +{ + type SyncError = sync::Error; + + async fn sync_db( + context: E, + config: Self::Config, + resolver: R, + mut target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let mut attempt = 0u64; + let mut tip_updates = Some(tip_updates); + loop { + if let Some(tip_updates) = tip_updates.as_mut() { + if let Some(update) = drain_latest_target(tip_updates) { + target = update; + } + } + + let context = context.child("sync").with_attribute("attempt", attempt); + attempt += 1; + let update_future = tip_updates.as_mut().map_or_else( + || Either::Right(pending()), + |updates| Either::Left(updates.recv()), + ); + let db = select! { + update = update_future => { + let Some(update) = update else { + tip_updates = None; + continue; + }; + target = update; + continue; + }, + db = sync::compact::sync(sync::compact::Config:: { + context, + resolver: resolver.clone(), + target: target.clone(), + db_config: config.clone(), + }) => db?, + }; + + if let Some(tip_updates) = tip_updates.as_mut() { + if let Some(update) = drain_latest_target(tip_updates) { + target = update; + continue; + } + } + + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(target.clone()).await.is_err() { + return Ok(db); + } + } + + let Some(finish) = finish.as_mut() else { + return Ok(db); + }; + let Some(tip_updates) = tip_updates.as_mut() else { + return Ok(db); + }; + select! { + _ = finish.recv() => return Ok(db), + update = tip_updates.recv() => { + let Some(update) = update else { + return Ok(db); + }; + target = update; + }, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::{sha256::Digest, Sha256}; + use commonware_parallel::Sequential; + use commonware_runtime::{ + buffer::paged::CacheRef, deterministic, BufferPooler, Runner as _, Spawner as _, + Supervisor as _, + }; + use commonware_storage::{ + journal::contiguous::fixed::Config as FixedJournalConfig, + merkle::{compact::Config as MerkleConfig, full::Config as FullMerkleConfig, mmr}, + qmdb::keyless as storage_keyless, + }; + use commonware_utils::{sequence::U64, NZUsize, NZU16, NZU64}; + use std::time::Duration; + + type FixedDb = fixed::CompactDb; + type FullFixedDb = + storage_keyless::fixed::Db; + type VariableDb = variable::CompactDb< + mmr::Family, + deterministic::Context, + Vec, + Sha256, + (commonware_codec::RangeCfg, ()), + Sequential, + >; + + #[derive(Clone)] + struct SupersedingCompactResolver { + source: Arc, + stale_target: sync::compact::Target, + stale_request_tx: mpsc::Sender<()>, + } + + impl sync::compact::Resolver for SupersedingCompactResolver { + type Family = mmr::Family; + type Digest = Digest; + type Op = storage_keyless::fixed::Operation; + type Error = sync::compact::ServeError; + + async fn get_compact_state( + &self, + target: sync::compact::Target, + ) -> Result, Self::Error> + { + if target == self.stale_target { + let _ = self.stale_request_tx.send(()).await; + return futures::future::pending().await; + } + + sync::compact::Resolver::get_compact_state(&self.source, target).await + } + } + + fn fixed_config(suffix: &str) -> fixed::CompactConfig { + fixed::CompactConfig { + merkle: MerkleConfig { + partition: format!("stateful-keyless-unjournaled-{suffix}"), + strategy: Sequential, + }, + commit_codec_config: (), + } + } + + fn full_fixed_config( + suffix: &str, + pooler: &impl BufferPooler, + ) -> storage_keyless::fixed::Config { + let page_cache = CacheRef::from_pooler(pooler, NZU16!(101), NZUsize!(11)); + storage_keyless::fixed::Config { + merkle: FullMerkleConfig { + journal_partition: format!("stateful-keyless-full-journal-{suffix}"), + metadata_partition: format!("stateful-keyless-full-metadata-{suffix}"), + items_per_blob: NZU64!(11), + write_buffer: NZUsize!(1024), + strategy: Sequential, + page_cache: page_cache.clone(), + }, + log: FixedJournalConfig { + partition: format!("stateful-keyless-full-log-{suffix}"), + items_per_blob: NZU64!(7), + page_cache, + write_buffer: NZUsize!(1024), + }, + } + } + + const fn sync_config() -> SyncEngineConfig { + SyncEngineConfig { + fetch_batch_size: NZU64!(1), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NZUsize!(1), + max_retained_roots: 0, + } + } + + fn assert_managed_db>() {} + + fn assert_state_sync_db() + where + T: StateSyncDb, + { + } + + #[test] + fn keyless_unjournaled_trait_impls_compile() { + assert_managed_db::(); + assert_managed_db::(); + assert_state_sync_db::>(); + assert_state_sync_db::>(); + } + + #[test] + fn managed_db_finalize_commits_fixed_keyless_unjournaled_batches() { + deterministic::Runner::default().start(|context| async move { + let config = fixed_config("managed-db"); + let db = FixedDb::init(context.child("db"), config).await.unwrap(); + let db = Arc::new(AsyncRwLock::new(db)); + + let batch = >::new_batch(&db) + .await + .append(U64::new(7)) + .with_inactivity_floor(mmr::Location::new(1)) + .with_metadata(U64::new(9)); + let merkleized = crate::stateful::db::Unmerkleized::merkleize(batch) + .await + .unwrap(); + let expected_root = merkleized.root(); + + { + let mut guard = db.write().await; + >::finalize(&mut *guard, merkleized) + .await + .unwrap(); + } + + let guard = db.read().await; + assert_eq!(guard.root(), expected_root); + assert_eq!(guard.get_metadata(), Some(U64::new(9))); + + let target = >::sync_target(&*guard).await; + assert_eq!(target.root, guard.root()); + assert_eq!(target.leaf_count, mmr::Location::new(3)); + }); + } + + #[test] + fn state_sync_fetches_fixed_keyless_compact_state() { + deterministic::Runner::default().start(|context| async move { + let mut source = FixedDb::init(context.child("source"), fixed_config("source")) + .await + .unwrap(); + let floor = source.inactivity_floor_loc(); + let batch = + source + .new_batch() + .append(U64::new(7)) + .merkleize(&source, Some(U64::new(9)), floor); + source.apply_batch(batch).unwrap(); + source.sync().await.unwrap(); + + let target = source.current_target(); + let (_update_tx, update_rx) = mpsc::channel(1); + let synced = >>::sync_db( + context.child("target"), + fixed_config("target"), + Arc::new(source), + target.clone(), + update_rx, + None, + None, + sync_config(), + ) + .await + .unwrap(); + + assert_eq!(synced.current_target(), target); + assert_eq!(synced.get_metadata(), Some(U64::new(9))); + }); + } + + #[test] + fn state_sync_drains_queued_target_before_reporting_reached() { + deterministic::Runner::default().start(|context| async move { + let mut source = FullFixedDb::init( + context.child("source"), + full_fixed_config("source", &context), + ) + .await + .unwrap(); + + let floor = source.inactivity_floor_loc(); + let batch = + source + .new_batch() + .append(U64::new(7)) + .merkleize(&source, Some(U64::new(9)), floor); + source.apply_batch(batch).await.unwrap(); + source.sync().await.unwrap(); + let first_target = sync::compact::Target { + root: source.root(), + leaf_count: source.bounds().await.end, + }; + + let floor = source.inactivity_floor_loc(); + let batch = source.new_batch().append(U64::new(8)).merkleize( + &source, + Some(U64::new(10)), + floor, + ); + source.apply_batch(batch).await.unwrap(); + source.sync().await.unwrap(); + let second_target = sync::compact::Target { + root: source.root(), + leaf_count: source.bounds().await.end, + }; + + let (update_tx, update_rx) = mpsc::channel(1); + update_tx.send(second_target.clone()).await.unwrap(); + let (reached_tx, mut reached_rx) = mpsc::channel(1); + let synced = >>::sync_db( + context.child("target"), + fixed_config("target"), + Arc::new(source), + first_target, + update_rx, + None, + Some(reached_tx), + sync_config(), + ) + .await + .unwrap(); + + assert_eq!(reached_rx.recv().await, Some(second_target.clone())); + assert_eq!(synced.current_target(), second_target); + assert_eq!(synced.get_metadata(), Some(U64::new(10))); + }); + } + + #[test] + fn state_sync_supersedes_in_flight_stale_compact_target() { + deterministic::Runner::default().start(|context| async move { + let mut source = + FixedDb::init(context.child("source"), fixed_config("supersede-source")) + .await + .unwrap(); + + let floor = source.inactivity_floor_loc(); + let batch = + source + .new_batch() + .append(U64::new(7)) + .merkleize(&source, Some(U64::new(9)), floor); + source.apply_batch(batch).unwrap(); + source.sync().await.unwrap(); + let stale_target = source.current_target(); + + let floor = source.inactivity_floor_loc(); + let batch = source.new_batch().append(U64::new(8)).merkleize( + &source, + Some(U64::new(10)), + floor, + ); + source.apply_batch(batch).unwrap(); + source.sync().await.unwrap(); + let latest_target = source.current_target(); + + let (stale_request_tx, mut stale_request_rx) = mpsc::channel(1); + let resolver = SupersedingCompactResolver { + source: Arc::new(source), + stale_target: stale_target.clone(), + stale_request_tx, + }; + + let (update_tx, update_rx) = mpsc::channel(1); + let sync_handle = context.child("sync").spawn(move |context| async move { + >::sync_db( + context.child("target"), + fixed_config("supersede-target"), + resolver, + stale_target, + update_rx, + None, + None, + sync_config(), + ) + .await + }); + + context + .timeout(Duration::from_secs(1), async move { + stale_request_rx.recv().await.unwrap(); + }) + .await + .expect("sync should request the stale target first"); + update_tx.send(latest_target.clone()).await.unwrap(); + + let synced = context + .timeout(Duration::from_secs(1), sync_handle) + .await + .expect("sync should switch to the latest target") + .expect("spawned sync task should complete") + .unwrap(); + + assert_eq!(synced.current_target(), latest_target); + assert_eq!(synced.get_metadata(), Some(U64::new(10))); + }); + } + + #[test] + fn managed_db_rewinds_fixed_keyless_unjournaled_one_commit_range() { + deterministic::Runner::default().start(|context| async move { + let config = fixed_config("rewind"); + let mut db = FixedDb::init(context.child("db"), config).await.unwrap(); + + let floor = db.inactivity_floor_loc(); + let batch = + db.new_batch() + .append(U64::new(1)) + .merkleize(&db, Some(U64::new(11)), floor); + db.apply_batch(batch).unwrap(); + db.sync().await.unwrap(); + let first_target = >::sync_target(&db).await; + + let floor = db.inactivity_floor_loc(); + let batch = + db.new_batch() + .append(U64::new(2)) + .merkleize(&db, Some(U64::new(22)), floor); + db.apply_batch(batch).unwrap(); + db.sync().await.unwrap(); + let second_target = >::sync_target(&db).await; + assert_ne!(second_target, first_target); + + >::rewind_to_target(&mut db, first_target.clone()) + .await + .unwrap(); + + let rewound_target = >::sync_target(&db).await; + assert_eq!(rewound_target, first_target); + assert_eq!(db.get_metadata(), Some(U64::new(11))); + }); + } +} diff --git a/glue/src/stateful/db/mod.rs b/glue/src/stateful/db/mod.rs new file mode 100644 index 00000000000..499c4d55622 --- /dev/null +++ b/glue/src/stateful/db/mod.rs @@ -0,0 +1,3038 @@ +//! Traits for database batch lifecycle and startup sync in [`Stateful`](super::Stateful). +//! +//! This module defines the boundary between stateful application logic and +//! storage backends (QMDB variants). +//! +//! # Batch Lifecycle +//! +//! Normal execution has three stages: +//! 1. [`Unmerkleized`]: mutable, in-progress batch (concrete types expose reads and writes). +//! 2. [`Merkleized`]: a sealed batch with a computed root. +//! 3. Finalization: persist the sealed batch via [`ManagedDb::finalize`]. +//! +//! [`DatabaseSet`] groups one or more [`ManagedDb`] instances into one logical +//! unit for execution and commit. +//! +//! # Startup State Sync +//! +//! Startup sync is expressed by two traits: +//! - [`StateSyncDb`]: per-database sync entrypoint. +//! - [`StateSyncSet`]: set-level orchestration. +//! +//! ## Anchors +//! +//! Each set of sync targets is paired with an anchor `(Height, Round, D)` where +//! `D` is the block digest. The db layer never interprets the anchor; it +//! only tracks which anchor each database converged on. +//! +//! On completion, [`StateSyncSet::sync`] returns the anchor that all databases +//! agreed on. The caller uses this to set the marshal floor and the +//! last-processed digest, ensuring they match the actual convergence point +//! rather than whatever marshal's head happens to be (which may have advanced +//! during sync). +//! +//! ## Convergence Algorithm (tuple sets) +//! +//! Tuple [`StateSyncSet`] implementations assign each `(anchor, targets)` +//! pair a *generation* number and use this algorithm: +//! +//! 1. Forward tip updates only to databases that have not yet reported +//! "reached target". Reached databases are frozen to prevent them from +//! running ahead to a newer anchor. +//! 2. When all databases report reached, compare the generation each was +//! assigned when it reported. +//! 3. If all generations match, every database synced to targets from the +//! same anchor. Return that anchor. +//! 4. If generations differ, *regroup*: re-send the highest-reached +//! generation's targets to the behind databases, clear their reached +//! state, and repeat from step 1. +//! +//! The coordinator continuously drains tip updates and keeps only the latest +//! value before forwarding, which avoids target-channel backpressure buildup. +//! The `generation_state` map is pruned after every dispatch to only retain +//! generations currently assigned to at least one database, so memory usage +//! is bounded by the number of databases regardless of how long sync runs. + +use commonware_consensus::types::{Height, Round}; +use commonware_cryptography::Digest; +use commonware_macros::select; +use commonware_runtime::{Metrics, Spawner}; +use commonware_utils::{ + channel::{fallible::AsyncFallibleExt, mpsc, ring}, + sync::AsyncRwLock, +}; +use futures::{ + future::{pending, Either}, + join, +}; +use std::{ + collections::BTreeMap, + fmt::Debug, + future::Future, + num::{NonZeroU64, NonZeroUsize}, + sync::Arc, +}; + +pub mod any; +pub mod compact_p2p; +pub mod current; +pub mod immutable; +pub mod immutable_unjournaled; +pub mod keyless; +pub mod keyless_unjournaled; +pub mod p2p; + +/// Mutable batch state before merkleization. +/// +/// Concrete types provide key-value operations (`get`, `write`, `set`, +/// `append`, etc.) as inherent methods; the generic wrapper only needs +/// [`merkleize`](Self::merkleize). +pub trait Unmerkleized: Sized + Send { + /// The merkleized batch produced by [`merkleize`](Self::merkleize). + type Merkleized: Merkleized; + + /// The error type returned by fallible operations. + type Error: Send; + + /// Resolve all mutations, compute the new state root, and produce a + /// merkleized batch. + fn merkleize(self) -> impl Future> + Send; +} + +/// Sealed batch state with a computed root. +/// +/// The application uses [`root`](Self::root) in block headers, and the wrapper +/// later finalizes this batch. +pub trait Merkleized: Sized + Send + Sync { + /// The digest type used for the state root. + type Digest: Digest; + + /// The unmerkleized batch type produced by [`new_batch`](Self::new_batch). + type Unmerkleized: Unmerkleized; + + /// The canonical state root committed in block headers. + fn root(&self) -> Self::Digest; + + /// Create a child unmerkleized batch that reads through this batch's + /// pending changes before falling back to the committed database state. + /// + /// In QMDB, this maps to `merkleized_batch.new_batch()`. + fn new_batch(&self) -> Self::Unmerkleized; +} + +/// One database managed by the [`Stateful`](super::Stateful) wrapper. +/// +/// Implementations create new batches from committed state and persist finalized +/// batches back to storage. +/// +/// [`new_batch`](Self::new_batch) receives `Arc>` so batch +/// types can keep read-through access to committed state. +/// +/// `E` is a trait generic (not an associated type), so one database type can +/// work across runtimes that satisfy the bounds. +pub trait ManagedDb: Send + Sync + Sized { + /// An in-progress batch of mutations that has not yet been merkleized. + type Unmerkleized: Unmerkleized; + + /// A batch whose root has been computed but has not yet been applied to + /// the underlying database. + /// + /// Constrained so that [`Merkleized::new_batch`] produces the same + /// [`Unmerkleized`] type as [`ManagedDb::new_batch`](Self::new_batch). + type Merkleized: Merkleized; + + /// The error type returned by fallible operations. + type Error: Debug + Send; + + /// Configuration needed to construct a new database instance. + type Config: Send; + + /// Sync target type for state sync of this database. + /// + /// Typically a database-specific state commitment plus the operation range needed to reach it. + type SyncTarget: Clone + PartialEq + Send + Sync; + + /// Construct a new database from its configuration. + fn init( + context: E, + config: Self::Config, + ) -> impl Future> + Send; + + /// Create a new unmerkleized batch rooted at the database's committed + /// state. + /// + /// The `db` parameter is the `Arc>` that wraps this + /// database, allowing batch types to capture a shared reference for + /// read-through to committed state. + fn new_batch(db: &Arc>) -> impl Future + Send; + + /// Return true if a merkleized batch matches a committed sync target. + fn matches_sync_target(batch: &Self::Merkleized, target: &Self::SyncTarget) -> bool; + + /// Apply a merkleized batch's changeset to the underlying database. + /// + /// In QMDB, this encapsulates calling `merkleized.finalize()` to produce + /// a `Changeset`, then `db.apply_batch(changeset)` and `db.commit()`. + fn finalize( + &mut self, + batch: Self::Merkleized, + ) -> impl Future> + Send; + + /// Return the sync target for this database's current committed state. + fn sync_target(&self) -> impl Future + Send; + + /// Rewind committed state to `target`. + /// + /// Implementations must ensure rewind effects are durable before returning + /// `Ok(())` (for example by committing after rewind). + fn rewind_to_target( + &mut self, + target: Self::SyncTarget, + ) -> impl Future> + Send; +} + +/// A collection of individually locked [`ManagedDb`] instances. +/// +/// Each database is wrapped in `Arc>`, so the set is cheap to +/// clone and each database can be shared without a global lock. +/// +/// `E` is a trait generic (not an associated type), so one set type can work +/// across runtimes that satisfy the bounds. +pub trait DatabaseSet: Clone + Send + Sync + 'static { + /// Tuple of [`ManagedDb::Unmerkleized`] for every database in the set. + type Unmerkleized: Send; + + /// Tuple of [`ManagedDb::Merkleized`] for every database in the set. + type Merkleized: Send + Sync; + + /// Configuration needed to construct every database in the set. + /// + /// - Single database sets use that database's [`ManagedDb::Config`]. + /// - Multi-database tuple sets use a tuple of per-database configs + /// `(Db1::Config, Db2::Config, ...)`. + type Config: Send; + + /// Per-database sync targets extracted from a finalized block. + /// + /// For a single-database set this is one target. For multi-database sets it is a tuple of + /// targets, one per database. + type SyncTargets: Clone + PartialEq + Send + Sync; + + /// Construct the database set from its configuration. + fn init(context: E, config: Self::Config) -> impl Future + Send; + + /// Create unmerkleized batches from each database's committed state. + /// + /// Acquires a read lock on each database. + fn new_batches(&self) -> impl Future + Send; + + /// Create child unmerkleized batches from a pending merkleized parent. + /// + /// No lock is needed; reads come from the in-memory merkleized state. + fn fork_batches(parent: &Self::Merkleized) -> Self::Unmerkleized; + + /// Return true if merkleized batches match the committed sync targets. + fn matches_sync_targets(batches: &Self::Merkleized, targets: &Self::SyncTargets) -> bool; + + /// Apply each merkleized batch's changeset to its underlying database. + /// + /// Acquires a write lock on each database. + fn finalize(&self, batches: Self::Merkleized) -> impl Future + Send; + + /// Return sync targets for the set's current committed state. + fn committed_targets(&self) -> impl Future + Send; + + /// Rewind the set to the provided per-database targets. + /// + /// Rewind failures are fatal for startup recovery and therefore panic. + fn rewind_to_targets(&self, targets: Self::SyncTargets) -> impl Future + Send; +} + +/// Parameters for a one-time state-sync pass. +#[derive(Clone, Copy, Debug)] +pub struct SyncEngineConfig { + /// Maximum operations fetched per resolver request. + pub fetch_batch_size: NonZeroU64, + + /// Number of operations applied per local apply step. + pub apply_batch_size: usize, + + /// Maximum number of outstanding resolver requests. + pub max_outstanding_requests: usize, + + /// Capacity of per-database target-update channels. + pub update_channel_size: NonZeroUsize, + + /// Number of historical roots to retain for proof verification across + /// target updates. + pub max_retained_roots: usize, +} + +/// A [`ManagedDb`] with a startup state-sync entrypoint. +pub trait StateSyncDb: ManagedDb { + /// Error returned by the state-sync engine for this database. + type SyncError: Debug + Send; + + /// Run state-sync for this database and return a fully-initialized instance. + #[allow(clippy::too_many_arguments)] + fn sync_db( + context: E, + config: Self::Config, + resolver: R, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + finish: Option>, + reached_target: Option>, + sync_config: SyncEngineConfig, + ) -> impl Future> + Send; +} + +/// Block metadata identifying the block that produced a set +/// of sync targets. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Anchor { + /// Height of the anchoring block. + pub height: Height, + /// Consensus round of the anchoring block. + pub round: Round, + /// Digest of the anchoring block. + pub digest: D, +} + +/// A [`DatabaseSet`] that can run one-time startup state sync. +/// +/// `D` is the block digest type. Each set of sync targets is paired +/// with an [`Anchor`] identifying the block that produced those targets. +/// On convergence, `sync` returns the anchor that all databases agreed on. +pub trait StateSyncSet: DatabaseSet +where + D: Digest, +{ + /// Error returned if any database in the set fails startup state-sync. + type Error: Debug + Send; + + /// Run one-time startup state-sync and return the initialized set + /// together with the anchor all databases converged on. + fn sync( + context: E, + config: Self::Config, + resolvers: R, + anchor: Anchor, + targets: Self::SyncTargets, + tip_updates: ring::Receiver<(Anchor, Self::SyncTargets)>, + sync_config: SyncEngineConfig, + ) -> impl Future), Self::Error>> + Send; +} + +/// Implement [`DatabaseSet`] for a single [`ManagedDb`] behind a lock. +impl + 'static> DatabaseSet for Arc> { + type Unmerkleized = T::Unmerkleized; + type Merkleized = T::Merkleized; + type Config = T::Config; + type SyncTargets = T::SyncTarget; + + async fn init(context: E, config: Self::Config) -> Self { + let db = T::init(context, config) + .await + .expect("database init failed"); + Self::new(AsyncRwLock::new(db)) + } + + async fn new_batches(&self) -> Self::Unmerkleized { + T::new_batch(self).await + } + + fn fork_batches(parent: &Self::Merkleized) -> Self::Unmerkleized { + parent.new_batch() + } + + fn matches_sync_targets(batches: &Self::Merkleized, targets: &Self::SyncTargets) -> bool { + T::matches_sync_target(batches, targets) + } + + async fn finalize(&self, batches: Self::Merkleized) { + let mut database = self.write().await; + finalize_or_panic(&mut *database, batches, None).await; + } + + async fn committed_targets(&self) -> Self::SyncTargets { + let database = self.read().await; + T::sync_target(&*database).await + } + + async fn rewind_to_targets(&self, target: Self::SyncTargets) { + let mut database = self.write().await; + rewind_or_panic(&mut *database, target, None).await; + } +} + +impl StateSyncSet for Arc> +where + E: Send + Sync + Metrics, + T: StateSyncDb + 'static, + R: Send + 'static, + D: Digest, +{ + type Error = T::SyncError; + + async fn sync( + context: E, + config: Self::Config, + resolver: R, + anchor: Anchor, + target: Self::SyncTargets, + tip_updates: ring::Receiver<(Anchor, Self::SyncTargets)>, + sync_config: SyncEngineConfig, + ) -> Result<(Self, Anchor), Self::Error> { + let (target_tx, target_rx) = mpsc::channel(sync_config.update_channel_size.get()); + let (finish_tx, finish_rx) = mpsc::channel(1); + let (reached_tx, mut reached_rx) = mpsc::channel(1); + + let mut current_target = target.clone(); + let sync = T::sync_db( + context, + config, + resolver, + target, + target_rx, + Some(finish_rx), + Some(reached_tx), + sync_config, + ); + + let coordinator = async { + let mut current_anchor = anchor; + let mut tip_updates = Some(tip_updates); + loop { + let update_future = tip_updates.as_mut().map_or_else( + || Either::Right(pending()), + |updates| Either::Left(updates.recv()), + ); + select! { + reached = reached_rx.recv() => { + let Some(reached) = reached else { + return current_anchor; + }; + if reached != current_target { + continue; + } + let _ = finish_tx.send_lossy(()).await; + return current_anchor; + }, + update = update_future => { + let Some((new_anchor, new_target)) = update else { + tip_updates = None; + continue; + }; + // Sync targets must only move forward. + if new_anchor.height <= current_anchor.height { + continue; + } + current_anchor = new_anchor; + current_target = new_target.clone(); + if !target_tx.send_lossy(new_target).await { + return current_anchor; + } + }, + } + } + }; + + let (db_result, converged_anchor) = join!(sync, coordinator); + let database = db_result?; + Ok((Self::new(AsyncRwLock::new(database)), converged_anchor)) + } +} + +/// Implement [`DatabaseSet`] for a tuple of individually-locked +/// [`ManagedDb`] instances. +macro_rules! impl_database_set { + ($($T:ident : $idx:tt),+) => { + impl + 'static),+> DatabaseSet + for ($(Arc>,)+) + { + type Unmerkleized = ($($T::Unmerkleized,)+); + type Merkleized = ($($T::Merkleized,)+); + type Config = ($($T::Config,)+); + type SyncTargets = ($($T::SyncTarget,)+); + + async fn init(context: E, config: Self::Config) -> Self { + let result = join!($( + async { + let db = $T::init( + context.child(concat!("db_", stringify!($idx))), + config.$idx, + ) + .await + .expect(concat!( + "database init failed (index ", + stringify!($idx), + ", type ", + stringify!($T), + ")", + )); + Arc::new(AsyncRwLock::new(db)) + }, + )+); + result + } + + async fn new_batches(&self) -> Self::Unmerkleized { + join!($($T::new_batch(&self.$idx),)+) + } + + fn fork_batches(parent: &Self::Merkleized) -> Self::Unmerkleized { + ($(parent.$idx.new_batch(),)+) + } + + fn matches_sync_targets(batches: &Self::Merkleized, targets: &Self::SyncTargets) -> bool { + $($T::matches_sync_target(&batches.$idx, &targets.$idx))&&+ + } + + async fn finalize(&self, batches: Self::Merkleized) { + join!($( + async { + let mut database = self.$idx.write().await; + finalize_or_panic(&mut *database, batches.$idx, Some($idx)).await; + }, + )+); + } + + async fn committed_targets(&self) -> Self::SyncTargets { + join!($( + async { + let database = self.$idx.read().await; + $T::sync_target(&*database).await + }, + )+) + } + + async fn rewind_to_targets(&self, targets: Self::SyncTargets) { + join!($( + async { + let mut database = self.$idx.write().await; + rewind_or_panic(&mut *database, targets.$idx, Some($idx)).await; + }, + )+); + } + } + }; +} + +impl_database_set!(DB1: 0); +impl_database_set!(DB1: 0, DB2: 1); +impl_database_set!(DB1: 0, DB2: 1, DB3: 2); +impl_database_set!(DB1: 0, DB2: 1, DB3: 2, DB4: 3); +impl_database_set!(DB1: 0, DB2: 1, DB3: 2, DB4: 3, DB5: 4); +impl_database_set!(DB1: 0, DB2: 1, DB3: 2, DB4: 3, DB5: 4, DB6: 5); +impl_database_set!(DB1: 0, DB2: 1, DB3: 2, DB4: 3, DB5: 4, DB6: 5, DB7: 6); +impl_database_set!(DB1: 0, DB2: 1, DB3: 2, DB4: 3, DB5: 4, DB6: 5, DB7: 6, DB8: 7); + +struct DbSyncChannels { + target_tx: mpsc::Sender, + target_rx: mpsc::Receiver, + finish_tx: mpsc::Sender<()>, + finish_rx: mpsc::Receiver<()>, + generation_tx: mpsc::Sender<(usize, T)>, + generation_rx: mpsc::Receiver<(usize, T)>, + reached_tx: mpsc::Sender, + reached_rx: mpsc::Receiver, +} + +impl DbSyncChannels { + fn new(update_channel_size: usize) -> Self { + let (target_tx, target_rx) = mpsc::channel(update_channel_size); + let (finish_tx, finish_rx) = mpsc::channel(1); + let (generation_tx, generation_rx) = mpsc::channel(update_channel_size); + let (reached_tx, reached_rx) = mpsc::channel(1); + Self { + target_tx, + target_rx, + finish_tx, + finish_rx, + generation_tx, + generation_rx, + reached_tx, + reached_rx, + } + } +} + +struct CoordinatorSyncSenders { + target_tx: mpsc::Sender, + finish_tx: mpsc::Sender<()>, + generation_tx: mpsc::Sender<(usize, T)>, +} + +macro_rules! impl_state_sync_set { + ($($T:ident : $R:ident : $idx:tt),+) => { + impl StateSyncSet for ($(Arc>,)+) + where + E: Send + Sync + Spawner + Metrics, + D: Digest, + $( + $T: StateSyncDb + 'static, + $R: Send + 'static, + )+ + { + type Error = String; + + async fn sync( + context: E, + config: Self::Config, + resolvers: ($($R,)+), + anchor: Anchor, + targets: Self::SyncTargets, + tip_updates: ring::Receiver<(Anchor, Self::SyncTargets)>, + sync_config: SyncEngineConfig, + ) -> Result<(Self, Anchor), Self::Error> { + let db_channels = ($( + DbSyncChannels::<<$T as ManagedDb>::SyncTarget>::new( + sync_config.update_channel_size.get(), + ), + )+); + let coordinator_senders = ($( + CoordinatorSyncSenders { + target_tx: db_channels.$idx.target_tx.clone(), + finish_tx: db_channels.$idx.finish_tx.clone(), + generation_tx: db_channels.$idx.generation_tx.clone(), + }, + )+); + let coordinator_owned_senders = ($( + CoordinatorSyncSenders { + target_tx: db_channels.$idx.target_tx, + finish_tx: db_channels.$idx.finish_tx, + generation_tx: db_channels.$idx.generation_tx, + }, + )+); + let (reached_event_tx, mut reached_event_rx) = mpsc::channel(16); + let (completion_tx, mut completion_rx) = mpsc::channel(1); + let db_count = [$($idx,)+].len(); + let coordinator_targets = targets.clone(); + let first_db_error: Arc>> = + Arc::new(commonware_utils::sync::Mutex::new(None)); + let coordinator_result: Arc>>> = + Arc::new(commonware_utils::sync::Mutex::new(None)); + let finish_coordinator = { + let coordinator_result = coordinator_result.clone(); + async move { + // Keep ownership of the original per-database senders inside this task so + // they are dropped as soon as the coordinator exits. + let coordinator_owned_senders = coordinator_owned_senders; + let mut tip_updates = Some(tip_updates); + let mut state = CoordinatorState::new(db_count, anchor, coordinator_targets); + + loop { + // Phase 1: Drain reached events. + loop { + match reached_event_rx.try_recv() { + Ok((idx, generation)) => state.record_reached(idx, generation), + Err(mpsc::error::TryRecvError::Empty) => break, + Err(mpsc::error::TryRecvError::Disconnected) => return, + } + } + + // Phase 2: Drain tip updates; keep only the latest. + if let Some(updates) = tip_updates.as_mut() { + loop { + match updates.try_recv() { + Ok((a, t)) => state.record_tip_update(a, t), + Err(ring::TryRecvError::Empty) => break, + Err(ring::TryRecvError::Disconnected) => { + tip_updates = None; + break; + } + } + } + } + + // Phase 3: Decide what to do. + match state.next_action() { + CoordinatorAction::Converged(anchor) => { + $( + let _ = coordinator_senders.$idx.finish_tx.send_lossy(()).await; + )+ + *coordinator_result.lock() = Some(anchor); + return; + } + CoordinatorAction::Dispatch { + generation, + targets: dispatch_targets, + } => { + $( + if state.should_dispatch($idx) { + let dispatch_target = dispatch_targets.$idx.clone(); + if !coordinator_senders.$idx + .generation_tx + .send_lossy((generation, dispatch_target.clone())) + .await + { + return; + } + if !coordinator_senders.$idx + .target_tx + .send_lossy(dispatch_target) + .await + { + return; + } + } + )+ + continue; + } + CoordinatorAction::Wait => {} + } + + // Phase 4: Block until the next event. + let update_future = tip_updates.as_mut().map_or_else( + || Either::Right(pending()), + |updates| Either::Left(updates.recv()), + ); + select! { + reached_event = reached_event_rx.recv() => { + let Some((idx, generation)) = reached_event else { + return; + }; + state.record_reached(idx, generation); + }, + _ = completion_rx.recv() => { + // A database task completed (success or failure). Close all + // outstanding per-database channels immediately so peers + // waiting on `finish_rx` or `target_rx` can terminate. + drop(coordinator_owned_senders); + return; + }, + update = update_future => { + let Some((a, t)) = update else { + tip_updates = None; + continue; + }; + state.record_tip_update(a, t); + }, + }; + } + } + }; + let synced = join!( + $( + async { + let first_db_error = first_db_error.clone(); + let mut reached_target_rx = db_channels.$idx.reached_rx; + let mut generation_rx = Some(db_channels.$idx.generation_rx); + let mut current_generation = 0usize; + let mut current_target = targets.$idx.clone(); + let mut last_reached_target = None; + let mut last_reported_generation = None; + let reached_event_sender = reached_event_tx.clone(); + let completion_signal = completion_tx.clone(); + let sync = $T::sync_db( + context.child(concat!("db_", stringify!($idx))), + config.$idx, + resolvers.$idx, + targets.$idx, + db_channels.$idx.target_rx, + Some(db_channels.$idx.finish_rx), + Some(db_channels.$idx.reached_tx), + sync_config, + ); + let forward_reached = async move { + loop { + drain_generation_updates( + &mut generation_rx, + &mut current_generation, + &mut current_target, + &last_reached_target, + &mut last_reported_generation, + &reached_event_sender, + $idx, + ) + .await; + + let update_future = generation_rx.as_mut().map_or_else( + || Either::Right(pending()), + |updates| Either::Left(updates.recv()), + ); + select! { + reached_target = reached_target_rx.recv() => { + let Some(reached_target) = reached_target else { + return; + }; + + last_reached_target = Some(reached_target.clone()); + drain_generation_updates( + &mut generation_rx, + &mut current_generation, + &mut current_target, + &last_reached_target, + &mut last_reported_generation, + &reached_event_sender, + $idx, + ) + .await; + + if reached_target != current_target { + continue; + } + + if last_reported_generation != Some(current_generation) { + if !reached_event_sender + .send_lossy(($idx, current_generation)) + .await + { + return; + } + last_reported_generation = Some(current_generation); + } + }, + update = update_future => { + let Some((generation, target)) = update else { + generation_rx = None; + continue; + }; + current_generation = generation; + current_target = target; + if last_reached_target.as_ref() == Some(¤t_target) + && last_reported_generation != Some(current_generation) + { + if !reached_event_sender + .send_lossy(($idx, current_generation)) + .await + { + return; + } + last_reported_generation = Some(current_generation); + } + }, + }; + } + }; + let (sync_result, _) = join!(sync, forward_reached); + let result = sync_result + .map(|database| Arc::new(AsyncRwLock::new(database))) + .map_err(|err| { + format!( + "state sync failed (index {}, db {}): {err:?}", + $idx, + core::any::type_name::<$T>(), + ) + }); + if let Err(err) = &result { + let mut first = first_db_error.lock(); + if first.is_none() { + *first = Some(err.clone()); + } + } + let _ = completion_signal.send_lossy(()).await; + result + }, + )+ + finish_coordinator, + ); + + if let Some(err) = first_db_error.lock().take() { + return Err(err); + } + + let synced = ($(synced.$idx?,)+); + let Some(converged_anchor) = coordinator_result.lock().take() else { + return Err("state sync coordinator did not report a converged anchor".into()); + }; + + Ok((synced, converged_anchor)) + } + } + }; +} + +impl_state_sync_set!(DB1: R1: 0, DB2: R2: 1); +impl_state_sync_set!(DB1: R1: 0, DB2: R2: 1, DB3: R3: 2); +impl_state_sync_set!(DB1: R1: 0, DB2: R2: 1, DB3: R3: 2, DB4: R4: 3); +impl_state_sync_set!(DB1: R1: 0, DB2: R2: 1, DB3: R3: 2, DB4: R4: 3, DB5: R5: 4); +impl_state_sync_set!(DB1: R1: 0, DB2: R2: 1, DB3: R3: 2, DB4: R4: 3, DB5: R5: 4, DB6: R6: 5); +impl_state_sync_set!( + DB1: R1: 0, + DB2: R2: 1, + DB3: R3: 2, + DB4: R4: 3, + DB5: R5: 4, + DB6: R6: 5, + DB7: R7: 6 +); +impl_state_sync_set!( + DB1: R1: 0, + DB2: R2: 1, + DB3: R3: 2, + DB4: R4: 3, + DB5: R5: 4, + DB6: R6: 5, + DB7: R7: 6, + DB8: R8: 7 +); + +async fn drain_generation_updates( + generation_rx: &mut Option>, + current_generation: &mut usize, + current_target: &mut T, + last_reached_target: &Option, + last_reported_generation: &mut Option, + reached_event_sender: &mpsc::Sender<(usize, usize)>, + idx: usize, +) where + T: Clone + PartialEq, +{ + if let Some(updates) = generation_rx.as_mut() { + loop { + match updates.try_recv() { + Ok((generation, target)) => { + *current_generation = generation; + *current_target = target; + + if last_reached_target.as_ref() == Some(current_target) + && *last_reported_generation != Some(*current_generation) + { + if !reached_event_sender + .send_lossy((idx, *current_generation)) + .await + { + return; + } + *last_reported_generation = Some(*current_generation); + } + } + Err(mpsc::error::TryRecvError::Empty) => break, + Err(mpsc::error::TryRecvError::Disconnected) => { + *generation_rx = None; + break; + } + } + } + } +} + +/// Per-database sync tracking state. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum DbSyncState { + /// Database is still syncing toward its assigned generation's targets. + Seeking { generation: usize }, + /// Database reported it reached its assigned generation's targets. + Reached { generation: usize }, +} + +impl DbSyncState { + const fn generation(self) -> usize { + match self { + Self::Seeking { generation } | Self::Reached { generation } => generation, + } + } + + const fn is_reached(self) -> bool { + matches!(self, Self::Reached { .. }) + } +} + +/// What the coordinator should do after processing events. +enum CoordinatorAction { + /// Nothing to do; wait for the next event. + Wait, + /// Dispatch targets to non-reached databases for `generation`. + Dispatch { generation: usize, targets: T }, + /// All databases converged on the same generation. + Converged(Anchor), +} + +/// Pure state machine for multi-database sync convergence. +/// +/// Tracks which generation each database is assigned to, which have +/// reported "reached", and decides when to regroup or declare +/// convergence. +struct CoordinatorState { + dbs: Vec, + generation_state: BTreeMap, T)>, + current_generation: usize, + latest_tip: Option<(Anchor, T)>, + last_dispatched_anchor: Anchor, +} + +impl CoordinatorState { + fn new(db_count: usize, anchor: Anchor, targets: T) -> Self { + let dbs = vec![DbSyncState::Seeking { generation: 0 }; db_count]; + let mut generation_state = BTreeMap::new(); + generation_state.insert(0, (anchor, targets)); + Self { + dbs, + generation_state, + current_generation: 0, + latest_tip: None, + last_dispatched_anchor: anchor, + } + } + + /// Record that database `idx` reached `generation`. + /// + /// Reached events can arrive late. If the database has already been + /// re-assigned to a newer generation, stale events are ignored. + fn record_reached(&mut self, idx: usize, generation: usize) { + if self.dbs[idx].generation() != generation { + return; + } + if self.dbs[idx].is_reached() { + return; + } + self.dbs[idx] = DbSyncState::Reached { generation }; + } + + /// Record a new tip update. + /// + /// Sync targets must move strictly forward. Ignore stale and duplicate + /// anchors to avoid dispatching backward targets. + fn record_tip_update(&mut self, anchor: Anchor, targets: T) { + let current_height = self + .latest_tip + .as_ref() + .map_or(self.last_dispatched_anchor.height, |(latest_anchor, _)| { + latest_anchor.height + }); + if anchor.height <= current_height { + return; + } + self.latest_tip = Some((anchor, targets)); + } + + /// Determine the next action. Mutates internal state for regroup/dispatch. + /// + /// Returns which database indices should receive targets via + /// `dbs[idx].is_reached() == false` after a `Dispatch` action. + fn next_action(&mut self) -> CoordinatorAction { + let all_reached = self.dbs.iter().all(|db| db.is_reached()); + + if all_reached { + let min_gen = self.dbs.iter().map(|db| db.generation()).min().unwrap(); + let max_gen = self.dbs.iter().map(|db| db.generation()).max().unwrap(); + + if min_gen == max_gen { + if let Some((anchor, targets)) = self.latest_tip.take() { + let generation = self.current_generation + 1; + self.current_generation = generation; + for db in &mut self.dbs { + *db = DbSyncState::Seeking { generation }; + } + self.generation_state + .insert(generation, (anchor, targets.clone())); + self.last_dispatched_anchor = anchor; + self.prune_generations(); + return CoordinatorAction::Dispatch { + generation, + targets, + }; + } + + let (anchor, _) = self + .generation_state + .get(&min_gen) + .expect("missing state for converged generation") + .clone(); + return CoordinatorAction::Converged(anchor); + } + + // Regroup: reset behind databases to seek the highest generation. + let (_anchor, targets) = self + .generation_state + .get(&max_gen) + .expect("missing state for regroup generation") + .clone(); + for db in &mut self.dbs { + if db.generation() != max_gen { + *db = DbSyncState::Seeking { + generation: max_gen, + }; + } + } + self.prune_generations(); + return CoordinatorAction::Dispatch { + generation: max_gen, + targets, + }; + } + + // Not all reached. If there's a pending tip, dispatch it. + let Some((anchor, targets)) = self.latest_tip.take() else { + return CoordinatorAction::Wait; + }; + + let generation = self.current_generation + 1; + self.current_generation = generation; + for db in &mut self.dbs { + if !db.is_reached() { + *db = DbSyncState::Seeking { generation }; + } + } + self.generation_state + .insert(generation, (anchor, targets.clone())); + self.last_dispatched_anchor = anchor; + + self.prune_generations(); + CoordinatorAction::Dispatch { + generation, + targets, + } + } + + /// Retain only generations referenced by at least one database. + fn prune_generations(&mut self) { + self.generation_state + .retain(|gen, _| self.dbs.iter().any(|db| db.generation() == *gen)); + } + + /// Whether database `idx` is a non-reached recipient for dispatch. + fn should_dispatch(&self, idx: usize) -> bool { + !self.dbs[idx].is_reached() + } +} + +async fn finalize_or_panic>( + database: &mut T, + batch: T::Merkleized, + index: Option, +) { + // Mutable finalize failures are fatal by design because other databases in + // the same set may already have committed, leaving partially applied state. + if let Err(err) = database.finalize(batch).await { + match index { + Some(index) => panic!( + "database finalize failed (index {index}, type {}): {err:?}", + core::any::type_name::(), + ), + None => panic!( + "database finalize failed (type {}): {err:?}", + core::any::type_name::(), + ), + } + } +} + +async fn rewind_or_panic>( + database: &mut T, + target: T::SyncTarget, + index: Option, +) { + // Mutable rewind failures are fatal by design because the database handle + // may be internally diverged after a failed rewind. + if let Err(err) = database.rewind_to_target(target).await { + match index { + Some(index) => panic!( + "database rewind failed (index {index}, type {}): {err:?}", + core::any::type_name::(), + ), + None => panic!( + "database rewind failed (type {}): {err:?}", + core::any::type_name::(), + ), + } + } +} + +/// A resolver that can attach a database at runtime. +/// +/// Implementations receive a database handle after startup so they can +/// serve incoming sync requests once the database is initialized. +pub trait AttachableResolver: Clone + Send + Sync + 'static { + /// Attach a database for serving incoming requests. + fn attach_database(&self, db: Arc>) -> impl Future + Send; +} + +/// Attach a database set to a resolver set with matching shape. +pub trait AttachableResolverSet: Clone + Send + Sync + 'static { + /// Attach all databases to their corresponding resolvers. + fn attach_databases(&self, databases: DBs) -> impl Future + Send; +} + +impl AttachableResolverSet>> for R +where + R: AttachableResolver, + DB: Send + Sync + 'static, +{ + async fn attach_databases(&self, db: Arc>) { + self.attach_database(db).await; + } +} + +macro_rules! impl_attachable_resolver_set { + ($($R:ident : $DB:ident : $idx:tt),+) => { + impl<$($R, $DB),+> AttachableResolverSet<($(Arc>,)+)> for ($($R,)+) + where + $( + $R: AttachableResolver<$DB>, + $DB: Send + Sync + 'static, + )+ + { + async fn attach_databases(&self, databases: ($(Arc>,)+)) { + futures::join!($( + self.$idx.attach_database(databases.$idx), + )+); + } + } + }; +} + +impl_attachable_resolver_set!(R1: DB1: 0, R2: DB2: 1); +impl_attachable_resolver_set!(R1: DB1: 0, R2: DB2: 1, R3: DB3: 2); +impl_attachable_resolver_set!(R1: DB1: 0, R2: DB2: 1, R3: DB3: 2, R4: DB4: 3); +impl_attachable_resolver_set!(R1: DB1: 0, R2: DB2: 1, R3: DB3: 2, R4: DB4: 3, R5: DB5: 4); +impl_attachable_resolver_set!( + R1: DB1: 0, + R2: DB2: 1, + R3: DB3: 2, + R4: DB4: 3, + R5: DB5: 4, + R6: DB6: 5 +); +impl_attachable_resolver_set!( + R1: DB1: 0, + R2: DB2: 1, + R3: DB3: 2, + R4: DB4: 3, + R5: DB5: 4, + R6: DB6: 5, + R7: DB7: 6 +); +impl_attachable_resolver_set!( + R1: DB1: 0, + R2: DB2: 1, + R3: DB3: 2, + R4: DB4: 3, + R5: DB5: 4, + R6: DB6: 5, + R7: DB7: 6, + R8: DB8: 7 +); + +#[cfg(test)] +mod tests { + use super::{ + Anchor, AttachableResolver, AttachableResolverSet, CoordinatorAction, CoordinatorState, + DatabaseSet, ManagedDb, Merkleized, StateSyncDb, StateSyncSet, SyncEngineConfig, + Unmerkleized, + }; + use commonware_consensus::types::{Epoch, Height, Round, View}; + use commonware_cryptography::sha256; + use commonware_macros::select; + use commonware_runtime::{deterministic, Clock, Runner as _, Spawner as _, Supervisor as _}; + use commonware_utils::{ + channel::{mpsc, oneshot, ring}, + sync::AsyncRwLock, + }; + use futures::{pin_mut, FutureExt, SinkExt}; + use std::{ + convert::Infallible, + num::{NonZeroU64, NonZeroUsize}, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }, + time::Duration, + }; + + #[derive(Clone, Copy)] + struct TestUnmerkleized; + + struct TestMerkleized; + + impl Unmerkleized for TestUnmerkleized { + type Merkleized = TestMerkleized; + type Error = Infallible; + + async fn merkleize(self) -> Result { + Ok(TestMerkleized) + } + } + + impl Merkleized for TestMerkleized { + type Digest = sha256::Digest; + type Unmerkleized = TestUnmerkleized; + + fn root(&self) -> Self::Digest { + sha256::Digest::from([0; 32]) + } + + fn new_batch(&self) -> Self::Unmerkleized { + TestUnmerkleized + } + } + + #[derive(Default)] + struct TestDb; + + impl ManagedDb for TestDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = (); + + async fn init(_context: E, _config: Self::Config) -> Result { + Ok(Self) + } + + async fn new_batch(db: &Arc>) -> Self::Unmerkleized { + let _guard = db.read().await; + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget {} + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + struct BlockingFinalizeDb { + started: Option>, + release: Option>, + } + + impl BlockingFinalizeDb { + fn new(started: oneshot::Sender<()>, release: oneshot::Receiver<()>) -> Self { + Self { + started: Some(started), + release: Some(release), + } + } + } + + #[derive(Debug)] + struct TestFinalizeError; + + struct FailingFinalizeDb; + + struct SlowSyncDb { + final_target: u64, + } + + struct StaleReachedSyncDb { + final_target: u64, + } + + struct FastSyncDb { + final_target: u64, + } + + struct ImmediateStateSyncDb; + + struct FailingStateSyncDb; + + struct FinishClosedSyncDb { + final_target: u64, + } + + struct ObservedSlowSyncDb { + final_target: u64, + } + + struct ObservedFastSyncDb { + final_target: u64, + } + + struct DistinctObservedFastSyncDb { + final_target: u64, + } + + #[derive(Clone)] + struct SlowSyncController { + release: Arc, + } + + #[derive(Clone)] + struct FastSyncObserver { + ready: Arc, + update_count: Arc, + } + + impl ManagedDb for FailingFinalizeDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = TestFinalizeError; + type Config = (); + type SyncTarget = (); + + async fn init(_context: E, _config: Self::Config) -> Result { + Ok(Self) + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Err(TestFinalizeError) + } + + async fn sync_target(&self) -> Self::SyncTarget {} + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for BlockingFinalizeDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = (); + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("BlockingFinalizeDb is constructed directly in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + if let Some(started) = self.started.take() { + let _ = started.send(()); + } + if let Some(release) = self.release.take() { + let _ = release.await; + } + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget {} + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for SlowSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("SlowSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for FastSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("FastSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for FailingStateSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("FailingStateSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + 0 + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for ImmediateStateSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("ImmediateStateSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + 0 + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for FinishClosedSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("FinishClosedSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for ObservedSlowSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("ObservedSlowSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for ObservedFastSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("ObservedFastSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl ManagedDb for DistinctObservedFastSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!( + "DistinctObservedFastSyncDb is only constructed through state sync in tests" + ) + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl StateSyncDb> for SlowSyncDb + where + E: Send + Clock, + { + type SyncError = Infallible; + + async fn sync_db( + context: E, + _config: Self::Config, + release: Arc, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + while !release.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + let mut final_target = target; + let mut tip_updates = Some(tip_updates); + + loop { + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(final_target).await.is_err() { + break; + } + } + + context.sleep(Duration::from_millis(1)).await; + + if finish.is_none() && tip_updates.is_none() { + break; + } + + let finish_signal = finish.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |finish_rx| futures::future::Either::Left(finish_rx.recv()), + ); + let update_signal = tip_updates.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |update_rx| futures::future::Either::Left(update_rx.recv()), + ); + + select! { + _ = finish_signal => { + break; + }, + update = update_signal => match update { + Some(update) => { + final_target = update; + } + None => { + tip_updates = None; + if finish.is_none() { + break; + } + } + }, + } + } + + Ok(Self { final_target }) + } + } + + impl ManagedDb for StaleReachedSyncDb { + type Unmerkleized = TestUnmerkleized; + type Merkleized = TestMerkleized; + type Error = Infallible; + type Config = (); + type SyncTarget = u64; + + async fn init(_context: E, _config: Self::Config) -> Result { + unreachable!("StaleReachedSyncDb is only constructed through state sync in tests") + } + + async fn new_batch(_db: &Arc>) -> Self::Unmerkleized { + TestUnmerkleized + } + + fn matches_sync_target(_batch: &Self::Merkleized, _target: &Self::SyncTarget) -> bool { + true + } + + async fn finalize(&mut self, _batch: Self::Merkleized) -> Result<(), Self::Error> { + Ok(()) + } + + async fn sync_target(&self) -> Self::SyncTarget { + self.final_target + } + + async fn rewind_to_target(&mut self, _target: Self::SyncTarget) -> Result<(), Self::Error> { + Ok(()) + } + } + + impl StateSyncDb for StaleReachedSyncDb + where + E: Send + Clock, + { + type SyncError = Infallible; + + async fn sync_db( + context: E, + _config: Self::Config, + _resolver: (), + target: Self::SyncTarget, + mut tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let update = tip_updates.recv().await.expect("expected forwarded tip"); + if let Some(reached_target) = reached_target.as_ref() { + let _ = reached_target.send(target).await; + } + + let finish_signal = finish.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |finish_rx| futures::future::Either::Left(finish_rx.recv()), + ); + select! { + _ = finish_signal => { + Ok(Self { final_target: target }) + }, + _ = context.sleep(Duration::from_millis(10)) => { + if let Some(reached_target) = reached_target.as_ref() { + let _ = reached_target.send(update).await; + } + if let Some(finish_rx) = finish.as_mut() { + let _ = finish_rx.recv().await; + } + Ok(Self { final_target: update }) + }, + } + } + } + + impl StateSyncDb> for FastSyncDb { + type SyncError = Infallible; + + async fn sync_db( + _context: E, + _config: Self::Config, + done: Arc, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + done.store(true, Ordering::SeqCst); + let mut final_target = target; + let mut tip_updates = Some(tip_updates); + + loop { + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(final_target).await.is_err() { + break; + } + } + + if finish.is_none() && tip_updates.is_none() { + break; + } + + let finish_signal = finish.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |finish_rx| futures::future::Either::Left(finish_rx.recv()), + ); + let update_signal = tip_updates.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |update_rx| futures::future::Either::Left(update_rx.recv()), + ); + + select! { + _ = finish_signal => { + break; + }, + update = update_signal => match update { + Some(update) => { + final_target = update; + } + None => { + tip_updates = None; + if finish.is_none() { + break; + } + } + }, + } + } + + Ok(Self { final_target }) + } + } + + #[derive(Debug)] + struct TestSyncError; + + #[derive(Debug)] + struct FinishClosedSyncError; + + impl StateSyncDb for FailingStateSyncDb { + type SyncError = TestSyncError; + + async fn sync_db( + _context: E, + _config: Self::Config, + _resolver: (), + _target: Self::SyncTarget, + _tip_updates: mpsc::Receiver, + _finish: Option>, + _reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + Err(TestSyncError) + } + } + + impl StateSyncDb for ImmediateStateSyncDb { + type SyncError = Infallible; + + async fn sync_db( + _context: E, + _config: Self::Config, + _resolver: (), + _target: Self::SyncTarget, + _tip_updates: mpsc::Receiver, + _finish: Option>, + _reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + Ok(Self) + } + } + + impl StateSyncDb for FinishClosedSyncDb { + type SyncError = FinishClosedSyncError; + + async fn sync_db( + _context: E, + _config: Self::Config, + _resolver: (), + target: Self::SyncTarget, + _tip_updates: mpsc::Receiver, + mut finish: Option>, + _reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let Some(finish_rx) = finish.as_mut() else { + panic!("finish receiver should be provided"); + }; + match finish_rx.recv().await { + Some(()) => Ok(Self { + final_target: target, + }), + None => Err(FinishClosedSyncError), + } + } + } + + impl StateSyncDb for ObservedSlowSyncDb + where + E: Send + Clock, + { + type SyncError = Infallible; + + async fn sync_db( + context: E, + _config: Self::Config, + controller: SlowSyncController, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + while !controller.release.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + + let mut final_target = target; + let mut tip_updates = Some(tip_updates); + let mut reported_target = None; + let mut observed_update = false; + loop { + if let Some(update_rx) = tip_updates.as_mut() { + loop { + match update_rx.try_recv() { + Ok(update) => { + final_target = update; + observed_update = true; + reported_target = None; + } + Err(mpsc::error::TryRecvError::Empty) => { + break; + } + Err(mpsc::error::TryRecvError::Disconnected) => { + tip_updates = None; + break; + } + } + } + } + + if observed_update && reported_target != Some(final_target) { + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(final_target).await.is_err() { + break; + } + } + reported_target = Some(final_target); + } + + if finish.is_none() && tip_updates.is_none() { + break; + } + + let finish_signal = finish.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |finish_rx| futures::future::Either::Left(finish_rx.recv()), + ); + let update_signal = tip_updates.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |update_rx| futures::future::Either::Left(update_rx.recv()), + ); + + select! { + _ = finish_signal => { + break; + }, + update = update_signal => match update { + Some(update) => { + final_target = update; + observed_update = true; + reported_target = None; + } + None => { + tip_updates = None; + if finish.is_none() { + break; + } + } + }, + } + } + + Ok(Self { final_target }) + } + } + + impl StateSyncDb for ObservedFastSyncDb { + type SyncError = Infallible; + + async fn sync_db( + _context: E, + _config: Self::Config, + observer: FastSyncObserver, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let mut final_target = target; + let mut tip_updates = Some(tip_updates); + let mut reported_target = None; + observer.ready.store(true, Ordering::SeqCst); + + loop { + if reported_target != Some(final_target) { + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(final_target).await.is_err() { + break; + } + } + reported_target = Some(final_target); + } + + if finish.is_none() && tip_updates.is_none() { + break; + } + + let finish_signal = finish.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |finish_rx| futures::future::Either::Left(finish_rx.recv()), + ); + let update_signal = tip_updates.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |update_rx| futures::future::Either::Left(update_rx.recv()), + ); + + select! { + _ = finish_signal => { + break; + }, + update = update_signal => match update { + Some(update) => { + observer.update_count.fetch_add(1, Ordering::SeqCst); + final_target = update; + reported_target = None; + } + None => { + tip_updates = None; + if finish.is_none() { + break; + } + } + }, + } + } + + Ok(Self { final_target }) + } + } + + impl StateSyncDb for DistinctObservedFastSyncDb { + type SyncError = Infallible; + + async fn sync_db( + _context: E, + _config: Self::Config, + observer: FastSyncObserver, + target: Self::SyncTarget, + tip_updates: mpsc::Receiver, + mut finish: Option>, + reached_target: Option>, + _sync_config: SyncEngineConfig, + ) -> Result { + let mut final_target = target; + let mut tip_updates = Some(tip_updates); + let mut reported_target = None; + observer.ready.store(true, Ordering::SeqCst); + + loop { + if reported_target != Some(final_target) { + if let Some(reached_target) = reached_target.as_ref() { + if reached_target.send(final_target).await.is_err() { + break; + } + } + reported_target = Some(final_target); + } + + if finish.is_none() && tip_updates.is_none() { + break; + } + + let finish_signal = finish.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |finish_rx| futures::future::Either::Left(finish_rx.recv()), + ); + let update_signal = tip_updates.as_mut().map_or_else( + || futures::future::Either::Right(futures::future::pending()), + |update_rx| futures::future::Either::Left(update_rx.recv()), + ); + + select! { + _ = finish_signal => { + break; + }, + update = update_signal => match update { + Some(update) => { + observer.update_count.fetch_add(1, Ordering::SeqCst); + if update != final_target { + final_target = update; + reported_target = None; + } + } + None => { + tip_updates = None; + if finish.is_none() { + break; + } + } + }, + } + } + + Ok(Self { final_target }) + } + } + + #[test] + fn tuple_new_batches_queues_reads_concurrently() { + deterministic::Runner::default().start(|_context| async move { + let db1 = Arc::new(AsyncRwLock::new(TestDb)); + let db2 = Arc::new(AsyncRwLock::new(TestDb)); + let databases = (db1.clone(), db2.clone()); + + let writer1 = db1.write().await; + let writer2 = db2.write().await; + + let new_batches = + <(Arc>, Arc>) as DatabaseSet< + deterministic::Context, + >>::new_batches(&databases); + pin_mut!(new_batches); + assert!(new_batches.as_mut().now_or_never().is_none()); + + drop(writer2); + { + let writer2_again = db2.write(); + pin_mut!(writer2_again); + assert!( + writer2_again.as_mut().now_or_never().is_none(), + "tuple new_batches should queue reads for all databases concurrently" + ); + } + + drop(writer1); + let _ = new_batches.await; + }); + } + + #[test] + fn tuple_finalize_runs_databases_in_parallel() { + deterministic::Runner::default().start(|_context| async move { + let (started1_tx, started1_rx) = oneshot::channel(); + let (started2_tx, started2_rx) = oneshot::channel(); + let (release1_tx, release1_rx) = oneshot::channel(); + let (release2_tx, release2_rx) = oneshot::channel(); + + let databases = ( + Arc::new(AsyncRwLock::new(BlockingFinalizeDb::new( + started1_tx, + release1_rx, + ))), + Arc::new(AsyncRwLock::new(BlockingFinalizeDb::new( + started2_tx, + release2_rx, + ))), + ); + + let finalize = <( + Arc>, + Arc>, + ) as DatabaseSet>::finalize( + &databases, + (TestMerkleized, TestMerkleized), + ); + pin_mut!(finalize); + assert!(finalize.as_mut().now_or_never().is_none()); + + let started1 = started1_rx; + let started2 = started2_rx; + pin_mut!(started1); + pin_mut!(started2); + assert!(matches!(started1.as_mut().now_or_never(), Some(Ok(())))); + assert!( + matches!(started2.as_mut().now_or_never(), Some(Ok(()))), + "tuple finalize should start all database finalizations concurrently" + ); + + let _ = release1_tx.send(()); + let _ = release2_tx.send(()); + finalize.await; + }); + } + + #[test] + fn tuple_finalize_panic_identifies_failing_database() { + let panic = std::panic::catch_unwind(|| { + deterministic::Runner::default().start(|_context| async move { + let databases = ( + Arc::new(AsyncRwLock::new(TestDb)), + Arc::new(AsyncRwLock::new(FailingFinalizeDb)), + ); + <( + Arc>, + Arc>, + ) as DatabaseSet>::finalize( + &databases, + (TestMerkleized, TestMerkleized), + ) + .await; + }); + }) + .expect_err("tuple finalize should panic when a database finalize fails"); + + let panic = panic + .downcast_ref::() + .map(String::as_str) + .or_else(|| panic.downcast_ref::<&'static str>().copied()) + .expect("panic should be a string"); + assert!( + panic.contains("index 1"), + "panic should identify the failing database index: {panic}" + ); + assert!( + panic.contains("FailingFinalizeDb"), + "panic should identify the failing database type: {panic}" + ); + } + + type TestAnchor = Anchor; + + fn anchor(n: u64) -> TestAnchor { + Anchor { + height: Height::new(n), + round: Round::new(Epoch::zero(), View::new(n)), + digest: sha256::Digest::from([n as u8; 32]), + } + } + + #[test] + fn single_state_sync_handles_closed_tip_updates_channel() { + deterministic::Runner::timed(Duration::from_secs(5)).start(|context| async move { + let (tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(1).unwrap()); + let release = Arc::new(AtomicBool::new(false)); + let release_for_sync = release.clone(); + + let sync = context.child("single_state_sync_closed_tip_updates").spawn( + move |context| async move { + > as StateSyncSet< + deterministic::Context, + Arc, + sha256::Digest, + >>::sync( + context, + (), + release_for_sync, + anchor(0), + 0, + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(1).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("single state sync should succeed") + }, + ); + + drop(tip_tx); + context.sleep(Duration::from_millis(1)).await; + release.store(true, Ordering::SeqCst); + + let (_database, converged_anchor) = sync.await.expect("sync task should complete"); + assert_eq!(converged_anchor, anchor(0)); + }); + } + + #[test] + fn single_state_sync_ignores_backward_tip_updates() { + deterministic::Runner::timed(Duration::from_secs(5)).start(|context| async move { + let (mut tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(4).unwrap()); + let release = Arc::new(AtomicBool::new(true)); + let resolver = SlowSyncController { + release: release.clone(), + }; + + let sync = context + .child("single_state_sync_ignores_backward_tip_updates") + .spawn(move |context| async move { + > as StateSyncSet< + deterministic::Context, + SlowSyncController, + sha256::Digest, + >>::sync( + context, + (), + resolver, + anchor(0), + 0, + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(4).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("single state sync should succeed") + }); + + let _ = tip_tx.send((anchor(2), 2)).await; + let _ = tip_tx.send((anchor(1), 1)).await; + drop(tip_tx); + + let (database, converged_anchor) = sync.await.expect("sync task should complete"); + let final_target = database.read().await.final_target; + assert_eq!( + final_target, 2, + "single-db sync target must never move backward" + ); + assert_eq!( + converged_anchor, + anchor(2), + "converged anchor must remain on the highest seen tip" + ); + }); + } + + #[test] + fn single_state_sync_ignores_stale_reached_after_forwarded_tip() { + deterministic::Runner::timed(Duration::from_secs(5)).start(|context| async move { + let (mut tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(4).unwrap()); + + let sync = + context + .child("single_state_sync_stale_reached") + .spawn(move |context| async move { + > as StateSyncSet< + deterministic::Context, + (), + sha256::Digest, + >>::sync( + context, + (), + (), + anchor(0), + 0, + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(4).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("single state sync should succeed") + }); + + let _ = tip_tx.send((anchor(2), 2)).await; + + let (database, converged_anchor) = sync.await.expect("sync task should complete"); + let final_target = database.read().await.final_target; + assert_eq!( + final_target, 2, + "single-db sync must not finish on a stale reached target", + ); + assert_eq!( + converged_anchor, + anchor(2), + "converged anchor must match the target the database reached", + ); + }); + } + + #[test] + fn tuple_state_sync_converges_before_finish() { + deterministic::Runner::default().start(|context| async move { + let (mut tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(4).unwrap()); + let slow_release = Arc::new(AtomicBool::new(false)); + let fast_done = Arc::new(AtomicBool::new(false)); + + let slow_release_for_sync = slow_release.clone(); + let fast_done_for_sync = fast_done.clone(); + let sync = context + .child("tuple_state_sync") + .spawn(move |context| async move { + <(Arc>, Arc>) as StateSyncSet< + deterministic::Context, + (Arc, Arc), + sha256::Digest, + >>::sync( + context, + ((), ()), + (slow_release_for_sync, fast_done_for_sync), + anchor(0), + (0, 0), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(4).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("tuple state sync should succeed") + }); + + while !fast_done.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + let _ = tip_tx.send((anchor(1), (1, 1))).await; + let _ = tip_tx.send((anchor(2), (2, 2))).await; + slow_release.store(true, Ordering::SeqCst); + drop(tip_tx); + + let (synced, converged_anchor) = sync.await.expect("sync task should complete"); + let slow_target = synced.0.read().await.final_target; + let fast_target = synced.1.read().await.final_target; + + assert_eq!( + slow_target, fast_target, + "all databases should finish on the same converged target set" + ); + assert_eq!( + converged_anchor.height.get(), + slow_target, + "returned anchor height should match the converged generation" + ); + }); + } + + #[test] + fn tuple_state_sync_ignores_backward_tip_updates() { + deterministic::Runner::timed(Duration::from_secs(5)).start(|context| async move { + let (mut tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(8).unwrap()); + let slow_release = Arc::new(AtomicBool::new(false)); + let fast_done = Arc::new(AtomicBool::new(false)); + + let slow_release_for_sync = slow_release.clone(); + let fast_done_for_sync = fast_done.clone(); + let sync = context + .child("tuple_state_sync_ignores_backward_tip_updates") + .spawn(move |context| async move { + <(Arc>, Arc>) as StateSyncSet< + deterministic::Context, + (Arc, Arc), + sha256::Digest, + >>::sync( + context, + ((), ()), + (slow_release_for_sync, fast_done_for_sync), + anchor(0), + (0, 0), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(8).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("tuple state sync should succeed") + }); + + while !fast_done.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + + let _ = tip_tx.send((anchor(2), (2, 2))).await; + let _ = tip_tx.send((anchor(1), (1, 1))).await; + drop(tip_tx); + context.sleep(Duration::from_millis(1)).await; + slow_release.store(true, Ordering::SeqCst); + + let (synced, converged_anchor) = sync.await.expect("sync task should complete"); + let slow_target = synced.0.read().await.final_target; + let fast_target = synced.1.read().await.final_target; + assert_eq!( + slow_target, 2, + "slow database target must never move backward" + ); + assert_eq!( + fast_target, 2, + "fast database target must never move backward" + ); + assert_eq!( + converged_anchor, + anchor(2), + "converged anchor must remain on the highest seen tip" + ); + }); + } + + #[test] + fn tuple_state_sync_returns_db_error_instead_of_panicking_when_anchor_missing() { + deterministic::Runner::timed(Duration::from_secs(5)).start(|context| async move { + let (_tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(1).unwrap()); + + let result = <( + Arc>, + Arc>, + ) as StateSyncSet>::sync( + context, + ((), ()), + ((), ()), + anchor(0), + (0, 0), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(1).unwrap(), + max_retained_roots: 0, + }, + ) + .await; + + let err = match result { + Ok(_) => panic!("tuple state sync should return the database sync error"), + Err(err) => err, + }; + assert!( + err.contains("state sync failed (index 1, db"), + "error should include failing database index: {err}" + ); + assert!( + err.contains("FailingStateSyncDb"), + "error should include failing database type: {err}" + ); + }); + } + + #[test] + fn tuple_state_sync_returns_db_error_when_other_database_waits_for_finish() { + deterministic::Runner::timed(Duration::from_secs(1)).start(|context| async move { + let (_tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(1).unwrap()); + let release = Arc::new(AtomicBool::new(true)); + + let result = <( + Arc>, + Arc>, + ) as StateSyncSet< + deterministic::Context, + (Arc, ()), + sha256::Digest, + >>::sync( + context, + ((), ()), + (release, ()), + anchor(0), + (0, 0), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(1).unwrap(), + max_retained_roots: 0, + }, + ) + .await; + + let err = match result { + Ok(_) => panic!("tuple state sync should return the database sync error"), + Err(err) => err, + }; + assert!( + err.contains("state sync failed (index 1, db"), + "error should include failing database index: {err}" + ); + assert!( + err.contains("FailingStateSyncDb"), + "error should include failing database type: {err}" + ); + }); + } + + #[test] + fn tuple_state_sync_preserves_original_failure_when_peer_finish_channel_closes() { + deterministic::Runner::timed(Duration::from_secs(1)).start(|context| async move { + let (_tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(1).unwrap()); + + let result = <( + Arc>, + Arc>, + ) as StateSyncSet>::sync( + context, + ((), ()), + ((), ()), + anchor(0), + (0, 0), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(1).unwrap(), + max_retained_roots: 0, + }, + ) + .await; + + let err = match result { + Ok(_) => panic!("tuple state sync should return the database sync error"), + Err(err) => err, + }; + assert!( + err.contains("state sync failed (index 1, db"), + "error should include failing database index, got: {err}", + ); + assert!( + err.contains("FailingStateSyncDb"), + "error should include failing database type, got: {err}", + ); + }); + } + + #[test] + fn coordinator_rejects_stale_reached_event_from_older_generation() { + let mut state = CoordinatorState::new(2, anchor(0), (0u64, 0u64)); + + state.record_tip_update(anchor(1), (1, 1)); + match state.next_action() { + CoordinatorAction::Dispatch { + generation, + targets: (left, right), + } => { + assert_eq!(generation, 1, "coordinator should dispatch generation 1"); + assert_eq!((left, right), (1, 1)); + } + CoordinatorAction::Wait => panic!("coordinator should dispatch the newer tip"), + CoordinatorAction::Converged(anchor) => { + panic!("coordinator converged too early at {anchor:?}") + } + } + + // This reached event belongs to generation 0 but arrives after the + // coordinator has already advanced the database to generation 1. + state.record_reached(1, 0); + + // Only database 0 has actually reached generation 1 so far. + state.record_reached(0, 1); + + match state.next_action() { + CoordinatorAction::Wait => {} + CoordinatorAction::Dispatch { targets, .. } => { + panic!( + "coordinator should wait for a fresh reached event, got dispatch {targets:?}" + ) + } + CoordinatorAction::Converged(anchor) => { + panic!("stale reached event must not allow convergence at {anchor:?}") + } + } + } + + #[test] + fn coordinator_dispatches_pending_tip_before_converging() { + let mut state = CoordinatorState::new(2, anchor(0), (0u64, 0u64)); + + state.record_tip_update(anchor(1), (1, 1)); + match state.next_action() { + CoordinatorAction::Dispatch { + generation, + targets: (left, right), + } => { + assert_eq!(generation, 1, "coordinator should dispatch generation 1"); + assert_eq!((left, right), (1, 1)); + } + CoordinatorAction::Wait => panic!("coordinator should dispatch the newer tip"), + CoordinatorAction::Converged(anchor) => { + panic!("coordinator converged too early at {anchor:?}") + } + } + + state.record_reached(0, 1); + state.record_reached(1, 1); + state.record_tip_update(anchor(2), (2, 2)); + + match state.next_action() { + CoordinatorAction::Dispatch { + generation, + targets: (left, right), + } => { + assert_eq!(generation, 2, "coordinator should advance to generation 2"); + assert_eq!((left, right), (2, 2)); + } + CoordinatorAction::Wait => panic!("coordinator should dispatch the pending tip"), + CoordinatorAction::Converged(anchor) => { + panic!("coordinator should not converge with a pending tip: {anchor:?}") + } + } + } + + #[test] + fn tuple_state_sync_stops_updates_after_reached_until_regroup() { + deterministic::Runner::default().start(|context| async move { + let (mut tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(32).unwrap()); + let slow_release = Arc::new(AtomicBool::new(true)); + let fast_ready = Arc::new(AtomicBool::new(false)); + let fast_update_count = Arc::new(AtomicUsize::new(0)); + + let slow_resolver = SlowSyncController { + release: slow_release.clone(), + }; + let fast_resolver = FastSyncObserver { + ready: fast_ready.clone(), + update_count: fast_update_count.clone(), + }; + let sync = context.child("tuple_state_sync_algorithm").spawn( + move |context| async move { + <( + Arc>, + Arc>, + ) as StateSyncSet< + deterministic::Context, + (SlowSyncController, FastSyncObserver), + sha256::Digest, + >>::sync( + context, + ((), ()), + (slow_resolver, fast_resolver), + anchor(0), + (0, 0), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(1).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("tuple state sync should succeed") + }, + ); + + while !fast_ready.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + + for target in 1..=16u64 { + let _ = tip_tx.send((anchor(target), (target, target))).await; + } + drop(tip_tx); + + let (synced, converged_anchor) = sync.await.expect("sync task should complete"); + let slow_target = synced.0.read().await.final_target; + let fast_target = synced.1.read().await.final_target; + + assert_eq!( + slow_target, fast_target, + "all databases should finish on the same converged target set" + ); + assert_eq!( + converged_anchor.height.get(), slow_target, + "returned anchor height should match the converged generation" + ); + assert_eq!( + fast_update_count.load(Ordering::SeqCst), + 1, + "a reached database must not receive tip updates before regroup; only regroup retarget should be observed" + ); + }); + } + + #[test] + fn tuple_state_sync_allows_noop_database_while_other_catches_up() { + deterministic::Runner::default().start(|context| async move { + let (tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(4).unwrap()); + let slow_release = Arc::new(AtomicBool::new(false)); + let fast_ready = Arc::new(AtomicBool::new(false)); + let fast_update_count = Arc::new(AtomicUsize::new(0)); + let target = 7u64; + + let sync = context.child("tuple_state_sync_noop").spawn({ + let slow_resolver = slow_release.clone(); + let fast_resolver = FastSyncObserver { + ready: fast_ready.clone(), + update_count: fast_update_count.clone(), + }; + move |context| async move { + <( + Arc>, + Arc>, + ) as StateSyncSet< + deterministic::Context, + (Arc, FastSyncObserver), + sha256::Digest, + >>::sync( + context, + ((), ()), + (slow_resolver, fast_resolver), + anchor(target), + (target, target), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(1).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("tuple state sync should succeed") + } + }); + + while !fast_ready.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + + drop(tip_tx); + slow_release.store(true, Ordering::SeqCst); + + let (synced, converged_anchor) = sync.await.expect("sync task should complete"); + let slow_target = synced.0.read().await.final_target; + let fast_target = synced.1.read().await.final_target; + + assert_eq!(slow_target, target); + assert_eq!(fast_target, target); + assert_eq!(converged_anchor, anchor(target)); + assert_eq!( + fast_update_count.load(Ordering::SeqCst), + 0, + "already-at-target database should not receive tip updates" + ); + }); + } + + #[test] + fn tuple_state_sync_regroup_completes_when_database_target_is_unchanged() { + deterministic::Runner::timed(Duration::from_secs(5)).start(|context| async move { + let (mut tip_tx, tip_rx) = ring::channel(NonZeroUsize::new(4).unwrap()); + let slow_release = Arc::new(AtomicBool::new(false)); + let fast_ready = Arc::new(AtomicBool::new(false)); + let fast_update_count = Arc::new(AtomicUsize::new(0)); + + let sync = context + .child("tuple_state_sync_regroup_unchanged_target") + .spawn({ + let slow_resolver = slow_release.clone(); + let fast_resolver = FastSyncObserver { + ready: fast_ready.clone(), + update_count: fast_update_count.clone(), + }; + move |context| async move { + <( + Arc>, + Arc>, + ) as StateSyncSet< + deterministic::Context, + (Arc, FastSyncObserver), + sha256::Digest, + >>::sync( + context, + ((), ()), + (slow_resolver, fast_resolver), + anchor(0), + (0, 7), + tip_rx, + SyncEngineConfig { + fetch_batch_size: NonZeroU64::new(1).unwrap(), + apply_batch_size: 1, + max_outstanding_requests: 1, + update_channel_size: NonZeroUsize::new(4).unwrap(), + max_retained_roots: 0, + }, + ) + .await + .expect("tuple state sync should succeed") + } + }); + + while !fast_ready.load(Ordering::SeqCst) { + context.sleep(Duration::from_millis(1)).await; + } + + let _ = tip_tx.send((anchor(9), (9, 7))).await; + context.sleep(Duration::from_millis(1)).await; + slow_release.store(true, Ordering::SeqCst); + drop(tip_tx); + + let (synced, converged_anchor) = sync.await.expect("sync task should complete"); + let slow_target = synced.0.read().await.final_target; + let fast_target = synced.1.read().await.final_target; + + assert_eq!(slow_target, 9); + assert_eq!(fast_target, 7); + assert_eq!(converged_anchor, anchor(9)); + assert_eq!( + fast_update_count.load(Ordering::SeqCst), + 1, + "the unchanged-target database should receive the regroup retarget exactly once", + ); + }); + } + + #[derive(Default)] + struct AttachDb1; + + #[derive(Default)] + struct AttachDb2; + + #[derive(Clone)] + struct RecordingResolver { + id: &'static str, + log: Arc>>, + } + + impl RecordingResolver { + fn new( + id: &'static str, + log: Arc>>, + ) -> Self { + Self { id, log } + } + } + + impl AttachableResolver for RecordingResolver { + async fn attach_database(&self, _db: Arc>) { + self.log.lock().push(self.id); + } + } + + #[test] + fn single_db_attach_calls_single_resolver() { + deterministic::Runner::default().start(|_| async move { + let log = Arc::new(commonware_utils::sync::Mutex::new(Vec::new())); + let resolver = RecordingResolver::new("db1", log.clone()); + let db = Arc::new(AsyncRwLock::new(AttachDb1)); + + resolver.attach_databases(db).await; + assert_eq!(&*log.lock(), &["db1"]); + }); + } + + #[test] + fn tuple_attach_is_index_stable() { + deterministic::Runner::default().start(|_| async move { + let log = Arc::new(commonware_utils::sync::Mutex::new(Vec::new())); + let resolvers = ( + RecordingResolver::new("resolver_0", log.clone()), + RecordingResolver::new("resolver_1", log.clone()), + ); + let databases = ( + Arc::new(AsyncRwLock::new(AttachDb1)), + Arc::new(AsyncRwLock::new(AttachDb2)), + ); + + resolvers.attach_databases(databases).await; + assert_eq!(&*log.lock(), &["resolver_0", "resolver_1"]); + }); + } + + #[test] + fn heterogeneous_tuple_attach_compiles() { + deterministic::Runner::default().start(|_| async move { + let log = Arc::new(commonware_utils::sync::Mutex::new(Vec::new())); + let resolvers = ( + RecordingResolver::new("db1", log.clone()), + RecordingResolver::new("db2", log.clone()), + ); + let databases = ( + Arc::new(AsyncRwLock::new(AttachDb1)), + Arc::new(AsyncRwLock::new(AttachDb2)), + ); + + resolvers.attach_databases(databases).await; + assert_eq!(&*log.lock(), &["db1", "db2"]); + }); + } +} diff --git a/glue/src/stateful/db/p2p/actor.rs b/glue/src/stateful/db/p2p/actor.rs new file mode 100644 index 00000000000..2bc37a8bda5 --- /dev/null +++ b/glue/src/stateful/db/p2p/actor.rs @@ -0,0 +1,704 @@ +//! Resolver service actor for QMDB sync over P2P. + +use super::{handler, mailbox, metrics::Metrics as ResolverMetrics, Mailbox}; +use commonware_actor::mailbox as actor_mailbox; +use commonware_codec::{Codec, Decode, Encode}; +use commonware_cryptography::PublicKey; +use commonware_macros::select_loop; +use commonware_p2p::{Blocker, Provider, Receiver, Sender}; +use commonware_resolver::{p2p, Resolver as _}; +use commonware_runtime::{ + spawn_cell, + telemetry::metrics::{status, GaugeExt}, + BufferPooler, Clock, ContextCell, Handle, Metrics, Spawner, +}; +use commonware_storage::{ + merkle::Family, + qmdb::sync::resolver::{FetchResult, Resolver as SyncResolver}, +}; +use commonware_utils::{ + channel::{fallible::OneshotExt, oneshot}, + sync::AsyncRwLock, +}; +use futures::future; +use rand::Rng; +use std::{ + collections::BTreeMap, + num::{NonZeroU64, NonZeroUsize}, + sync::Arc, + time::Duration, +}; +use tracing::{debug, info}; + +type Op = > as SyncResolver>::Op; +type DatabaseRoot = > as SyncResolver>::Digest; +type SyncMailbox = Mailbox, DatabaseRoot>; +type Pending = oneshot::Sender, mailbox::ResponseDropped>>; +type PendingSubs = BTreeMap, Vec, DatabaseRoot>>>; + +/// Configuration for [`Actor`]. +pub struct Config +where + P: PublicKey, + D: Provider, + B: Blocker, +{ + /// Provider for the current peer set. + pub peer_provider: D, + + /// Blocker used when peers send invalid data. + pub blocker: B, + + /// Local database used to serve incoming requests when available. + pub database: Option>>, + + /// Maximum size of resolver mailbox backlogs. + pub mailbox_size: NonZeroUsize, + + /// Local node identity if available. + pub me: Option

, + + /// Initial expected performance for new peers. + pub initial: Duration, + + /// Request timeout. + pub timeout: Duration, + + /// Retry cadence for pending fetches. + pub fetch_retry_timeout: Duration, + + /// Maximum number of operations to serve in a single response. + pub max_serve_ops: NonZeroU64, + + /// Send fetch requests with network priority. + pub priority_requests: bool, + + /// Send responses with network priority. + pub priority_responses: bool, +} + +/// Runtime serving state for the resolver actor. +enum State { + /// Database is not attached yet. + NoDb, + /// Database is attached and can serve incoming requests. + HasDb(Arc>), +} + +/// An action dispatched by incoming mailbox messages. +enum MailboxAction { + None, + Fetch(handler::Request), + Cancel(handler::Request), +} + +/// Runs a QMDB sync resolver service over `commonware_resolver::p2p::Engine`. +pub struct Actor +where + E: BufferPooler + Clock + Spawner + Rng + Metrics, + P: PublicKey, + D: Provider, + B: Blocker, + F: Family, + Arc>: SyncResolver, + Op: Codec + Send + Clone + 'static, +{ + context: ContextCell, + config: Config, + mailbox_rx: actor_mailbox::Receiver, DatabaseRoot>>, + state: State, + metrics: ResolverMetrics, + pending: PendingSubs, +} + +impl Actor +where + E: BufferPooler + Clock + Spawner + Rng + Metrics, + P: PublicKey, + D: Provider, + B: Blocker, + F: Family, + Arc>: SyncResolver, + Op: Codec + Send + Clone + 'static, +{ + /// Create a new resolver actor and mailbox. + pub fn new(context: E, mut cfg: Config) -> (Self, SyncMailbox) { + let metrics = ResolverMetrics::new(&context); + let state = cfg.database.take().map_or(State::NoDb, |db| { + let _ = metrics.has_database.try_set(1i64); + State::HasDb(db) + }); + let (mailbox_tx, mailbox_rx) = + actor_mailbox::new(context.child("mailbox"), cfg.mailbox_size); + let mailbox = Mailbox::new(mailbox_tx); + let actor = Self { + context: ContextCell::new(context), + config: cfg, + mailbox_rx, + state, + metrics, + pending: BTreeMap::new(), + }; + (actor, mailbox) + } + + /// Start the resolver service. + pub fn start( + mut self, + net: (impl Sender, impl Receiver), + ) -> Handle<()> { + spawn_cell!(self.context, self.run(net)) + } + + /// Main event loop: multiplexes mailbox messages and engine callbacks. + async fn run( + mut self, + (sender, receiver): (impl Sender, impl Receiver), + ) { + let (handler_tx, mut handler_rx) = + actor_mailbox::new(self.context.child("handler"), self.config.mailbox_size); + let handler = handler::Handler::new(handler_tx); + let (engine, mut resolver_mailbox) = p2p::Engine::new( + self.context.as_present().child("resolver"), + p2p::Config { + peer_provider: self.config.peer_provider.clone(), + blocker: self.config.blocker.clone(), + consumer: handler.clone(), + producer: handler, + mailbox_size: self.config.mailbox_size, + me: self.config.me.clone(), + initial: self.config.initial, + timeout: self.config.timeout, + fetch_retry_timeout: self.config.fetch_retry_timeout, + priority_requests: self.config.priority_requests, + priority_responses: self.config.priority_responses, + }, + ); + let mut resolver_task = engine.start((sender, receiver)); + + select_loop! { + self.context, + on_start => { + self.pending.retain(|_, subs| { + subs.retain(|s| !s.is_closed()); + !subs.is_empty() + }); + let mailbox_message = async { + match self.mailbox_rx.recv().await { + Some(message) => Some(message), + None => future::pending().await, + } + }; + }, + on_stopped => { + return; + }, + _ = &mut resolver_task => { + return; + }, + Some(message) = mailbox_message else continue => { + match self.handle_mailbox_message(message) { + MailboxAction::None => {} + MailboxAction::Fetch(request) => { + resolver_mailbox.fetch(request); + } + MailboxAction::Cancel(request) => { + resolver_mailbox.retain(move |key, _| key != &request); + } + } + }, + Some(message) = handler_rx.recv() else { + return; + } => match message { + handler::EngineMessage::Deliver { + key, + value, + response, + } => { + self.handle_deliver(key, value, response).await; + } + handler::EngineMessage::Produce { key, response } => { + self.handle_produce(key, response).await; + } + }, + } + } + + /// Process a mailbox message. Returns a request to fetch if a new key was registered. + fn handle_mailbox_message( + &mut self, + message: mailbox::Message, DatabaseRoot>, + ) -> MailboxAction { + match message { + mailbox::Message::AttachDatabase(db) => { + let replacing_existing = matches!(self.state, State::HasDb(_)); + info!(replacing_existing, "attached resolver database"); + self.state = State::HasDb(db); + let _ = self.metrics.has_database.try_set(1i64); + MailboxAction::None + } + mailbox::Message::GetOperations { request, response } => { + if let Some(subscribers) = self.pending.get_mut(&request) { + subscribers.retain(|subscriber| !subscriber.is_closed()); + if !subscribers.is_empty() { + subscribers.push(response); + return MailboxAction::None; + } + } + self.pending.insert(request.clone(), vec![response]); + self.metrics.fetch_requests.inc(); + let _ = self.metrics.pending_requests.try_set(self.pending.len()); + MailboxAction::Fetch(request) + } + mailbox::Message::CancelOperations { request } => { + if self.should_cancel_request(&request) { + self.metrics.cancel_requests.inc(); + let _ = self.metrics.pending_requests.try_set(self.pending.len()); + MailboxAction::Cancel(request) + } else { + MailboxAction::None + } + } + } + } + + /// Returns `true` if a request should be cancelled. + fn should_cancel_request(&mut self, request: &handler::Request) -> bool { + let Some(subscribers) = self.pending.get_mut(request) else { + return false; + }; + subscribers.retain(|subscriber| !subscriber.is_closed()); + if !subscribers.is_empty() { + return false; + } + self.pending.remove(request); + true + } + + /// Decode a peer's response, fan it out to pending subscribers, and aggregate approvals. + async fn handle_deliver( + &mut self, + key: handler::Request, + value: bytes::Bytes, + response: oneshot::Sender, + ) { + // Only accept responses for keys we currently have in-flight. + // Unknown keys are unsolicited/stale deliveries and are ignored. + let Some(subscribers) = self.pending.remove(&key) else { + self.metrics.deliveries.inc(status::Status::Dropped); + response.send_lossy(true); + return; + }; + let _ = self.metrics.pending_requests.try_set(self.pending.len()); + + // `max_ops` is sourced from the original local request key above. + let max_ops = key.max_ops.get() as usize; + let decoded = + match handler::Response::, DatabaseRoot>::decode_cfg(value, &max_ops) { + Ok(decoded) => decoded, + Err(_) => { + self.pending.insert(key, subscribers); + let _ = self.metrics.pending_requests.try_set(self.pending.len()); + self.metrics.deliveries.inc(status::Status::Invalid); + response.send_lossy(false); + return; + } + }; + + let mut approvals = Vec::new(); + for subscriber in subscribers { + let (success_tx, success_rx) = oneshot::channel(); + if subscriber + .send(Ok(FetchResult { + proof: decoded.proof.clone(), + operations: decoded.operations.clone(), + success_tx, + pinned_nodes: decoded.pinned_nodes.clone(), + })) + .is_err() + { + continue; + } + approvals.push(success_rx); + } + + if approvals.is_empty() { + self.metrics.deliveries.inc(status::Status::Success); + response.send_lossy(true); + return; + } + + let mut peer_valid = true; + for approval in approvals { + if let Ok(approved) = approval.await { + peer_valid &= approved; + } + } + + if peer_valid { + self.metrics.deliveries.inc(status::Status::Success); + } else { + self.metrics.deliveries.inc(status::Status::Failure); + debug!(?key, "downstream marked response as peer-invalid"); + } + response.send_lossy(peer_valid); + } + + /// Serve a peer's request by querying the local database. + async fn handle_produce( + &mut self, + key: handler::Request, + response: oneshot::Sender, + ) { + let State::HasDb(database) = &self.state else { + self.metrics.serve_requests.inc(status::Status::Dropped); + return; + }; + if key.max_ops > self.config.max_serve_ops { + self.metrics.serve_requests.inc(status::Status::Dropped); + return; + } + let (_cancel_tx, cancel_rx) = oneshot::channel(); + let result = database + .get_operations( + key.op_count, + key.start_loc, + key.max_ops, + key.include_pinned_nodes, + cancel_rx, + ) + .await; + + let Ok(fetch) = result else { + self.metrics.serve_requests.inc(status::Status::Failure); + return; + }; + + response.send_lossy( + handler::Response { + proof: fetch.proof, + operations: fetch.operations, + pinned_nodes: fetch.pinned_nodes, + } + .encode(), + ); + self.metrics.serve_requests.inc(status::Status::Success); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use bytes::Bytes; + use commonware_cryptography::{ed25519, sha256, Sha256}; + use commonware_p2p::{Provider, TrackedPeers}; + use commonware_parallel::Sequential; + use commonware_runtime::{ + buffer::paged::CacheRef, deterministic, BufferPooler, Runner as _, Supervisor as _, + }; + use commonware_storage::{ + journal::contiguous::fixed::Config as FixedLogConfig, + mmr::{self, full::Config as MmrJournalConfig, Location, Proof}, + qmdb::any::{unordered::fixed, FixedConfig}, + translator::TwoCap, + }; + use commonware_utils::{channel::oneshot, sync::AsyncRwLock, NZUsize, NZU16, NZU64}; + use std::{num::NonZeroU64, sync::Arc, time::Duration}; + + #[derive(Clone, Debug)] + struct DummyProvider; + + impl Provider for DummyProvider { + type PublicKey = ed25519::PublicKey; + + async fn peer_set(&mut self, _id: u64) -> Option> { + None + } + + async fn subscribe(&mut self) -> commonware_p2p::PeerSetSubscription { + let (_tx, rx) = commonware_utils::channel::mpsc::unbounded_channel(); + rx + } + } + + #[derive(Clone)] + struct DummyBlocker; + + impl commonware_p2p::Blocker for DummyBlocker { + type PublicKey = ed25519::PublicKey; + + fn block(&mut self, _peer: Self::PublicKey) -> commonware_actor::Feedback { + commonware_actor::Feedback::Ok + } + } + + type TestDb = fixed::Db< + mmr::Family, + deterministic::Context, + sha256::Digest, + sha256::Digest, + Sha256, + TwoCap, + Sequential, + >; + type TestOp = > as SyncResolver>::Op; + + type TestActor = Actor< + deterministic::Context, + ed25519::PublicKey, + DummyProvider, + DummyBlocker, + mmr::Family, + TestDb, + >; + + fn test_config( + database: Option>>, + ) -> Config { + Config { + peer_provider: DummyProvider, + blocker: DummyBlocker, + database, + mailbox_size: NZUsize!(16), + me: None, + initial: Duration::from_millis(10), + timeout: Duration::from_millis(10), + fetch_retry_timeout: Duration::from_millis(10), + max_serve_ops: NonZeroU64::new(16).unwrap(), + priority_requests: false, + priority_responses: false, + } + } + + fn test_request_at(op_count: Location) -> handler::Request { + handler::Request { + op_count, + start_loc: Location::new(0), + max_ops: NonZeroU64::new(1).unwrap(), + include_pinned_nodes: false, + } + } + + type TestPending = Pending; + type TestPendingResult = oneshot::Receiver< + Result, mailbox::ResponseDropped>, + >; + + fn test_subscriber() -> (TestPending, TestPendingResult) { + oneshot::channel() + } + + fn db_config(suffix: &str, pooler: &impl BufferPooler) -> FixedConfig { + let page_cache = CacheRef::from_pooler(pooler, NZU16!(101), NZUsize!(11)); + FixedConfig { + merkle_config: MmrJournalConfig { + journal_partition: format!("{suffix}-mmr-journal"), + metadata_partition: format!("{suffix}-mmr-metadata"), + items_per_blob: NZU64!(11), + write_buffer: NZUsize!(1024), + strategy: Sequential, + page_cache: page_cache.clone(), + }, + journal_config: FixedLogConfig { + partition: format!("{suffix}-log-journal"), + items_per_blob: NZU64!(7), + page_cache, + write_buffer: NZUsize!(1024), + }, + translator: TwoCap, + } + } + + async fn init_db(context: deterministic::Context, suffix: &str) -> Arc> { + let db = TestDb::init(context.child("db"), db_config(suffix, &context)) + .await + .expect("db init should succeed"); + Arc::new(AsyncRwLock::new(db)) + } + + fn encoded_fetch_payload() -> Bytes { + handler::Response:: { + proof: Proof { + leaves: Location::new(0), + inactive_peaks: 0, + digests: Vec::new(), + }, + operations: Vec::new(), + pinned_nodes: None, + } + .encode() + } + + #[test] + fn produce_denied_before_attach() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context.child("actor"), test_config(None)); + + let (response_tx, response_rx) = oneshot::channel(); + actor + .handle_produce(test_request_at(Location::new(1)), response_tx) + .await; + assert!(response_rx.await.is_err()); + }); + } + + #[test] + fn same_request_served_after_attach() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context.child("actor"), test_config(None)); + let db = init_db(context.child("resolver_db"), "resolver-after-attach").await; + let op_count = db.read().await.bounds().await.end; + actor.handle_mailbox_message(mailbox::Message::AttachDatabase(db)); + + let (response_tx, response_rx) = oneshot::channel(); + actor + .handle_produce(test_request_at(op_count), response_tx) + .await; + + let payload = response_rx + .await + .expect("response should be available after attach"); + assert!(!payload.is_empty()); + }); + } + + #[test] + fn produce_rejects_request_above_max_serve_ops() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context.child("actor"), test_config(None)); + let db = init_db(context.child("resolver_db"), "resolver-unbounded-max-ops").await; + let op_count = db.read().await.bounds().await.end; + actor.handle_mailbox_message(mailbox::Message::AttachDatabase(db)); + + let request = handler::Request { + op_count, + start_loc: Location::new(0), + max_ops: NonZeroU64::new(1_000).unwrap(), + include_pinned_nodes: false, + }; + let (response_tx, response_rx) = oneshot::channel(); + actor.handle_produce(request, response_tx).await; + + assert!(response_rx.await.is_err()); + }); + } + + #[test] + fn deliver_with_dropped_response_receiver_is_treated_as_valid() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context, test_config(None)); + let request = test_request_at(Location::new(1)); + + let (subscriber_tx, subscriber_rx) = test_subscriber(); + drop(subscriber_rx); + actor.pending.insert(request.clone(), vec![subscriber_tx]); + + let (ack_tx, ack_rx) = oneshot::channel(); + actor + .handle_deliver(request, encoded_fetch_payload(), ack_tx) + .await; + + assert!(ack_rx.await.unwrap()); + }); + } + + #[test] + fn deliver_with_rejected_subscriber_blocks_peer() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context, test_config(None)); + let request = test_request_at(Location::new(1)); + + let (sub1_tx, sub1_rx) = test_subscriber(); + let (sub2_tx, sub2_rx) = test_subscriber(); + actor + .pending + .insert(request.clone(), vec![sub1_tx, sub2_tx]); + + let (ack_tx, ack_rx) = oneshot::channel(); + futures::join!( + actor.handle_deliver(request, encoded_fetch_payload(), ack_tx), + async { + let fetch = sub1_rx.await.unwrap().unwrap(); + fetch.success_tx.send(true).unwrap(); + }, + async { + let fetch = sub2_rx.await.unwrap().unwrap(); + fetch.success_tx.send(false).unwrap(); + } + ); + + assert!(!ack_rx.await.unwrap()); + }); + } + + #[test] + fn deliver_ignores_dropped_subscriber_approval() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context, test_config(None)); + let request = test_request_at(Location::new(1)); + + let (sub1_tx, sub1_rx) = test_subscriber(); + let (sub2_tx, sub2_rx) = test_subscriber(); + actor + .pending + .insert(request.clone(), vec![sub1_tx, sub2_tx]); + + let (ack_tx, ack_rx) = oneshot::channel(); + futures::join!( + actor.handle_deliver(request, encoded_fetch_payload(), ack_tx), + async { + let fetch = sub1_rx.await.unwrap().unwrap(); + drop(fetch); + }, + async { + let fetch = sub2_rx.await.unwrap().unwrap(); + fetch.success_tx.send(true).unwrap(); + } + ); + + assert!(ack_rx.await.unwrap()); + }); + } + + #[test] + fn failed_then_deliver_clears_pending_and_allows_retry() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context, test_config(None)); + let request = test_request_at(Location::new(1)); + + let (subscriber_tx, _subscriber_rx) = test_subscriber(); + actor.pending.insert(request.clone(), vec![subscriber_tx]); + actor.pending.remove(&request); + assert!(!actor.pending.contains_key(&request)); + + let (ack_tx, ack_rx) = oneshot::channel(); + actor + .handle_deliver(request, Bytes::from_static(b"late-response"), ack_tx) + .await; + assert!(ack_rx.await.unwrap()); + }); + } + + #[test] + fn get_operations_refetches_when_pending_subscribers_are_closed() { + deterministic::Runner::default().start(|context| async move { + let (mut actor, _mailbox) = TestActor::new(context, test_config(None)); + let request = test_request_at(Location::new(1)); + + let (stale_tx, stale_rx) = test_subscriber(); + drop(stale_rx); + actor.pending.insert(request.clone(), vec![stale_tx]); + + let (fresh_tx, _fresh_rx) = test_subscriber(); + let action = actor.handle_mailbox_message(mailbox::Message::GetOperations { + request: request.clone(), + response: fresh_tx, + }); + + assert!(matches!(action, MailboxAction::Fetch(ref key) if key == &request)); + let pending = actor.pending.get(&request).unwrap(); + assert_eq!(pending.len(), 1); + assert!(!pending[0].is_closed()); + }); + } +} diff --git a/glue/src/stateful/db/p2p/handler.rs b/glue/src/stateful/db/p2p/handler.rs new file mode 100644 index 00000000000..6c528a93cf0 --- /dev/null +++ b/glue/src/stateful/db/p2p/handler.rs @@ -0,0 +1,448 @@ +//! Internal handler types for resolver actor coordination. + +use bytes::{Buf, BufMut, Bytes}; +use commonware_actor::mailbox::{Overflow, Policy, Sender}; +use commonware_codec::{EncodeSize, Error as CodecError, Read, ReadExt, ReadRangeExt, Write}; +use commonware_cryptography::Digest; +use commonware_resolver::{self as resolver, p2p::Producer, Delivery}; +use commonware_storage::merkle::{Family, Location, Proof, MAX_PROOF_DIGESTS_PER_ELEMENT}; +use commonware_utils::{channel::oneshot, Span}; +use std::{ + cmp::Ordering, + collections::VecDeque, + fmt, + hash::{Hash, Hasher}, + num::NonZeroU64, +}; + +/// Safe upper bound on pinned nodes for any u64-backed family. +const MAX_PINNED_NODES: usize = 64; + +/// Request key sent through `resolver::p2p::Engine`. +#[derive(Clone, Debug)] +pub(super) struct Request { + /// Total operation count for proof context. + pub op_count: Location, + /// First operation location to fetch. + pub start_loc: Location, + /// Maximum number of operations to fetch. + pub max_ops: NonZeroU64, + /// Include pinned nodes for `start_loc` when `true`. + pub include_pinned_nodes: bool, +} + +impl PartialEq for Request { + fn eq(&self, other: &Self) -> bool { + self.op_count == other.op_count + && self.start_loc == other.start_loc + && self.max_ops == other.max_ops + && self.include_pinned_nodes == other.include_pinned_nodes + } +} + +impl Eq for Request {} + +impl PartialOrd for Request { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Request { + fn cmp(&self, other: &Self) -> Ordering { + self.op_count + .cmp(&other.op_count) + .then_with(|| self.start_loc.cmp(&other.start_loc)) + .then_with(|| self.max_ops.cmp(&other.max_ops)) + .then_with(|| self.include_pinned_nodes.cmp(&other.include_pinned_nodes)) + } +} + +impl Hash for Request { + fn hash(&self, state: &mut H) { + self.op_count.hash(state); + self.start_loc.hash(state); + self.max_ops.hash(state); + self.include_pinned_nodes.hash(state); + } +} + +impl fmt::Display for Request { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Request(count={}, start={}, max={}, pinned={})", + self.op_count, self.start_loc, self.max_ops, self.include_pinned_nodes, + ) + } +} + +impl Write for Request { + fn write(&self, buf: &mut impl BufMut) { + self.op_count.write(buf); + self.start_loc.write(buf); + self.max_ops.write(buf); + self.include_pinned_nodes.write(buf); + } +} + +impl EncodeSize for Request { + fn encode_size(&self) -> usize { + self.op_count.encode_size() + + self.start_loc.encode_size() + + self.max_ops.encode_size() + + self.include_pinned_nodes.encode_size() + } +} + +impl Read for Request { + type Cfg = (); + + fn read_cfg(buf: &mut impl Buf, _: &()) -> Result { + Ok(Self { + op_count: Location::::read(buf)?, + start_loc: Location::::read(buf)?, + max_ops: NonZeroU64::read(buf)?, + include_pinned_nodes: bool::read(buf)?, + }) + } +} + +impl Span for Request {} + +#[cfg(feature = "arbitrary")] +impl arbitrary::Arbitrary<'_> for Request { + fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result { + Ok(Self { + op_count: u.arbitrary()?, + start_loc: u.arbitrary()?, + max_ops: u.arbitrary()?, + include_pinned_nodes: u.arbitrary()?, + }) + } +} + +/// Wire-format response to a [`Request`]. +/// +/// Carries the inclusion proof, the fetched operations, and +/// optionally the pinned nodes at the requested start location. +/// Encoded by the producing peer and decoded by the consuming peer; +/// the actor converts this into a [`FetchResult`](commonware_storage::qmdb::sync::resolver::FetchResult) +/// before handing it to subscribers. +pub(super) struct Response { + pub(super) proof: Proof, + pub(super) operations: Vec, + pub(super) pinned_nodes: Option>, +} + +impl Write for Response { + fn write(&self, buf: &mut impl BufMut) { + self.proof.write(buf); + self.operations.write(buf); + self.pinned_nodes.write(buf); + } +} + +impl EncodeSize for Response { + fn encode_size(&self) -> usize { + self.proof.encode_size() + self.operations.encode_size() + self.pinned_nodes.encode_size() + } +} + +impl, D: Digest> Read for Response { + /// Maximum operations expected in this response, derived from the + /// request's `max_ops` field. + type Cfg = usize; + + fn read_cfg(buf: &mut impl Buf, max_ops: &usize) -> Result { + let max_proof_digests = max_ops.saturating_mul(MAX_PROOF_DIGESTS_PER_ELEMENT); + let proof = Proof::::read_cfg(buf, &max_proof_digests)?; + let operations = Vec::::read_range(buf, ..=*max_ops)?; + // Pinned nodes are the fold-prefix peaks at `start_loc`, independent of + // `max_ops`. Bound them by the global pinned-node limit. + let pinned_nodes = Option::>::read_range(buf, ..=MAX_PINNED_NODES)?; + Ok(Self { + proof, + operations, + pinned_nodes, + }) + } +} + +#[cfg(feature = "arbitrary")] +impl arbitrary::Arbitrary<'_> for Response +where + Op: for<'a> arbitrary::Arbitrary<'a>, + D: for<'a> arbitrary::Arbitrary<'a>, +{ + fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result { + Ok(Self { + proof: u.arbitrary()?, + operations: u.arbitrary()?, + pinned_nodes: u.arbitrary()?, + }) + } +} + +/// Messages sent from [`Handler`] to the resolver [`Actor`](super::Actor). +/// +/// Each variant corresponds to one of the `resolver::Consumer` or `p2p::Producer` +/// callbacks, re-routed so the actor processes them on its own task. +pub(super) enum EngineMessage { + /// A peer delivered a response for a previously fetched key. + /// The actor decodes the value, fans it out to waiting subscribers, + /// and reports acceptance back through `response`. + Deliver { + key: Request, + value: Bytes, + response: oneshot::Sender, + }, + /// A peer requested data for `key`. + /// The actor queries the local database and sends the encoded + /// [`Response`] back through `response`. + Produce { + key: Request, + response: oneshot::Sender, + }, +} + +impl EngineMessage { + fn response_closed(&self) -> bool { + match self { + Self::Deliver { response, .. } => response.is_closed(), + Self::Produce { response, .. } => response.is_closed(), + } + } +} + +pub(super) struct EnginePending(VecDeque>); + +impl Default for EnginePending { + fn default() -> Self { + Self(VecDeque::new()) + } +} + +impl Overflow> for EnginePending { + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + fn drain

(&mut self, mut push: P) + where + P: FnMut(EngineMessage) -> Option>, + { + while let Some(message) = self.0.pop_front() { + if message.response_closed() { + continue; + } + + if let Some(message) = push(message) { + self.0.push_front(message); + break; + } + } + } +} + +impl Policy for EngineMessage { + type Overflow = EnginePending; + + fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + if message.response_closed() { + return true; + } + overflow.0.push_back(message); + true + } +} + +/// Bridges `resolver::Consumer` and `p2p::Producer` into the actor's +/// message channel. +/// +/// Every callback from the resolver engine is converted into an +/// [`EngineMessage`] and sent to the actor. This keeps all mutable +/// state (pending subscribers, database handle) on the actor task, +/// while the engine runs independently. +#[derive(Clone)] +pub(super) struct Handler { + sender: Sender>, +} + +impl Handler { + pub(super) const fn new(sender: Sender>) -> Self { + Self { sender } + } +} + +impl resolver::Consumer for Handler { + type Key = Request; + type Value = Bytes; + type Subscriber = (); + + fn deliver( + &mut self, + delivery: Delivery, + value: Self::Value, + ) -> oneshot::Receiver { + let (response, receiver) = oneshot::channel(); + let _ = self.sender.enqueue(EngineMessage::Deliver { + key: delivery.key, + value, + response, + }); + receiver + } +} + +impl Producer for Handler { + type Key = Request; + + fn produce(&mut self, key: Self::Key) -> oneshot::Receiver { + let (response, receiver) = oneshot::channel(); + let _ = self + .sender + .enqueue(EngineMessage::Produce { key, response }); + receiver + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_codec::{Decode, DecodeExt, Encode}; + use commonware_cryptography::sha256; + use commonware_storage::merkle::mmr; + + const TEST_MAX_OPS: usize = 10_000; + + #[test] + fn response_codec_roundtrip() { + let response = Response:: { + proof: Proof { + leaves: mmr::Location::new(10), + inactive_peaks: 0, + digests: vec![sha256::Digest::from([7; 32])], + }, + operations: vec![1, 2, 3], + pinned_nodes: Some(vec![sha256::Digest::from([9; 32])]), + }; + + let encoded = response.encode(); + let decoded = + Response::::decode_cfg(encoded, &TEST_MAX_OPS) + .unwrap(); + assert_eq!(decoded.operations, vec![1, 2, 3]); + assert_eq!(decoded.proof.leaves, mmr::Location::new(10)); + assert_eq!(decoded.pinned_nodes.unwrap().len(), 1); + } + + #[test] + fn response_decode_rejects_invalid_pinned_flag() { + let mut encoded = Response:: { + proof: Proof { + leaves: mmr::Location::new(10), + inactive_peaks: 0, + digests: vec![sha256::Digest::from([7; 32])], + }, + operations: vec![1, 2, 3], + pinned_nodes: None, + } + .encode() + .to_vec(); + *encoded + .last_mut() + .expect("response encoding must include pinned_nodes flag") = 2; + + let err = match Response::::decode_cfg( + Bytes::from(encoded), + &TEST_MAX_OPS, + ) { + Ok(_) => panic!("decode should fail for invalid bool flag"), + Err(err) => err, + }; + assert!(matches!(err, CodecError::InvalidBool)); + } + + #[test] + fn response_decode_allows_pinned_nodes_above_max_ops() { + let max_ops = 1usize; + let response = Response:: { + proof: Proof { + leaves: mmr::Location::new(10), + inactive_peaks: 0, + digests: vec![sha256::Digest::from([7; 32])], + }, + operations: vec![1], + pinned_nodes: Some(vec![sha256::Digest::from([9; 32]); 3]), + }; + + let encoded = response.encode(); + let decoded = + Response::::decode_cfg(encoded, &max_ops).unwrap(); + assert_eq!(decoded.operations, vec![1]); + assert_eq!(decoded.pinned_nodes.unwrap().len(), 3); + } + + #[test] + fn response_decode_allows_max_single_operation_proof() { + let max_ops = 1usize; + let response = Response:: { + proof: Proof { + leaves: mmr::Location::new(10), + inactive_peaks: 0, + digests: vec![sha256::Digest::from([7; 32]); MAX_PROOF_DIGESTS_PER_ELEMENT], + }, + operations: vec![1], + pinned_nodes: None, + }; + + let encoded = response.encode(); + let decoded = + Response::::decode_cfg(encoded, &max_ops).unwrap(); + assert_eq!(decoded.operations, vec![1]); + assert_eq!(decoded.proof.digests.len(), MAX_PROOF_DIGESTS_PER_ELEMENT); + } + + #[test] + fn request_codec_roundtrip() { + let req = Request:: { + op_count: mmr::Location::new(128), + start_loc: mmr::Location::new(64), + max_ops: NonZeroU64::new(16).unwrap(), + include_pinned_nodes: true, + }; + let encoded = req.encode(); + let decoded = Request::::decode(encoded).unwrap(); + assert_eq!(req, decoded); + } + + #[test] + fn request_decode_rejects_invalid_pinned_flag() { + let mut encoded = Request:: { + op_count: mmr::Location::new(128), + start_loc: mmr::Location::new(64), + max_ops: NonZeroU64::new(16).unwrap(), + include_pinned_nodes: true, + } + .encode() + .to_vec(); + *encoded + .last_mut() + .expect("request encoding must include flag") = 2; + + let err = Request::::decode(Bytes::from(encoded)).unwrap_err(); + assert!(matches!(err, CodecError::InvalidBool)); + } + + #[cfg(feature = "arbitrary")] + mod conformance { + use super::*; + use commonware_codec::conformance::CodecConformance; + + commonware_conformance::conformance_tests! { + CodecConformance>, + CodecConformance>, + } + } +} diff --git a/glue/src/stateful/db/p2p/mailbox.rs b/glue/src/stateful/db/p2p/mailbox.rs new file mode 100644 index 00000000000..4ede8d31842 --- /dev/null +++ b/glue/src/stateful/db/p2p/mailbox.rs @@ -0,0 +1,244 @@ +//! Mailbox and wire types for the QMDB sync resolver service. + +use super::handler; +use crate::stateful::db::AttachableResolver; +use commonware_actor::mailbox::{Overflow, Policy, Sender}; +use commonware_codec::Read; +use commonware_cryptography::Digest; +use commonware_macros::select; +use commonware_storage::{ + merkle::{Family, Location}, + qmdb::sync::resolver::{FetchResult, Resolver as SyncResolver}, +}; +use commonware_utils::{channel::oneshot, sync::AsyncRwLock}; +use futures::FutureExt as _; +use std::{collections::VecDeque, future::Future, num::NonZeroU64, sync::Arc}; + +/// The resolver actor dropped the response before completion. +#[derive(Debug, thiserror::Error)] +#[error("response dropped before completion")] +pub struct ResponseDropped; + +/// Messages sent from the [`Mailbox`] to the resolver [`Actor`](super::Actor). +pub(super) enum Message { + /// Provide a database handle so the actor can serve incoming requests. + AttachDatabase(Arc>), + /// Fetch operations from a remote peer via the P2P resolver engine. + GetOperations { + request: handler::Request, + response: oneshot::Sender, ResponseDropped>>, + }, + /// Cancel a previously requested operation fetch. + CancelOperations { request: handler::Request }, +} + +impl Message { + fn response_closed(&self) -> bool { + match self { + Self::AttachDatabase(_) | Self::CancelOperations { .. } => false, + Self::GetOperations { response, .. } => response.is_closed(), + } + } +} + +pub(super) struct Pending { + database: Option>>, + messages: VecDeque>, +} + +impl Default for Pending { + fn default() -> Self { + Self { + database: None, + messages: VecDeque::new(), + } + } +} + +impl Overflow> for Pending { + fn is_empty(&self) -> bool { + self.database.is_none() && self.messages.is_empty() + } + + fn drain

(&mut self, mut push: P) + where + P: FnMut(Message) -> Option>, + { + if let Some(database) = self.database.take() { + if let Some(Message::AttachDatabase(database)) = push(Message::AttachDatabase(database)) + { + self.database = Some(database); + return; + } + } + + while let Some(message) = self.messages.pop_front() { + if message.response_closed() { + continue; + } + + if let Some(message) = push(message) { + self.messages.push_front(message); + break; + } + } + } +} + +impl Policy for Message { + type Overflow = Pending; + + fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + if message.response_closed() { + return true; + } + + match message { + Self::AttachDatabase(database) => { + overflow.database = Some(database); + } + message => overflow.messages.push_back(message), + } + true + } +} + +/// Client-facing resolver mailbox used by the QMDB sync engine. +pub struct Mailbox { + sender: Sender>, +} + +impl Clone for Mailbox { + fn clone(&self) -> Self { + Self { + sender: self.sender.clone(), + } + } +} + +impl Mailbox { + pub(super) const fn new(sender: Sender>) -> Self { + Self { sender } + } +} + +impl Mailbox { + pub fn attach_database(&self, db: Arc>) { + let _ = self.sender.enqueue(Message::AttachDatabase(db)); + } +} + +impl SyncResolver for Mailbox +where + F: Family, + Op: Read + Send + Sync + Clone + 'static, + D: Digest, + DB: Send + Sync + 'static, +{ + type Family = F; + type Digest = D; + type Op = Op; + type Error = ResponseDropped; + + async fn get_operations( + &self, + op_count: Location, + start_loc: Location, + max_ops: NonZeroU64, + include_pinned_nodes: bool, + cancel_rx: oneshot::Receiver<()>, + ) -> Result, Self::Error> { + let request = handler::Request { + op_count, + start_loc, + max_ops, + include_pinned_nodes, + }; + + futures::pin_mut!(cancel_rx); + let (response_tx, response_rx) = oneshot::channel(); + let _ = self.sender.enqueue(Message::GetOperations { + request: request.clone(), + response: response_tx, + }); + futures::pin_mut!(response_rx); + + select! { + response = response_rx.as_mut() => response.map_err(|_| ResponseDropped)?, + _ = cancel_rx.as_mut() => { + if let Some(response) = response_rx.as_mut().now_or_never() { + return response.map_err(|_| ResponseDropped)?; + } + let _ = self.sender.enqueue(Message::CancelOperations { request }); + Err(ResponseDropped) + }, + } + } +} + +impl AttachableResolver for Mailbox +where + F: Family, + Op: Read + Send + Sync + Clone + 'static, + D: Digest, + DB: Send + Sync + 'static, +{ + fn attach_database(&self, db: Arc>) -> impl Future + Send { + Self::attach_database(self, db); + std::future::ready(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commonware_cryptography::sha256; + use commonware_runtime::{deterministic, Runner as _}; + use commonware_storage::mmr; + use commonware_utils::{NZUsize, NZU64}; + + #[test] + fn get_operations_cancellation_sends_cancel_message() { + deterministic::Runner::default().start(|context| async move { + let (sender, mut receiver) = commonware_actor::mailbox::new(context, NZUsize!(4)); + let mailbox = Mailbox::<(), mmr::Family, u64, sha256::Digest>::new(sender); + let op_count = mmr::Location::new(10); + let start_loc = mmr::Location::new(3); + let max_ops = NZU64!(2); + + let (cancel_tx, cancel_rx) = oneshot::channel(); + let get = mailbox.get_operations(op_count, start_loc, max_ops, false, cancel_rx); + let observe = async move { + let response = match receiver.recv().await.expect("request should be queued") { + Message::GetOperations { request, response } => { + assert_eq!(request.op_count, op_count); + assert_eq!(request.start_loc, start_loc); + assert_eq!(request.max_ops, max_ops); + assert!(!request.include_pinned_nodes); + response + } + Message::AttachDatabase(_) => panic!("unexpected attach message"), + Message::CancelOperations { .. } => panic!("cancel should come after request"), + }; + + drop(cancel_tx); + + match receiver.recv().await.expect("cancel should be queued") { + Message::CancelOperations { request } => { + assert_eq!(request.op_count, op_count); + assert_eq!(request.start_loc, start_loc); + assert_eq!(request.max_ops, max_ops); + assert!(!request.include_pinned_nodes); + } + Message::AttachDatabase(_) => panic!("unexpected attach message"), + Message::GetOperations { .. } => panic!("unexpected duplicate request"), + } + + drop(response); + }; + + let (result, _) = futures::join!(get, observe); + assert!(matches!(result, Err(ResponseDropped))); + }); + } +} diff --git a/glue/src/stateful/db/p2p/metrics.rs b/glue/src/stateful/db/p2p/metrics.rs new file mode 100644 index 00000000000..04b23135d16 --- /dev/null +++ b/glue/src/stateful/db/p2p/metrics.rs @@ -0,0 +1,66 @@ +//! Metrics for the P2P resolver [`Actor`](super::Actor). + +use commonware_runtime::{ + telemetry::metrics::{status, MetricsExt, Registered}, + Metrics as MetricsTrait, +}; +use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; + +/// Metrics for the QMDB P2P resolver actor. +#[derive(Clone)] +pub(super) struct Metrics { + /// Current number of in-flight fetch request keys. + pub pending_requests: Registered, + + /// Total fetch requests dispatched to the P2P engine. + pub fetch_requests: Registered, + + /// Total cancelled requests. + pub cancel_requests: Registered, + + /// Deliveries from peers by outcome. + pub deliveries: status::Counter, + + /// Incoming serve requests by outcome. + pub serve_requests: status::Counter, + + /// Whether a database is currently attached (1) or not (0). + pub has_database: Registered, +} + +impl Metrics { + /// Create and register all resolver metrics. + pub fn new(context: &impl MetricsTrait) -> Self { + let pending_requests = context.register( + "pending_requests", + "Current in-flight fetch request keys", + Gauge::default(), + ); + let fetch_requests = context.register( + "fetch_requests", + "Total fetch requests dispatched to the P2P engine", + Counter::default(), + ); + let cancel_requests = context.register( + "cancel_requests", + "Total cancelled requests", + Counter::default(), + ); + let deliveries = context.family("deliveries", "Deliveries from peers by outcome"); + let serve_requests = context.family("serve_requests", "Incoming serve requests by outcome"); + let has_database = context.register( + "has_database", + "Whether a database is currently attached", + Gauge::default(), + ); + + Self { + pending_requests, + fetch_requests, + cancel_requests, + deliveries, + serve_requests, + has_database, + } + } +} diff --git a/glue/src/stateful/db/p2p/mod.rs b/glue/src/stateful/db/p2p/mod.rs new file mode 100644 index 00000000000..6c7969f8c18 --- /dev/null +++ b/glue/src/stateful/db/p2p/mod.rs @@ -0,0 +1,22 @@ +//! P2P implementation of the QMDB sync resolver. +//! +//! Implements [`commonware_storage::qmdb::sync::resolver::Resolver`] over +//! [`commonware_resolver::p2p::Engine`], fetching operations from peers and +//! serving local operations in response to incoming requests. +//! +//! - [`Mailbox`]: client-facing handle that the QMDB sync engine calls to +//! fetch operations. Each call is multiplexed through the P2P resolver +//! engine so that duplicate requests share a single network round-trip. +//! - [`Actor`]: service loop that bridges the [`Mailbox`] with the P2P +//! engine, dispatches fetches, fans out deliveries to waiting callers, +//! and serves produce requests from the local database. + +mod actor; +pub use actor::{Actor, Config}; + +mod mailbox; +pub use mailbox::{Mailbox, ResponseDropped}; + +mod handler; + +mod metrics; diff --git a/glue/src/stateful/mod.rs b/glue/src/stateful/mod.rs new file mode 100644 index 00000000000..c91a7eca9e1 --- /dev/null +++ b/glue/src/stateful/mod.rs @@ -0,0 +1,219 @@ +//! Manage QMDB database instances on behalf of a stateful application. +//! +//! A stateful application built on consensus must maintain speculative state for +//! every pending chain built on top of the finalized tip. This module provides +//! the [`Application`] trait and a [`Stateful`] actor that automates that +//! bookkeeping: +//! +//! 1. Before each `propose` or `verify`, the actor forks unmerkleized batches +//! from the parent block's pending state (or from committed database state +//! if the parent has been finalized). +//! 2. The application executes against those batches and returns merkleized +//! results, which the actor stores as a new pending tip keyed by the +//! block's digest. +//! 3. On finalization, the actor applies the winning tip's changesets to the +//! underlying databases and prunes pending entries from dead forks. +//! +//! # Database Layer +//! +//! The [`db`] module defines batch lifecycle traits ([`db::Unmerkleized`], +//! [`db::Merkleized`], [`db::ManagedDb`]) and a [`db::DatabaseSet`] trait that +//! groups one or more databases into a single unit. +//! +//! The [`db::p2p`] submodule provides a P2P resolver actor that implements +//! [`commonware_storage::qmdb::sync::resolver::Resolver`] over +//! [`commonware-resolver`](commonware_resolver), enabling databases to fetch +//! and serve sync operations from peers. +//! +//! # Syncing +//! +//! The actor supports two startup modes via [`StartupMode`]: +//! +//! - [`MarshalSync`](StartupMode::MarshalSync): Initialize fresh databases +//! and let the marshal backfill blocks from the network. Appropriate for +//! validators joining from genesis or after a clean state wipe. +//! +//! - [`StateSync`](StartupMode::StateSync): Run a one-time QMDB state sync +//! from a seed block, populating each database via +//! [`db::StateSyncSet::sync`]. Tip updates stream in as new blocks finalize +//! during the sync, so the final synced height is not predetermined. Once all +//! databases converge on the same anchor block, the actor transitions to +//! normal processing. A durable metadata flag ensures state sync runs at most +//! once; subsequent restarts must take the marshal sync path. +//! +//! # Lazy Recovery +//! +//! Pending state is kept entirely in memory to avoid disk writes on the +//! consensus hot path. After a restart the map is empty, but the actor +//! recovers lazily: when `propose` or `verify` encounters a parent whose +//! state is missing, the actor walks back through the block DAG (via a +//! [`BlockProvider`](commonware_consensus::marshal::ancestry::BlockProvider)) +//! to the nearest known ancestor or the finalized tip, +//! then replays forward via [`Application::apply`] to fill the gap. Each +//! replayed block is inserted into the pending map immediately so that +//! partial progress survives timeouts. +//! +//! # Compatibility +//! +//! The [`Stateful`] application may be used with [`Deferred`] and [`coding::Marshaled`], +//! but not with [`Inline`]. This is because [`Inline`] does not verify the correctness +//! of the embedded context within the [`CertifiableBlock`]. +//! +//! [`Deferred`]: commonware_consensus::marshal::standard::Deferred +//! [`Inline`]: commonware_consensus::marshal::standard::Inline +//! [`coding::Marshaled`]: commonware_consensus::marshal::coding::Marshaled + +use commonware_consensus::{CertifiableBlock, Epochable, Viewable}; +use commonware_cryptography::certificate::Scheme; +use commonware_runtime::{Clock, Metrics, Spawner}; +use db::DatabaseSet; +use futures::Stream; +use rand::Rng; +use std::future::Future; + +mod actor; +pub use actor::{Config, Mailbox, StartupMode, Stateful}; + +pub mod db; + +#[cfg(test)] +mod tests; + +/// The output of a successful [`Application::propose`] call. +pub struct Proposed, E: Rng + Spawner + Metrics + Clock> { + /// The block built by the application. + pub block: A::Block, + + /// The merkleized database batches produced during execution. + pub merkleized: >::Merkleized, +} + +/// A stateful application whose storage is managed by a [`DatabaseSet`]. +/// +/// Implementors receive [`DatabaseSet::Unmerkleized`] batches and +/// return [`DatabaseSet::Merkleized`] batches after execution. The surrounding +/// wrapper handles persistence: storing merkleized batches as pending tips on +/// the block tree and applying changesets to the underlying databases on +/// finalization. +pub trait Application: Clone + Send + 'static +where + E: Rng + Spawner + Metrics + Clock, +{ + /// The signing scheme used by the application. + type SigningScheme: Scheme; + + /// Metadata provided by the consensus engine for a given block. + /// + /// This often includes things like the proposer, view number, height, or + /// epoch. Must be [`Epochable`] and [`Viewable`] so the wrapper can + /// construct a [`Round`](commonware_consensus::types::Round) for + /// pending-state pruning. + type Context: Epochable + Viewable + Send; + + /// The block type produced by the application. + /// + /// Must implement [`CertifiableBlock`] so the wrapper can extract + /// the consensus context during lazy recovery (see + /// [`apply`](Self::apply)). + type Block: CertifiableBlock; + + /// The set of databases managed on behalf of this application. + type Databases: DatabaseSet; + + /// A provider of input to the application. + /// + /// This may be a mempool that serves transactions, a stream of + /// certificates, or any other source of input that drives state + /// transitions. + type InputProvider: Send; + + /// Extract per-database sync targets from a finalized block. + /// + /// Called by the wrapper when a [`Update::Tip`](commonware_consensus::marshal::Update::Tip) + /// is received during state sync. The returned targets are forwarded to + /// the background sync orchestrator so the sync engines can track the + /// latest finalized state root and range. + fn sync_targets(block: &Self::Block) -> >::SyncTargets; + + /// Block used to initialize the consensus engine in the first epoch. + fn genesis(&mut self) -> impl Future + Send; + + /// Build a new block on top of the provided parent ancestry. + /// + /// Returns [`None`] if the build fails. + /// + /// This future may be cancelled by consensus if the caller drops its + /// response receiver. Implementations should be cancellation-safe: dropping + /// and retrying must not violate invariants or lose durable progress. + fn propose( + &mut self, + context: (E, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + input: &mut Self::InputProvider, + ) -> impl Future>> + Send; + + /// Verify a block received from a peer, relative to its ancestry. + /// + /// Called before voting. The implementation should execute the block + /// against the provided batches and merkleize them. Returns [`None`] + /// only when the block is permanently invalid; if validity may still + /// change as additional information arrives, continue waiting. + /// + /// Verification must reject any block whose execution result does not + /// match the block's committed state (for example, a state root mismatch). + /// + /// This future may be cancelled by consensus if the caller drops its + /// response receiver. Implementations should be cancellation-safe: dropping + /// and retrying must not violate invariants or lose durable progress. + fn verify( + &mut self, + context: (E, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + ) -> impl Future>::Merkleized>> + Send; + + /// Apply a previously certified block to reconstruct its merkleized state. + /// + /// Called by the wrapper during lazy recovery when pending state for + /// an ancestor block is missing (e.g. after a restart). The block is + /// known-good (it was previously certified), so the implementation + /// should unconditionally execute the block's state transitions. + /// + /// The returned merkleized state must match what + /// [`verify`](Self::verify) accepted for `block`. The wrapper commits this + /// replay result during finalization and cannot re-check block-specific + /// commitments generically. + /// + /// This future may be cancelled if the originating propose/verify request + /// is dropped. Implementations should be cancellation-safe: dropping and + /// retrying must not violate invariants or lose durable progress. + /// + /// # Panics + /// + /// Implementations should panic if execution fails, as this indicates + /// data corruption or non-determinism. + fn apply( + &mut self, + context: (E, Self::Context), + block: &Self::Block, + batches: >::Unmerkleized, + ) -> impl Future>::Merkleized> + Send; + + /// Observe a block after its database batches have been durably finalized. + /// + /// Called only after [`DatabaseSet::finalize`] succeeds. Implementations + /// may use this to run post-finalization maintenance such as pruning. + /// + /// # Panics + /// + /// Implementations should panic if post-finalization maintenance fails. + fn finalized( + &mut self, + _context: (E, Self::Context), + _block: &Self::Block, + _databases: &Self::Databases, + ) -> impl Future + Send { + async {} + } +} diff --git a/glue/src/stateful/tests/common.rs b/glue/src/stateful/tests/common.rs new file mode 100644 index 00000000000..2c1e1ac2665 --- /dev/null +++ b/glue/src/stateful/tests/common.rs @@ -0,0 +1,212 @@ +use crate::simulate::processed::ProcessedHeight; +use commonware_consensus::{ + marshal::{self, core::Variant, Identifier as MarshalIdentifier}, + simplex::{mocks::scheme::Scheme as MockScheme, types::Finalization}, + types::Height, + Heightable, +}; +use commonware_cryptography::{ed25519, sha256, Digestible}; +use commonware_runtime::{buffer::paged::CacheRef, Clock, Metrics, Quota, Storage}; +use commonware_storage::{ + archive::immutable, + metadata::{Config as MetadataConfig, Metadata}, +}; +use commonware_utils::{sequence::U64, sync::Mutex, NZUsize, NZU16, NZU64}; +use std::{ + collections::{BTreeMap, HashMap}, + num::{NonZeroU16, NonZeroU32, NonZeroU64, NonZeroUsize}, + sync::Arc, + time::Duration, +}; + +pub(super) const EPOCH_LENGTH: NonZeroU64 = NZU64!(u64::MAX); +pub(super) const NAMESPACE: &[u8] = b"stateful_e2e_test"; +pub(super) const PAGE_SIZE: NonZeroU16 = NZU16!(1024); +pub(super) const PAGE_CACHE_SIZE: NonZeroUsize = NZUsize!(10); +pub(super) const IO_BUFFER_SIZE: NonZeroUsize = NZUsize!(2048); +pub(super) const TEST_QUOTA: Quota = Quota::per_second(NonZeroU32::MAX); + +pub(super) fn u64_to_digest(v: u64) -> sha256::Digest { + let mut bytes = [0u8; 32]; + bytes[..8].copy_from_slice(&v.to_be_bytes()); + sha256::Digest::from(bytes) +} + +pub(super) fn digest_to_u64(d: &sha256::Digest) -> u64 { + let bytes: &[u8] = d.as_ref(); + u64::from_be_bytes(bytes[..8].try_into().unwrap()) +} + +pub(super) fn archive_config( + prefix: &str, + name: &str, + page_cache: CacheRef, + codec_config: C, +) -> immutable::Config { + immutable::Config { + metadata_partition: format!("{prefix}-{name}-metadata"), + freezer_table_partition: format!("{prefix}-{name}-freezer-table"), + freezer_table_initial_size: 64, + freezer_table_resize_frequency: 10, + freezer_table_resize_chunk_size: 10, + freezer_key_partition: format!("{prefix}-{name}-freezer-key"), + freezer_key_page_cache: page_cache, + freezer_value_partition: format!("{prefix}-{name}-freezer-value"), + freezer_value_target_size: 1024, + freezer_value_compression: None, + ordinal_partition: format!("{prefix}-{name}-ordinal"), + items_per_section: NZU64!(10), + codec_config, + replay_buffer: IO_BUFFER_SIZE, + freezer_key_write_buffer: IO_BUFFER_SIZE, + freezer_value_write_buffer: IO_BUFFER_SIZE, + ordinal_write_buffer: IO_BUFFER_SIZE, + } +} + +/// Per-validator state inspectable by test properties. +/// +/// Generic over the marshal variant so both single-db and multi-db engines +/// can share the same state type and property implementations. +#[derive(Clone)] +pub(crate) struct MockValidatorState { + pub(super) marshal: marshal::core::Mailbox, V>, + pub(super) startup_sync_height: Option, +} + +impl PartialEq for MockValidatorState { + fn eq(&self, other: &Self) -> bool { + self.startup_sync_height == other.startup_sync_height + } +} + +impl MockValidatorState +where + V: Variant, + V::ApplicationBlock: Digestible, +{ + pub(crate) async fn digest_at_height(&self, height: u64) -> Option { + self.marshal + .get_block(MarshalIdentifier::Height(Height::new(height))) + .await + .map(|b| b.digest()) + } + + pub(crate) const fn startup_sync_height(&self) -> Option { + self.startup_sync_height + } +} + +pub(super) type MarshalMailboxOf = marshal::core::Mailbox, V>; + +/// Poll peers for a majority-agreed sync target block. +pub(super) async fn fetch_majority_sync_target( + mailboxes: &Arc>>>, + context: &impl Clock, + me: &ed25519::PublicKey, +) -> Option<( + V::Block, + Finalization, V::Commitment>, +)> +where + V: Variant, + V::ApplicationBlock: Digestible, +{ + for _ in 0..20 { + let peers_mailboxes: Vec> = { + let guard = mailboxes.lock(); + guard + .iter() + .filter(|(peer, _)| *peer != me) + .map(|(_, mailbox)| mailbox.clone()) + .collect() + }; + + // Collect latest heights from all peers. + let mut peers: Vec<(MarshalMailboxOf, Height)> = Vec::new(); + for mailbox in peers_mailboxes { + if let Some(height) = mailbox + .get_block(MarshalIdentifier::Latest) + .await + .map(|b| b.height()) + { + peers.push((mailbox, height)); + } + } + if peers.is_empty() { + context.sleep(Duration::from_millis(100)).await; + continue; + } + + // Find the highest height that a majority of peers have reached. + let required = peers.len() / 2 + 1; + let mut heights: Vec = peers.iter().map(|(_, h)| *h).collect(); + heights.sort(); + let quorum_height = heights[heights.len() - required]; + + // Count digests at quorum height and return the first block with majority agreement. + let mut counts: HashMap)> = HashMap::new(); + for (mailbox, h) in &peers { + if *h < quorum_height { + continue; + } + if let Some(digest) = mailbox + .get_block(MarshalIdentifier::Height(quorum_height)) + .await + .map(|b| b.digest()) + { + counts + .entry(digest) + .and_modify(|(c, _)| *c += 1) + .or_insert((1, mailbox.clone())); + } + } + for (digest, (count, mailbox)) in counts { + if count >= required { + if let Some(block) = mailbox.get_block(MarshalIdentifier::Digest(digest)).await { + let finalization = mailbox + .get_finalization(quorum_height) + .await + .expect("sync target finalization must be available"); + return Some((block, finalization)); + } + } + } + + context.sleep(Duration::from_millis(100)).await; + } + None +} + +const STATE_SYNC_METADATA_SUFFIX: &str = "_state_sync_metadata"; +const SYNC_DONE_KEY: U64 = U64::new(0); + +/// Check whether state sync has already completed for this validator. +pub(super) async fn state_sync_done( + context: impl Storage + Clock + Metrics, + partition_prefix: &str, +) -> bool { + let metadata = Metadata::<_, U64, bool>::init( + context, + MetadataConfig { + partition: format!("{partition_prefix}{STATE_SYNC_METADATA_SUFFIX}"), + codec_config: (), + }, + ) + .await + .expect("failed to read state sync metadata"); + metadata.get(&SYNC_DONE_KEY).copied().unwrap_or(false) +} + +impl ProcessedHeight for MockValidatorState +where + V: Variant, + V::ApplicationBlock: Digestible, +{ + async fn processed_height(&self) -> u64 { + self.marshal + .get_processed_height() + .await + .map_or(0, |height| height.get()) + } +} diff --git a/glue/src/stateful/tests/mod.rs b/glue/src/stateful/tests/mod.rs new file mode 100644 index 00000000000..14059da7e34 --- /dev/null +++ b/glue/src/stateful/tests/mod.rs @@ -0,0 +1,637 @@ +//! E2E tests for `stateful` + +use crate::simulate::{ + action::{Action, Crash, Schedule}, + engine::EngineDefinition, + exit::{ExitCondition, ProcessedHeightAtLeast}, + plan::PlanBuilder, + processed::ProcessedHeight, + property::Property, +}; +use commonware_cryptography::{ed25519, PublicKey}; +use commonware_macros::{test_group, test_traced}; +use commonware_p2p::simulated::Link; +use commonware_runtime::deterministic; +use multi_db_app::MultiDbEngine; +use properties::{BlockAgreementAtHeight, LateJoinerStateSyncHandoff}; +use single_db_app::SingleDbEngine; +use std::time::Duration; + +mod common; +mod multi_db_app; +mod properties; +mod single_db_app; + +const NUM_VALIDATORS: u32 = 5; + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn all_validators_finalize_and_commit() { + run_finalize(SingleDbEngine::new(NUM_VALIDATORS)); + run_finalize(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn all_validators_finalize_and_commit_with_storage_faults() { + run_finalize_with_storage_faults(SingleDbEngine::new(NUM_VALIDATORS)); + run_finalize_with_storage_faults(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn deterministic_across_seeds() { + run_determinism(SingleDbEngine::new(NUM_VALIDATORS)); + run_determinism(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn crash_and_restart_one_validator() { + run_crash_restart(SingleDbEngine::new(NUM_VALIDATORS)); + run_crash_restart(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn delayed_start_one_validator() { + run_delayed_start(SingleDbEngine::new(NUM_VALIDATORS)); + run_delayed_start(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn state_sync_hands_off_to_marshal() { + run_state_sync(SingleDbEngine::new(NUM_VALIDATORS).with_state_sync()); + run_state_sync(MultiDbEngine::new(NUM_VALIDATORS).with_state_sync()); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn state_sync_hands_off_to_marshal_with_storage_faults() { + run_state_sync_with_storage_faults(SingleDbEngine::new(NUM_VALIDATORS).with_state_sync()); + run_state_sync_with_storage_faults(MultiDbEngine::new(NUM_VALIDATORS).with_state_sync()); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn state_sync_deterministic() { + run_state_sync_deterministic(SingleDbEngine::new(NUM_VALIDATORS).with_state_sync()); + run_state_sync_deterministic(MultiDbEngine::new(NUM_VALIDATORS).with_state_sync()); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn state_sync_random_crashes() { + run_state_sync_random_crashes(SingleDbEngine::new(NUM_VALIDATORS).with_state_sync()); + run_state_sync_random_crashes(MultiDbEngine::new(NUM_VALIDATORS).with_state_sync()); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn state_sync_lossy_network() { + let link = Link { + latency: Duration::from_millis(200), + jitter: Duration::from_millis(150), + success_rate: 0.7, + }; + run_state_sync_lossy( + SingleDbEngine::new(NUM_VALIDATORS).with_state_sync(), + link.clone(), + ); + run_state_sync_lossy(MultiDbEngine::new(NUM_VALIDATORS).with_state_sync(), link); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn lossy_network() { + let link = Link { + latency: Duration::from_millis(200), + jitter: Duration::from_millis(150), + success_rate: 0.7, + }; + run_lossy(SingleDbEngine::new(NUM_VALIDATORS), link.clone()); + run_lossy(MultiDbEngine::new(NUM_VALIDATORS), link); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn random_crashes() { + run_random_crashes(SingleDbEngine::new(NUM_VALIDATORS)); + run_random_crashes(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn many_concurrent_crashes() { + run_many_crashes(SingleDbEngine::new(NUM_VALIDATORS)); + run_many_crashes(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn full_cluster_outage_and_recovery() { + run_total_shutdown(SingleDbEngine::new(NUM_VALIDATORS)); + run_total_shutdown(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn state_sync_crash_during_sync() { + run_state_sync_crash_during_sync(SingleDbEngine::new(NUM_VALIDATORS).with_state_sync()); + run_state_sync_crash_during_sync(MultiDbEngine::new(NUM_VALIDATORS).with_state_sync()); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn rapid_crashes() { + run_rapid_crashes(SingleDbEngine::new(NUM_VALIDATORS)); + run_rapid_crashes(MultiDbEngine::new(NUM_VALIDATORS)); +} + +#[test_group("slow")] +#[test_traced("DEBUG")] +fn network_partition_and_rejoin() { + run_network_partition(SingleDbEngine::new(NUM_VALIDATORS)); + run_network_partition(MultiDbEngine::new(NUM_VALIDATORS)); +} + +fn run_finalize(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + finalize_plan(engine).run().unwrap(); +} + +fn run_finalize_with_storage_faults(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let participants = engine.participants(); + finalize_plan(engine) + .crash(Crash::Schedule(default_storage_fault_schedule( + participants, + ))) + .timeout(Duration::from_secs(45)) + .run() + .unwrap(); +} + +fn finalize_plan(engine: D) -> PlanBuilder +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .exit_condition(ProcessedHeightAtLeast::new(100)) + .property(BlockAgreementAtHeight::new(100)) +} + +fn storage_fault_config() -> deterministic::FaultConfig { + deterministic::FaultConfig::default().sync(0.01) +} + +fn default_storage_fault_schedule

(restart_order: impl IntoIterator) -> Schedule

+where + P: PublicKey, +{ + storage_fault_schedule( + restart_order, + Duration::from_secs(1), + Duration::from_secs(2), + Duration::from_millis(2500), + ) +} + +fn storage_fault_schedule

( + restart_order: impl IntoIterator, + fault_at: Duration, + clear_at: Duration, + restart_at: Duration, +) -> Schedule

+where + P: PublicKey, +{ + let mut schedule = Schedule::new() + .at(fault_at, Action::SetStorageFault(storage_fault_config())) + .at( + clear_at, + Action::SetStorageFault(deterministic::FaultConfig::default()), + ); + + for (index, participant) in restart_order.into_iter().enumerate() { + schedule = schedule.at( + restart_at + Duration::from_millis(250 * index as u64), + Action::Restart(participant), + ); + } + + schedule +} + +fn run_determinism(engine: D) +where + D: EngineDefinition + Clone, + D::State: ProcessedHeight + PartialEq, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let seeds = 0..5; + let r1 = PlanBuilder::new(engine.clone()) + .seeds(seeds.clone()) + .exit_condition(ProcessedHeightAtLeast::new(20)) + .property(BlockAgreementAtHeight::new(20)) + .run() + .unwrap(); + let r2 = PlanBuilder::new(engine) + .seeds(seeds.clone()) + .exit_condition(ProcessedHeightAtLeast::new(20)) + .property(BlockAgreementAtHeight::new(20)) + .run() + .unwrap(); + for (seed, (left, right)) in seeds.zip(r1.iter().zip(r2.iter())) { + assert_eq!( + left.state, right.state, + "seed {seed} produced different state" + ); + } +} + +fn run_crash_restart(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let validator = engine.participants()[0].clone(); + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Schedule( + Schedule::new() + .at( + Duration::from_millis(2500), + Action::Crash(validator.clone()), + ) + .at(Duration::from_millis(5000), Action::Restart(validator)), + )) + .exit_condition(ProcessedHeightAtLeast::new(50)) + .property(BlockAgreementAtHeight::new(50)) + .run() + .unwrap(); +} + +fn run_delayed_start(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Delay { count: 1, after: 5 }) + .exit_condition(ProcessedHeightAtLeast::new(20)) + .property(BlockAgreementAtHeight::new(20)) + .run() + .unwrap(); +} + +fn run_state_sync(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + state_sync_plan(engine).run().unwrap(); +} + +fn run_state_sync_with_storage_faults(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let participants = engine.participants(); + state_sync_plan(engine) + .crash(Crash::Schedule(default_storage_fault_schedule( + state_sync_restart_order(&participants), + ))) + .timeout(Duration::from_secs(90)) + .run() + .unwrap(); +} + +fn state_sync_plan(engine: D) -> PlanBuilder +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Delay { + count: 1, + after: 80, + }) + .exit_condition(ProcessedHeightAtLeast::new(150)) + .property(LateJoinerStateSyncHandoff) + .property(BlockAgreementAtHeight::new(150)) +} + +fn state_sync_restart_order(participants: &[P]) -> Vec

{ + let Some((late_joiner, active)) = participants.split_first() else { + return Vec::new(); + }; + + let mut restart_order = active.to_vec(); + restart_order.push(late_joiner.clone()); + restart_order +} + +fn run_lossy(engine: D, link: Link) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .link(link) + .exit_condition(ProcessedHeightAtLeast::new(20)) + .property(BlockAgreementAtHeight::new(20)) + .run() + .unwrap(); +} + +fn run_random_crashes(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Random { + frequency: Duration::from_secs(2), + downtime: Duration::from_secs(1), + count: 1, + }) + .exit_condition(ProcessedHeightAtLeast::new(50)) + .property(BlockAgreementAtHeight::new(50)) + .run() + .unwrap(); +} + +fn run_many_crashes(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Random { + frequency: Duration::from_secs(2), + downtime: Duration::from_millis(500), + count: 3, + }) + .exit_condition(ProcessedHeightAtLeast::new(50)) + .property(BlockAgreementAtHeight::new(50)) + .run() + .unwrap(); +} + +fn run_total_shutdown(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let total = engine.participants().len(); + + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Random { + frequency: Duration::from_secs(2), + downtime: Duration::from_millis(500), + count: total, + }) + .exit_condition(ProcessedHeightAtLeast::new(100)) + .property(BlockAgreementAtHeight::new(100)) + .run() + .unwrap(); +} + +fn run_state_sync_deterministic(engine: D) +where + D: EngineDefinition + Clone, + D::State: ProcessedHeight + PartialEq, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let seeds = 0..5; + let r1 = PlanBuilder::new(engine.clone()) + .seeds(seeds.clone()) + .crash(Crash::Delay { + count: 1, + after: 80, + }) + .exit_condition(ProcessedHeightAtLeast::new(100)) + .property(LateJoinerStateSyncHandoff) + .property(BlockAgreementAtHeight::new(100)) + .run() + .unwrap(); + let r2 = PlanBuilder::new(engine) + .seeds(seeds.clone()) + .crash(Crash::Delay { + count: 1, + after: 80, + }) + .exit_condition(ProcessedHeightAtLeast::new(100)) + .property(LateJoinerStateSyncHandoff) + .property(BlockAgreementAtHeight::new(100)) + .run() + .unwrap(); + for (seed, (left, right)) in seeds.zip(r1.iter().zip(r2.iter())) { + assert_eq!( + left.state, right.state, + "seed {seed} produced different state" + ); + } +} + +fn run_state_sync_random_crashes(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Delay { + count: 1, + after: 80, + }) + .crash(Crash::Random { + frequency: Duration::from_secs(3), + downtime: Duration::from_secs(1), + count: 1, + }) + .exit_condition(ProcessedHeightAtLeast::new(150)) + .property(LateJoinerStateSyncHandoff) + .property(BlockAgreementAtHeight::new(150)) + .run() + .unwrap(); +} + +fn run_state_sync_lossy(engine: D, link: Link) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Delay { + count: 1, + after: 30, + }) + .link(link) + .exit_condition(ProcessedHeightAtLeast::new(60)) + .property(LateJoinerStateSyncHandoff) + .property(BlockAgreementAtHeight::new(60)) + .run() + .unwrap(); +} + +/// Crash the late joiner mid-sync and restart it, exercising the +/// `sync_done` metadata recovery path (second boot takes marshal sync). +fn run_state_sync_crash_during_sync(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + LateJoinerStateSyncHandoff: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let late_joiner = engine.participants()[0].clone(); + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Delay { + count: 1, + after: 20, + }) + // Crash the late joiner shortly after it starts syncing, then restart. + .crash(Crash::Schedule( + Schedule::new() + .at(Duration::from_secs(5), Action::Crash(late_joiner.clone())) + .at(Duration::from_secs(7), Action::Restart(late_joiner)), + )) + .exit_condition(ProcessedHeightAtLeast::new(100)) + .property(LateJoinerStateSyncHandoff) + .property(BlockAgreementAtHeight::new(100)) + .run() + .unwrap(); +} + +/// Rapid successive crashes with very short downtime, targeting the +/// processor's lazy recovery path being interrupted by cancellation. +fn run_rapid_crashes(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Random { + frequency: Duration::from_millis(500), + downtime: Duration::from_millis(100), + count: 1, + }) + .exit_condition(ProcessedHeightAtLeast::new(50)) + .property(BlockAgreementAtHeight::new(50)) + .run() + .unwrap(); +} + +/// Temporarily partition one validator from the network, then heal, +/// testing lazy recovery without a full restart. +fn run_network_partition(engine: D) +where + D: EngineDefinition, + D::State: ProcessedHeight, + BlockAgreementAtHeight: Property, + ProcessedHeightAtLeast: ExitCondition, +{ + let participants = engine.participants(); + let isolated = participants[0].clone(); + let good_link = Link { + latency: Duration::from_millis(10), + jitter: Duration::from_millis(5), + success_rate: 1.0, + }; + let dead_link = Link { + latency: Duration::from_secs(1), + jitter: Duration::ZERO, + success_rate: 0.0, + }; + + // Build a schedule that kills all links to/from the isolated node at + // 500ms, then heals all links at 2s. + let mut schedule = Schedule::new(); + for peer in &participants[1..] { + schedule = schedule + .at( + Duration::from_millis(500), + Action::UpdateLink { + from: isolated.clone(), + to: peer.clone(), + link: dead_link.clone(), + }, + ) + .at( + Duration::from_millis(500), + Action::UpdateLink { + from: peer.clone(), + to: isolated.clone(), + link: dead_link.clone(), + }, + ); + } + schedule = schedule.at(Duration::from_secs(2), Action::Heal(good_link)); + + PlanBuilder::new(engine) + .seeds(0..5) + .crash(Crash::Schedule(schedule)) + .exit_condition(ProcessedHeightAtLeast::new(50)) + .property(BlockAgreementAtHeight::new(50)) + .run() + .unwrap(); +} diff --git a/glue/src/stateful/tests/multi_db_app.rs b/glue/src/stateful/tests/multi_db_app.rs new file mode 100644 index 00000000000..104aa3172e2 --- /dev/null +++ b/glue/src/stateful/tests/multi_db_app.rs @@ -0,0 +1,671 @@ +use super::common::*; +use crate::{ + simulate::{ + engine::{EngineDefinition, InitContext}, + reporter::MonitorReporter, + }, + stateful::{ + db::{ + p2p as qmdb_resolver, DatabaseSet, Merkleized as _, SyncEngineConfig, Unmerkleized as _, + }, + Application, Config as StatefulConfig, Proposed, StartupMode, Stateful as StatefulActor, + }, +}; +use commonware_broadcast::buffered; +use commonware_codec::{Encode, EncodeSize, Error as CodecError, Read, ReadExt as _, Write}; +use commonware_consensus::{ + marshal::{ + self, + core::Actor as MarshalActor, + resolver::p2p as marshal_resolver, + standard::{Deferred, Standard}, + }, + simplex::{ + self, + config::ForwardingPolicy, + elector::RoundRobin, + mocks::scheme::{self as scheme_mocks, Scheme as MockScheme}, + types::Context, + }, + types::{Epoch, FixedEpocher, Height, Round, View, ViewDelta}, + Block as ConsensusBlock, CertifiableBlock, Heightable, +}; +use commonware_cryptography::{ + certificate::{mocks::Fixture, ConstantProvider}, + ed25519, + sha256::{self, Digest as Sha256Digest}, + Digest as _, Digestible, Hasher, Sha256, Signer as _, +}; +use commonware_formatting::hex; +use commonware_p2p::utils::mux::Muxer; +use commonware_parallel::Sequential; +use commonware_runtime::{ + buffer::paged::CacheRef, Buf, BufMut, Clock, Handle, Metrics, Quota, Spawner, Storage, + Supervisor as _, +}; +use commonware_storage::{ + archive::immutable, + journal::contiguous::fixed::Config as FixedLogConfig, + mmr::{self, full::Config as MmrJournalConfig, Location}, + qmdb::any::{sync::Target, unordered::fixed, FixedConfig}, + translator::TwoCap, +}; +use commonware_utils::{ + non_empty_range, + range::NonEmptyRange, + sync::{AsyncRwLock, Mutex}, + test_rng, NZUsize, NZU64, +}; +use futures::{Stream, StreamExt}; +use rand::Rng; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; + +/// The QMDB database type used by the multi-db e2e tests. +type Qmdb = + fixed::Db; + +/// A single QMDB database behind a lock. +type SingleDb = Arc>>; + +/// Two QMDB databases as a tuple. +pub(crate) type MultiDatabaseSet = (SingleDb, SingleDb); + +type MarshalMailbox = MarshalMailboxOf>; + +/// A block carrying state from two QMDB databases. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct Block { + context: Context, + parent: sha256::Digest, + height: Height, + root_a: sha256::Digest, + range_a: NonEmptyRange, + root_b: sha256::Digest, + range_b: NonEmptyRange, +} + +impl Write for Block { + fn write(&self, buf: &mut impl BufMut) { + self.context.write(buf); + self.parent.write(buf); + self.height.write(buf); + self.root_a.write(buf); + self.range_a.write(buf); + self.root_b.write(buf); + self.range_b.write(buf); + } +} + +impl EncodeSize for Block { + fn encode_size(&self) -> usize { + self.context.encode_size() + + self.parent.encode_size() + + self.height.encode_size() + + self.root_a.encode_size() + + self.range_a.encode_size() + + self.root_b.encode_size() + + self.range_b.encode_size() + } +} + +impl Read for Block { + type Cfg = (); + + fn read_cfg(buf: &mut impl Buf, _: &Self::Cfg) -> Result { + Ok(Self { + context: Context::read(buf)?, + parent: sha256::Digest::read(buf)?, + height: Height::read(buf)?, + root_a: sha256::Digest::read(buf)?, + range_a: NonEmptyRange::read(buf)?, + root_b: sha256::Digest::read(buf)?, + range_b: NonEmptyRange::read(buf)?, + }) + } +} + +impl Digestible for Block { + type Digest = sha256::Digest; + + fn digest(&self) -> sha256::Digest { + Sha256::hash(&self.encode()) + } +} + +impl Heightable for Block { + fn height(&self) -> Height { + self.height + } +} + +impl ConsensusBlock for Block { + fn parent(&self) -> sha256::Digest { + self.parent + } +} + +impl CertifiableBlock for Block { + type Context = Context; + + fn context(&self) -> Self::Context { + self.context.clone() + } +} + +impl Block { + fn genesis( + root_a: sha256::Digest, + range_a: NonEmptyRange, + root_b: sha256::Digest, + range_b: NonEmptyRange, + ) -> Self { + Self { + context: Context { + round: Round::new(Epoch::zero(), View::zero()), + leader: ed25519::PrivateKey::from_seed(0).public_key(), + parent: (View::zero(), sha256::Digest::EMPTY), + }, + parent: sha256::Digest::EMPTY, + height: Height::zero(), + root_a, + range_a, + root_b, + range_b, + } + } +} + +/// A stateful application that writes to two QMDB databases. +/// +/// DB-A stores a counter incremented each block. +/// DB-B stores height markers (height -> height_val). +#[derive(Clone)] +struct App { + genesis: Block, +} + +impl App { + fn new(genesis: Block) -> Self { + Self { genesis } + } + + /// Execute a block against two databases. + async fn execute( + height: Height, + batches: ( + as DatabaseSet>::Unmerkleized, + as DatabaseSet>::Unmerkleized, + ), + ) -> ( + as DatabaseSet>::Merkleized, + as DatabaseSet>::Merkleized, + ) { + let (mut batch_a, mut batch_b) = batches; + + // DB-A: increment counter + let counter = Sha256::hash(b"counter"); + let current: u64 = batch_a + .get(&counter) + .await + .unwrap() + .map_or(0, |v| digest_to_u64(&v)); + batch_a = batch_a.write(counter, Some(u64_to_digest(current + 1))); + + // DB-B: write height marker + batch_b = batch_b.write( + Sha256::hash(&height.get().to_be_bytes()), + Some(u64_to_digest(height.get())), + ); + + let merkleized_a = batch_a.merkleize().await.unwrap(); + let merkleized_b = batch_b.merkleize().await.unwrap(); + (merkleized_a, merkleized_b) + } +} + +impl Application for App { + type SigningScheme = MockScheme; + type Context = Context; + type Block = Block; + type Databases = MultiDatabaseSet; + type InputProvider = (); + + async fn genesis(&mut self) -> Self::Block { + self.genesis.clone() + } + + async fn propose( + &mut self, + context: (E, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + _input: &mut Self::InputProvider, + ) -> Option> { + let mut ancestry = Box::pin(ancestry); + let parent = ancestry.next().await?; + let height = Height::new(parent.height().get() + 1); + let (merkleized_a, merkleized_b) = Self::execute(height, batches).await; + let bounds_a = merkleized_a.bounds(); + let bounds_b = merkleized_b.bounds(); + let block = Block { + context: context.1.clone(), + parent: parent.digest(), + height, + root_a: merkleized_a.root(), + range_a: non_empty_range!( + bounds_a.inactivity_floor, + Location::new(bounds_a.total_size) + ), + root_b: merkleized_b.root(), + range_b: non_empty_range!( + bounds_b.inactivity_floor, + Location::new(bounds_b.total_size) + ), + }; + Some(Proposed { + block, + merkleized: (merkleized_a, merkleized_b), + }) + } + + async fn verify( + &mut self, + _context: (E, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + ) -> Option<>::Merkleized> { + let mut ancestry = Box::pin(ancestry); + let tip = ancestry.next().await?; + let (merkleized_a, merkleized_b) = Self::execute(tip.height(), batches).await; + let bounds_a = merkleized_a.bounds(); + let bounds_b = merkleized_b.bounds(); + let matches_a = merkleized_a.root() == tip.root_a + && non_empty_range!( + bounds_a.inactivity_floor, + Location::new(bounds_a.total_size) + ) == tip.range_a; + let matches_b = merkleized_b.root() == tip.root_b + && non_empty_range!( + bounds_b.inactivity_floor, + Location::new(bounds_b.total_size) + ) == tip.range_b; + if !matches_a || !matches_b { + return None; + } + Some((merkleized_a, merkleized_b)) + } + + async fn apply( + &mut self, + _context: (E, Self::Context), + block: &Self::Block, + batches: >::Unmerkleized, + ) -> >::Merkleized { + Self::execute(block.height(), batches).await + } + + fn sync_targets(block: &Self::Block) -> >::SyncTargets { + ( + Target::new(block.root_a, block.range_a.clone()), + Target::new(block.root_b, block.range_b.clone()), + ) + } +} + +/// Multi-database engine definition for the simulation harness. +#[derive(Clone)] +pub(crate) struct MultiDbEngine { + participants: Vec, + schemes: Vec>, + enable_state_sync: bool, + marshal_mailboxes: Arc>>, + sync_heights: Arc>>, +} + +impl MultiDbEngine { + pub(crate) fn new(n: u32) -> Self { + let mut rng = test_rng(); + let Fixture { + participants, + schemes, + .. + } = scheme_mocks::fixture(&mut rng, NAMESPACE, n); + + Self { + participants, + schemes, + enable_state_sync: false, + marshal_mailboxes: Arc::new(Mutex::new(BTreeMap::new())), + sync_heights: Arc::new(Mutex::new(BTreeMap::new())), + } + } + + pub(crate) fn with_state_sync(mut self) -> Self { + self.enable_state_sync = true; + self + } +} + +impl EngineDefinition for MultiDbEngine { + type PublicKey = ed25519::PublicKey; + type Engine = Handle<()>; + type State = MockValidatorState>; + + fn participants(&self) -> Vec { + self.participants.clone() + } + + fn channels(&self) -> Vec<(u64, Quota)> { + vec![ + (0, TEST_QUOTA), // votes + (1, TEST_QUOTA), // certificates + (2, TEST_QUOTA), // resolver + (3, TEST_QUOTA), // backfill + (4, TEST_QUOTA), // broadcast + (5, TEST_QUOTA), // qmdb sync resolvers (muxed) + ] + } + + async fn init(&self, ctx: InitContext<'_, Self::PublicKey>) -> (Self::Engine, Self::State) { + let InitContext { + context, + index, + public_key, + oracle, + channels, + participants: _, + monitor, + } = ctx; + + let scheme = self.schemes[index].clone(); + + let partition_prefix = format!("validator-{index}"); + let page_cache = CacheRef::from_pooler(&context, PAGE_SIZE, PAGE_CACHE_SIZE); + + // QMDB database configs (one per database) + let db_config_a = FixedConfig { + merkle_config: MmrJournalConfig { + journal_partition: format!("{partition_prefix}-qmdb-a-mmr-journal"), + metadata_partition: format!("{partition_prefix}-qmdb-a-mmr-metadata"), + items_per_blob: NZU64!(11), + write_buffer: IO_BUFFER_SIZE, + strategy: Sequential, + page_cache: page_cache.clone(), + }, + journal_config: FixedLogConfig { + partition: format!("{partition_prefix}-qmdb-a-log-journal"), + items_per_blob: NZU64!(7), + page_cache: page_cache.clone(), + write_buffer: IO_BUFFER_SIZE, + }, + translator: TwoCap, + }; + let db_config_b = FixedConfig { + merkle_config: MmrJournalConfig { + journal_partition: format!("{partition_prefix}-qmdb-b-mmr-journal"), + metadata_partition: format!("{partition_prefix}-qmdb-b-mmr-metadata"), + items_per_blob: NZU64!(11), + write_buffer: IO_BUFFER_SIZE, + strategy: Sequential, + page_cache: page_cache.clone(), + }, + journal_config: FixedLogConfig { + partition: format!("{partition_prefix}-qmdb-b-log-journal"), + items_per_blob: NZU64!(7), + page_cache: page_cache.clone(), + write_buffer: IO_BUFFER_SIZE, + }, + translator: TwoCap, + }; + let db_config = (db_config_a, db_config_b); + + // Destructure the 6 channels. + let mut channels = channels.into_iter(); + let vote_network = channels.next().unwrap(); + let certificate_network = channels.next().unwrap(); + let resolver_network = channels.next().unwrap(); + let backfill_network = channels.next().unwrap(); + let broadcast_network = channels.next().unwrap(); + let qmdb_resolver_network = channels.next().unwrap(); + + // Mux the QMDB resolver channel into two subchannels (one per database). + let (mux, mut mux_handle) = Muxer::new( + context.child("qmdb_mux"), + qmdb_resolver_network.0, + qmdb_resolver_network.1, + 100, + ); + mux.start(); + let qmdb_a_resolver_network = mux_handle.register(0).await.unwrap(); + let qmdb_b_resolver_network = mux_handle.register(1).await.unwrap(); + + // Marshal resolver + let resolver_cfg = marshal_resolver::Config { + public_key: public_key.clone(), + peer_provider: oracle.manager(), + blocker: oracle.control(public_key.clone()), + mailbox_size: NZUsize!(100), + initial: Duration::from_secs(1), + timeout: Duration::from_secs(2), + fetch_retry_timeout: Duration::from_millis(100), + priority_requests: false, + priority_responses: false, + }; + let resolver = marshal_resolver::init( + context.child("marshal_resolver"), + resolver_cfg, + backfill_network, + ); + + // Buffered broadcast engine + let broadcast_config = buffered::Config { + public_key: public_key.clone(), + mailbox_size: NZUsize!(100), + deque_size: 10, + priority: false, + codec_config: (), + peer_provider: oracle.manager(), + }; + let (broadcast_engine, buffer) = + buffered::Engine::new(context.child("broadcast"), broadcast_config); + broadcast_engine.start(broadcast_network); + + // Immutable archives + let finalizations_by_height = immutable::Archive::init( + context.child("finalizations_by_height"), + archive_config(&partition_prefix, "finalizations", page_cache.clone(), ()), + ) + .await + .expect("failed to initialize finalizations archive"); + let finalized_blocks = immutable::Archive::init( + context.child("finalized_blocks"), + archive_config(&partition_prefix, "blocks", page_cache.clone(), ()), + ) + .await + .expect("failed to initialize blocks archive"); + + let genesis_block = { + let empty_db_root = Sha256Digest::from(hex!( + "ea6e0567a525372add5e4ef4d0600c18ed47fa5dd041a0ab0d25b60ea8c35978" + )); + Block::genesis( + empty_db_root, + non_empty_range!(Location::new(0), Location::new(1)), + empty_db_root, + non_empty_range!(Location::new(0), Location::new(1)), + ) + }; + + // Marshal actor + let provider = ConstantProvider::new(scheme.clone()); + let marshal_config = marshal::Config { + provider, + epocher: FixedEpocher::new(EPOCH_LENGTH), + start: marshal::Start::Genesis(genesis_block.clone()), + partition_prefix: partition_prefix.clone(), + mailbox_size: NZUsize!(100), + view_retention_timeout: ViewDelta::new(10), + prunable_items_per_section: NZU64!(10), + page_cache: page_cache.clone(), + replay_buffer: IO_BUFFER_SIZE, + key_write_buffer: IO_BUFFER_SIZE, + value_write_buffer: IO_BUFFER_SIZE, + block_codec_config: (), + max_repair: NZUsize!(10), + max_pending_acks: NZUsize!(1), + strategy: Sequential, + }; + let (marshal_actor, marshal_mailbox, _last_height) = + MarshalActor::<_, Standard, _, _, _, _, _>::init( + context.child("marshal"), + finalizations_by_height, + finalized_blocks, + marshal_config, + ) + .await; + self.marshal_mailboxes + .lock() + .insert(public_key.clone(), marshal_mailbox.clone()); + + // QMDB state-sync resolvers (one per database). + let (qmdb_resolver_actor_a, qmdb_sync_resolver_a) = + qmdb_resolver::Actor::<_, ed25519::PublicKey, _, _, mmr::Family, Qmdb<_>>::new( + context.child("qmdb_resolver_a"), + qmdb_resolver::Config { + peer_provider: oracle.manager(), + blocker: oracle.control(public_key.clone()), + database: None, + mailbox_size: NZUsize!(100), + me: Some(public_key.clone()), + initial: Duration::from_secs(1), + timeout: Duration::from_secs(2), + fetch_retry_timeout: Duration::from_millis(100), + max_serve_ops: NZU64!(16), + priority_requests: false, + priority_responses: false, + }, + ); + qmdb_resolver_actor_a.start(qmdb_a_resolver_network); + + let (qmdb_resolver_actor_b, qmdb_sync_resolver_b) = + qmdb_resolver::Actor::<_, ed25519::PublicKey, _, _, mmr::Family, Qmdb<_>>::new( + context.child("qmdb_resolver_b"), + qmdb_resolver::Config { + peer_provider: oracle.manager(), + blocker: oracle.control(public_key.clone()), + database: None, + mailbox_size: NZUsize!(100), + me: Some(public_key.clone()), + initial: Duration::from_secs(1), + timeout: Duration::from_secs(2), + fetch_retry_timeout: Duration::from_millis(100), + max_serve_ops: NZU64!(16), + priority_requests: false, + priority_responses: false, + }, + ); + qmdb_resolver_actor_b.start(qmdb_b_resolver_network); + + let (startup, startup_sync_height) = if self.enable_state_sync + && !state_sync_done(context.child("state_sync_metadata"), &partition_prefix).await + { + fetch_majority_sync_target(&self.marshal_mailboxes, &context, public_key) + .await + .map_or((StartupMode::MarshalSync, None), |(block, finalization)| { + let height = block.height().get(); + self.sync_heights.lock().insert(public_key.clone(), height); + ( + StartupMode::StateSync { + block, + finalization, + }, + Some(height), + ) + }) + } else { + let prior = self.sync_heights.lock().get(public_key).copied(); + (StartupMode::MarshalSync, prior) + }; + + // Stateful actor + let app = App::new(genesis_block.clone()); + let (stateful_actor, stateful_mailbox) = StatefulActor::init( + context.child("stateful"), + StatefulConfig { + app, + db_config, + input_provider: (), + marshal: marshal_mailbox.clone(), + mailbox_size: 100, + partition_prefix: partition_prefix.clone(), + startup, + resolvers: (qmdb_sync_resolver_a.clone(), qmdb_sync_resolver_b.clone()), + sync_config: SyncEngineConfig { + fetch_batch_size: NZU64!(16), + apply_batch_size: 64, + max_outstanding_requests: 8, + update_channel_size: NZUsize!(256), + max_retained_roots: 32, + }, + }, + ); + + // Deferred wrapper + let deferred = Deferred::new( + context.child("deferred"), + stateful_mailbox.clone(), + marshal_mailbox.clone(), + FixedEpocher::new(EPOCH_LENGTH), + ); + + // Marshal reporter: stateful mailbox, wrapped by monitor. + let marshal_reporters = MonitorReporter::new(public_key.clone(), monitor, stateful_mailbox); + + // Start marshal actor with monitored reporters. + marshal_actor.start(marshal_reporters, Some(buffer), resolver); + + // Initialize stateful from marshal's processed frontier. + stateful_actor.start(); + + // Simplex engine + let simplex_config = simplex::Config { + scheme, + elector: RoundRobin::::default(), + blocker: oracle.control(public_key.clone()), + automaton: deferred.clone(), + relay: deferred, + reporter: marshal_mailbox.clone(), + strategy: Sequential, + partition: format!("{partition_prefix}-simplex"), + mailbox_size: NZUsize!(100), + epoch: Epoch::zero(), + floor: simplex::config::Floor::Genesis(genesis_block.digest()), + replay_buffer: IO_BUFFER_SIZE, + write_buffer: IO_BUFFER_SIZE, + page_cache, + leader_timeout: Duration::from_secs(1), + certification_timeout: Duration::from_secs(2), + timeout_retry: Duration::from_millis(500), + activity_timeout: ViewDelta::new(10), + skip_timeout: ViewDelta::new(5), + fetch_timeout: Duration::from_secs(2), + fetch_concurrent: NZUsize!(3), + forwarding: ForwardingPolicy::Disabled, + }; + + let engine = simplex::Engine::new(context, simplex_config); + let handle = engine.start(vote_network, certificate_network, resolver_network); + + ( + handle, + MockValidatorState { + marshal: marshal_mailbox, + startup_sync_height, + }, + ) + } + + fn start(engine: Self::Engine) -> Handle<()> { + engine + } +} diff --git a/glue/src/stateful/tests/properties.rs b/glue/src/stateful/tests/properties.rs new file mode 100644 index 00000000000..33ce407cc86 --- /dev/null +++ b/glue/src/stateful/tests/properties.rs @@ -0,0 +1,96 @@ +use super::common::MockValidatorState; +use crate::simulate::{processed::ProcessedHeight, property::Property, tracker::ProgressTracker}; +use commonware_consensus::marshal::core::Variant; +use commonware_cryptography::{ed25519, sha256, Digestible}; +use std::{future::Future, pin::Pin}; + +/// Post-run property: all validators agree on the finalized block at `height`. +#[derive(Clone, Copy)] +pub(crate) struct BlockAgreementAtHeight { + height: u64, +} + +impl BlockAgreementAtHeight { + pub fn new(height: u64) -> Self { + Self { height } + } +} + +impl Property> for BlockAgreementAtHeight +where + V: Variant, + V::ApplicationBlock: Digestible, + MockValidatorState: Send + Sync, +{ + fn name(&self) -> &str { + "block_agreement_at_height" + } + + fn check<'a>( + &'a self, + _tracker: &'a ProgressTracker, + states: &'a [&'a MockValidatorState], + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let mut expected = None; + for state in states { + let Some(digest) = state.digest_at_height(self.height).await else { + return Err(format!( + "missing finalized digest at height {} on at least one validator", + self.height + )); + }; + if let Some(previous) = expected { + if digest != previous { + return Err(format!( + "digest disagreement at finalized height {}", + self.height + )); + } + } else { + expected = Some(digest); + } + } + + Ok(()) + }) + } +} + +/// Post-run property: at least one node used startup state sync and then advanced further. +#[derive(Clone, Copy)] +pub(crate) struct LateJoinerStateSyncHandoff; + +impl Property> for LateJoinerStateSyncHandoff +where + V: Variant, + V::ApplicationBlock: Digestible, + MockValidatorState: Send + Sync, +{ + fn name(&self) -> &str { + "late_joiner_state_sync_handoff" + } + + fn check<'a>( + &'a self, + _tracker: &'a ProgressTracker, + states: &'a [&'a MockValidatorState], + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + for state in states { + let Some(sync_height) = state.startup_sync_height() else { + continue; + }; + let processed_height = state.processed_height().await; + if processed_height > sync_height { + return Ok(()); + } + } + + Err( + "no validator both used startup state sync and advanced beyond the synced height" + .to_string(), + ) + }) + } +} diff --git a/glue/src/stateful/tests/single_db_app.rs b/glue/src/stateful/tests/single_db_app.rs new file mode 100644 index 00000000000..82b357760e9 --- /dev/null +++ b/glue/src/stateful/tests/single_db_app.rs @@ -0,0 +1,561 @@ +use super::common::*; +use crate::{ + simulate::{ + engine::{EngineDefinition, InitContext}, + reporter::MonitorReporter, + }, + stateful::{ + db::{ + p2p as qmdb_resolver, DatabaseSet, Merkleized as _, SyncEngineConfig, Unmerkleized as _, + }, + Application, Config as StatefulConfig, Proposed, StartupMode, Stateful as StatefulActor, + }, +}; +use commonware_broadcast::buffered; +use commonware_codec::{Encode, EncodeSize, Error as CodecError, Read, ReadExt as _, Write}; +use commonware_consensus::{ + marshal::{ + self, + core::Actor as MarshalActor, + resolver::p2p as marshal_resolver, + standard::{Deferred, Standard}, + }, + simplex::{ + self, + config::ForwardingPolicy, + elector::RoundRobin, + mocks::scheme::{self as scheme_mocks, Scheme as MockScheme}, + types::Context, + }, + types::{Epoch, FixedEpocher, Height, Round, View, ViewDelta}, + Block as ConsensusBlock, CertifiableBlock, Heightable, +}; +use commonware_cryptography::{ + certificate::{mocks::Fixture, ConstantProvider}, + ed25519, + sha256::{self, Digest as Sha256Digest}, + Digest as _, Digestible, Hasher, Sha256, Signer as _, +}; +use commonware_formatting::hex; +use commonware_parallel::Sequential; +use commonware_runtime::{ + buffer::paged::CacheRef, Buf, BufMut, Clock, Handle, Metrics, Quota, Spawner, Storage, + Supervisor as _, +}; +use commonware_storage::{ + archive::immutable, + journal::contiguous::fixed::Config as FixedLogConfig, + mmr::{self, full::Config as MmrJournalConfig, Location}, + qmdb::any::{sync::Target, unordered::fixed, FixedConfig}, + translator::TwoCap, +}; +use commonware_utils::{ + non_empty_range, + range::NonEmptyRange, + sync::{AsyncRwLock, Mutex}, + test_rng, NZUsize, NZU64, +}; +use futures::{Stream, StreamExt}; +use rand::Rng; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; + +/// The QMDB database type used by the single-db e2e tests. +type Qmdb = + fixed::Db; + +pub(crate) type SingleDatabaseSet = Arc>>; +type MarshalMailbox = MarshalMailboxOf>; + +/// A block carrying key-value mutations with embedded consensus context. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct Block { + context: Context, + parent: sha256::Digest, + height: Height, + state_root: sha256::Digest, + range: NonEmptyRange, +} + +impl Write for Block { + fn write(&self, buf: &mut impl BufMut) { + self.context.write(buf); + self.parent.write(buf); + self.height.write(buf); + self.state_root.write(buf); + self.range.write(buf); + } +} + +impl EncodeSize for Block { + fn encode_size(&self) -> usize { + self.context.encode_size() + + self.parent.encode_size() + + self.height.encode_size() + + self.state_root.encode_size() + + self.range.encode_size() + } +} + +impl Read for Block { + type Cfg = (); + + fn read_cfg(buf: &mut impl Buf, _: &Self::Cfg) -> Result { + Ok(Self { + context: Context::read(buf)?, + parent: sha256::Digest::read(buf)?, + height: Height::read(buf)?, + state_root: sha256::Digest::read(buf)?, + range: NonEmptyRange::read(buf)?, + }) + } +} + +impl Digestible for Block { + type Digest = sha256::Digest; + + fn digest(&self) -> sha256::Digest { + Sha256::hash(&self.encode()) + } +} + +impl Heightable for Block { + fn height(&self) -> Height { + self.height + } +} + +impl ConsensusBlock for Block { + fn parent(&self) -> sha256::Digest { + self.parent + } +} + +impl CertifiableBlock for Block { + type Context = Context; + + fn context(&self) -> Self::Context { + self.context.clone() + } +} + +impl Block { + fn genesis(state_root: sha256::Digest, range: NonEmptyRange) -> Self { + Self { + context: Context { + round: Round::new(Epoch::zero(), View::zero()), + leader: ed25519::PrivateKey::from_seed(0).public_key(), + parent: (View::zero(), sha256::Digest::EMPTY), + }, + parent: sha256::Digest::EMPTY, + height: Height::zero(), + state_root, + range, + } + } +} + +/// A stateful application that increments a counter each block. +#[derive(Clone)] +struct App { + genesis: Block, +} + +impl App { + fn new(genesis: Block) -> Self { + Self { genesis } + } + + /// Execute a block: increment "counter" and write `height -> height_val`. + async fn execute( + height: Height, + mut batches: as DatabaseSet>::Unmerkleized, + ) -> as DatabaseSet>::Merkleized { + let counter = Sha256::hash(b"counter"); + let current: u64 = batches + .get(&counter) + .await + .unwrap() + .map_or(0, |v| digest_to_u64(&v)); + batches = batches.write(counter, Some(u64_to_digest(current + 1))); + batches = batches.write( + Sha256::hash(&height.get().to_be_bytes()), + Some(u64_to_digest(height.get())), + ); + batches.merkleize().await.unwrap() + } +} + +impl Application for App { + type SigningScheme = MockScheme; + type Context = Context; + type Block = Block; + type Databases = SingleDatabaseSet; + type InputProvider = (); + + async fn genesis(&mut self) -> Self::Block { + self.genesis.clone() + } + + async fn propose( + &mut self, + context: (E, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + _input: &mut Self::InputProvider, + ) -> Option> { + let mut ancestry = Box::pin(ancestry); + let parent = ancestry.next().await?; + let height = Height::new(parent.height().get() + 1); + let merkleized = Self::execute(height, batches).await; + let bounds = merkleized.bounds(); + let block = Block { + context: context.1.clone(), + parent: parent.digest(), + height, + state_root: merkleized.root(), + range: non_empty_range!(bounds.inactivity_floor, Location::new(bounds.total_size)), + }; + Some(Proposed { block, merkleized }) + } + + async fn verify( + &mut self, + _context: (E, Self::Context), + ancestry: impl Stream + Send, + batches: >::Unmerkleized, + ) -> Option<>::Merkleized> { + let mut ancestry = Box::pin(ancestry); + let tip = ancestry.next().await?; + let merkleized = Self::execute(tip.height(), batches).await; + let bounds = merkleized.bounds(); + if merkleized.root() != tip.state_root + || non_empty_range!(bounds.inactivity_floor, Location::new(bounds.total_size)) + != tip.range + { + return None; + } + Some(merkleized) + } + + async fn apply( + &mut self, + _context: (E, Self::Context), + block: &Self::Block, + batches: >::Unmerkleized, + ) -> >::Merkleized { + Self::execute(block.height(), batches).await + } + + fn sync_targets(block: &Self::Block) -> >::SyncTargets { + Target::new(block.state_root, block.range.clone()) + } +} + +/// Engine definition implementing `EngineDefinition` for the simulation harness. +#[derive(Clone)] +pub(crate) struct SingleDbEngine { + participants: Vec, + schemes: Vec>, + enable_state_sync: bool, + marshal_mailboxes: Arc>>, + sync_heights: Arc>>, +} + +impl SingleDbEngine { + pub(crate) fn new(n: u32) -> Self { + let mut rng = test_rng(); + let Fixture { + participants, + schemes, + .. + } = scheme_mocks::fixture(&mut rng, NAMESPACE, n); + + Self { + participants, + schemes, + enable_state_sync: false, + marshal_mailboxes: Arc::new(Mutex::new(BTreeMap::new())), + sync_heights: Arc::new(Mutex::new(BTreeMap::new())), + } + } + + pub(crate) fn with_state_sync(mut self) -> Self { + self.enable_state_sync = true; + self + } +} + +impl EngineDefinition for SingleDbEngine { + type PublicKey = ed25519::PublicKey; + type Engine = Handle<()>; + type State = MockValidatorState>; + + fn participants(&self) -> Vec { + self.participants.clone() + } + + fn channels(&self) -> Vec<(u64, Quota)> { + vec![ + (0, TEST_QUOTA), // votes + (1, TEST_QUOTA), // certificates + (2, TEST_QUOTA), // resolver + (3, TEST_QUOTA), // backfill + (4, TEST_QUOTA), // broadcast + (5, TEST_QUOTA), // qmdb sync resolver + ] + } + + async fn init(&self, ctx: InitContext<'_, Self::PublicKey>) -> (Self::Engine, Self::State) { + let InitContext { + context, + index, + public_key, + oracle, + channels, + participants: _, + monitor, + } = ctx; + + let scheme = self.schemes[index].clone(); + + let partition_prefix = format!("validator-{index}"); + let page_cache = CacheRef::from_pooler(&context, PAGE_SIZE, PAGE_CACHE_SIZE); + + // QMDB database config (created by Stateful::start) + let db_config = FixedConfig { + merkle_config: MmrJournalConfig { + journal_partition: format!("{partition_prefix}-qmdb-mmr-journal"), + metadata_partition: format!("{partition_prefix}-qmdb-mmr-metadata"), + items_per_blob: NZU64!(11), + write_buffer: IO_BUFFER_SIZE, + strategy: Sequential, + page_cache: page_cache.clone(), + }, + journal_config: FixedLogConfig { + partition: format!("{partition_prefix}-qmdb-log-journal"), + items_per_blob: NZU64!(7), + page_cache: page_cache.clone(), + write_buffer: IO_BUFFER_SIZE, + }, + translator: TwoCap, + }; + + // Destructure the 6 channels. + let mut channels = channels.into_iter(); + let vote_network = channels.next().unwrap(); + let certificate_network = channels.next().unwrap(); + let resolver_network = channels.next().unwrap(); + let backfill_network = channels.next().unwrap(); + let broadcast_network = channels.next().unwrap(); + let qmdb_resolver_network = channels.next().unwrap(); + + // Marshal resolver + let resolver_cfg = marshal_resolver::Config { + public_key: public_key.clone(), + peer_provider: oracle.manager(), + blocker: oracle.control(public_key.clone()), + mailbox_size: NZUsize!(100), + initial: Duration::from_secs(1), + timeout: Duration::from_secs(2), + fetch_retry_timeout: Duration::from_millis(100), + priority_requests: false, + priority_responses: false, + }; + let resolver = marshal_resolver::init( + context.child("marshal_resolver"), + resolver_cfg, + backfill_network, + ); + + // Buffered broadcast engine + let broadcast_config = buffered::Config { + public_key: public_key.clone(), + mailbox_size: NZUsize!(100), + deque_size: 10, + priority: false, + codec_config: (), + peer_provider: oracle.manager(), + }; + let (broadcast_engine, buffer) = + buffered::Engine::new(context.child("broadcast"), broadcast_config); + broadcast_engine.start(broadcast_network); + + // Immutable archives + let finalizations_by_height = immutable::Archive::init( + context.child("finalizations_by_height"), + archive_config(&partition_prefix, "finalizations", page_cache.clone(), ()), + ) + .await + .expect("failed to initialize finalizations archive"); + + let finalized_blocks = immutable::Archive::init( + context.child("finalized_blocks"), + archive_config(&partition_prefix, "blocks", page_cache.clone(), ()), + ) + .await + .expect("failed to initialize blocks archive"); + + let genesis_block = { + let empty_db_root = Sha256Digest::from(hex!( + "ea6e0567a525372add5e4ef4d0600c18ed47fa5dd041a0ab0d25b60ea8c35978" + )); + Block::genesis( + empty_db_root, + non_empty_range!(Location::new(0), Location::new(1)), + ) + }; + + // Marshal actor + let provider = ConstantProvider::new(scheme.clone()); + let marshal_config = marshal::Config { + provider, + epocher: FixedEpocher::new(EPOCH_LENGTH), + start: marshal::Start::Genesis(genesis_block.clone()), + partition_prefix: partition_prefix.clone(), + mailbox_size: NZUsize!(100), + view_retention_timeout: ViewDelta::new(10), + prunable_items_per_section: NZU64!(10), + page_cache: page_cache.clone(), + replay_buffer: IO_BUFFER_SIZE, + key_write_buffer: IO_BUFFER_SIZE, + value_write_buffer: IO_BUFFER_SIZE, + block_codec_config: (), + max_repair: NZUsize!(10), + max_pending_acks: NZUsize!(1), + strategy: Sequential, + }; + let (marshal_actor, marshal_mailbox, _last_height) = + MarshalActor::<_, Standard, _, _, _, _, _>::init( + context.child("marshal"), + finalizations_by_height, + finalized_blocks, + marshal_config, + ) + .await; + self.marshal_mailboxes + .lock() + .insert(public_key.clone(), marshal_mailbox.clone()); + + // QMDB state-sync resolver. + let (qmdb_resolver_actor, qmdb_sync_resolver) = + qmdb_resolver::Actor::<_, ed25519::PublicKey, _, _, mmr::Family, Qmdb<_>>::new( + context.child("qmdb_resolver"), + qmdb_resolver::Config { + peer_provider: oracle.manager(), + blocker: oracle.control(public_key.clone()), + database: None, + mailbox_size: NZUsize!(100), + me: Some(public_key.clone()), + initial: Duration::from_secs(1), + timeout: Duration::from_secs(2), + fetch_retry_timeout: Duration::from_millis(100), + max_serve_ops: NZU64!(16), + priority_requests: false, + priority_responses: false, + }, + ); + let _qmdb_resolver_handle = qmdb_resolver_actor.start(qmdb_resolver_network); + + let (startup, startup_sync_height) = if self.enable_state_sync + && !state_sync_done(context.child("state_sync_metadata"), &partition_prefix).await + { + fetch_majority_sync_target(&self.marshal_mailboxes, &context, public_key) + .await + .map_or((StartupMode::MarshalSync, None), |(block, finalization)| { + let height = block.height().get(); + self.sync_heights.lock().insert(public_key.clone(), height); + ( + StartupMode::StateSync { + block, + finalization, + }, + Some(height), + ) + }) + } else { + let prior = self.sync_heights.lock().get(public_key).copied(); + (StartupMode::MarshalSync, prior) + }; + + // Stateful actor + let app = App::new(genesis_block.clone()); + let (stateful_actor, stateful_mailbox) = StatefulActor::init( + context.child("stateful"), + StatefulConfig { + app, + db_config, + input_provider: (), + marshal: marshal_mailbox.clone(), + mailbox_size: 100, + partition_prefix: partition_prefix.clone(), + startup, + resolvers: qmdb_sync_resolver.clone(), + sync_config: SyncEngineConfig { + fetch_batch_size: NZU64!(16), + apply_batch_size: 64, + max_outstanding_requests: 8, + update_channel_size: NZUsize!(256), + max_retained_roots: 8, + }, + }, + ); + + // Deferred wrapper + let deferred = Deferred::new( + context.child("deferred"), + stateful_mailbox.clone(), + marshal_mailbox.clone(), + FixedEpocher::new(EPOCH_LENGTH), + ); + + // Marshal reporter: stateful mailbox, wrapped by monitor. + let marshal_reporters = MonitorReporter::new(public_key.clone(), monitor, stateful_mailbox); + + // Start marshal actor with monitored reporters. + marshal_actor.start(marshal_reporters, Some(buffer), resolver); + + // Initialize stateful from marshal's processed frontier. + stateful_actor.start(); + + // Simplex engine + let simplex_config = simplex::Config { + scheme, + elector: RoundRobin::::default(), + blocker: oracle.control(public_key.clone()), + automaton: deferred.clone(), + relay: deferred, + reporter: marshal_mailbox.clone(), + strategy: Sequential, + partition: format!("{partition_prefix}-simplex"), + mailbox_size: NZUsize!(3), + epoch: Epoch::zero(), + floor: simplex::config::Floor::Genesis(genesis_block.digest()), + replay_buffer: IO_BUFFER_SIZE, + write_buffer: IO_BUFFER_SIZE, + page_cache, + leader_timeout: Duration::from_secs(1), + certification_timeout: Duration::from_secs(2), + timeout_retry: Duration::from_millis(500), + activity_timeout: ViewDelta::new(10), + skip_timeout: ViewDelta::new(5), + fetch_timeout: Duration::from_secs(2), + fetch_concurrent: NZUsize!(3), + forwarding: ForwardingPolicy::Disabled, + }; + + let engine = simplex::Engine::new(context, simplex_config); + let handle = engine.start(vote_network, certificate_network, resolver_network); + + ( + handle, + MockValidatorState { + marshal: marshal_mailbox, + startup_sync_height, + }, + ) + } + + fn start(engine: Self::Engine) -> Handle<()> { + engine + } +} diff --git a/storage/src/journal/contiguous/mod.rs b/storage/src/journal/contiguous/mod.rs index 1fbe782d5b1..bdb20aefa9e 100644 --- a/storage/src/journal/contiguous/mod.rs +++ b/storage/src/journal/contiguous/mod.rs @@ -34,7 +34,7 @@ pub trait Reader: Send + Sync { /// Read the item at the given position. /// /// Guaranteed not to return [Error::ItemPruned] for positions within `bounds()`. - fn read(&self, position: u64) -> impl Future> + Send; + fn read(&self, position: u64) -> impl Future> + Send + Sync; /// Read multiple items at the given positions, which must be strictly increasing. /// diff --git a/storage/src/merkle/mod.rs b/storage/src/merkle/mod.rs index 20c403ac74c..8866433fed4 100644 --- a/storage/src/merkle/mod.rs +++ b/storage/src/merkle/mod.rs @@ -35,7 +35,7 @@ pub use location::{Location, LocationRangeExt}; pub use position::Position; #[cfg(test)] pub(crate) use proof::build_range_proof; -pub use proof::Proof; +pub use proof::{Proof, MAX_PROOF_DIGESTS_PER_ELEMENT}; pub use read::Readable; use thiserror::Error; diff --git a/storage/src/qmdb/sync/compact.rs b/storage/src/qmdb/sync/compact.rs index 92f654a7bbd..7bd8fb48025 100644 --- a/storage/src/qmdb/sync/compact.rs +++ b/storage/src/qmdb/sync/compact.rs @@ -81,7 +81,7 @@ use std::{future::Future, num::NonZeroU64, sync::Arc}; /// Compact sync authenticates only the final committed root and total leaf count. Unlike replay /// sync, there is no lower replay bound here because compact sync does not transfer or reconstruct /// historical operations. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Debug)] pub struct Target { /// Authenticated root of the committed compact state. pub root: D, @@ -106,6 +106,23 @@ impl Target { } } +impl Clone for Target { + fn clone(&self) -> Self { + Self { + root: self.root, + leaf_count: self.leaf_count, + } + } +} + +impl PartialEq for Target { + fn eq(&self, other: &Self) -> bool { + self.root == other.root && self.leaf_count == other.leaf_count + } +} + +impl Eq for Target {} + impl Write for Target { fn write(&self, buf: &mut impl BufMut) { self.root.write(buf); diff --git a/storage/src/qmdb/sync/engine.rs b/storage/src/qmdb/sync/engine.rs index d4ae78d75a1..3440c085207 100644 --- a/storage/src/qmdb/sync/engine.rs +++ b/storage/src/qmdb/sync/engine.rs @@ -523,7 +523,7 @@ where } /// Record a progress snapshot in metrics. - async fn record_progress(&self) { + async fn record_progress(&mut self) { self.progress_metrics .record(self.journal.size().await, *self.target.range().end()); } @@ -606,7 +606,7 @@ where } /// Check if sync is complete based on the current journal size and target - pub async fn is_at_target(&self) -> Result> { + pub async fn is_at_target(&mut self) -> Result> { let journal_size = self.journal.size().await; let target_journal_size = self.target.range().end(); @@ -635,7 +635,7 @@ where } /// Returns whether the journal and boundary state are both ready for completion. - async fn is_ready_to_complete(&self) -> Result> { + async fn is_ready_to_complete(&mut self) -> Result> { Ok(self.is_at_target().await? && self.has_boundary_state()) } diff --git a/utils/src/channel/ring.rs b/utils/src/channel/ring.rs index d49c8c8eb66..3b69a7ba914 100644 --- a/utils/src/channel/ring.rs +++ b/utils/src/channel/ring.rs @@ -43,6 +43,17 @@ use thiserror::Error; #[error("channel closed")] pub struct ChannelClosed; +/// Error returned by [`Receiver::try_recv`]. +#[derive(Debug, Error, PartialEq, Eq)] +pub enum TryRecvError { + /// The channel currently has no buffered items, but senders still exist. + #[error("channel empty")] + Empty, + /// The channel is empty and all senders have been dropped. + #[error("channel closed")] + Disconnected, +} + #[derive(Debug)] struct Shared { buffer: VecDeque, @@ -164,6 +175,25 @@ pub struct Receiver { shared: Arc>>, } +impl Receiver { + /// Receives the next item from the channel. + pub async fn recv(&mut self) -> Option { + futures::future::poll_fn(|cx| Pin::new(&mut *self).poll_next(cx)).await + } + + /// Attempts to receive an item without waiting. + pub fn try_recv(&mut self) -> Result { + let mut shared = self.shared.lock(); + if let Some(item) = shared.buffer.pop_front() { + return Ok(item); + } + if shared.sender_count == 0 { + return Err(TryRecvError::Disconnected); + } + Err(TryRecvError::Empty) + } +} + impl Stream for Receiver { type Item = T; From a7eb0416bb79bff6d22555af5ee14a004868f3b9 Mon Sep 17 00:00:00 2001 From: clabby Date: Fri, 22 May 2026 12:46:08 -0400 Subject: [PATCH 2/4] [glue/stateful] stream sync targets from finalized blocks --- glue/src/stateful/actor/bootstrap.rs | 209 +++++++-------------- glue/src/stateful/actor/core.rs | 263 +++++++++++++++------------ glue/src/stateful/actor/mailbox.rs | 57 +----- glue/src/stateful/actor/processor.rs | 5 - glue/src/stateful/mod.rs | 20 +- 5 files changed, 229 insertions(+), 325 deletions(-) diff --git a/glue/src/stateful/actor/bootstrap.rs b/glue/src/stateful/actor/bootstrap.rs index 2d55e6034fc..558168a224a 100644 --- a/glue/src/stateful/actor/bootstrap.rs +++ b/glue/src/stateful/actor/bootstrap.rs @@ -21,8 +21,7 @@ //! - This reconciliation assumes databases were not manually rolled back or //! replaced out-of-band. //! - Any rewind failure is fatal and causes a panic. -//! - Bootstrap then transitions to processing mode via -//! [`ApplicationMailbox::sync_complete`] at marshal's processed anchor. +//! - Bootstrap then hands the processed anchor and databases to the actor. //! //! The marshal only advances its processed height after it has durably stored //! the floor block, so reconciliation can read the processed block directly. @@ -41,18 +40,16 @@ //! //! 1. Extract the initial anchor and sync targets from the //! seed block. -//! 2. Run [`StateSyncSet::sync`], -//! which initializes and populates all databases via the provided -//! resolvers. Tip updates stream in via the `target_updates` channel as -//! new blocks finalize during the sync, so the final synced height is -//! determined by the sync routine itself, not pre-determined. -//! 3. Persist `sync_done = true` so subsequent boots skip state sync. -//! 4. Raise the marshal floor to the synced height via -//! [`MarshalMailbox::set_floor`], then assert that the marshal's processed -//! height is at that floor. -//! 5. Call [`ApplicationMailbox::sync_complete`] with the constructed databases -//! and the synced digest, transitioning the actor into block-processing -//! mode. +//! 2. Raise marshal's floor to the initial target so finalized blocks stream +//! contiguously while state sync runs. +//! 3. Run [`StateSyncSet::sync`], which initializes and populates all databases +//! via the provided resolvers. Finalized blocks acknowledged by the actor +//! stream in via the `target_updates` channel during the sync, so the final +//! synced height is determined by the sync routine itself, not pre-determined. +//! 4. Persist `sync_done = true` so subsequent boots skip state sync. +//! 5. Send the constructed databases and synced digest to the actor. The actor +//! keeps acknowledging finalized blocks until marshal has delivered through +//! the synced height, then transitions into block-processing mode. //! //! ## Crash during state sync //! @@ -64,24 +61,27 @@ use crate::stateful::{ db::{Anchor, DatabaseSet, StateSyncSet, SyncEngineConfig}, - Application, Mailbox as ApplicationMailbox, + Application, }; use commonware_consensus::{ marshal::{ - core::{CommitmentFallback, Mailbox as MarshalMailbox, Variant as MarshalVariant}, + core::{Mailbox as MarshalMailbox, Variant as MarshalVariant}, Identifier, }, simplex::types::Finalization, types::{Height, Round}, CertifiableBlock, Epochable, Heightable, Viewable, }; -use commonware_cryptography::{certificate::Scheme, Digest, Digestible}; +use commonware_cryptography::{certificate::Scheme, Digestible}; use commonware_runtime::{ telemetry::metrics::{MetricsExt, Registered}, Clock, Metrics, Spawner, Storage, }; use commonware_storage::metadata::{Config as MetadataConfig, Metadata}; -use commonware_utils::{channel::ring, sequence::U64}; +use commonware_utils::{ + channel::{fallible::OneshotExt, oneshot, ring}, + sequence::U64, +}; use prometheus_client::metrics::gauge::Gauge; use rand::Rng; @@ -92,22 +92,17 @@ type SyncTargets = <>::Databases as DatabaseSet>::S type BlockDigest = <>::Block as Digestible>::Digest; type AnchoredUpdate = (Anchor>, SyncTargets); -/// Bootstrap outcome before durable metadata is finalized. -enum BootstrapState +/// Startup bootstrap completion delivered directly to the actor. +pub(super) struct Completion where - G: Digest, + E: Rng + Spawner + Metrics + Clock, + A: Application, { - /// Databases are ready with no marshal floor update. - Ready { - databases: D, - last_processed: Anchor, - }, - /// Databases were state-synced and require marshal floor update. - Synced { - databases: D, - last_processed: Anchor, - floor: F, - }, + /// Databases initialized by bootstrap. + pub(super) databases: A::Databases, + + /// Last block whose state is already represented by the databases. + pub(super) last_processed: Anchor>, } /// Startup inputs for bootstrap. @@ -144,6 +139,9 @@ where /// Database configuration for the managed set. pub(super) db_config: >::Config, + /// Application used to load genesis during bootstrap. + pub(super) app: A, + /// Metadata partition that stores the durable "state sync done" bit. pub(super) metadata_partition: String, @@ -155,6 +153,9 @@ where /// Startup mode and required inputs for that mode. pub(super) mode: Mode, + + /// Actor handoff for initialized databases. + pub(super) completion: oneshot::Sender>, } /// Initialize databases and transition the actor into processing mode. @@ -187,14 +188,13 @@ where /// databases to marshal's processed block targets. Rewind errors indicate /// unrecoverable local history loss/corruption (for example pruned rewind /// boundaries or invalid commit targets), so startup must stop. -/// - Marshal unreachable after `set_floor`. After state sync the marshal -/// floor must be raised so that the node does not attempt to re-process -/// blocks below the synced height. If the marshal does not respond, or -/// reports a processed height that does not equal the floor, the node -/// cannot safely determine where to resume. +/// - Marshal unreachable after `set_floor`. Before state sync the marshal +/// floor must be raised to the initial target so the finalized block stream +/// starts at the first block that may advance synced state. If the marshal +/// does not respond, or reports a processed height below the initial floor, +/// the node cannot safely determine where to resume. pub(super) async fn bootstrap( marshal: MarshalMailbox, - application: ApplicationMailbox, config: BootstrapConfig>, ) where E: Rng + Spawner + Metrics + Clock + Storage, @@ -206,10 +206,12 @@ pub(super) async fn bootstrap( let BootstrapConfig { context, db_config, + mut app, metadata_partition, sync_config, resolvers, mode, + completion, } = config; let state_sync_done: Registered = @@ -233,15 +235,9 @@ pub(super) async fn bootstrap( "state sync bootstrap received a sync startup target after state sync was already marked complete", ); - let genesis = application.genesis().await; + let genesis = app.genesis().await; let databases = A::Databases::init(context.child("db_set"), db_config).await; - // After a crash following state sync, the block at the floor height - // may not yet be in the marshal's archive: `set_floor` advanced - // `processed_height`, but the local marshal had not finalized that - // block through its own consensus flow before the crash. If the - // block is missing, hint the marshal to fetch it from the network, - // then poll until it arrives. let (processed_anchor, processed_targets) = processed_anchor_targets::(&marshal, &genesis) .await @@ -257,21 +253,27 @@ pub(super) async fn bootstrap( ); } - application.sync_complete(databases, processed_anchor); + assert!( + completion.send_lossy(Completion { + databases, + last_processed: processed_anchor, + }), + "stateful actor dropped during bootstrap completion", + ); return; } - let state = match mode { + let completion_message = match mode { Mode::MarshalSync => { let databases = A::Databases::init(context.child("db_set"), db_config).await; - let genesis = application.genesis().await; + let genesis = app.genesis().await; let genesis_context = genesis.context(); let last_processed = Anchor { height: Height::zero(), round: Round::new(genesis_context.epoch(), genesis_context.view()), digest: genesis.digest(), }; - BootstrapState::Ready { + Completion { databases, last_processed, } @@ -288,6 +290,11 @@ pub(super) async fn bootstrap( digest: block.digest(), }; let initial_targets = A::sync_targets(&block); + // Move marshal to the initial target before state sync starts so + // the actor sees a contiguous finalized stream after target + // selection. Every later finalized block can then become both a + // sync target update and an acknowledgement toward handoff. + marshal.set_floor(finalization.clone()); let (databases, last_processed) = A::Databases::sync( context.child("state_sync"), db_config, @@ -299,56 +306,23 @@ pub(super) async fn bootstrap( ) .await .unwrap_or_else(|err| panic!("state sync failed: {err:?}")); - let floor = if last_processed.height == initial_anchor.height { - finalization - } else { - marshal - .get_finalization(last_processed.height) - .await - .expect("marshal must respond with finalization after state sync") - }; - BootstrapState::Synced { + Completion { databases, last_processed, - floor, } } }; - let (databases, last_processed) = match state { - BootstrapState::Ready { - databases, - last_processed, - } => { - metadata - .put_sync(SYNC_DONE_KEY, true) - .await - .expect("must persist state sync completion metadata"); - state_sync_done.set(1); - (databases, last_processed) - } - BootstrapState::Synced { - databases, - last_processed, - floor: finalization, - } => { - let floor = last_processed.height; - metadata - .put_sync(SYNC_DONE_KEY, true) - .await - .expect("must persist state sync completion metadata"); - state_sync_done.set(1); - // Marshal fetches and stores the floor block before advancing its - // processed height, which also clears pending acknowledgements - // below that floor. - let floor_commitment = finalization.proposal.payload; - marshal.set_floor(finalization); - wait_for_floor(&marshal, floor, floor_commitment).await; - (databases, last_processed) - } - }; + metadata + .put_sync(SYNC_DONE_KEY, true) + .await + .expect("must persist state sync completion metadata"); + state_sync_done.set(1); - application.sync_complete(databases, last_processed); + assert!( + completion.send_lossy(completion_message), + "stateful actor dropped during bootstrap completion", + ); } /// Load marshal's current processed anchor and derived sync targets. @@ -401,54 +375,3 @@ where A::sync_targets(&block), )) } - -async fn wait_for_floor( - marshal: &MarshalMailbox, - floor: Height, - commitment: V::Commitment, -) where - S: Scheme, - V: MarshalVariant, -{ - let block = marshal - .subscribe_by_commitment(commitment, CommitmentFallback::Wait) - .await - .expect("marshal floor block subscription cancelled"); - assert_eq!( - block.height(), - floor, - "marshal returned unexpected state sync floor block height", - ); - - let processed_height = marshal - .get_processed_height() - .await - .expect("marshal must respond with processed height after set_floor"); - assert_eq!( - processed_height, floor, - "marshal processed height must match updated floor after state sync", - ); -} - -#[cfg(test)] -mod tests { - #[test] - fn synced_bootstrap_persists_sync_done_before_advancing_floor() { - let source = include_str!("bootstrap.rs"); - let synced_arm = source - .split("BootstrapState::Synced") - .nth(2) - .expect("synced arm should exist"); - let set_floor = synced_arm - .find("marshal.set_floor") - .expect("synced bootstrap should advance marshal floor"); - let put_sync = synced_arm - .find("put_sync(SYNC_DONE_KEY, true)") - .expect("synced bootstrap should persist sync_done"); - - assert!( - put_sync < set_floor, - "sync_done must be durable before the marshal floor advances", - ); - } -} diff --git a/glue/src/stateful/actor/core.rs b/glue/src/stateful/actor/core.rs index 2f6af0f7e41..830ed1d5067 100644 --- a/glue/src/stateful/actor/core.rs +++ b/glue/src/stateful/actor/core.rs @@ -3,7 +3,7 @@ use crate::stateful::{ actor::{ - bootstrap::{bootstrap, BootstrapConfig, Mode as BootstrapMode}, + bootstrap::{bootstrap, BootstrapConfig, Completion, Mode as BootstrapMode}, mailbox::{ErasedAncestorStream, Message}, metrics::Metrics as ProcessorMetrics, processor::{FinalizeStatus, Processor}, @@ -16,7 +16,7 @@ use commonware_actor::mailbox as actor_mailbox; use commonware_consensus::{ marshal::{ ancestry::BlockProvider, - core::{DigestFallback, Mailbox as MarshalMailbox, Variant as MarshalVariant}, + core::{Mailbox as MarshalMailbox, Variant as MarshalVariant}, }, simplex::types::Finalization, types::{Height, Round}, @@ -30,7 +30,10 @@ use commonware_utils::{ channel::{fallible::OneshotExt, oneshot, ring}, Acknowledgement, }; -use futures::SinkExt; +use futures::{ + future::{pending, Either}, + SinkExt, +}; use rand::Rng; use std::num::NonZeroUsize; use tracing::{debug, info}; @@ -52,12 +55,6 @@ where response: oneshot::Sender, } -/// Buffered finalization while startup sync is in progress. -struct HeldFinalization { - block: B, - acknowledgement: Exact, -} - /// Tracks the attached database set and pending subscribers. struct DatabaseAttachment { databases: Option, @@ -103,7 +100,7 @@ pub enum StartupMode { /// /// It is up to the user to determine whether or not this block is a valid member /// of the canonical chain. The finalization is used to advance marshal's - /// floor after state sync completes. + /// floor before state sync starts. StateSync { block: B, finalization: F }, } @@ -151,7 +148,7 @@ where app: A, /// Anchored target updates forwarded to the bootstrap sync task. - tip_sender: ring::Sender>, + target_sender: ring::Sender>, /// Resolver set attached once sync completes. sync_resolvers: R, @@ -162,11 +159,11 @@ where /// list is bounded by protocol behavior. held_verify_requests: Vec>, - /// Finalizations held while syncing. - /// - /// Marshal bounds in-flight application updates by `max_pending_acks`, - /// so this list is also bounded by protocol behavior. - held_finalizations: Vec>, + /// Bootstrap completion, once database state sync has converged. + completion: Option>, + + /// Last finalized block acknowledged while syncing. + last_acknowledged: Height, } impl SyncingState @@ -176,15 +173,17 @@ where { const fn new( app: A, - tip_sender: ring::Sender>, + target_sender: ring::Sender>, sync_resolvers: R, + last_acknowledged: Height, ) -> Self { Self { app, - tip_sender, + target_sender, sync_resolvers, held_verify_requests: Vec::new(), - held_finalizations: Vec::new(), + completion: None, + last_acknowledged, } } } @@ -231,9 +230,6 @@ where E: Rng + Spawner + Metrics + Clock, A: Application, { - /// Sender half of the actor mailbox channel. - sender: actor_mailbox::Sender>, - /// Runtime context providing RNG, task spawning, metrics, and clock. context: ContextCell, @@ -280,7 +276,6 @@ where let (sender, mailbox) = actor_mailbox::new(context.child("mailbox"), mailbox_size); ( Self { - sender: sender.clone(), context: ContextCell::new(context), mailbox, inner: config.app, @@ -313,42 +308,58 @@ where MarshalMailbox: BlockProvider, R: AttachableResolverSet, { - let (tip_sender, target_updates) = ring::channel(self.sync_config.update_channel_size); - let bootstrap_mode = match self.startup { - StartupMode::MarshalSync => BootstrapMode::MarshalSync, + let (target_sender, target_updates) = ring::channel(self.sync_config.update_channel_size); + let (completion, bootstrap_completion) = oneshot::channel(); + let (bootstrap_mode, last_acknowledged) = match self.startup { + StartupMode::MarshalSync => (BootstrapMode::MarshalSync, Height::zero()), StartupMode::StateSync { block, finalization, - } => BootstrapMode::StateSync { - block, - finalization: finalization.into(), - target_updates, - }, + } => { + let last_acknowledged = block.height(); + ( + BootstrapMode::StateSync { + block, + finalization: finalization.into(), + target_updates, + }, + last_acknowledged, + ) + } }; + let bootstrap_app = self.inner.clone(); let bootstrap_resolvers = self.resolvers.clone(); let bootstrap_context = self.context.as_present().child("state_sync"); let bootstrap_task_context = self.context.as_present().child("state_sync_bootstrap"); let marshal: MarshalMailbox = self.marshal.clone().into(); let mut service = Service { mailbox: self.mailbox, + marshal_sync_startup: matches!(bootstrap_mode, BootstrapMode::MarshalSync), shared: Shared { context: self.context, input_provider: self.input_provider, marshal: marshal.clone(), database_attachment: DatabaseAttachment::new(), }, - mode: Mode::Syncing(SyncingState::new(self.inner, tip_sender, self.resolvers)), + bootstrap_completion: Some(bootstrap_completion), + mode: Mode::Syncing(SyncingState::new( + self.inner, + target_sender, + self.resolvers, + last_acknowledged, + )), }; let bootstrap_config = BootstrapConfig { context: bootstrap_context, db_config: self.db_config, + app: bootstrap_app, metadata_partition: format!("{}{STATE_SYNC_METADATA_SUFFIX}", self.partition_prefix), sync_config: self.sync_config, resolvers: bootstrap_resolvers, mode: bootstrap_mode, + completion, }; - let mailbox = Mailbox::new(self.sender); - bootstrap_task_context.spawn(move |_| bootstrap(marshal, mailbox, bootstrap_config)); + bootstrap_task_context.spawn(move |_| bootstrap(marshal, bootstrap_config)); spawn_cell!(service.shared.context, service.run()) } } @@ -363,6 +374,8 @@ where MarshalMailbox: BlockProvider, { mailbox: actor_mailbox::Receiver>, + marshal_sync_startup: bool, + bootstrap_completion: Option>>, shared: Shared, mode: Mode, } @@ -383,23 +396,47 @@ where self.shared.context, on_start => { self.shared.database_attachment.prune_closed_subscribers(); + let read_mailbox = !self.marshal_sync_startup + || self.bootstrap_completion.is_none(); + let bootstrap_completion = self.bootstrap_completion.as_mut().map_or_else( + || Either::Right(pending()), + Either::Left, + ); + let mailbox_message = if read_mailbox { + Either::Left(self.mailbox.recv()) + } else { + Either::Right(pending()) + }; }, on_stopped => { debug!("context shutdown, stopping stateful application"); }, - Some(message) = self.mailbox.recv() else { + result = bootstrap_completion => { + self.bootstrap_completion = None; + let completion = result.expect("bootstrap completion channel closed"); + if let Mode::Syncing(syncing) = &mut self.mode { + if self.marshal_sync_startup { + syncing.last_acknowledged = completion.last_processed.height; + } + syncing.completion = Some(completion); + if let Some((databases, processor)) = try_enter_processing( + self.shared.context.as_present(), + self.shared.marshal.clone(), + syncing, + ) + .await + { + self.shared.database_attachment.attach(databases); + self.mode = Mode::Processing(processor); + } + } + }, + Some(message) = mailbox_message else { debug!("mailbox closed, shutting down"); break; } => { match (&mut self.mode, message) { // Shared - (_, Message::Genesis { response }) => { - let genesis = match &mut self.mode { - Mode::Syncing(syncing) => syncing.app.genesis().await, - Mode::Processing(processor) => processor.genesis().await, - }; - response.send_lossy(genesis); - } (_, Message::SubscribeDatabases { response }) => { self.shared.database_attachment.subscribe(response); } @@ -437,36 +474,18 @@ where acknowledgement, }, ) => { - debug!( - height = block.height().get(), - "finalization held during sync" - ); - syncing.held_finalizations.push(HeldFinalization { - block, - acknowledgement, - }); - } - (Mode::Syncing(syncing), Message::Tip { height, digest }) => { - handle_tip(&mut self.shared, syncing, height, digest).await; - } - ( - Mode::Syncing(syncing), - Message::SyncComplete { - databases, - last_processed, - }, - ) => { - let attached_databases = databases.clone(); - let processor = handle_sync_complete( + if let Some((databases, processor)) = handle_syncing_finalized( self.shared.context.as_present(), self.shared.marshal.clone(), syncing, - databases, - last_processed, + block, + acknowledgement, ) - .await; - self.shared.database_attachment.attach(attached_databases); - self.mode = Mode::Processing(processor); + .await + { + self.shared.database_attachment.attach(databases); + self.mode = Mode::Processing(processor); + } } // Processing mode @@ -522,62 +541,93 @@ where } acknowledgement.acknowledge(); } - (Mode::Processing(_), Message::Tip { .. }) => {} - (Mode::Processing(_), Message::SyncComplete { .. }) => {} } }, } } } -/// Handles a [`Message::Tip`]. -/// -/// In [`Mode::Syncing`], fetches the block from marshal, extracts -/// per-database sync targets via [`Application::sync_targets`], and -/// forwards them to the background sync engines. -async fn handle_tip( - shared: &mut Shared, +async fn handle_syncing_finalized( + context: &E, + marshal: MarshalMailbox, syncing: &mut SyncingState, - height: Height, - digest: ::Digest, -) where + block: A::Block, + acknowledgement: Exact, +) -> Option<(A::Databases, Processor)> +where E: Rng + Spawner + Metrics + Clock, A: Application, S: Scheme, V: MarshalVariant, + MarshalMailbox: BlockProvider, + R: AttachableResolverSet, { - let Some(block) = shared - .marshal - .subscribe_by_digest(digest, DigestFallback::Wait) - .await - .ok() - .map(V::into_inner) - else { - debug!( - height = height.get(), - "tip block not available from provider, skipping target update" - ); - return; - }; + let height = block.height(); + if height <= syncing.last_acknowledged { + acknowledgement.acknowledge(); + return try_enter_processing(context, marshal, syncing).await; + } + + assert_eq!( + height, + syncing.last_acknowledged.next(), + "marshal must deliver contiguous finalized blocks while syncing", + ); let block_context = block.context(); - let anchored_update = ( + let update = ( Anchor { height, round: Round::new(block_context.epoch(), block_context.view()), - digest, + digest: block.digest(), }, A::sync_targets(&block), ); - if syncing.tip_sender.send(anchored_update).await.is_err() { + + if syncing.target_sender.send(update).await.is_err() { debug!( height = height.get(), - "tip update channel unavailable, skipping target update" + "sync target update ignored: bootstrap receiver closed" ); } + + syncing.last_acknowledged = height; + acknowledgement.acknowledge(); + try_enter_processing(context, marshal, syncing).await +} + +async fn try_enter_processing( + context: &E, + marshal: MarshalMailbox, + syncing: &mut SyncingState, +) -> Option<(A::Databases, Processor)> +where + E: Rng + Spawner + Metrics + Clock, + A: Application, + S: Scheme, + V: MarshalVariant, + MarshalMailbox: BlockProvider, + R: AttachableResolverSet, +{ + let completion = syncing.completion.as_ref()?; + if syncing.last_acknowledged < completion.last_processed.height { + return None; + } + + let Completion { + databases, + last_processed, + } = syncing + .completion + .take() + .expect("completion must be present"); + let attached_databases = databases.clone(); + let processor = + handle_sync_complete(context, marshal, syncing, databases, last_processed).await; + Some((attached_databases, processor)) } -/// Handles a [`Message::SyncComplete`]. +/// Handles bootstrap completion. /// /// Attaches resolvers to the databases and returns a [`Processor`] ready for /// consensus execution. @@ -623,23 +673,6 @@ where .await; } - // In case any finalizations were delivered after the floor was updated, - // process them now to ensure we progress marshal. - for HeldFinalization { - block, - acknowledgement, - } in syncing.held_finalizations.drain(..) - { - if block.height() <= last_processed.height { - // Block is already persisted at or below the reconciled floor. - // The acknowledgement can be dropped, since marshal cancels - // pending acks when the floor is updated. - continue; - } - processor.finalize(context, block).await; - acknowledgement.acknowledge(); - } - info!("sync complete, database attached to processor"); processor } diff --git a/glue/src/stateful/actor/mailbox.rs b/glue/src/stateful/actor/mailbox.rs index 86841348f5f..6a2dea1cebd 100644 --- a/glue/src/stateful/actor/mailbox.rs +++ b/glue/src/stateful/actor/mailbox.rs @@ -1,14 +1,11 @@ //! Mailbox for the [`super::Stateful`] actor. -use crate::stateful::{db::Anchor, Application}; +use crate::stateful::Application; use commonware_actor::{ mailbox::{Overflow, Policy, Sender}, Feedback, }; -use commonware_consensus::{ - marshal::Update, types::Height, Application as ConsensusApplication, Reporter, -}; -use commonware_cryptography::Digestible; +use commonware_consensus::{marshal::Update, Application as ConsensusApplication, Reporter}; use commonware_runtime::{Clock, Metrics, Spawner}; use commonware_utils::{acknowledgement::Exact, channel::oneshot}; use futures::Stream; @@ -24,9 +21,6 @@ where E: Rng + Spawner + Metrics + Clock, A: Application, { - /// A request for the genesis block. - Genesis { response: oneshot::Sender }, - /// A request to propose a block. Propose { context: (E, A::Context), @@ -47,22 +41,6 @@ where acknowledgement: Exact, }, - /// A new finalized tip observed by marshal. - /// - /// During state sync, the actor uses this to fetch the block and - /// extract updated sync targets. In processing mode, this is a no-op. - Tip { - height: Height, - digest: ::Digest, - }, - - /// Signals that state sync is complete and the actor should transition - /// to `Mode::Processing`. - SyncComplete { - databases: A::Databases, - last_processed: Anchor<::Digest>, - }, - /// Requests the attached database set. /// /// The actor replies once the database set is attached, or immediately if @@ -79,11 +57,10 @@ where { fn response_closed(&self) -> bool { match self { - Self::Genesis { response } => response.is_closed(), Self::Propose { response, .. } => response.is_closed(), Self::Verify { response, .. } => response.is_closed(), Self::SubscribeDatabases { response } => response.is_closed(), - Self::Finalized { .. } | Self::Tip { .. } | Self::SyncComplete { .. } => false, + Self::Finalized { .. } => false, } } } @@ -185,32 +162,6 @@ where E: Rng + Spawner + Metrics + Clock, A: Application, { - /// Fetch the application's genesis block from the actor. - pub(crate) async fn genesis(&self) -> A::Block { - let (response, receiver) = oneshot::channel(); - let _ = self.sender.enqueue(Message::Genesis { response }); - receiver - .await - .expect("stateful actor dropped during genesis") - } - - /// Signal that state sync is complete, providing the constructed databases - /// and the finalized digest to transition the actor to processing mode. - pub fn sync_complete( - &self, - databases: A::Databases, - last_processed: Anchor<::Digest>, - ) { - let feedback = self.sender.enqueue(Message::SyncComplete { - databases, - last_processed, - }); - assert!( - feedback.accepted(), - "stateful actor dropped during sync_complete" - ); - } - /// Wait for the attached database set. /// /// This resolves when startup bootstrap finishes and the actor has @@ -278,7 +229,7 @@ where fn report(&mut self, activity: Self::Activity) -> Feedback { let message = match activity { - Update::Tip(_, height, digest) => Message::Tip { height, digest }, + Update::Tip(_, _, _) => return Feedback::Ok, Update::Block(block, acknowledgement) => Message::Finalized { block, acknowledgement, diff --git a/glue/src/stateful/actor/processor.rs b/glue/src/stateful/actor/processor.rs index ac3cc953c97..a244d635ae0 100644 --- a/glue/src/stateful/actor/processor.rs +++ b/glue/src/stateful/actor/processor.rs @@ -117,11 +117,6 @@ where } } - /// Delegate to the application to produce the genesis block. - pub(super) async fn genesis(&mut self) -> A::Block { - self.app.genesis().await - } - /// Prepare parent-relative batches and delegate to the application to /// build a new block proposal. The resulting block and its merkleized /// state are cached in `pending`. Sends `None` on `response` if the diff --git a/glue/src/stateful/mod.rs b/glue/src/stateful/mod.rs index c91a7eca9e1..46fa6498506 100644 --- a/glue/src/stateful/mod.rs +++ b/glue/src/stateful/mod.rs @@ -35,11 +35,12 @@ //! //! - [`StateSync`](StartupMode::StateSync): Run a one-time QMDB state sync //! from a seed block, populating each database via -//! [`db::StateSyncSet::sync`]. Tip updates stream in as new blocks finalize -//! during the sync, so the final synced height is not predetermined. Once all -//! databases converge on the same anchor block, the actor transitions to -//! normal processing. A durable metadata flag ensures state sync runs at most -//! once; subsequent restarts must take the marshal sync path. +//! [`db::StateSyncSet::sync`]. Finalized blocks acknowledged by the actor +//! stream target updates into the sync task, so the final synced height is +//! not predetermined. Once all databases converge on the same anchor block, +//! the actor acknowledges through that height and then transitions to normal +//! processing. A durable metadata flag ensures state sync runs at most once; +//! subsequent restarts must take the marshal sync path. //! //! # Lazy Recovery //! @@ -129,10 +130,11 @@ where /// Extract per-database sync targets from a finalized block. /// - /// Called by the wrapper when a [`Update::Tip`](commonware_consensus::marshal::Update::Tip) - /// is received during state sync. The returned targets are forwarded to - /// the background sync orchestrator so the sync engines can track the - /// latest finalized state root and range. + /// Called by the wrapper for finalized blocks received during state sync. + /// + /// The returned targets are forwarded to the background sync orchestrator + /// so the sync engines can track the latest finalized state root and + /// range. fn sync_targets(block: &Self::Block) -> >::SyncTargets; /// Block used to initialize the consensus engine in the first epoch. From afb01ef5598f8ae935e6ae2e5c5b17308ef2c82f Mon Sep 17 00:00:00 2001 From: clabby Date: Fri, 22 May 2026 15:21:57 -0400 Subject: [PATCH 3/4] [glue/stateful] initialize from finalized sync floor --- glue/src/stateful/actor/bootstrap.rs | 102 ++++--- glue/src/stateful/actor/core.rs | 329 ++++++++++++++--------- glue/src/stateful/actor/mod.rs | 2 +- glue/src/stateful/mod.rs | 21 +- glue/src/stateful/tests/common.rs | 53 ++-- glue/src/stateful/tests/multi_db_app.rs | 47 ++-- glue/src/stateful/tests/single_db_app.rs | 47 ++-- 7 files changed, 343 insertions(+), 258 deletions(-) diff --git a/glue/src/stateful/actor/bootstrap.rs b/glue/src/stateful/actor/bootstrap.rs index 558168a224a..6ef0440f706 100644 --- a/glue/src/stateful/actor/bootstrap.rs +++ b/glue/src/stateful/actor/bootstrap.rs @@ -4,10 +4,10 @@ //! process blocks. It initializes the databases, optionally runs state sync //! (at most once), and transitions the actor into processing mode. //! -//! A durable `sync_done` flag in a -//! [`Metadata`] store tracks whether -//! state sync has already completed. The combination of this flag and the -//! [`Mode`] in [`BootstrapConfig`] determines which path is taken: +//! A durable flag in a [`Metadata`] store tracks whether state sync +//! has already completed. Callers can load this flag before constructing +//! marshal and stateful, then use a single startup decision to configure both. +//! Bootstrap validates that decision before taking one of the following paths: //! //! ## Already synced (`sync_done = true`, [`Mode::MarshalSync`]) //! @@ -28,26 +28,25 @@ //! //! ## Fresh start (`sync_done = false`, [`Mode::MarshalSync`]) //! -//! No sync target was provided. Databases are initialized, the genesis block +//! No finalized floor was provided. Databases are initialized, the genesis block //! digest is used as the last processed digest, `sync_done` is persisted, and //! the actor transitions. //! //! ## State sync (`sync_done = false`, [`Mode::StateSync`]) //! -//! A sync target block and a channel of anchored target updates are provided. -//! State sync runs at most once; subsequent boots take the "already synced" -//! path above. The procedure is: +//! A finalized floor and a channel of anchored target updates are provided. State +//! sync runs at most once; subsequent boots take the "already synced" path above. +//! The procedure is: //! -//! 1. Extract the initial anchor and sync targets from the -//! seed block. -//! 2. Raise marshal's floor to the initial target so finalized blocks stream -//! contiguously while state sync runs. -//! 3. Run [`StateSyncSet::sync`], which initializes and populates all databases +//! 1. Wait for marshal to durably apply its configured floor and return the +//! floor block. Marshal records the floor at construction, then applies it +//! after startup once it has resolver access. +//! 2. Run [`StateSyncSet::sync`], which initializes and populates all databases //! via the provided resolvers. Finalized blocks acknowledged by the actor //! stream in via the `target_updates` channel during the sync, so the final //! synced height is determined by the sync routine itself, not pre-determined. -//! 4. Persist `sync_done = true` so subsequent boots skip state sync. -//! 5. Send the constructed databases and synced digest to the actor. The actor +//! 3. Persist `sync_done = true` so subsequent boots skip state sync. +//! 4. Send the constructed databases and synced digest to the actor. The actor //! keeps acknowledging finalized blocks until marshal has delivered through //! the synced height, then transitions into block-processing mode. //! @@ -65,7 +64,7 @@ use crate::stateful::{ }; use commonware_consensus::{ marshal::{ - core::{Mailbox as MarshalMailbox, Variant as MarshalVariant}, + core::{CommitmentFallback, Mailbox as MarshalMailbox, Variant as MarshalVariant}, Identifier, }, simplex::types::Finalization, @@ -87,11 +86,32 @@ use rand::Rng; /// Durable metadata key for "state sync completed". const SYNC_DONE_KEY: U64 = U64::new(0); +const STATE_SYNC_METADATA_SUFFIX: &str = "_state_sync_metadata"; type SyncTargets = <>::Databases as DatabaseSet>::SyncTargets; type BlockDigest = <>::Block as Digestible>::Digest; type AnchoredUpdate = (Anchor>, SyncTargets); +pub(super) fn metadata_partition(partition_prefix: &str) -> String { + format!("{partition_prefix}{STATE_SYNC_METADATA_SUFFIX}") +} + +pub(super) async fn state_sync_done(context: E, partition_prefix: &str) -> bool +where + E: Clock + Metrics + Storage, +{ + let metadata = Metadata::::init( + context, + MetadataConfig { + partition: metadata_partition(partition_prefix), + codec_config: (), + }, + ) + .await + .expect("failed to read state sync metadata"); + metadata.get(&SYNC_DONE_KEY).copied().unwrap_or(false) +} + /// Startup bootstrap completion delivered directly to the actor. pub(super) struct Completion where @@ -115,12 +135,12 @@ where /// to marshal sync MarshalSync, - /// Run startup state sync from initial targets and follow target updates. + /// Run startup state sync from a finalized floor and follow target updates. StateSync { - /// The block whose embedded targets seed the initial sync pass. - block: A::Block, - /// The finalization for `block`. + /// The finalized floor marshal should resolve before sync starts. finalization: F, + /// Handoff for the resolved state-sync floor height. + state_sync_floor_height: oneshot::Sender, /// Channel of anchored target updates as new blocks finalize during sync. target_updates: ring::Receiver>, }, @@ -151,7 +171,7 @@ where /// Per-database resolvers used to fetch state from peers. pub(super) resolvers: R, - /// Startup mode and required inputs for that mode. + /// Bootstrap mode and required inputs for that mode. pub(super) mode: Mode, /// Actor handoff for initialized databases. @@ -188,11 +208,11 @@ where /// databases to marshal's processed block targets. Rewind errors indicate /// unrecoverable local history loss/corruption (for example pruned rewind /// boundaries or invalid commit targets), so startup must stop. -/// - Marshal unreachable after `set_floor`. Before state sync the marshal -/// floor must be raised to the initial target so the finalized block stream -/// starts at the first block that may advance synced state. If the marshal -/// does not respond, or reports a processed height below the initial floor, -/// the node cannot safely determine where to resume. +/// - Marshal did not apply its configured floor. Before state sync the marshal +/// floor must be applied so the finalized block stream starts +/// at the first block that may advance synced state. If the marshal does not +/// respond, or reports a processed height below the configured floor, the node +/// cannot safely determine where to resume. pub(super) async fn bootstrap( marshal: MarshalMailbox, config: BootstrapConfig>, @@ -279,22 +299,40 @@ pub(super) async fn bootstrap( } } Mode::StateSync { - block, finalization, + state_sync_floor_height, target_updates, } => { + let block = V::into_inner( + marshal + .subscribe_by_commitment( + finalization.proposal.payload, + CommitmentFallback::Wait, + ) + .await + .expect("marshal floor block subscription cancelled"), + ); + let processed_height = marshal + .get_processed_height() + .await + .expect("state sync bootstrap must fetch marshal processed height"); let block_context = block.context(); let initial_anchor = Anchor { height: block.height(), round: Round::new(block_context.epoch(), block_context.view()), digest: block.digest(), }; + assert!( + processed_height >= initial_anchor.height, + "marshal processed height {processed_height} is below state-sync floor {}", + initial_anchor.height, + ); let initial_targets = A::sync_targets(&block); - // Move marshal to the initial target before state sync starts so - // the actor sees a contiguous finalized stream after target - // selection. Every later finalized block can then become both a - // sync target update and an acknowledgement toward handoff. - marshal.set_floor(finalization.clone()); + assert!( + state_sync_floor_height.send_lossy(initial_anchor.height), + "stateful actor dropped before state-sync floor height handoff", + ); + let (databases, last_processed) = A::Databases::sync( context.child("state_sync"), db_config, diff --git a/glue/src/stateful/actor/core.rs b/glue/src/stateful/actor/core.rs index 830ed1d5067..4ea0bbbf375 100644 --- a/glue/src/stateful/actor/core.rs +++ b/glue/src/stateful/actor/core.rs @@ -3,7 +3,10 @@ use crate::stateful::{ actor::{ - bootstrap::{bootstrap, BootstrapConfig, Completion, Mode as BootstrapMode}, + bootstrap::{ + bootstrap, metadata_partition, state_sync_done, BootstrapConfig, Completion, + Mode as BootstrapMode, + }, mailbox::{ErasedAncestorStream, Message}, metrics::Metrics as ProcessorMetrics, processor::{FinalizeStatus, Processor}, @@ -17,17 +20,19 @@ use commonware_consensus::{ marshal::{ ancestry::BlockProvider, core::{Mailbox as MarshalMailbox, Variant as MarshalVariant}, + Start as MarshalStart, }, simplex::types::Finalization, types::{Height, Round}, CertifiableBlock, Epochable, Heightable, Viewable, }; -use commonware_cryptography::{certificate::Scheme, Digestible}; +use commonware_cryptography::{certificate::Scheme, Digest, Digestible}; use commonware_macros::select_loop; use commonware_runtime::{spawn_cell, Clock, ContextCell, Handle, Metrics, Spawner, Storage}; use commonware_utils::{ acknowledgement::Exact, channel::{fallible::OneshotExt, oneshot, ring}, + futures::OptionFuture, Acknowledgement, }; use futures::{ @@ -90,18 +95,83 @@ impl DatabaseAttachment { } } -const STATE_SYNC_METADATA_SUFFIX: &str = "_state_sync_metadata"; +/// Startup plan that determines whether state sync is required and, when so, +/// which finalized floor to sync from. +/// +/// Construction is two-phase so the caller can avoid fetching a finalized +/// floor from peers when state sync has already completed: +/// +/// 1. [`SyncPlan::load`] reads the durable state-sync flag. +/// 2. If [`SyncPlan::needs_state_sync`] returns `true`, the caller fetches a +/// finalized floor and attaches it via [`SyncPlan::with_floor`]. Otherwise +/// the caller skips floor selection entirely. +/// +/// The same plan is then passed to marshal (via [`SyncPlan::marshal_start`]) +/// and to stateful (via [`Config::plan`]), guaranteeing both actors agree on +/// the startup decision. +pub struct SyncPlan { + partition_prefix: String, + state_sync_complete: bool, + floor: Option, +} + +impl SyncPlan { + /// Load the durable state-sync completion flag for this partition prefix. + /// + /// # Panics + /// + /// Panics if the metadata store cannot be opened. A node that cannot + /// determine whether state sync already completed cannot safely choose a + /// startup path. + pub async fn load(context: E, partition_prefix: impl Into) -> Self + where + E: Clock + Metrics + Storage, + { + let partition_prefix = partition_prefix.into(); + let state_sync_complete = state_sync_done(context, &partition_prefix).await; + Self { + partition_prefix, + state_sync_complete, + floor: None, + } + } + + /// Returns whether state sync should still run on this node. + /// + /// When `false`, the caller should skip floor selection: any floor passed + /// to [`SyncPlan::with_floor`] would be ignored. + pub const fn needs_state_sync(&self) -> bool { + !self.state_sync_complete + } + + /// Attach a finalized floor to state sync from. + /// + /// Has no effect if state sync has already completed. + #[must_use] + pub fn with_floor(mut self, floor: F) -> Self { + if self.needs_state_sync() { + self.floor = Some(floor); + } + self + } +} -/// Startup mode for the [`Stateful`] application. -pub enum StartupMode { - /// Initialize databases and let marshal backfill. - MarshalSync, - /// State sync the databases, starting at the given block's embedded targets. +impl SyncPlan> +where + S: Scheme, + C: Digest, +{ + /// Marshal startup anchor matching this plan. /// - /// It is up to the user to determine whether or not this block is a valid member - /// of the canonical chain. The finalization is used to advance marshal's - /// floor before state sync starts. - StateSync { block: B, finalization: F }, + /// Callers should pass this into marshal's configuration before + /// constructing stateful, so both actors are initialized from the same + /// startup decision. + pub fn marshal_start(&self, genesis: B) -> MarshalStart { + self.floor.as_ref().map_or_else( + || MarshalStart::Genesis(genesis), + |finalization| MarshalStart::Floor(finalization.clone()), + ) + } } /// Configuration for constructing a [`Stateful`] application. @@ -125,11 +195,10 @@ where /// Capacity of the stateful actor mailbox channel. pub mailbox_size: usize, - /// Partition prefix used to derive the durable state-sync metadata partition. - pub partition_prefix: String, - - /// Explicit startup mode. - pub startup: StartupMode, + /// Startup plan loaded via [`SyncPlan::load`], optionally augmented with + /// a finalized floor via [`SyncPlan::with_floor`]. Carries the durable + /// metadata partition prefix and the startup decision shared with marshal. + pub plan: SyncPlan, /// Resolver(s) for startup sync fetches and post-bootstrap serving. pub resolvers: R, @@ -175,7 +244,6 @@ where app: A, target_sender: ring::Sender>, sync_resolvers: R, - last_acknowledged: Height, ) -> Self { Self { app, @@ -183,7 +251,7 @@ where sync_resolvers, held_verify_requests: Vec::new(), completion: None, - last_acknowledged, + last_acknowledged: Height::zero(), } } } @@ -248,11 +316,8 @@ where /// Configuration used to initialize the database set at startup. db_config: >::Config, - /// Partition prefix used to derive the durable state-sync metadata partition. - partition_prefix: String, - - /// Explicit startup mode. - startup: StartupMode, + /// Startup plan carrying the metadata partition prefix and floor decision. + plan: SyncPlan, /// Resolver(s) for startup sync fetches and post-bootstrap serving. resolvers: R, @@ -282,8 +347,7 @@ where input_provider: config.input_provider, marshal: config.marshal, db_config: config.db_config, - partition_prefix: config.partition_prefix, - startup: config.startup, + plan: config.plan, resolvers: config.resolvers, sync_config: config.sync_config, }, @@ -294,8 +358,8 @@ where /// Start the actor and run startup bootstrap in the background. /// /// This is the single startup entrypoint for both modes: - /// - [`StartupMode::MarshalSync`]: initialize databases and backfill from marshal. - /// - [`StartupMode::StateSync`]: run one-time startup state sync. + /// - No floor attached to the plan: initialize databases and backfill from marshal. + /// - Floor attached to the plan: run one-time startup state sync. pub fn start(self) -> Handle<()> where E: Rng + Spawner + Metrics + Clock + Storage, @@ -310,23 +374,23 @@ where { let (target_sender, target_updates) = ring::channel(self.sync_config.update_channel_size); let (completion, bootstrap_completion) = oneshot::channel(); - let (bootstrap_mode, last_acknowledged) = match self.startup { - StartupMode::MarshalSync => (BootstrapMode::MarshalSync, Height::zero()), - StartupMode::StateSync { - block, - finalization, - } => { - let last_acknowledged = block.height(); - ( - BootstrapMode::StateSync { - block, - finalization: finalization.into(), - target_updates, - }, - last_acknowledged, - ) - } - }; + let SyncPlan { + partition_prefix, + floor, + state_sync_complete: _, + } = self.plan; + let (bootstrap_mode, pending_state_sync_floor_height) = floor.map_or_else( + || (BootstrapMode::MarshalSync, None), + |finalization| { + let (floor_height_sender, floor_height_receiver) = oneshot::channel(); + let mode = BootstrapMode::StateSync { + finalization: finalization.into(), + target_updates, + state_sync_floor_height: floor_height_sender, + }; + (mode, Some(floor_height_receiver)) + }, + ); let bootstrap_app = self.inner.clone(); let bootstrap_resolvers = self.resolvers.clone(); let bootstrap_context = self.context.as_present().child("state_sync"); @@ -334,7 +398,8 @@ where let marshal: MarshalMailbox = self.marshal.clone().into(); let mut service = Service { mailbox: self.mailbox, - marshal_sync_startup: matches!(bootstrap_mode, BootstrapMode::MarshalSync), + marshal_sync: matches!(bootstrap_mode, BootstrapMode::MarshalSync), + pending_state_sync_floor_height, shared: Shared { context: self.context, input_provider: self.input_provider, @@ -342,18 +407,13 @@ where database_attachment: DatabaseAttachment::new(), }, bootstrap_completion: Some(bootstrap_completion), - mode: Mode::Syncing(SyncingState::new( - self.inner, - target_sender, - self.resolvers, - last_acknowledged, - )), + mode: Mode::Syncing(SyncingState::new(self.inner, target_sender, self.resolvers)), }; let bootstrap_config = BootstrapConfig { context: bootstrap_context, db_config: self.db_config, app: bootstrap_app, - metadata_partition: format!("{}{STATE_SYNC_METADATA_SUFFIX}", self.partition_prefix), + metadata_partition: metadata_partition(&partition_prefix), sync_config: self.sync_config, resolvers: bootstrap_resolvers, mode: bootstrap_mode, @@ -374,7 +434,8 @@ where MarshalMailbox: BlockProvider, { mailbox: actor_mailbox::Receiver>, - marshal_sync_startup: bool, + marshal_sync: bool, + pending_state_sync_floor_height: Option>, bootstrap_completion: Option>>, shared: Shared, mode: Mode, @@ -388,6 +449,66 @@ where V: MarshalVariant, MarshalMailbox: BlockProvider, { + const fn should_read_mailbox(&self) -> bool { + // State sync starts from marshal's configured floor. Wait until + // bootstrap tells us that floor height so finalized blocks are + // interpreted relative to the state-sync floor, not Height::zero. + if self.pending_state_sync_floor_height.is_some() { + return false; + } + + // Keep reading finalized blocks while bootstrap syncs databases if + // state syncing. Those blocks drive target updates, and proposals/ + // verifies are still rejected or held by Syncing mode. + if !self.marshal_sync { + return true; + } + + // Marshal sync does not need mailbox traffic to complete bootstrap. + // Wait for database initialization/reconciliation before handling + // proposals, verifies, or finalized blocks. + self.bootstrap_completion.is_none() + } + + async fn try_enter_processing(&mut self) + where + R: AttachableResolverSet, + { + let transition = { + let Mode::Syncing(syncing) = &mut self.mode else { + return; + }; + let Some(completion) = syncing.completion.as_ref() else { + return; + }; + if syncing.last_acknowledged < completion.last_processed.height { + return; + } + + let Completion { + databases, + last_processed, + } = syncing + .completion + .take() + .expect("completion must be present"); + let attached_databases = databases.clone(); + let processor = handle_sync_complete( + self.shared.context.as_present(), + self.shared.marshal.clone(), + syncing, + databases, + last_processed, + ) + .await; + (attached_databases, processor) + }; + + let (databases, processor) = transition; + self.shared.database_attachment.attach(databases); + self.mode = Mode::Processing(processor); + } + async fn run(mut self) where R: AttachableResolverSet, @@ -396,12 +517,10 @@ where self.shared.context, on_start => { self.shared.database_attachment.prune_closed_subscribers(); - let read_mailbox = !self.marshal_sync_startup - || self.bootstrap_completion.is_none(); - let bootstrap_completion = self.bootstrap_completion.as_mut().map_or_else( - || Either::Right(pending()), - Either::Left, - ); + let read_mailbox = self.should_read_mailbox(); + let state_sync_floor_height = + OptionFuture::from(self.pending_state_sync_floor_height.as_mut()); + let bootstrap_completion = OptionFuture::from(self.bootstrap_completion.as_mut()); let mailbox_message = if read_mailbox { Either::Left(self.mailbox.recv()) } else { @@ -411,25 +530,26 @@ where on_stopped => { debug!("context shutdown, stopping stateful application"); }, + height = state_sync_floor_height => { + self.pending_state_sync_floor_height = None; + let height = height.expect("state-sync floor height channel closed"); + let Mode::Syncing(syncing) = &mut self.mode else { + panic!("received state-sync floor height while not syncing"); + }; + syncing.last_acknowledged = height; + self.try_enter_processing().await; + }, result = bootstrap_completion => { self.bootstrap_completion = None; let completion = result.expect("bootstrap completion channel closed"); - if let Mode::Syncing(syncing) = &mut self.mode { - if self.marshal_sync_startup { - syncing.last_acknowledged = completion.last_processed.height; - } - syncing.completion = Some(completion); - if let Some((databases, processor)) = try_enter_processing( - self.shared.context.as_present(), - self.shared.marshal.clone(), - syncing, - ) - .await - { - self.shared.database_attachment.attach(databases); - self.mode = Mode::Processing(processor); - } + let Mode::Syncing(syncing) = &mut self.mode else { + panic!("received bootstrap completion while not syncing"); + }; + if self.marshal_sync { + syncing.last_acknowledged = completion.last_processed.height; } + syncing.completion = Some(completion); + self.try_enter_processing().await; }, Some(message) = mailbox_message else { debug!("mailbox closed, shutting down"); @@ -474,18 +594,8 @@ where acknowledgement, }, ) => { - if let Some((databases, processor)) = handle_syncing_finalized( - self.shared.context.as_present(), - self.shared.marshal.clone(), - syncing, - block, - acknowledgement, - ) - .await - { - self.shared.database_attachment.attach(databases); - self.mode = Mode::Processing(processor); - } + handle_syncing_finalized(syncing, block, acknowledgement).await; + self.try_enter_processing().await; } // Processing mode @@ -547,25 +657,18 @@ where } } -async fn handle_syncing_finalized( - context: &E, - marshal: MarshalMailbox, +async fn handle_syncing_finalized( syncing: &mut SyncingState, block: A::Block, acknowledgement: Exact, -) -> Option<(A::Databases, Processor)> -where +) where E: Rng + Spawner + Metrics + Clock, A: Application, - S: Scheme, - V: MarshalVariant, - MarshalMailbox: BlockProvider, - R: AttachableResolverSet, { let height = block.height(); if height <= syncing.last_acknowledged { acknowledgement.acknowledge(); - return try_enter_processing(context, marshal, syncing).await; + return; } assert_eq!( @@ -593,38 +696,6 @@ where syncing.last_acknowledged = height; acknowledgement.acknowledge(); - try_enter_processing(context, marshal, syncing).await -} - -async fn try_enter_processing( - context: &E, - marshal: MarshalMailbox, - syncing: &mut SyncingState, -) -> Option<(A::Databases, Processor)> -where - E: Rng + Spawner + Metrics + Clock, - A: Application, - S: Scheme, - V: MarshalVariant, - MarshalMailbox: BlockProvider, - R: AttachableResolverSet, -{ - let completion = syncing.completion.as_ref()?; - if syncing.last_acknowledged < completion.last_processed.height { - return None; - } - - let Completion { - databases, - last_processed, - } = syncing - .completion - .take() - .expect("completion must be present"); - let attached_databases = databases.clone(); - let processor = - handle_sync_complete(context, marshal, syncing, databases, last_processed).await; - Some((attached_databases, processor)) } /// Handles bootstrap completion. diff --git a/glue/src/stateful/actor/mod.rs b/glue/src/stateful/actor/mod.rs index cd86a825d75..78a788210b0 100644 --- a/glue/src/stateful/actor/mod.rs +++ b/glue/src/stateful/actor/mod.rs @@ -1,5 +1,5 @@ mod core; -pub use core::{Config, StartupMode, Stateful}; +pub use core::{Config, Stateful, SyncPlan}; mod mailbox; pub use mailbox::Mailbox; diff --git a/glue/src/stateful/mod.rs b/glue/src/stateful/mod.rs index 46fa6498506..dfd1bdc8771 100644 --- a/glue/src/stateful/mod.rs +++ b/glue/src/stateful/mod.rs @@ -27,14 +27,21 @@ //! //! # Syncing //! -//! The actor supports two startup modes via [`StartupMode`]: +//! Applications load a [`SyncPlan`] before constructing marshal and stateful. +//! The plan reads the durable state-sync flag; callers gate floor selection +//! on [`SyncPlan::needs_state_sync`] and, if still required, attach a finalized +//! floor via [`SyncPlan::with_floor`]. The same plan then drives marshal (via +//! [`SyncPlan::marshal_start`]) and stateful (via [`Config::plan`]), so both +//! actors are guaranteed to agree on the startup decision. //! -//! - [`MarshalSync`](StartupMode::MarshalSync): Initialize fresh databases -//! and let the marshal backfill blocks from the network. Appropriate for -//! validators joining from genesis or after a clean state wipe. +//! The actor supports two sync paths: //! -//! - [`StateSync`](StartupMode::StateSync): Run a one-time QMDB state sync -//! from a seed block, populating each database via +//! - **Marshal sync** (no floor attached): Initialize fresh databases and let +//! the marshal backfill blocks from the network. Appropriate for validators +//! joining from genesis or after a clean state wipe. +//! +//! - **State sync** (floor attached): Run a one-time QMDB state sync from +//! marshal's configured floor block, populating each database via //! [`db::StateSyncSet::sync`]. Finalized blocks acknowledged by the actor //! stream target updates into the sync task, so the final synced height is //! not predetermined. Once all databases converge on the same anchor block, @@ -73,7 +80,7 @@ use rand::Rng; use std::future::Future; mod actor; -pub use actor::{Config, Mailbox, StartupMode, Stateful}; +pub use actor::{Config, Mailbox, Stateful, SyncPlan}; pub mod db; diff --git a/glue/src/stateful/tests/common.rs b/glue/src/stateful/tests/common.rs index 2c1e1ac2665..e55b1279c5e 100644 --- a/glue/src/stateful/tests/common.rs +++ b/glue/src/stateful/tests/common.rs @@ -6,12 +6,9 @@ use commonware_consensus::{ Heightable, }; use commonware_cryptography::{ed25519, sha256, Digestible}; -use commonware_runtime::{buffer::paged::CacheRef, Clock, Metrics, Quota, Storage}; -use commonware_storage::{ - archive::immutable, - metadata::{Config as MetadataConfig, Metadata}, -}; -use commonware_utils::{sequence::U64, sync::Mutex, NZUsize, NZU16, NZU64}; +use commonware_runtime::{buffer::paged::CacheRef, Clock, Quota}; +use commonware_storage::archive::immutable; +use commonware_utils::{sync::Mutex, NZUsize, NZU16, NZU64}; use std::{ collections::{BTreeMap, HashMap}, num::{NonZeroU16, NonZeroU32, NonZeroU64, NonZeroUsize}, @@ -99,14 +96,14 @@ where pub(super) type MarshalMailboxOf = marshal::core::Mailbox, V>; -/// Poll peers for a majority-agreed sync target block. -pub(super) async fn fetch_majority_sync_target( +/// Poll peers for a majority-agreed sync floor. +pub(super) async fn fetch_majority_sync_floor( mailboxes: &Arc>>>, context: &impl Clock, me: &ed25519::PublicKey, ) -> Option<( - V::Block, Finalization, V::Commitment>, + Height, )> where V: Variant, @@ -144,7 +141,7 @@ where heights.sort(); let quorum_height = heights[heights.len() - required]; - // Count digests at quorum height and return the first block with majority agreement. + // Count digests at quorum height and return the first finalization with majority agreement. let mut counts: HashMap)> = HashMap::new(); for (mailbox, h) in &peers { if *h < quorum_height { @@ -163,13 +160,15 @@ where } for (digest, (count, mailbox)) in counts { if count >= required { - if let Some(block) = mailbox.get_block(MarshalIdentifier::Digest(digest)).await { - let finalization = mailbox - .get_finalization(quorum_height) - .await - .expect("sync target finalization must be available"); - return Some((block, finalization)); - } + let finalization = mailbox + .get_finalization(quorum_height) + .await + .expect("sync floor finalization must be available"); + assert_eq!( + V::commitment_to_inner(finalization.proposal.payload), + digest + ); + return Some((finalization, quorum_height)); } } @@ -178,26 +177,6 @@ where None } -const STATE_SYNC_METADATA_SUFFIX: &str = "_state_sync_metadata"; -const SYNC_DONE_KEY: U64 = U64::new(0); - -/// Check whether state sync has already completed for this validator. -pub(super) async fn state_sync_done( - context: impl Storage + Clock + Metrics, - partition_prefix: &str, -) -> bool { - let metadata = Metadata::<_, U64, bool>::init( - context, - MetadataConfig { - partition: format!("{partition_prefix}{STATE_SYNC_METADATA_SUFFIX}"), - codec_config: (), - }, - ) - .await - .expect("failed to read state sync metadata"); - metadata.get(&SYNC_DONE_KEY).copied().unwrap_or(false) -} - impl ProcessedHeight for MockValidatorState where V: Variant, diff --git a/glue/src/stateful/tests/multi_db_app.rs b/glue/src/stateful/tests/multi_db_app.rs index 104aa3172e2..7e4b23c7028 100644 --- a/glue/src/stateful/tests/multi_db_app.rs +++ b/glue/src/stateful/tests/multi_db_app.rs @@ -8,7 +8,7 @@ use crate::{ db::{ p2p as qmdb_resolver, DatabaseSet, Merkleized as _, SyncEngineConfig, Unmerkleized as _, }, - Application, Config as StatefulConfig, Proposed, StartupMode, Stateful as StatefulActor, + Application, Config as StatefulConfig, Proposed, Stateful as StatefulActor, SyncPlan, }, }; use commonware_broadcast::buffered; @@ -496,12 +496,29 @@ impl EngineDefinition for MultiDbEngine { ) }; + let mut plan = + SyncPlan::load(context.child("stateful_startup"), partition_prefix.clone()).await; + let startup_sync_height = if self.enable_state_sync && plan.needs_state_sync() { + match fetch_majority_sync_floor(&self.marshal_mailboxes, &context, public_key).await { + Some((finalization, height)) => { + self.sync_heights + .lock() + .insert(public_key.clone(), height.get()); + plan = plan.with_floor(finalization); + Some(height.get()) + } + None => None, + } + } else { + self.sync_heights.lock().get(public_key).copied() + }; + // Marshal actor let provider = ConstantProvider::new(scheme.clone()); let marshal_config = marshal::Config { provider, epocher: FixedEpocher::new(EPOCH_LENGTH), - start: marshal::Start::Genesis(genesis_block.clone()), + start: plan.marshal_start(genesis_block.clone()), partition_prefix: partition_prefix.clone(), mailbox_size: NZUsize!(100), view_retention_timeout: ViewDelta::new(10), @@ -566,27 +583,6 @@ impl EngineDefinition for MultiDbEngine { ); qmdb_resolver_actor_b.start(qmdb_b_resolver_network); - let (startup, startup_sync_height) = if self.enable_state_sync - && !state_sync_done(context.child("state_sync_metadata"), &partition_prefix).await - { - fetch_majority_sync_target(&self.marshal_mailboxes, &context, public_key) - .await - .map_or((StartupMode::MarshalSync, None), |(block, finalization)| { - let height = block.height().get(); - self.sync_heights.lock().insert(public_key.clone(), height); - ( - StartupMode::StateSync { - block, - finalization, - }, - Some(height), - ) - }) - } else { - let prior = self.sync_heights.lock().get(public_key).copied(); - (StartupMode::MarshalSync, prior) - }; - // Stateful actor let app = App::new(genesis_block.clone()); let (stateful_actor, stateful_mailbox) = StatefulActor::init( @@ -597,9 +593,8 @@ impl EngineDefinition for MultiDbEngine { input_provider: (), marshal: marshal_mailbox.clone(), mailbox_size: 100, - partition_prefix: partition_prefix.clone(), - startup, - resolvers: (qmdb_sync_resolver_a.clone(), qmdb_sync_resolver_b.clone()), + plan, + resolvers: (qmdb_sync_resolver_a, qmdb_sync_resolver_b), sync_config: SyncEngineConfig { fetch_batch_size: NZU64!(16), apply_batch_size: 64, diff --git a/glue/src/stateful/tests/single_db_app.rs b/glue/src/stateful/tests/single_db_app.rs index 82b357760e9..fa6ab4900a3 100644 --- a/glue/src/stateful/tests/single_db_app.rs +++ b/glue/src/stateful/tests/single_db_app.rs @@ -8,7 +8,7 @@ use crate::{ db::{ p2p as qmdb_resolver, DatabaseSet, Merkleized as _, SyncEngineConfig, Unmerkleized as _, }, - Application, Config as StatefulConfig, Proposed, StartupMode, Stateful as StatefulActor, + Application, Config as StatefulConfig, Proposed, Stateful as StatefulActor, SyncPlan, }, }; use commonware_broadcast::buffered; @@ -405,12 +405,29 @@ impl EngineDefinition for SingleDbEngine { ) }; + let mut plan = + SyncPlan::load(context.child("stateful_startup"), partition_prefix.clone()).await; + let startup_sync_height = if self.enable_state_sync && plan.needs_state_sync() { + match fetch_majority_sync_floor(&self.marshal_mailboxes, &context, public_key).await { + Some((finalization, height)) => { + self.sync_heights + .lock() + .insert(public_key.clone(), height.get()); + plan = plan.with_floor(finalization); + Some(height.get()) + } + None => None, + } + } else { + self.sync_heights.lock().get(public_key).copied() + }; + // Marshal actor let provider = ConstantProvider::new(scheme.clone()); let marshal_config = marshal::Config { provider, epocher: FixedEpocher::new(EPOCH_LENGTH), - start: marshal::Start::Genesis(genesis_block.clone()), + start: plan.marshal_start(genesis_block.clone()), partition_prefix: partition_prefix.clone(), mailbox_size: NZUsize!(100), view_retention_timeout: ViewDelta::new(10), @@ -456,27 +473,6 @@ impl EngineDefinition for SingleDbEngine { ); let _qmdb_resolver_handle = qmdb_resolver_actor.start(qmdb_resolver_network); - let (startup, startup_sync_height) = if self.enable_state_sync - && !state_sync_done(context.child("state_sync_metadata"), &partition_prefix).await - { - fetch_majority_sync_target(&self.marshal_mailboxes, &context, public_key) - .await - .map_or((StartupMode::MarshalSync, None), |(block, finalization)| { - let height = block.height().get(); - self.sync_heights.lock().insert(public_key.clone(), height); - ( - StartupMode::StateSync { - block, - finalization, - }, - Some(height), - ) - }) - } else { - let prior = self.sync_heights.lock().get(public_key).copied(); - (StartupMode::MarshalSync, prior) - }; - // Stateful actor let app = App::new(genesis_block.clone()); let (stateful_actor, stateful_mailbox) = StatefulActor::init( @@ -487,9 +483,8 @@ impl EngineDefinition for SingleDbEngine { input_provider: (), marshal: marshal_mailbox.clone(), mailbox_size: 100, - partition_prefix: partition_prefix.clone(), - startup, - resolvers: qmdb_sync_resolver.clone(), + plan, + resolvers: qmdb_sync_resolver, sync_config: SyncEngineConfig { fetch_batch_size: NZU64!(16), apply_batch_size: 64, From b8b8cdb27ec6c341e21f568ee1704709d7d0c70c Mon Sep 17 00:00:00 2001 From: clabby Date: Fri, 22 May 2026 17:29:17 -0400 Subject: [PATCH 4/4] [glue] align Policy::handle signatures with trait --- glue/src/stateful/actor/mailbox.rs | 5 ++--- glue/src/stateful/db/compact_p2p/handler.rs | 5 ++--- glue/src/stateful/db/compact_p2p/mailbox.rs | 5 ++--- glue/src/stateful/db/p2p/handler.rs | 5 ++--- glue/src/stateful/db/p2p/mailbox.rs | 5 ++--- 5 files changed, 10 insertions(+), 15 deletions(-) diff --git a/glue/src/stateful/actor/mailbox.rs b/glue/src/stateful/actor/mailbox.rs index 6a2dea1cebd..d9f3f7eeac6 100644 --- a/glue/src/stateful/actor/mailbox.rs +++ b/glue/src/stateful/actor/mailbox.rs @@ -113,12 +113,11 @@ where { type Overflow = Pending; - fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + fn handle(overflow: &mut Self::Overflow, message: Self) { if message.response_closed() { - return true; + return; } overflow.0.push_back(message); - true } } diff --git a/glue/src/stateful/db/compact_p2p/handler.rs b/glue/src/stateful/db/compact_p2p/handler.rs index 1bbfb4ed0ec..06b9d66ee82 100644 --- a/glue/src/stateful/db/compact_p2p/handler.rs +++ b/glue/src/stateful/db/compact_p2p/handler.rs @@ -160,12 +160,11 @@ impl Overflow> for EnginePending impl Policy for EngineMessage { type Overflow = EnginePending; - fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + fn handle(overflow: &mut Self::Overflow, message: Self) { if message.response_closed() { - return true; + return; } overflow.0.push_back(message); - true } } diff --git a/glue/src/stateful/db/compact_p2p/mailbox.rs b/glue/src/stateful/db/compact_p2p/mailbox.rs index 9bf2092c432..7211b7cc678 100644 --- a/glue/src/stateful/db/compact_p2p/mailbox.rs +++ b/glue/src/stateful/db/compact_p2p/mailbox.rs @@ -77,9 +77,9 @@ impl Overflow> for Pending Policy for Message { type Overflow = Pending; - fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + fn handle(overflow: &mut Self::Overflow, message: Self) { if message.response_closed() { - return true; + return; } match message { @@ -88,7 +88,6 @@ impl Policy for Message { } message => overflow.messages.push_back(message), } - true } } diff --git a/glue/src/stateful/db/p2p/handler.rs b/glue/src/stateful/db/p2p/handler.rs index 6c528a93cf0..a0f0d7888d8 100644 --- a/glue/src/stateful/db/p2p/handler.rs +++ b/glue/src/stateful/db/p2p/handler.rs @@ -248,12 +248,11 @@ impl Overflow> for EnginePending { impl Policy for EngineMessage { type Overflow = EnginePending; - fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + fn handle(overflow: &mut Self::Overflow, message: Self) { if message.response_closed() { - return true; + return; } overflow.0.push_back(message); - true } } diff --git a/glue/src/stateful/db/p2p/mailbox.rs b/glue/src/stateful/db/p2p/mailbox.rs index 4ede8d31842..d05b4092b59 100644 --- a/glue/src/stateful/db/p2p/mailbox.rs +++ b/glue/src/stateful/db/p2p/mailbox.rs @@ -88,9 +88,9 @@ impl Overflow> for Pending Policy for Message { type Overflow = Pending; - fn handle(overflow: &mut Self::Overflow, message: Self) -> bool { + fn handle(overflow: &mut Self::Overflow, message: Self) { if message.response_closed() { - return true; + return; } match message { @@ -99,7 +99,6 @@ impl Policy for Message { } message => overflow.messages.push_back(message), } - true } }