Skip to content

Commit 6796b9d

Browse files
authored
fix(tee): bootstrap an admitted ReadOnlyTee replica so it self-confirms and replicates (#2473)
1 parent 8e79db4 commit 6796b9d

5 files changed

Lines changed: 1082 additions & 12 deletions

File tree

crates/context/src/group_store/local_state.rs

Lines changed: 137 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use calimero_context_client::local_governance::SignedGroupOp;
12
use calimero_context_config::types::ContextGroupId;
23
use calimero_primitives::context::{ContextId, GroupMemberRole};
34
use calimero_primitives::identity::PublicKey;
@@ -109,18 +110,58 @@ pub(crate) fn append_op_log_entry(
109110
Ok(())
110111
}
111112

112-
pub(crate) fn persist_group_governance_progress(
113+
/// Append an op to the group op-log and advance the op head, WITHOUT
114+
/// touching the per-signer nonce.
115+
///
116+
/// Used by the namespace-governance apply path
117+
/// (`namespace_governance::apply_group_op_inner`), which manages the nonce
118+
/// itself via `set_local_gov_nonce`. The authoring path uses
119+
/// [`persist_group_governance_progress`] instead, which also writes the
120+
/// nonce in the same batch. Keeping these separate avoids the
121+
/// namespace-governance path double-writing the nonce.
122+
///
123+
/// CRASH-SAFETY INVARIANT (no atomic multi-key write available):
124+
/// `calimero-store` has no transactional batch — `StoreBatch` commits each
125+
/// `put` immediately (see `crates/store/src/batch.rs`) and `Store::handle()`
126+
/// writes straight through to the backend. So the two `put`s below are NOT
127+
/// atomic; a crash can land between them. The write ORDER is therefore
128+
/// chosen to be crash-safe: the op-log ENTRY is written first, the
129+
/// `GroupOpHead` second.
130+
///
131+
/// - Crash after entry, before head: an ORPHAN log entry exists at
132+
/// `sequence` while the head still points at `sequence - 1`. This is
133+
/// benign — every reader scans the log directly (`read_op_log_after`,
134+
/// `read_tee_admission_policy`, `is_quote_hash_used`), so the entry is
135+
/// already visible; and the replica apply path derives `next_seq` from
136+
/// [`max_op_log_sequence`] (the actual max persisted sequence), NOT from
137+
/// this possibly-stale head, so the next op lands strictly above the orphan
138+
/// and never overwrites it. (The authoring side still derives `next_seq`
139+
/// from the head, but a crash there leaves an entry this node authored with
140+
/// its nonce un-advanced, so the next authored op re-derives the identical
141+
/// op and an overwrite is an idempotent self-replay.)
142+
/// - The reverse order (head first) would be UNSAFE: a crash would leave a
143+
/// head whose `sequence` references a log entry that was never written,
144+
/// so `read_op_log_after` would silently skip the gap and the op-head's
145+
/// `dag_heads` would advertise a frontier op nobody can read back.
146+
///
147+
/// This mirrors the entry-then-head ordering the authoring side uses
148+
/// (`persist_group_governance_progress` below) and the head-advance /
149+
/// store-operation ordering note in
150+
/// `namespace_governance::apply_signed_op`. Replacing this with a real
151+
/// single-batch atomic write is deferred to the codebase-wide store-batch
152+
/// work tracked alongside the cascade-delete atomicity discussion.
153+
pub(crate) fn persist_group_op_log_entry(
113154
store: &Store,
114155
group_id: &ContextGroupId,
115156
sequence: u64,
116-
signer: &PublicKey,
117-
nonce: u64,
118157
dag_heads: Vec<[u8; 32]>,
119158
op_bytes: &[u8],
120159
) -> EyreResult<()> {
121160
let gid = group_id.to_bytes();
122161
let mut handle = store.handle();
123162

163+
// Entry first (see CRASH-SAFETY INVARIANT above): an orphan entry is
164+
// benign; a head referencing a missing entry is not.
124165
let op_log_key = GroupOpLog::new(gid, sequence);
125166
handle.put(&op_log_key, &op_bytes.to_vec())?;
126167

@@ -133,7 +174,33 @@ pub(crate) fn persist_group_governance_progress(
133174
},
134175
)?;
135176

136-
let nonce_key = GroupLocalGovNonce::new(gid, *signer);
177+
Ok(())
178+
}
179+
180+
/// Authoring-side variant of [`persist_group_op_log_entry`] that ALSO advances
181+
/// the per-(group, signer) nonce in the same call.
182+
///
183+
/// The two paths share the op-log entry + head write (delegated to
184+
/// [`persist_group_op_log_entry`]) but differ in nonce handling: the authoring
185+
/// path owns the nonce here, whereas the namespace-governance apply path
186+
/// manages it separately via `set_local_gov_nonce` (it advances the nonce only
187+
/// AFTER the full op apply succeeds — see the invariant comment in
188+
/// `apply_group_op_inner`). The nonce `put` runs LAST so the same crash-safety
189+
/// ordering holds: entry → head → nonce. An un-advanced nonce after a crash
190+
/// just replays the (idempotent) op; it never skips one.
191+
pub(crate) fn persist_group_governance_progress(
192+
store: &Store,
193+
group_id: &ContextGroupId,
194+
sequence: u64,
195+
signer: &PublicKey,
196+
nonce: u64,
197+
dag_heads: Vec<[u8; 32]>,
198+
op_bytes: &[u8],
199+
) -> EyreResult<()> {
200+
persist_group_op_log_entry(store, group_id, sequence, dag_heads, op_bytes)?;
201+
202+
let mut handle = store.handle();
203+
let nonce_key = GroupLocalGovNonce::new(group_id.to_bytes(), *signer);
137204
handle.put(&nonce_key, &nonce)?;
138205

139206
Ok(())
@@ -166,6 +233,72 @@ pub fn read_op_log_after(
166233
Ok(results)
167234
}
168235

236+
/// The highest sequence number present in the group's op-log, or `None` if the
237+
/// log is empty.
238+
///
239+
/// Derived by scanning the persisted op-log rather than reading
240+
/// `GroupOpHeadValue.sequence`, so it is correct even when the head is stale
241+
/// relative to the log — e.g. after a crash that landed between the entry `put`
242+
/// and the head `put` in [`persist_group_op_log_entry`] (see the CRASH-SAFETY
243+
/// INVARIANT there). The replica apply path uses this to derive `next_seq` so a
244+
/// new op never reuses a sequence already occupied by an orphan entry, which
245+
/// would silently overwrite it.
246+
///
247+
/// Keys are big-endian on the sequence component, so the op-log iterates in
248+
/// ascending order and the last entry carries the max; cost is the same O(n)
249+
/// governance-only scan the other log readers already pay.
250+
pub(crate) fn max_op_log_sequence(
251+
store: &Store,
252+
group_id: &ContextGroupId,
253+
) -> EyreResult<Option<u64>> {
254+
let gid = group_id.to_bytes();
255+
let keys =
256+
collect_keys_with_prefix(store, GroupOpLog::new(gid, 1), GROUP_OP_LOG_PREFIX, |k| {
257+
k.group_id() == gid
258+
})?;
259+
Ok(keys.last().map(GroupOpLog::sequence))
260+
}
261+
262+
/// Whether the group op-log already holds an entry whose op has the given
263+
/// `content_hash`.
264+
///
265+
/// This is the durable dedup signal for the replica apply path
266+
/// (`namespace_governance::apply_group_op_inner`). It scans the persisted
267+
/// op-log — the same column the readers (`read_tee_admission_policy`,
268+
/// `is_quote_hash_used`, `is_tee_admitted_identity`) scan — rather than
269+
/// consulting the op-head's `dag_heads`. `dag_heads` only tracks the CURRENT
270+
/// frontier: once a later op supersedes an earlier one, the earlier op's
271+
/// content hash is pruned from the head set, so a head-based check would
272+
/// wrongly report a superseded-then-re-received op as "not yet logged" and
273+
/// append a second copy — skewing every log scan. Keying the check on the
274+
/// persisted log makes it monotonic: an op that was ever logged stays logged.
275+
///
276+
/// Cost is an O(n) scan over the group's governance op-log (governance ops
277+
/// only — not state-DAG traffic), matching what the readers already pay; the
278+
/// per-(group, signer) nonce guard in `apply_group_op_inner` short-circuits
279+
/// the common re-receive before this is reached, so this is the backstop for
280+
/// the retry/backfill path that re-applies without having advanced the nonce.
281+
pub(crate) fn op_log_contains_content_hash(
282+
store: &Store,
283+
group_id: &ContextGroupId,
284+
content_hash: &[u8; 32],
285+
) -> EyreResult<bool> {
286+
let entries = read_op_log_after(store, group_id, 0, usize::MAX)?;
287+
for (_seq, bytes) in &entries {
288+
let Ok(op) = borsh::from_slice::<SignedGroupOp>(bytes) else {
289+
continue;
290+
};
291+
if op
292+
.content_hash()
293+
.map(|h| h == *content_hash)
294+
.unwrap_or(false)
295+
{
296+
return Ok(true);
297+
}
298+
}
299+
Ok(false)
300+
}
301+
169302
fn delete_op_log_and_head(store: &Store, group_id: &ContextGroupId) -> EyreResult<()> {
170303
const BATCH_SIZE: usize = 1000;
171304
let mut after_sequence = 0u64;

0 commit comments

Comments
 (0)