fix(tee): re-announce TeeAttestationAnnounce until admitted (fleet-join) (#2460)

xilosada · web-flow · commit 8e79db40ddb0 · 2026-05-25T11:40:49.000Z
diff --git a/crates/node/primitives/src/client.rs b/crates/node/primitives/src/client.rs
@@ -422,6 +422,33 @@ impl NodeClient {
         Ok(())
     }
 
+    /// Publish raw payload on the namespace topic `ns/<hex(namespace_id)>`
+    /// immediately, without the "wait for mesh, then publish anyway" loop of
+    /// [`publish_on_namespace`](Self::publish_on_namespace).
+    ///
+    /// Returns the mesh peer count observed at publish time so a caller running
+    /// its own re-announce loop (e.g. the fleet-join admission wait) can tell a
+    /// publish into a live mesh apart from a publish into an empty mesh — the
+    /// latter is lost forever because gossipsub does not replay. Re-announcing
+    /// each poll cycle means a *later* mesh window still receives a fresh copy,
+    /// which a single up-front publish (the bug this fixes) never could.
+    ///
+    /// Kept separate from [`publish_on_namespace`](Self::publish_on_namespace)
+    /// so the up-front wait-then-publish semantics other callers rely on are
+    /// untouched; this is the opt-in, per-cycle building block.
+    pub async fn publish_on_namespace_now(
+        &self,
+        namespace_id: [u8; 32],
+        payload: Vec<u8>,
+    ) -> eyre::Result<usize> {
+        let topic_str = format!("ns/{}", hex::encode(namespace_id));
+        let topic = TopicHash::from_raw(topic_str);
+
+        let mesh_peers = self.network_client.mesh_peer_count(topic.clone()).await;
+        let _ignored = self.network_client.publish(topic, payload).await?;
+        Ok(mesh_peers)
+    }
+
     pub async fn get_peers_count(&self, context: Option<&ContextId>) -> usize {
         let Some(context) = context else {
             return self.network_client.peer_count().await;
@@ -731,3 +758,217 @@ impl NodeClient {
             .await
     }
 }
+
+#[cfg(test)]
+mod publish_on_namespace_now_tests {
+    //! Unit tests for the re-announce building block
+    //! [`NodeClient::publish_on_namespace_now`] and the re-announce-until-
+    //! admitted loop it powers in the fleet-join handler.
+    //!
+    //! The fleet-join admission wait lives in `calimero-server` (it needs the
+    //! `ctx_client` admission read, which this crate does not see), so the loop
+    //! itself is reproduced here against the same publish primitive. This is the
+    //! smallest real unit: a live `NetworkClient` backed by a stub network actor
+    //! that counts `Publish` messages and reports a settable mesh peer count —
+    //! no libp2p transport, no server crate. The full owner-side admission path
+    //! is covered by `calimero-node`'s `local_governance_node_e2e.rs`.
+    //!
+    //! Runs under `#[actix::test]` (single-threaded actix System) so the stub
+    //! actor's mailbox is pumped by the same runtime that drives the client's
+    //! `.await`s — `Actor::create` + `LazyRecipient::init`, the documented
+    //! pattern from `calimero-utils-actix`'s own `lazy_tests.rs`.
+
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+    use std::time::Duration;
+
+    use actix::Actor;
+    use calimero_blobstore::config::BlobStoreConfig;
+    use calimero_blobstore::{BlobManager as BlobStore, FileSystem};
+    use calimero_network_primitives::client::NetworkClient;
+    use calimero_network_primitives::messages::{MessageId, NetworkMessage};
+    use calimero_store::db::InMemoryDB;
+    use calimero_store::Store;
+    use calimero_utils_actix::LazyRecipient;
+    use tokio::sync::{broadcast, mpsc};
+
+    use super::{BlobManager, NodeClient, SyncClient};
+
+    /// Stub network actor: records how many times a `Publish` is requested and
+    /// reports whatever mesh peer count the test sets via the shared atomic.
+    /// Resolves `Publish`/`MeshPeerCount` outcomes so the awaiting client future
+    /// completes; every other variant is dropped (none are reached here).
+    struct CountingNetworkActor {
+        publish_count: Arc<AtomicUsize>,
+        mesh_peers: Arc<AtomicUsize>,
+    }
+
+    impl Actor for CountingNetworkActor {
+        type Context = actix::Context<Self>;
+    }
+
+    impl actix::Handler<NetworkMessage> for CountingNetworkActor {
+        type Result = ();
+
+        fn handle(&mut self, msg: NetworkMessage, _ctx: &mut Self::Context) -> Self::Result {
+            match msg {
+                NetworkMessage::MeshPeerCount { outcome, .. } => {
+                    let _ = outcome.send(self.mesh_peers.load(Ordering::SeqCst));
+                }
+                NetworkMessage::Publish { outcome, .. } => {
+                    let _prev = self.publish_count.fetch_add(1, Ordering::SeqCst);
+                    let _ = outcome.send(Ok(MessageId(b"stub".to_vec())));
+                }
+                _ => {}
+            }
+        }
+    }
+
+    /// Build a `NodeClient` whose `network_client` is wired to a freshly started
+    /// [`CountingNetworkActor`] on the current actix System. Only the network
+    /// path is exercised by `publish_on_namespace_now`; the remaining fields are
+    /// minimal real stubs. Returns the client plus the shared publish-count and
+    /// mesh-peer atomics for assertions. The `TempDir` is returned so the
+    /// caller keeps the blobstore filesystem alive for the test's duration.
+    async fn make_client() -> (
+        NodeClient,
+        Arc<AtomicUsize>,
+        Arc<AtomicUsize>,
+        tempfile::TempDir,
+    ) {
+        let tmp = tempfile::tempdir().expect("tempdir");
+        let store = Store::new(Arc::new(InMemoryDB::owned()));
+
+        let blob_cfg =
+            BlobStoreConfig::new(tmp.path().to_path_buf().try_into().expect("utf8 blob path"));
+        let fs = FileSystem::new(&blob_cfg).await.expect("blob fs");
+        let blob_manager = BlobManager::new(BlobStore::new(store.clone(), fs));
+
+        let network_recipient = LazyRecipient::<NetworkMessage>::new();
+        let network_client = NetworkClient::new(network_recipient.clone());
+
+        let publish_count = Arc::new(AtomicUsize::new(0));
+        let mesh_peers = Arc::new(AtomicUsize::new(0));
+
+        let actor = CountingNetworkActor {
+            publish_count: Arc::clone(&publish_count),
+            mesh_peers: Arc::clone(&mesh_peers),
+        };
+        let _addr = CountingNetworkActor::create(move |ctx| {
+            assert!(network_recipient.init(ctx), "network recipient init");
+            actor
+        });
+
+        let (event_sender, _) = broadcast::channel(16);
+        let (ctx_sync_tx, _ctx_sync_rx) = mpsc::channel(8);
+        let (ns_sync_tx, _ns_sync_rx) = mpsc::channel(8);
+        let (ns_join_tx, _ns_join_rx) = mpsc::channel(8);
+        let (open_subgroup_join_tx, _open_rx) = mpsc::channel(8);
+        let sync_client =
+            SyncClient::new(ctx_sync_tx, ns_sync_tx, ns_join_tx, open_subgroup_join_tx);
+
+        let node_client = NodeClient::new(
+            store,
+            blob_manager,
+            network_client,
+            LazyRecipient::new(),
+            event_sender,
+            sync_client,
+            String::new(),
+            None,
+        );
+
+        (node_client, publish_count, mesh_peers, tmp)
+    }
+
+    /// `publish_on_namespace_now` publishes exactly once per call and reports the
+    /// mesh peer count observed at publish time (here: 0 — the empty-mesh case
+    /// that silently dropped the one-shot announce before this fix).
+    #[actix::test]
+    async fn publishes_once_and_reports_empty_mesh() {
+        let (client, publish_count, _mesh, _tmp) = make_client().await;
+
+        let observed = client
+            .publish_on_namespace_now([0x11; 32], b"announce".to_vec())
+            .await
+            .expect("publish_on_namespace_now");
+
+        assert_eq!(observed, 0, "no mesh peers were set");
+        assert_eq!(
+            publish_count.load(Ordering::SeqCst),
+            1,
+            "exactly one publish per call"
+        );
+    }
+
+    /// `publish_on_namespace_now` surfaces a non-zero mesh peer count when one
+    /// is present — the signal a caller uses to know the announce landed in a
+    /// live mesh rather than an empty one.
+    #[actix::test]
+    async fn reports_live_mesh_peer_count() {
+        let (client, _publish_count, mesh, _tmp) = make_client().await;
+        mesh.store(2, Ordering::SeqCst);
+
+        let observed = client
+            .publish_on_namespace_now([0x33; 32], b"announce".to_vec())
+            .await
+            .expect("publish_on_namespace_now");
+
+        assert_eq!(observed, 2, "must report the live mesh peer count");
+    }
+
+    /// Locks in the P1 fix: a re-announce loop that publishes every cycle while
+    /// not admitted publishes MORE THAN ONCE over the wait window (the one-shot
+    /// bug published exactly once), and STOPS the moment admission is observed —
+    /// no further announces after admitted. This mirrors the integrated loop in
+    /// `crates/server/src/admin/handlers/tee/fleet_join.rs`.
+    #[actix::test]
+    async fn reannounce_loop_publishes_more_than_once_then_stops_on_admission() {
+        let (client, publish_count, _mesh, _tmp) = make_client().await;
+
+        // Mirror the fleet-join handler loop: publish up front, then on each
+        // not-yet-admitted cycle re-check admission, sleep, then re-publish
+        // (re-publish AFTER the sleep, as the handler does, so the first
+        // re-announce doesn't fire back-to-back with the up-front publish).
+        // Admission flips true after a few cycles; once true the loop must break
+        // BEFORE publishing again. A fast poll keeps the test sub-second.
+        const ADMIT_AFTER_CYCLES: usize = 3;
+        const POLL: Duration = Duration::from_millis(10);
+        const MAX_CYCLES: usize = 50; // hard safety bound
+
+        // First (up-front) announce, as the handler does before its loop.
+        let _ = client
+            .publish_on_namespace_now([0x22; 32], b"announce".to_vec())
+            .await
+            .expect("first announce");
+
+        let mut cycles = 0;
+        let mut admitted = false;
+        while cycles < MAX_CYCLES {
+            // Admission check FIRST so we never re-announce after admitted.
+            if cycles >= ADMIT_AFTER_CYCLES {
+                admitted = true;
+                break;
+            }
+            tokio::time::sleep(POLL).await;
+            let _ = client
+                .publish_on_namespace_now([0x22; 32], b"announce".to_vec())
+                .await
+                .expect("re-announce");
+            cycles += 1;
+        }
+
+        assert!(admitted, "loop must observe admission");
+        let total = publish_count.load(Ordering::SeqCst);
+        assert!(
+            total > 1,
+            "re-announce must publish more than once over the wait window, got {total}"
+        );
+        // up-front (1) + one per not-yet-admitted cycle (ADMIT_AFTER_CYCLES).
+        assert_eq!(
+            total,
+            1 + ADMIT_AFTER_CYCLES,
+            "must stop announcing the instant admission is observed"
+        );
+    }
+}
diff --git a/crates/server/src/admin/handlers/tee/fleet_join.rs b/crates/server/src/admin/handlers/tee/fleet_join.rs
@@ -119,9 +119,17 @@ pub async fn handler(
         .into_response();
     }
 
+    // Fire the first announce up front. A single publish at fleet-join time is
+    // lost forever if it lands in an empty gossipsub mesh (no replay), which is
+    // the common case for a NAT'd/relay owner whose mesh forms only
+    // intermittently. The admission loop below therefore RE-announces every
+    // poll cycle until admitted or the deadline, so a *later* mesh window still
+    // receives a fresh copy. If even this first publish errors at the transport
+    // level we bail (subscription with no announce is useless); a publish into
+    // an empty mesh is *not* an error and is expected to be retried below.
     if let Err(err) = state
         .node_client
-        .publish_on_namespace(group_id_bytes, payload)
+        .publish_on_namespace_now(group_id_bytes, payload.clone())
         .await
     {
         warn!(error=?err, "Failed to broadcast, unsubscribing from namespace");
@@ -139,29 +147,65 @@ pub async fn handler(
     info!(
         group_id = %req.group_id,
         %our_public_key,
-        "TeeAttestationAnnounce broadcast; waiting for admission then joining contexts"
+        "TeeAttestationAnnounce broadcast; re-announcing until admission then joining contexts"
     );
 
-    // Poll for group admission, then auto-join all contexts in the namespace
+    // Poll for group admission, then auto-join all contexts in the namespace.
+    //
+    // Re-announce strategy: this loop both (a) checks for admission and (b)
+    // re-publishes the announce each cycle the node is not yet admitted. The
+    // re-announce is request-scoped (bounded by `MAX_ADMISSION_WAIT`) rather
+    // than a long-lived background task: the mdma sidecar already re-polls
+    // should-join and re-invokes fleet-join, so each call covering one mesh
+    // window is sufficient, and a request-scoped loop needs no extra actor /
+    // lifecycle management. See the handler-level rationale comment.
     let mut contexts_joined = Vec::new();
     let mut admitted = false;
     let mut auto_follow_enabled = false;
 
+    // Overall bound for one fleet-join call. The sidecar re-invokes across a
+    // larger window, so this only needs to cover a single mesh-formation
+    // attempt comfortably.
     const MAX_ADMISSION_WAIT: std::time::Duration = std::time::Duration::from_secs(30);
+    // Interval between admission checks AND between re-announces — short enough
+    // that a transient mesh window (mesh peers appear, then vanish) is hit by a
+    // fresh publish, but not so tight it spams the topic.
     const ADMISSION_POLL: std::time::Duration = std::time::Duration::from_secs(2);
 
     let deadline = tokio::time::Instant::now() + MAX_ADMISSION_WAIT;
 
-    while tokio::time::Instant::now() < deadline {
-        match state
-            .ctx_client
-            .list_group_contexts(ListGroupContextsRequest {
-                group_id,
-                offset: 0,
-                limit: 100,
-            })
-            .await
-        {
+    // `loop {}` (not `while now < deadline`) so the deadline is only checked
+    // *after* an admission check, never right after a sleep — otherwise an
+    // admission that completes during the final sleep would be lost to a false
+    // "timed out" / `admitted:false`. The deadline break lives in the `Err`
+    // arm below, immediately after the (failed) admission check.
+    loop {
+        // Bound each admission check so a stuck context-manager actor can't
+        // extend the handler past MAX_ADMISSION_WAIT: a check that exceeds the
+        // poll interval is mapped to a (retriable) error and handled by the
+        // `Err` arm below, exactly like a not-yet-admitted result. A
+        // slow-but-not-stuck actor whose check nears ADMISSION_POLL makes the
+        // effective cycle up to ~2x ADMISSION_POLL; that's acceptable, and we
+        // keep the budget at ADMISSION_POLL (rather than shrinking it) so a
+        // normally-fast actor isn't spuriously timed out. The overall deadline
+        // still bounds total wall-clock either way.
+        let admission = tokio::time::timeout(
+            ADMISSION_POLL,
+            state
+                .ctx_client
+                .list_group_contexts(ListGroupContextsRequest {
+                    group_id,
+                    offset: 0,
+                    limit: 100,
+                }),
+        )
+        .await
+        .unwrap_or_else(|_| {
+            Err(eyre::eyre!(
+                "list_group_contexts exceeded the admission poll budget"
+            ))
+        });
+        match admission {
             Ok(entries) => {
                 info!(
                     group_id = %req.group_id,
@@ -239,7 +283,47 @@ pub async fn handler(
             }
             Err(err) => {
                 tracing::debug!(error=?err, "Admission check not yet successful, retrying...");
-                tokio::time::sleep(ADMISSION_POLL).await;
+
+                // Stop once past the deadline — but only here, AFTER the
+                // admission check above, so an admission that landed during the
+                // previous sleep is observed on this iteration instead of being
+                // lost to a false "timed out".
+                if tokio::time::Instant::now() >= deadline {
+                    break;
+                }
+
+                // Cap the poll sleep to the remaining budget so the loop wakes
+                // for its final admission check right at the deadline rather
+                // than up to ADMISSION_POLL past it.
+                let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
+                tokio::time::sleep(remaining.min(ADMISSION_POLL)).await;
+
+                // Re-announce AFTER the poll sleep, and only if we're still
+                // before the deadline. Doing it here (rather than before the
+                // sleep) avoids both a duplicate publish fired back-to-back with
+                // the up-front one at t=0 and a wasted publish right as we give
+                // up. A single up-front publish is lost if the mesh was empty at
+                // fleet-join (gossipsub does not replay), so re-publishing each
+                // cycle delivers a fresh copy to a mesh window that opens later.
+                // Best effort — a transport error here is logged, not fatal.
+                if tokio::time::Instant::now() < deadline {
+                    match state
+                        .node_client
+                        .publish_on_namespace_now(group_id_bytes, payload.clone())
+                        .await
+                    {
+                        Ok(mesh_peers) => tracing::debug!(
+                            group_id = %req.group_id,
+                            mesh_peers,
+                            "re-announced TeeAttestationAnnounce while awaiting admission"
+                        ),
+                        Err(reannounce_err) => warn!(
+                            group_id = %req.group_id,
+                            error = ?reannounce_err,
+                            "re-announce publish failed; will retry next cycle"
+                        ),
+                    }
+                }
             }
         }
     }