nearai
diff --git a/‎crates/inference_providers/src/spki_verifier.rs‎
Lines changed: 79 additions & 0 deletions b/‎crates/inference_providers/src/spki_verifier.rs‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎crates/services/src/attestation/verification.rs‎
Lines changed: 0 additions & 35 deletions b/‎crates/services/src/attestation/verification.rs‎
Lines changed: 0 additions & 35 deletions
@@ -57,6 +57,24 @@ impl FingerprintState {
         // Don't block if already Pinned — keep existing verified fingerprints
     }
 
+    /// Replace the pinned set wholesale.
+    ///
+    /// Called once per discovery cycle when the cycle achieved complete
+    /// coverage (every healthy backend produced exactly one verified
+    /// fingerprint). Lets the pin set track the *current* healthy set rather
+    /// than accumulating every backend the proxy ever routed to — when a
+    /// backend goes unhealthy or its cert rotates, its old fingerprint is
+    /// dropped within one refresh interval.
+    ///
+    /// Transitions Bootstrap → Pinned and Blocked → Pinned, matching
+    /// `add_fingerprint`. An empty `fps` is permitted; callers treat that as
+    /// "no healthy backends right now" and the provider-level fail-closed
+    /// path keeps connections rejected until a future cycle re-pins
+    /// something.
+    pub fn replace_with(&mut self, fps: HashSet<String>) {
+        *self = FingerprintState::Pinned(fps);
+    }
+
     /// Number of pinned fingerprints (0 for Bootstrap/Blocked).
     pub fn pinned_count(&self) -> usize {
         match self {
@@ -266,4 +284,65 @@ mod tests {
         assert!(matches!(state, FingerprintState::Pinned(_)));
         assert_eq!(state.pinned_count(), 1);
     }
+
+    #[test]
+    fn test_replace_with_from_bootstrap() {
+        let mut state = FingerprintState::Bootstrap;
+        let mut fps = HashSet::new();
+        fps.insert("a".to_string());
+        fps.insert("b".to_string());
+        state.replace_with(fps);
+        assert!(matches!(state, FingerprintState::Pinned(_)));
+        assert_eq!(state.pinned_count(), 2);
+    }
+
+    #[test]
+    fn test_replace_with_shrinks_pinned() {
+        let mut state = FingerprintState::Bootstrap;
+        for fp in ["a", "b", "c", "d", "e"] {
+            state.add_fingerprint(fp.to_string());
+        }
+        assert_eq!(state.pinned_count(), 5);
+
+        // Backend went away — pin set tracks the new healthy set.
+        let mut shrunk = HashSet::new();
+        shrunk.insert("a".to_string());
+        shrunk.insert("b".to_string());
+        shrunk.insert("c".to_string());
+        shrunk.insert("d".to_string());
+        state.replace_with(shrunk);
+        assert_eq!(state.pinned_count(), 4);
+        if let FingerprintState::Pinned(set) = &state {
+            assert!(set.contains("a"));
+            assert!(!set.contains("e"), "evicted fingerprint must be gone");
+        } else {
+            panic!("expected Pinned");
+        }
+    }
+
+    #[test]
+    fn test_replace_with_from_blocked() {
+        // Blocked → Pinned mirrors add_fingerprint's recovery path.
+        let mut state = FingerprintState::Bootstrap;
+        state.block();
+        assert!(matches!(state, FingerprintState::Blocked));
+
+        let mut fps = HashSet::new();
+        fps.insert("recovered".to_string());
+        state.replace_with(fps);
+        assert!(matches!(state, FingerprintState::Pinned(_)));
+        assert_eq!(state.pinned_count(), 1);
+    }
+
+    #[test]
+    fn test_replace_with_empty_set_is_permitted() {
+        // Caller may pass an empty set to express "no healthy backends".
+        // The provider-level fail-closed path is responsible for rejecting
+        // connections; FingerprintState just stores the (empty) Pinned set.
+        let mut state = FingerprintState::Bootstrap;
+        state.add_fingerprint("a".to_string());
+        state.replace_with(HashSet::new());
+        assert!(matches!(state, FingerprintState::Pinned(_)));
+        assert_eq!(state.pinned_count(), 0);
+    }
 }
@@ -8,41 +8,6 @@ use std::collections::HashSet;
 
 const NVIDIA_NRAS_URL: &str = "https://nras.attestation.nvidia.com/v3/attest/gpu";
 
-/// Number of parallel attestation calls per model to discover TLS fingerprints
-/// from multiple backends behind L4 load balancing.
-///
-/// Each cloud-api instance runs its own discovery, so the effective load on a
-/// model is `PARALLELISM * cloud-api instance count` per refresh cycle. Keep
-/// this modest to avoid piling attestation work on inference backends.
-pub const ATTESTATION_DISCOVERY_PARALLELISM: usize = 5;
-
-/// Number of cumulative attestation calls per reused provider on each refresh.
-///
-/// Each cycle adds a small number of fresh-TCP discovery calls to a reused
-/// provider, which accumulates new backend fingerprints into the shared
-/// `FingerprintState`. Over several cycles this covers every backend behind
-/// the L4 LB, even when the initial discovery only hit one. Kept small so
-/// steady-state refresh load stays low.
-pub const CUMULATIVE_DISCOVERY_CALLS: usize = 2;
-
-/// Inter-model stagger for cumulative discovery on each refresh cycle (milliseconds).
-///
-/// When the provider pool refreshes, it runs cumulative attestation discovery
-/// for every reused model. Without staggering, all models fire their first
-/// discovery call at t=0, creating a burst that saturates the GPU evidence
-/// worker on dense hosts (e.g. gpu04 runs 8+ model instances).
-///
-/// With this stagger, model i starts its discovery after `i * MODEL_DISCOVERY_STAGGER_MS`
-/// delay. At 2 s/model the burst is spread across tens of seconds rather than
-/// a single wall-clock instant, while still completing well within the 5-minute
-/// refresh interval even for large pools.
-///
-/// Note: the cumulative discovery loop runs inside `buffer_unordered(10)`, so
-/// tasks at index >= 10 begin their sleep only after a concurrency slot opens.
-/// Their effective wall-clock delay is therefore ≥ i × STAGGER_MS, making the
-/// spread more conservative (not less) for pools larger than 10 models.
-pub const MODEL_DISCOVERY_STAGGER_MS: u64 = 2_000;
-
 /// Result of verifying an attestation report from an inference backend.
 #[derive(Debug, Clone)]
 pub struct VerifiedAttestation {