nearai
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/services/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎crates/services/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/services/src/attestation/verification.rs‎
Lines changed: 9 additions & 0 deletions b/‎crates/services/src/attestation/verification.rs‎
Lines changed: 9 additions & 0 deletions
@@ -10,6 +10,7 @@ description.workspace = true
 async-trait = "0.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+serde_urlencoded = "0.7"
 uuid = { version = "1.22", features = ["v4", "v5", "serde"] }
 opentelemetry = { version = "0.31", features = ["metrics"] }
 opentelemetry_sdk = { version = "0.31", features = ["rt-tokio", "metrics"] }
 
@@ -16,6 +16,15 @@ const NVIDIA_NRAS_URL: &str = "https://nras.attestation.nvidia.com/v3/attest/gpu
 /// this modest to avoid piling attestation work on inference backends.
 pub const ATTESTATION_DISCOVERY_PARALLELISM: usize = 5;
 
+/// Number of cumulative attestation calls per reused provider on each refresh.
+///
+/// Each cycle adds a small number of fresh-TCP discovery calls to a reused
+/// provider, which accumulates new backend fingerprints into the shared
+/// `FingerprintState`. Over several cycles this covers every backend behind
+/// the L4 LB, even when the initial discovery only hit one. Kept small so
+/// steady-state refresh load stays low.
+pub const CUMULATIVE_DISCOVERY_CALLS: usize = 2;
+
 /// Result of verifying an attestation report from an inference backend.
 #[derive(Debug, Clone)]
 pub struct VerifiedAttestation {