Skip to content

Commit 109f9b7

Browse files
authored
debug: E2EE pubkey routing diagnostics (#518)
* debug: add E2EE pubkey routing diagnostics Log available pubkeys and provider counts when E2EE routing fails, and log pubkey_to_providers state after each provider pool refresh. This will help diagnose why E2EE requests fail despite successful attestation fetches from backends. * fix PR review comments + add org_id to no-usage error logs - Drop mappings lock before logging (reduce contention) - Use chars().take() instead of byte slicing (UTF-8 safe) - Log only pubkey prefix, not full value - Limit pubkey summaries to first 10 entries - Add organization_id and model_id to "no usage stats" error logs * test: add E2EE pubkey routing integration test Tests the full register → attestation → pubkey store → route by key flow. Verifies: correct key routes, wrong key fails, no-key routes normally. * fix: cargo fmt
1 parent 03245be commit 109f9b7

3 files changed

Lines changed: 121 additions & 28 deletions

File tree

crates/api/src/routes/models.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ pub async fn list_models(
111111
model.input_modalities.clone(),
112112
model.output_modalities.clone(),
113113
),
114-
inference_url: model.inference_url.clone(),
114+
inference_url: None, // Redacted: internal infrastructure URL, admin-only
115115
},
116116
})
117117
.collect();
@@ -214,7 +214,7 @@ pub async fn get_model_by_name(
214214
model.input_modalities,
215215
model.output_modalities,
216216
),
217-
inference_url: model.inference_url,
217+
inference_url: None, // Redacted: internal infrastructure URL, admin-only
218218
},
219219
};
220220

crates/services/src/completions/mod.rs

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -160,31 +160,35 @@ where
160160
)
161161
.entered();
162162

163-
let (input_tokens, output_tokens, cache_read_tokens, chat_id) =
164-
match (&self.last_usage_stats, &self.last_chat_id) {
165-
(Some(usage), Some(chat_id)) => (
166-
usage.prompt_tokens,
167-
usage.completion_tokens,
168-
usage.cached_tokens(),
169-
chat_id.clone(),
170-
),
171-
(None, None) => {
172-
tracing::error!("Stream ended but no usage stats and no chat_id available");
173-
return;
174-
}
175-
(None, Some(chat_id)) => {
176-
tracing::error!(%chat_id, "Stream ended but no usage stats available");
177-
return;
178-
}
179-
(Some(usage), None) => {
180-
tracing::error!(
181-
prompt_tokens = usage.prompt_tokens,
182-
completion_tokens = usage.completion_tokens,
183-
"Stream ended but no chat_id available"
184-
);
185-
return;
186-
}
187-
};
163+
let (input_tokens, output_tokens, cache_read_tokens, chat_id) = match (
164+
&self.last_usage_stats,
165+
&self.last_chat_id,
166+
) {
167+
(Some(usage), Some(chat_id)) => (
168+
usage.prompt_tokens,
169+
usage.completion_tokens,
170+
usage.cached_tokens(),
171+
chat_id.clone(),
172+
),
173+
(None, None) => {
174+
tracing::error!(%organization_id, %model_id, "Stream ended but no usage stats and no chat_id available");
175+
return;
176+
}
177+
(None, Some(chat_id)) => {
178+
tracing::error!(%chat_id, %organization_id, %model_id, "Stream ended but no usage stats available");
179+
return;
180+
}
181+
(Some(usage), None) => {
182+
tracing::error!(
183+
prompt_tokens = usage.prompt_tokens,
184+
completion_tokens = usage.completion_tokens,
185+
%organization_id,
186+
%model_id,
187+
"Stream ended but no chat_id available"
188+
);
189+
return;
190+
}
191+
};
188192

189193
if input_tokens == 0 && output_tokens == 0 {
190194
return;

crates/services/src/inference_provider_pool/mod.rs

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,30 @@ impl InferenceProviderPool {
584584
Some(p) => p,
585585
None => {
586586
if let Some(pub_key) = model_pub_key {
587+
let (available_pubkeys, model_provider_count) = {
588+
let mappings = self.provider_mappings.read().await;
589+
let pubkeys: Vec<String> = mappings
590+
.pubkey_to_providers
591+
.keys()
592+
.map(|k| {
593+
let prefix: String = k.chars().take(16).collect();
594+
format!("{}...({})", prefix, k.len())
595+
})
596+
.collect();
597+
let count = mappings
598+
.model_to_providers
599+
.get(model_id)
600+
.map(|v| v.len())
601+
.unwrap_or(0);
602+
(pubkeys, count)
603+
};
604+
let model_pub_key_prefix: String = pub_key.chars().take(16).collect();
587605
tracing::warn!(
588606
model_id = %model_id,
589-
model_pub_key = %pub_key,
607+
model_pub_key_prefix = %model_pub_key_prefix,
608+
model_pub_key_len = pub_key.len(),
609+
available_pubkeys = ?available_pubkeys,
610+
model_provider_count = model_provider_count,
590611
operation = operation_name,
591612
"No provider found for model public key"
592613
);
@@ -1400,6 +1421,27 @@ impl InferenceProviderPool {
14001421
}
14011422
}
14021423

1424+
// Log pubkey mapping state for debugging E2EE routing issues
1425+
let (pubkey_count, pubkey_summaries) = {
1426+
let mappings = self.provider_mappings.read().await;
1427+
let count = mappings.pubkey_to_providers.len();
1428+
let summaries: Vec<String> = mappings
1429+
.pubkey_to_providers
1430+
.iter()
1431+
.take(10)
1432+
.map(|(k, v)| {
1433+
let prefix: String = k.chars().take(16).collect();
1434+
format!("{}...({}chars,{}providers)", prefix, k.len(), v.len())
1435+
})
1436+
.collect();
1437+
(count, summaries)
1438+
};
1439+
info!(
1440+
pubkey_mapping_count = pubkey_count,
1441+
pubkey_summaries = ?pubkey_summaries,
1442+
"pubkey_to_providers state after update"
1443+
);
1444+
14031445
// Update the URL→provider cache
14041446
*self.inference_url_providers.write().await = new_url_cache;
14051447

@@ -1999,6 +2041,53 @@ mod tests {
19992041
}
20002042
}
20012043

2044+
/// Test that E2EE routing via pubkey works end-to-end after register_provider.
2045+
/// This exercises: register_provider → fetch attestation → store pubkey → route by pubkey.
2046+
#[tokio::test]
2047+
async fn test_e2ee_pubkey_routing_after_register() {
2048+
use inference_providers::mock::MockProvider;
2049+
2050+
let pool = InferenceProviderPool::new(None, ExternalProvidersConfig::default());
2051+
let model_id = "test-e2ee-model".to_string();
2052+
2053+
// Register provider (fetches attestation, stores pubkeys)
2054+
let mock_provider = Arc::new(MockProvider::new());
2055+
pool.register_provider(model_id.clone(), mock_provider)
2056+
.await;
2057+
2058+
// The mock provider returns this ECDSA key
2059+
let ecdsa_key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
2060+
2061+
// Test 1: routing WITHOUT pubkey should work
2062+
let result: Result<((), _), _> = pool
2063+
.retry_with_fallback(&model_id, "test_op", None, |_provider| async { Ok(()) })
2064+
.await;
2065+
assert!(result.is_ok(), "Routing without pubkey should succeed");
2066+
2067+
// Test 2: routing WITH correct pubkey should work
2068+
let result: Result<((), _), _> = pool
2069+
.retry_with_fallback(&model_id, "test_op", Some(ecdsa_key), |_provider| async {
2070+
Ok(())
2071+
})
2072+
.await;
2073+
assert!(
2074+
result.is_ok(),
2075+
"Routing with correct ECDSA pubkey should succeed, got: {:?}",
2076+
result.err()
2077+
);
2078+
2079+
// Test 3: routing with WRONG pubkey should fail
2080+
let result: Result<((), _), _> = pool
2081+
.retry_with_fallback(
2082+
&model_id,
2083+
"test_op",
2084+
Some("deadbeef00000000deadbeef00000000deadbeef00000000deadbeef00000000deadbeef00000000deadbeef00000000deadbeef00000000deadbeef00000000"),
2085+
|_provider| async { Ok(()) },
2086+
)
2087+
.await;
2088+
assert!(result.is_err(), "Routing with wrong pubkey should fail");
2089+
}
2090+
20022091
#[tokio::test]
20032092
async fn test_sync_external_providers() {
20042093
let pool = InferenceProviderPool::new(

0 commit comments

Comments
 (0)