diff --git a/README.md b/README.md index 7a091140..799f2204 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ potions than Howl would tolerate. | Command-line and optional MCP tools for agent-facing secret-name discovery, with no value readback | Working | [Agent-facing tools](https://calciforge.org/#agent-facing-tools-mcp-and-cli) | | Agent runtime contract for command-line guidance, optional MCP, artifacts, and future Calciforge APIs | Working draft | [Agent runtime contract](docs/agent-runtime-contract.md) | | Telegram, Matrix, WhatsApp, Signal, and text/iMessage routing | Working | [Multi-channel chat](https://calciforge.org/#multi-channel-chat) | -| OpenAI-compatible model gateway, provider routing, model aliases, alloys, cascades, dispatchers, and local model switching | Working | [Model gateway](docs/model-gateway.md) | +| OpenAI-compatible model gateway, provider routing, model aliases, Wardwright adapter support, legacy alloys/cascades/dispatchers, and local model switching | Working | [Model gateway](docs/model-gateway.md) | | Helicone-backed gateway observability with dashboard-visible doctor checks | Working | [Model gateway](docs/model-gateway.md#external-gateway-engines) | | Codex CLI and OpenClaw Codex subscription/OAuth integration paths | Working | [Codex integration](docs/codex-openclaw-integration.md) | | `calciforge doctor` config/state/endpoint diagnostics | Working | [Quick Start](#quick-start) | @@ -124,8 +124,10 @@ tunnel with `CALCIFORGE_PASTE_PUBLIC_BASE_URL`. Keep Calciforge's own service traffic separate from agent traffic. Point agents at Calciforge's OpenAI-compatible model gateway for model calls; -that path provides model aliases, alloys, cascades, dispatchers, provider -routing, and observability. Route agent tool/web traffic through +that path provides model aliases, provider routing, observability, and legacy +in-process synthetic selectors. For new alloys, cascades, and dispatchers, use +[Wardwright](https://wardwright.dev/) as an OpenAI-compatible provider adapter +and let it own the route graph and receipts. Route agent tool/web traffic through `security-proxy` or a Calciforge fetch/tool integration when returned content needs scanning or `{{secret:NAME}}` substitution. diff --git a/crates/calciforge/src/config.rs b/crates/calciforge/src/config.rs index 2467bd9c..510b8d9a 100644 --- a/crates/calciforge/src/config.rs +++ b/crates/calciforge/src/config.rs @@ -63,18 +63,18 @@ pub struct CalciforgeConfig { #[serde(default)] pub model_roles: Vec, - /// `[[alloys]]` — model blending/mixing groups. + /// `[[alloys]]` — legacy in-process model blending/mixing groups. /// Use `!model ` to activate an alloy for an identity. #[serde(default)] pub alloys: Vec, - /// `[[cascades]]` — explicit ordered model fallback chains. + /// `[[cascades]]` — legacy explicit ordered model fallback chains. /// The proxy tries the first model whose declared context window can hold /// the request, then falls through to later eligible models on failure. #[serde(default)] pub cascades: Vec, - /// `[[dispatchers]]` — request-size aware model selectors. + /// `[[dispatchers]]` — legacy request-size aware model selectors. /// The proxy picks the smallest configured model that can hold the request, /// then uses larger eligible models as fallbacks. #[serde(default)] @@ -149,7 +149,7 @@ impl CalciforgeConfig { } } -/// Alloy definition (`[[alloys]]`). +/// Legacy alloy definition (`[[alloys]]`). #[derive(Debug, Clone, Deserialize, Serialize)] pub struct AlloyConfig { /// Alloy identifier used by commands (e.g. "free-alloy-1"). @@ -192,7 +192,7 @@ fn default_alloy_weight() -> u32 { 1 } -/// Cascade definition (`[[cascades]]`). +/// Legacy cascade definition (`[[cascades]]`). #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CascadeConfig { /// Synthetic model id requested by agents. @@ -205,7 +205,7 @@ pub struct CascadeConfig { pub models: Vec, } -/// Dispatcher definition (`[[dispatchers]]`). +/// Legacy dispatcher definition (`[[dispatchers]]`). #[derive(Debug, Clone, Deserialize, Serialize)] pub struct DispatcherConfig { /// Synthetic model id requested by agents. @@ -1077,8 +1077,8 @@ pub struct ProxyProviderConfig { /// Provider adapter kind. Supported OpenAI-compatible engine overlays /// include "http", "helicone", "litellm", "portkey", "tensorzero", - /// "future-agi", and "openrouter". They share the same request core; - /// the kind chooses engine metadata, dashboard capability, and any + /// "future-agi", "openrouter", and "wardwright". They share the same + /// request core; the kind chooses engine metadata, dashboard capability, and any /// provider-specific headers. CLI-backed subscriptions are configured as /// `[[agents]]`, not gateway providers. #[serde(default = "default_proxy_provider_backend")] diff --git a/crates/calciforge/src/config/validator.rs b/crates/calciforge/src/config/validator.rs index 3ef32f80..eb6a9ae9 100644 --- a/crates/calciforge/src/config/validator.rs +++ b/crates/calciforge/src/config/validator.rs @@ -71,6 +71,8 @@ pub fn validate_config(config: &CalciforgeConfig) -> ValidationResult { // Validate enabled channels before long-lived tasks start. validate_channels(config, &mut result); + warn_on_legacy_synthetic_selectors(config, &mut result); + // Validate alloys have valid constituents validate_alloys(config, &mut result); @@ -90,6 +92,17 @@ pub fn validate_config(config: &CalciforgeConfig) -> ValidationResult { result } +fn warn_on_legacy_synthetic_selectors(config: &CalciforgeConfig, result: &mut ValidationResult) { + let has_legacy_synthetic_selectors = + !config.alloys.is_empty() || !config.cascades.is_empty() || !config.dispatchers.is_empty(); + if has_legacy_synthetic_selectors { + result.add_warning( + "Calciforge in-process synthetic selectors ([[alloys]], [[cascades]], [[dispatchers]]) are legacy compatibility features. Prefer Wardwright or another OpenAI-compatible provider adapter for new synthetic-model composition." + .to_string(), + ); + } +} + /// Validate agent adapter kinds and required fields. fn validate_agents(config: &CalciforgeConfig, result: &mut ValidationResult) { for agent in &config.agents { @@ -1044,3 +1057,7 @@ mod validator_tests_2; #[cfg(test)] #[path = "validator_tests_3.rs"] mod validator_tests_3; + +#[cfg(test)] +#[path = "validator_wardwright_tests.rs"] +mod validator_wardwright_tests; diff --git a/crates/calciforge/src/config/validator_wardwright_tests.rs b/crates/calciforge/src/config/validator_wardwright_tests.rs new file mode 100644 index 00000000..54913a7f --- /dev/null +++ b/crates/calciforge/src/config/validator_wardwright_tests.rs @@ -0,0 +1,38 @@ +use super::validator_test_support::{MIN_VALID, parse}; +use super::*; + +#[test] +fn wardwright_backend_type_validates_from_shared_allowlist() { + let fixture = format!( + "{MIN_VALID}\n[proxy]\nenabled = true\nbind = \"127.0.0.1:18083\"\nbackend_type = \"wardwright\"\nbackend_url = \"https://gateway.example.invalid/v1\"\n" + ); + let config = parse(&fixture); + let result = validate_config(&config); + assert!( + result.is_valid(), + "wardwright should be accepted as an OpenAI-compatible provider adapter; errors: {:?}", + result.errors + ); +} + +#[test] +fn legacy_synthetic_selectors_warn_to_prefer_wardwright() { + let fixture = format!( + "{MIN_VALID}\n[[dispatchers]]\nid = \"balanced\"\nname = \"Balanced\"\n\n[[dispatchers.models]]\nmodel = \"local-small\"\ncontext_window = 32000\n" + ); + let config = parse(&fixture); + let result = validate_config(&config); + + assert!( + result.is_valid(), + "legacy synthetic selectors should remain valid while migration is optional: {:?}", + result.errors + ); + assert!( + result.warnings.iter().any(|warning| { + warning.contains("legacy compatibility") && warning.contains("Wardwright") + }), + "synthetic selector configs should point operators toward Wardwright; warnings: {:?}", + result.warnings + ); +} diff --git a/crates/calciforge/src/proxy/backend.rs b/crates/calciforge/src/proxy/backend.rs index 19d7481a..0612d9bc 100644 --- a/crates/calciforge/src/proxy/backend.rs +++ b/crates/calciforge/src/proxy/backend.rs @@ -279,6 +279,7 @@ impl SecretsBackend for MockBackend { total_tokens: 0, }, system_fingerprint: None, + extra_body: serde_json::Map::new(), }) } diff --git a/crates/calciforge/src/proxy/gateway.rs b/crates/calciforge/src/proxy/gateway.rs index cfb444e9..f549fd3e 100644 --- a/crates/calciforge/src/proxy/gateway.rs +++ b/crates/calciforge/src/proxy/gateway.rs @@ -3,7 +3,7 @@ //! Calciforge should not assume there is one installed "model gateway". It owns //! a policy/audit/auth boundary, then routes to one or more configured provider //! adapters such as builtin OpenAI-compatible HTTP, Helicone, LiteLLM, -//! OpenRouter, Ollama, or mock test adapters. +//! OpenRouter, Wardwright, Ollama, or mock test adapters. //! //! The older config field names still say `backend_type` for compatibility, but //! runtime code should treat these as adapter kinds. @@ -136,6 +136,8 @@ pub enum GatewayType { FutureAgi, /// OpenRouter OpenAI-compatible provider boundary. OpenRouter, + /// Wardwright synthetic model gateway. + Wardwright, /// Mock adapter for tests only. Mock, } @@ -152,6 +154,7 @@ impl std::str::FromStr for GatewayType { "tensorzero" | "tensor-zero" | "tensor_zero" => Ok(GatewayType::TensorZero), "future-agi" | "future_agi" | "futureagi" => Ok(GatewayType::FutureAgi), "openrouter" | "open-router" | "open_router" => Ok(GatewayType::OpenRouter), + "wardwright" | "ward-wright" | "ward_wright" => Ok(GatewayType::Wardwright), "mock" => Ok(GatewayType::Mock), _ => Err(format!("Unknown gateway type: {}", s)), } @@ -168,6 +171,7 @@ impl std::fmt::Display for GatewayType { GatewayType::TensorZero => write!(f, "tensorzero"), GatewayType::FutureAgi => write!(f, "future-agi"), GatewayType::OpenRouter => write!(f, "openrouter"), + GatewayType::Wardwright => write!(f, "wardwright"), GatewayType::Mock => write!(f, "mock"), } } @@ -182,6 +186,7 @@ impl GatewayType { "tensorzero", "future-agi", "openrouter", + "wardwright", "mock", ]; @@ -193,6 +198,7 @@ impl GatewayType { "tensorzero", "future-agi", "openrouter", + "wardwright", ]; pub fn display_name(self) -> &'static str { @@ -204,6 +210,7 @@ impl GatewayType { GatewayType::TensorZero => "TensorZero gateway", GatewayType::FutureAgi => "Future AGI gateway", GatewayType::OpenRouter => "OpenRouter", + GatewayType::Wardwright => "Wardwright synthetic model gateway", GatewayType::Mock => "Mock provider adapter", } } @@ -266,6 +273,14 @@ impl GatewayType { observability: false, operator_ui: true, }, + GatewayType::Wardwright => GatewayCapabilities { + openai_chat_completions: true, + model_listing: true, + tool_call_transcripts: false, + config_validation: false, + observability: true, + operator_ui: true, + }, GatewayType::Mock => GatewayCapabilities { openai_chat_completions: true, model_listing: true, @@ -337,6 +352,11 @@ impl GatewayType { true, ), ], + GatewayType::Wardwright => vec![ProviderObservabilityCapability::new( + ProviderObservabilityKind::NativeDashboard, + "Wardwright receipt and route dashboard", + false, + )], GatewayType::BuiltinHttp | GatewayType::OpenRouter | GatewayType::Mock => Vec::new(), } } @@ -351,6 +371,7 @@ impl GatewayType { | GatewayType::TensorZero | GatewayType::FutureAgi | GatewayType::OpenRouter + | GatewayType::Wardwright ) } @@ -464,7 +485,8 @@ pub fn create_gateway( | GatewayType::Portkey | GatewayType::TensorZero | GatewayType::FutureAgi - | GatewayType::OpenRouter => { + | GatewayType::OpenRouter + | GatewayType::Wardwright => { // Builtin HTTP upstream calls // This requires a backend to be passed in let backend = backend.ok_or_else(|| { @@ -785,6 +807,7 @@ impl ProviderAdapter for MockGateway { total_tokens: 0, }, system_fingerprint: None, + extra_body: serde_json::Map::new(), }) } diff --git a/crates/calciforge/src/proxy/gateway_engine_tests.rs b/crates/calciforge/src/proxy/gateway_engine_tests.rs new file mode 100644 index 00000000..48364a08 --- /dev/null +++ b/crates/calciforge/src/proxy/gateway_engine_tests.rs @@ -0,0 +1,47 @@ +use super::gateway::{GatewayType, openai_compatible_headers}; +use crate::config::GatewayRetryConfig; + +#[test] +fn helicone_policy_headers_are_overlay_not_separate_gateway_core() { + let retry = GatewayRetryConfig { + enabled: true, + max_retries: 4, + min_timeout_ms: 250, + max_timeout_ms: 3_000, + factor: 3, + retry_on: vec![], + }; + + let headers = openai_compatible_headers(GatewayType::Helicone, Some("test-key"), &retry, None) + .expect("helicone overlay should add headers"); + + assert_eq!( + headers.get("helicone-auth"), + Some(&"Bearer test-key".to_string()) + ); + assert_eq!( + headers.get("helicone-retry-enabled"), + Some(&"true".to_string()) + ); + assert_eq!(headers.get("helicone-retry-num"), Some(&"4".to_string())); + assert_eq!( + headers.get("helicone-retry-min-timeout"), + Some(&"250".to_string()) + ); + assert_eq!( + headers.get("helicone-retry-max-timeout"), + Some(&"3000".to_string()) + ); + assert_eq!(headers.get("helicone-retry-factor"), Some(&"3".to_string())); + + assert!( + openai_compatible_headers( + GatewayType::LiteLlm, + Some("test-key"), + &GatewayRetryConfig::default(), + None + ) + .is_none(), + "LiteLLM should not inherit Helicone-specific headers" + ); +} diff --git a/crates/calciforge/src/proxy/gateway_tests.rs b/crates/calciforge/src/proxy/gateway_tests.rs index d1931484..fedf3d78 100644 --- a/crates/calciforge/src/proxy/gateway_tests.rs +++ b/crates/calciforge/src/proxy/gateway_tests.rs @@ -84,51 +84,6 @@ fn named_gateway_engines_share_openai_compatible_http_core() { assert!(!GatewayType::Mock.uses_openai_compatible_http_core()); } -#[test] -fn helicone_policy_headers_are_overlay_not_separate_gateway_core() { - let retry = GatewayRetryConfig { - enabled: true, - max_retries: 4, - min_timeout_ms: 250, - max_timeout_ms: 3_000, - factor: 3, - retry_on: vec![], - }; - - let headers = openai_compatible_headers(GatewayType::Helicone, Some("test-key"), &retry, None) - .expect("helicone overlay should add headers"); - - assert_eq!( - headers.get("helicone-auth"), - Some(&"Bearer test-key".to_string()) - ); - assert_eq!( - headers.get("helicone-retry-enabled"), - Some(&"true".to_string()) - ); - assert_eq!(headers.get("helicone-retry-num"), Some(&"4".to_string())); - assert_eq!( - headers.get("helicone-retry-min-timeout"), - Some(&"250".to_string()) - ); - assert_eq!( - headers.get("helicone-retry-max-timeout"), - Some(&"3000".to_string()) - ); - assert_eq!(headers.get("helicone-retry-factor"), Some(&"3".to_string())); - - assert!( - openai_compatible_headers( - GatewayType::LiteLlm, - Some("test-key"), - &GatewayRetryConfig::default(), - None - ) - .is_none(), - "LiteLLM should not inherit Helicone-specific headers" - ); -} - #[test] fn test_mock_gateway() { let config = GatewayConfig { @@ -325,6 +280,7 @@ async fn builtin_http_gateway_forwards_complete_chat_request_options() { total_tokens: 2, }, system_fingerprint: None, + extra_body: serde_json::Map::new(), }; let mock = server .mock("POST", "/v1/chat/completions") @@ -413,6 +369,7 @@ async fn configured_authorization_header_cannot_override_backend_api_key() { total_tokens: 2, }, system_fingerprint: None, + extra_body: serde_json::Map::new(), }; let mock = server .mock("POST", "/v1/chat/completions") @@ -590,6 +547,7 @@ async fn helicone_engine_uses_shared_http_core_with_engine_headers() { total_tokens: 2, }, system_fingerprint: None, + extra_body: serde_json::Map::new(), }; let mock = server .mock("POST", "/v1/chat/completions") diff --git a/crates/calciforge/src/proxy/handlers.rs b/crates/calciforge/src/proxy/handlers.rs index baf07247..db9aafb7 100644 --- a/crates/calciforge/src/proxy/handlers.rs +++ b/crates/calciforge/src/proxy/handlers.rs @@ -345,7 +345,7 @@ async fn try_provider( let duration = start.elapsed(); let event = match &result { - Ok(response) => telemetry_attempt.success(duration, response.choices.len()), + Ok(response) => telemetry_attempt.success_response(duration, response), Err(error) => telemetry_attempt.failure(duration, error.failure_kind()), }; state.telemetry.emit_gateway_attempt(event).await; @@ -1197,6 +1197,7 @@ mod tests { total_tokens: 2, }, system_fingerprint: None, + extra_body: serde_json::Map::new(), }) } diff --git a/crates/calciforge/src/proxy/mod.rs b/crates/calciforge/src/proxy/mod.rs index e9ea0f29..644b834a 100644 --- a/crates/calciforge/src/proxy/mod.rs +++ b/crates/calciforge/src/proxy/mod.rs @@ -25,6 +25,8 @@ mod backend; mod control_auth; pub(crate) mod gateway; #[cfg(test)] +mod gateway_engine_tests; +#[cfg(test)] mod gateway_tests; mod handlers; pub(crate) mod model_resolver; @@ -37,6 +39,8 @@ pub(crate) mod telemetry; mod telemetry_tests; mod token_estimator; mod voice_handlers; +#[cfg(test)] +mod wardwright_tests; pub use openai::ChatCompletionRequest; pub use routing::ProviderEntry; @@ -191,7 +195,8 @@ pub async fn start_proxy_server( | gateway::GatewayType::Portkey | gateway::GatewayType::TensorZero | gateway::GatewayType::FutureAgi - | gateway::GatewayType::OpenRouter => { + | gateway::GatewayType::OpenRouter + | gateway::GatewayType::Wardwright => { let headers = gateway::openai_compatible_headers( gateway_type, default_api_key.as_deref(), @@ -325,6 +330,10 @@ mod tests { gateway_type_for_backend_type("litellm"), Some(gateway::GatewayType::LiteLlm) ); + assert_eq!( + gateway_type_for_backend_type("wardwright"), + Some(gateway::GatewayType::Wardwright) + ); assert_eq!( gateway_type_for_backend_type("mock"), Some(gateway::GatewayType::Mock) @@ -338,6 +347,7 @@ mod tests { assert!(backend_accepts_unlisted_models("http")); assert!(backend_accepts_unlisted_models("litellm")); assert!(backend_accepts_unlisted_models("openrouter")); + assert!(backend_accepts_unlisted_models("wardwright")); assert!(!backend_accepts_unlisted_models("mock")); } @@ -353,6 +363,7 @@ mod tests { "tensorzero", "future-agi", "openrouter", + "wardwright", "mock" ] ); diff --git a/crates/calciforge/src/proxy/openai.rs b/crates/calciforge/src/proxy/openai.rs index 8eed39a3..73e18e84 100644 --- a/crates/calciforge/src/proxy/openai.rs +++ b/crates/calciforge/src/proxy/openai.rs @@ -258,6 +258,28 @@ pub struct ChatCompletionResponse { /// System fingerprint (optional) #[serde(skip_serializing_if = "Option::is_none")] pub system_fingerprint: Option, + + /// Provider/gateway-specific OpenAI-compatible response extensions. + /// + /// Wardwright, for example, returns `wardwright.receipt_id` so operators + /// can inspect why a synthetic route selected a concrete provider. Preserve + /// unknown response fields instead of silently dropping trace handles. + #[serde(default, flatten)] + pub extra_body: serde_json::Map, +} + +impl ChatCompletionResponse { + pub fn wardwright_receipt_id(&self) -> Option<&str> { + self.extra_body + .get("wardwright") + .and_then(|value| value.get("receipt_id")) + .and_then(serde_json::Value::as_str) + .or_else(|| { + self.extra_body + .get("receipt_id") + .and_then(serde_json::Value::as_str) + }) + } } /// A completion choice diff --git a/crates/calciforge/src/proxy/openai_streaming.rs b/crates/calciforge/src/proxy/openai_streaming.rs index 2f34fa47..82e0ecb4 100644 --- a/crates/calciforge/src/proxy/openai_streaming.rs +++ b/crates/calciforge/src/proxy/openai_streaming.rs @@ -287,6 +287,7 @@ pub(super) fn parse_streaming_chat_completion( choices, usage, system_fingerprint: None, + extra_body: serde_json::Map::new(), }) } diff --git a/crates/calciforge/src/proxy/routing.rs b/crates/calciforge/src/proxy/routing.rs index 830aee33..5caf545f 100644 --- a/crates/calciforge/src/proxy/routing.rs +++ b/crates/calciforge/src/proxy/routing.rs @@ -580,6 +580,25 @@ mod tests { assert_eq!(entries[0].gateway.engine_info().id, "litellm"); } + #[test] + fn wardwright_provider_uses_shared_http_core_with_wardwright_engine_metadata() { + let config = ProxyConfig { + providers: vec![provider( + "wardwright-local", + "wardwright", + "http://127.0.0.1:8791/v1", + )], + ..Default::default() + }; + + let entries = build_provider_entries(&config, 30).unwrap(); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].patterns, vec!["test-model"]); + assert_eq!(entries[0].gateway.gateway_type(), GatewayType::Wardwright); + assert_eq!(entries[0].gateway.engine_info().id, "wardwright"); + } + proptest! { #[test] fn prefix_slash_wildcard_only_matches_names_inside_namespace( diff --git a/crates/calciforge/src/proxy/telemetry.rs b/crates/calciforge/src/proxy/telemetry.rs index b5e2bf79..c70b1269 100644 --- a/crates/calciforge/src/proxy/telemetry.rs +++ b/crates/calciforge/src/proxy/telemetry.rs @@ -13,6 +13,7 @@ use serde_json::{Value, json}; use tracing::{debug, warn}; use crate::config::{GatewayFailureKind, ProxyObservabilityConfig}; +use crate::proxy::openai::ChatCompletionResponse; use crate::sync::Arc; pub(crate) const SUPPORTED_OBSERVABILITY_KINDS: &[&str] = @@ -43,6 +44,7 @@ pub(crate) struct GatewayTelemetryEvent { pub outcome: GatewayTelemetryOutcome, pub failure_kind: Option, pub choices: Option, + pub receipt_id: Option, } pub(crate) struct GatewayTelemetryAttempt { @@ -59,7 +61,12 @@ pub(crate) struct GatewayTelemetryAttempt { } impl GatewayTelemetryAttempt { - pub(crate) fn success(self, duration: Duration, choices: usize) -> GatewayTelemetryEvent { + pub(crate) fn success_with_receipt_id( + self, + duration: Duration, + choices: usize, + receipt_id: Option<&str>, + ) -> GatewayTelemetryEvent { GatewayTelemetryEvent { event_type: "model_gateway.attempt", timestamp_ms: timestamp_ms(), @@ -77,9 +84,22 @@ impl GatewayTelemetryAttempt { outcome: GatewayTelemetryOutcome::Success, failure_kind: None, choices: Some(choices), + receipt_id: receipt_id.map(str::to_string), } } + pub(crate) fn success_response( + self, + duration: Duration, + response: &ChatCompletionResponse, + ) -> GatewayTelemetryEvent { + self.success_with_receipt_id( + duration, + response.choices.len(), + response.wardwright_receipt_id(), + ) + } + pub(crate) fn failure( self, duration: Duration, @@ -102,6 +122,7 @@ impl GatewayTelemetryAttempt { outcome: GatewayTelemetryOutcome::Failure, failure_kind: Some(failure_kind), choices: None, + receipt_id: None, } } } @@ -182,6 +203,7 @@ impl TelemetrySink for LogTelemetrySink { gateway_engine = %event.gateway_engine, duration_ms = event.duration_ms, choices = event.choices.unwrap_or_default(), + receipt_id = ?event.receipt_id, "Model gateway attempt succeeded" ), GatewayTelemetryOutcome::Failure => warn!( @@ -368,6 +390,9 @@ fn otlp_attributes(event: &GatewayTelemetryEvent) -> Vec { if let Some(choices) = event.choices { attrs.push(otlp_i64_attr("calciforge.choices", choices as i64)); } + if let Some(receipt_id) = event.receipt_id.as_deref() { + attrs.push(otlp_attr("calciforge.receipt_id", receipt_id)); + } attrs } @@ -450,7 +475,7 @@ mod tests { tools: true, message_count: 2, } - .success(Duration::from_millis(42), 1) + .success_with_receipt_id(Duration::from_millis(42), 1, None) } #[test] diff --git a/crates/calciforge/src/proxy/telemetry_tests.rs b/crates/calciforge/src/proxy/telemetry_tests.rs index feed3e7b..eb5a2ad9 100644 --- a/crates/calciforge/src/proxy/telemetry_tests.rs +++ b/crates/calciforge/src/proxy/telemetry_tests.rs @@ -2,6 +2,7 @@ use std::sync::Mutex; use async_trait::async_trait; use axum::Json; +use axum::body::to_bytes; use axum::extract::State; use axum::http::{HeaderMap, HeaderValue, StatusCode}; use axum::response::IntoResponse; @@ -23,6 +24,7 @@ use crate::sync::Arc; struct RecordingGateway { config: GatewayConfig, requests: Mutex>, + response_extra_body: serde_json::Map, } impl RecordingGateway { @@ -39,8 +41,18 @@ impl RecordingGateway { ui_url: None, }, requests: Mutex::new(Vec::new()), + response_extra_body: serde_json::Map::new(), } } + + fn with_wardwright_receipt(receipt_id: &str) -> Self { + let mut gateway = Self::new(); + gateway.response_extra_body.insert( + "wardwright".to_string(), + serde_json::json!({ "receipt_id": receipt_id }), + ); + gateway + } } #[async_trait] @@ -80,6 +92,7 @@ impl ProviderAdapter for RecordingGateway { total_tokens: 2, }, system_fingerprint: None, + extra_body: self.response_extra_body.clone(), }) } @@ -106,14 +119,17 @@ async fn provider_route_emits_gateway_attempt_telemetry_without_payloads() { "upstream_model": "kimi-k2.6", "provider_id": "opencode-go", "outcome": "success", - "message_count": 1 + "message_count": 1, + "receipt_id": "rcpt_wardwright_test" }))) .with_status(204) .create_async() .await; let default_gateway = Arc::new(RecordingGateway::new()); - let provider_gateway = Arc::new(RecordingGateway::new()); + let provider_gateway = Arc::new(RecordingGateway::with_wardwright_receipt( + "rcpt_wardwright_test", + )); let state = ProxyState { alloy_manager: Arc::new(AlloyManager::empty()), provider_registry: Arc::new(ProviderRegistry::new()), @@ -156,6 +172,14 @@ async fn provider_route_emits_gateway_attempt_telemetry_without_payloads() { .into_response(); assert_eq!(response.status(), StatusCode::OK); + let body = to_bytes(response.into_body(), usize::MAX).await.unwrap(); + let body: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert_eq!( + body.pointer("/wardwright/receipt_id") + .and_then(serde_json::Value::as_str), + Some("rcpt_wardwright_test"), + "provider-specific response extensions should still reach the client" + ); wait_for_mock(&telemetry_mock).await; } diff --git a/crates/calciforge/src/proxy/wardwright_tests.rs b/crates/calciforge/src/proxy/wardwright_tests.rs new file mode 100644 index 00000000..ae8c92f5 --- /dev/null +++ b/crates/calciforge/src/proxy/wardwright_tests.rs @@ -0,0 +1,40 @@ +use super::gateway::{GatewayConfig, GatewayType}; + +#[test] +fn wardwright_gateway_type_parses_and_displays() { + assert_eq!( + "ward-wright".parse::().unwrap(), + GatewayType::Wardwright + ); + assert_eq!(GatewayType::Wardwright.to_string(), "wardwright"); +} + +#[test] +fn wardwright_provider_uses_shared_http_core_with_receipt_dashboard_metadata() { + assert!(GatewayType::Wardwright.uses_openai_compatible_http_core()); + + let config = GatewayConfig { + backend_type: GatewayType::Wardwright, + ui_url: Some("http://127.0.0.1:8791/admin/runtime".to_string()), + ..Default::default() + }; + let info = config.engine_info(GatewayType::Wardwright); + + assert_eq!(info.id, "wardwright"); + assert_eq!(info.display_name, "Wardwright synthetic model gateway"); + assert_eq!( + info.ui_url.as_deref(), + Some("http://127.0.0.1:8791/admin/runtime") + ); + assert!(info.capabilities.openai_chat_completions); + assert!(info.capabilities.model_listing); + assert!( + !info.capabilities.config_validation, + "Calciforge does not call a Wardwright validation API yet" + ); + assert!( + info.observability + .iter() + .any(|capability| capability.display_name.contains("receipt")) + ); +} diff --git a/docs/adr/0001-model-gateway-and-agent-boundaries.md b/docs/adr/0001-model-gateway-and-agent-boundaries.md index ba7dd267..4187ecdd 100644 --- a/docs/adr/0001-model-gateway-and-agent-boundaries.md +++ b/docs/adr/0001-model-gateway-and-agent-boundaries.md @@ -47,10 +47,10 @@ flowchart TD ``` The root model gateway has a small supported backend set. `http`, `helicone`, -`litellm`, `portkey`, `tensorzero`, `future-agi`, and `openrouter` use the same -OpenAI-compatible HTTP core. The engine name selects metadata, dashboard hints, -and small policy overlays such as Helicone auth/retry headers. `mock` is -deterministic local/test behavior. +`litellm`, `portkey`, `tensorzero`, `future-agi`, `openrouter`, and +`wardwright` use the same OpenAI-compatible HTTP core. The engine name selects +metadata, dashboard hints, and small policy overlays such as Helicone auth/retry +headers. `mock` is deterministic local/test behavior. Experimental or stale root backends such as `embedded`, `library`, and `traceloop` are not supported in production config. They can return later only @@ -120,3 +120,8 @@ privileged runtime path. `http`, `helicone`, `litellm`, `portkey`, `tensorzero`, `future-agi`, and `openrouter` now share the same OpenAI-compatible HTTP core, with named engines supplying policy/metadata overlays. + +2026-05-15: Wardwright became the forward path for synthetic-model composition. +Calciforge keeps in-process alloys, cascades, and dispatchers for compatibility, +but new synthetic route graphs should run through Wardwright as an +OpenAI-compatible provider adapter. diff --git a/docs/adr/0002-provider-adapter-boundary.md b/docs/adr/0002-provider-adapter-boundary.md index df87e723..9f1ba65f 100644 --- a/docs/adr/0002-provider-adapter-boundary.md +++ b/docs/adr/0002-provider-adapter-boundary.md @@ -28,11 +28,12 @@ Calciforge will use provider adapters as the primary model-call abstraction. - `ProviderAdapter` is the runtime trait for a configured model boundary. - `[[proxy.providers]]` is the preferred operational config surface. - A deployment may configure multiple adapters: Ollama, OpenRouter, LiteLLM, - Helicone, direct OpenAI-compatible HTTP, or future native/library adapters. + Helicone, Wardwright, direct OpenAI-compatible HTTP, or future native/library + adapters. - OpenAI-compatible engine adapters share one HTTP request/response core. Engine names such as `litellm`, `helicone`, `portkey`, `tensorzero`, - `future-agi`, and `openrouter` supply metadata, dashboard hints, and small - policy overlays; they are not separate copied gateways. + `future-agi`, `openrouter`, and `wardwright` supply metadata, dashboard + hints, and small policy overlays; they are not separate copied gateways. - Provider adapters expose observability capabilities as metadata separate from request routing. A provider may advertise a native dashboard, OTel export, OpenInference traces, Langfuse callbacks, or no trace sink at all. @@ -83,6 +84,13 @@ or synthetic selectors. Internal features such as adversary-detector classifier checks should ask for a role like `security.screening`; deployment config maps that role to the provider/model/synthetic selector that should serve it. +Calciforge's in-process synthetic selectors (`[[alloys]]`, `[[cascades]]`, and +`[[dispatchers]]`) are compatibility features. They stay available while +existing installs migrate, but new synthetic-model composition should live in a +provider adapter such as Wardwright. Calciforge should pass Wardwright model +names through as opaque OpenAI-compatible model IDs and let Wardwright own route +graphs, stream policy, and receipts. + Generic Calciforge wrappers should not grow provider-specific logic except through explicit adapter configuration. diff --git a/docs/index.md b/docs/index.md index ebfc6c51..71f07023 100644 --- a/docs/index.md +++ b/docs/index.md @@ -493,6 +493,13 @@ while the operator maps that role to a local model, a hosted gateway route, or a synthetic selector. Roles share the shortcut resolver instead of creating a parallel routing system. +The in-process synthetic selector vocabulary below is legacy compatibility now. +It still works, but new synthetic-model composition belongs in +[Wardwright](https://wardwright.dev/), which exposes an OpenAI-compatible API and +keeps receipts for route decisions. Calciforge can treat Wardwright as one more +provider adapter while keeping channel identity, secrets, and traffic policy at +the outer boundary. + The synthetic routing vocabulary is: - **Alloy** — blend among interchangeable models by weighted or diff --git a/docs/model-gateway.md b/docs/model-gateway.md index 50011375..7f0cefc8 100644 --- a/docs/model-gateway.md +++ b/docs/model-gateway.md @@ -17,9 +17,15 @@ The product boundary is not one magic gateway implementation. Calciforge owns the model access boundary: authentication, identity, per-agent model policy, auditing, aliases, synthetic selectors, and route selection. Concrete model traffic then exits through one or more configured `ProviderAdapter`s, such as -Ollama, OpenRouter, LiteLLM, Helicone, or a direct OpenAI-compatible endpoint. -Put another way: Calciforge decides which door opens; the provider adapter -walks through it. +Ollama, OpenRouter, LiteLLM, Helicone, Wardwright, or a direct +OpenAI-compatible endpoint. Put another way: Calciforge decides which door +opens; the provider adapter walks through it. + +Calciforge's built-in synthetic selectors (`[[alloys]]`, `[[cascades]]`, and +`[[dispatchers]]`) are now legacy compatibility features. They still work for +existing installs, but new synthetic-model composition is moving to +[Wardwright](https://wardwright.dev/), which exposes the same OpenAI-compatible +surface while owning route graphs, stream policy, and decision receipts. Operational installs should choose at least one explicit provider adapter. `mock` is test-only, and there is intentionally no built-in public-provider @@ -110,14 +116,14 @@ warns because that path bypasses provider-specific prefixes, API keys, and | Explicit model routes | Working | `[[proxy.model_routes]]` overrides provider pattern matching. | | Model shortcuts | Working | `[[model_shortcuts]]` gives users short aliases such as `sonnet`. | | Local model switching | Working | `[local_models]` manages local `mlx_lm.server` targets. | -| Alloys | Working | `[[alloys]]` samples among interchangeable constituents by `weighted` or `round_robin` strategy, with context-window safety checks. | -| Fallback behavior | Working, implicit | Alloy execution produces an ordered attempt plan; later constituents are tried when earlier ones fail. | -| Named cascades | Working | `[[cascades]]` defines explicit ordered fallback chains and skips targets whose declared context window cannot fit the request. | -| Dispatchers | Working | `[[dispatchers]]` picks the smallest configured context window that fits, then uses larger eligible models as fallbacks. | +| Alloys | Legacy compatibility | `[[alloys]]` samples among interchangeable constituents by `weighted` or `round_robin` strategy, with context-window safety checks. Prefer Wardwright for new configs. | +| Fallback behavior | Legacy compatibility | Alloy/cascade/dispatcher execution produces ordered attempt plans; later constituents are tried when earlier ones fail. Prefer Wardwright for new configs. | +| Named cascades | Legacy compatibility | `[[cascades]]` defines explicit ordered fallback chains and skips targets whose declared context window cannot fit the request. Prefer Wardwright for new configs. | +| Dispatchers | Legacy compatibility | `[[dispatchers]]` picks the smallest configured context window that fits, then uses larger eligible models as fallbacks. Prefer Wardwright for new configs. | | Token estimators | Working | `char_ratio`, `byte_ratio`, and optional `tiktoken-rs` support for OpenAI-compatible BPE counts. BPE means byte-pair encoding, a common way model APIs count tokens. | | CLI-backed subscription agents | Working | Codex, Claude Code, Kimi Code, Dirac, and generic executable adapters are agent routes, not gateway model selectors. | | External gateway metadata | Working | `/gateway`, `/gateway/ui`, and `!gateway` expose the selected provider adapter and operator dashboard link after sender identity resolution. | -| OpenAI-compatible provider adapter core | Working | `backend_type = "http"`, `"helicone"`, `"litellm"`, `"portkey"`, `"tensorzero"`, `"future-agi"`, and `"openrouter"` share the same `/v1/chat/completions` request path. Engine names select metadata, dashboard hints, and small policy overlays, not separate gateway implementations. | +| OpenAI-compatible provider adapter core | Working | `backend_type = "http"`, `"helicone"`, `"litellm"`, `"portkey"`, `"tensorzero"`, `"future-agi"`, `"openrouter"`, and `"wardwright"` share the same `/v1/chat/completions` request path. Engine names select metadata, dashboard hints, and small policy overlays, not separate gateway implementations. | | Provider observability capability metadata | Started | Provider adapters now advertise known observability surfaces separately from request routing, including native dashboards and future OTel/OpenInference/Langfuse sink shapes. Event export configuration and emission remain roadmap work. | | Builtin HTTP upstream adapter | Compatibility path | `backend_type = "http"` is the plain OpenAI-compatible HTTP shape. It is useful for direct providers, tests, and local development. Prefer a named engine such as `litellm`, `helicone`, or `openrouter` when that boundary owns provider registry, keys, retries, or dashboard state. | @@ -128,7 +134,7 @@ Calciforge's gateway layer is pluggable at the provider-adapter boundary. The OpenAI-compatible HTTP core, then applies a small engine policy for metadata, dashboard hints, and headers. `helicone` is no longer a privileged code path; it is one adapter kind beside `litellm`, `portkey`, `tensorzero`, `future-agi`, -`openrouter`, and plain `http`. +`openrouter`, `wardwright`, and plain `http`. That split matters. Request plumbing should be boring and shared. Provider engines can add operator dashboards, provider registries, virtual keys, retries, @@ -137,9 +143,10 @@ channels and agents talk to Calciforge. Calciforge intentionally treats external provider-boundary model IDs as opaque when that boundary owns provider configuration. If LiteLLM, Helicone, Portkey, -TensorZero, Future AGI, OpenRouter, or another gateway owns provider/key/model -state, Calciforge should not duplicate that registry. In Calciforge config, set -`model_credential_owner = "provider"` on the provider route. The provider's +TensorZero, Future AGI, OpenRouter, Wardwright, or another gateway owns +provider/key/model state, Calciforge should not duplicate that registry. In +Calciforge config, set `model_credential_owner = "provider"` on the provider +route. The provider's `api_key`/`api_key_file`, if present, then authenticates Calciforge to that provider boundary; it is not the upstream OpenAI, Anthropic, Ollama, or other final provider key. @@ -291,8 +298,8 @@ Calciforge's installer can provision a local Helicone deployment when `CALCIFORGE_HELICONE_ENABLED=true`. That path is heavier because it includes a dashboard, Postgres, ClickHouse, Jawn, and S3-compatible object storage pieces. The adapter boundary is intentionally where LiteLLM, Helicone, Portkey, -TensorZero, Future AGI, OpenRouter, and future PRs plug in without changing -agent/channel behavior. +TensorZero, Future AGI, OpenRouter, Wardwright, and future PRs plug in without +changing agent/channel behavior. Configure Calciforge manually by setting `backend_type` to the adapter kind and pointing `backend_url` at that engine's OpenAI-compatible base URL. @@ -326,6 +333,39 @@ backend_api_key_file = "/etc/calciforge/secrets/litellm-client-key" gateway_ui_url = "http://127.0.0.1:4000/ui" ``` +The same shape also works for Wardwright. Use this when you want Wardwright to +own synthetic model names such as `coding-balanced` or +`wardwright/coding-balanced` while Calciforge remains the outer channel, +identity, secret, and traffic-policy boundary: + +```toml +[proxy] +enabled = true +bind = "127.0.0.1:8080" +api_key_file = "/etc/calciforge/secrets/model-gateway-client-key" +backend_type = "wardwright" +backend_url = "http://127.0.0.1:8791/v1" +gateway_ui_url = "http://127.0.0.1:8791/admin/runtime" +``` + +For named providers, prefer a Wardwright namespace so the ownership boundary is +obvious: + +```toml +[[proxy.providers]] +id = "wardwright" +backend_type = "wardwright" +url = "http://127.0.0.1:8791/v1" +model_credential_owner = "provider" +models = ["wardwright/*"] +``` + +Wardwright responses may include `X-Wardwright-Receipt-Id`, +`X-Wardwright-Selected-Model`, and a `wardwright.receipt_id` body field. Those +are Wardwright-owned trace handles. Calciforge forwards the OpenAI-compatible +response body and copies `wardwright.receipt_id` into gateway telemetry when it +is present. + ### Retry and Fallback Policy There are two distinct failure-handling layers: @@ -687,6 +727,12 @@ intended classes: alloys, cascades, and dispatchers. They may reference other synthetic routing selectors as long as the resulting graph is a DAG; cycles fail config initialization. +This section documents the legacy in-process implementation. It is kept so old +configs still make sense and existing installs keep working. New work should +prefer Wardwright as an OpenAI-compatible provider adapter, because Wardwright +was split out to own this route-graph problem directly and records a receipt for +why each request took the path it did. + ### Alloy An alloy blends equivalent models. It is useful when any constituent diff --git a/scripts/check-architecture-ratchets.rb b/scripts/check-architecture-ratchets.rb index 438cb1b4..ae57eb3c 100644 --- a/scripts/check-architecture-ratchets.rb +++ b/scripts/check-architecture-ratchets.rb @@ -37,7 +37,6 @@ "crates/calciforge/src/install/wizard.rs" => 701, "crates/calciforge/src/providers/alloy.rs" => 1126, "crates/calciforge/src/proxy/gateway.rs" => 1001, - "crates/calciforge/src/proxy/gateway_tests.rs" => 718, "crates/calciforge/src/proxy/handlers.rs" => 2386, "crates/host-agent/src/main.rs" => 1288, "crates/paste-server/src/lib.rs" => 2623,