feat: direct api key and cheap model (#116)

ilblackdragon · desamtralized · claude · web-flow · commit 72623c9e5b20 · 2026-02-17T01:24:27.000Z
* feat: Support direct API key auth and cheap model routing Allow using IronClaw with any OpenAI-compatible API provider (e.g. Anthropic Claude) via API key, without requiring NEAR AI session auth. Changes: - Skip session authentication in chat_completions mode (API key auth) - Skip first-run onboard check when NEARAI_API_KEY is configured - Add `cheap_model` config field (NEARAI_CHEAP_MODEL env var) for a secondary lightweight model used for heartbeat, routing, evaluation - Add `create_cheap_llm_provider()` factory in llm module - Add `cheap_llm` to AgentDeps with fallback to main model - Route heartbeat through cheap model to reduce costs - Fix wizard compilation for new config field Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address PR #20 review feedback - Check API key presence (not api_mode) for auth skip (ilblackdragon) - Add Settings::load() call in check_onboard_needed (ilblackdragon) - Warn and ignore cheap_model for non-NearAi backends (ilblackdragon) - Add unit tests for create_cheap_llm_provider (ilblackdragon) - Minor formatting cleanup in cheap provider match arm Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Samuel Barbosa <sambarbosaa@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/src/agent/agent_loop.rs b/src/agent/agent_loop.rs
@@ -67,6 +67,9 @@ enum AgenticLoopResult {
 pub struct AgentDeps {
     pub store: Option<Arc<dyn Database>>,
     pub llm: Arc<dyn LlmProvider>,
+    /// Cheap/fast LLM for lightweight tasks (heartbeat, routing, evaluation).
+    /// Falls back to the main `llm` if None.
+    pub cheap_llm: Option<Arc<dyn LlmProvider>>,
     pub safety: Arc<SafetyLayer>,
     pub tools: Arc<ToolRegistry>,
     pub workspace: Option<Arc<Workspace>>,
@@ -138,6 +141,11 @@ impl Agent {
         &self.deps.llm
     }
 
+    /// Get the cheap/fast LLM provider, falling back to the main one.
+    fn cheap_llm(&self) -> &Arc<dyn LlmProvider> {
+        self.deps.cheap_llm.as_ref().unwrap_or(&self.deps.llm)
+    }
+
     fn safety(&self) -> &Arc<SafetyLayer> {
         &self.deps.safety
     }
@@ -301,7 +309,7 @@ impl Agent {
                     Some(spawn_heartbeat(
                         config,
                         workspace.clone(),
-                        self.llm().clone(),
+                        self.cheap_llm().clone(),
                         Some(notify_tx),
                     ))
                 } else {
diff --git a/src/config.rs b/src/config.rs
@@ -388,6 +388,9 @@ impl std::str::FromStr for NearAiApiMode {
 pub struct NearAiConfig {
     /// Model to use (e.g., "claude-3-5-sonnet-20241022", "gpt-4o")
     pub model: String,
+    /// Cheap/fast model for lightweight tasks (heartbeat, routing, evaluation).
+    /// Falls back to the main model if not set.
+    pub cheap_model: Option<String>,
     /// Base URL for the NEAR AI API (default: https://api.near.ai)
     pub base_url: String,
     /// Base URL for auth/refresh endpoints (default: https://private.near.ai)
@@ -454,6 +457,7 @@ impl LlmConfig {
                     "fireworks::accounts/fireworks/models/llama4-maverick-instruct-basic"
                         .to_string()
                 }),
+            cheap_model: optional_env("NEARAI_CHEAP_MODEL")?,
             base_url: optional_env("NEARAI_BASE_URL")?
                 .unwrap_or_else(|| "https://cloud-api.near.ai".to_string()),
             auth_base_url: optional_env("NEARAI_AUTH_URL")?
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
@@ -183,3 +183,106 @@ fn create_openai_compatible_provider(config: &LlmConfig) -> Result<Arc<dyn LlmPr
     );
     Ok(Arc::new(RigAdapter::new(model, &compat.model)))
 }
+
+/// Create a cheap/fast LLM provider for lightweight tasks (heartbeat, routing, evaluation).
+///
+/// Uses `NEARAI_CHEAP_MODEL` if set, otherwise falls back to the main provider.
+/// Currently only supports NEAR AI backends (Responses and ChatCompletions modes).
+pub fn create_cheap_llm_provider(
+    config: &LlmConfig,
+    session: Arc<SessionManager>,
+) -> Result<Option<Arc<dyn LlmProvider>>, LlmError> {
+    let Some(ref cheap_model) = config.nearai.cheap_model else {
+        return Ok(None);
+    };
+
+    if config.backend != LlmBackend::NearAi {
+        tracing::warn!(
+            "NEARAI_CHEAP_MODEL is set but LLM_BACKEND is {:?}, not NearAi. \
+             Cheap model setting will be ignored.",
+            config.backend
+        );
+        return Ok(None);
+    }
+
+    let mut cheap_config = config.nearai.clone();
+    cheap_config.model = cheap_model.clone();
+
+    tracing::info!("Cheap LLM provider: {}", cheap_model);
+
+    match cheap_config.api_mode {
+        NearAiApiMode::Responses => Ok(Some(Arc::new(NearAiProvider::new(cheap_config, session)))),
+        NearAiApiMode::ChatCompletions => {
+            Ok(Some(Arc::new(NearAiChatProvider::new(cheap_config)?)))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::{LlmBackend, NearAiApiMode, NearAiConfig};
+    use std::path::PathBuf;
+
+    fn test_nearai_config() -> NearAiConfig {
+        NearAiConfig {
+            model: "test-model".to_string(),
+            cheap_model: None,
+            base_url: "https://api.near.ai".to_string(),
+            auth_base_url: "https://private.near.ai".to_string(),
+            session_path: PathBuf::from("/tmp/test-session.json"),
+            api_mode: NearAiApiMode::Responses,
+            api_key: None,
+            fallback_model: None,
+            max_retries: 3,
+        }
+    }
+
+    fn test_llm_config() -> LlmConfig {
+        LlmConfig {
+            backend: LlmBackend::NearAi,
+            nearai: test_nearai_config(),
+            openai: None,
+            anthropic: None,
+            ollama: None,
+            openai_compatible: None,
+        }
+    }
+
+    #[test]
+    fn test_create_cheap_llm_provider_returns_none_when_not_configured() {
+        let config = test_llm_config();
+        let session = Arc::new(SessionManager::new(SessionConfig::default()));
+
+        let result = create_cheap_llm_provider(&config, session);
+        assert!(result.is_ok());
+        assert!(result.unwrap().is_none());
+    }
+
+    #[test]
+    fn test_create_cheap_llm_provider_creates_provider_when_configured() {
+        let mut config = test_llm_config();
+        config.nearai.cheap_model = Some("cheap-test-model".to_string());
+
+        let session = Arc::new(SessionManager::new(SessionConfig::default()));
+        let result = create_cheap_llm_provider(&config, session);
+
+        assert!(result.is_ok());
+        let provider = result.unwrap();
+        assert!(provider.is_some());
+        assert_eq!(provider.unwrap().model_name(), "cheap-test-model");
+    }
+
+    #[test]
+    fn test_create_cheap_llm_provider_ignored_for_non_nearai_backend() {
+        let mut config = test_llm_config();
+        config.backend = LlmBackend::OpenAi;
+        config.nearai.cheap_model = Some("cheap-test-model".to_string());
+
+        let session = Arc::new(SessionManager::new(SessionConfig::default()));
+        let result = create_cheap_llm_provider(&config, session);
+
+        assert!(result.is_ok());
+        assert!(result.unwrap().is_none());
+    }
+}
diff --git a/src/main.rs b/src/main.rs
@@ -23,8 +23,8 @@ use ironclaw::{
     context::ContextManager,
     extensions::ExtensionManager,
     llm::{
-        FailoverProvider, LlmProvider, SessionConfig, create_llm_provider,
-        create_llm_provider_with_config, create_session_manager,
+        FailoverProvider, LlmProvider, SessionConfig, create_cheap_llm_provider,
+        create_llm_provider, create_llm_provider_with_config, create_session_manager,
     },
     orchestrator::{
         ContainerJobConfig, ContainerJobManager, OrchestratorApi, TokenStore,
@@ -307,8 +307,11 @@ async fn main() -> anyhow::Result<()> {
     };
     let session = create_session_manager(session_config).await;
 
-    // Ensure we're authenticated before proceeding (only needed for NEAR AI backend)
-    if config.llm.backend == ironclaw::config::LlmBackend::NearAi {
+    // Session-based auth is only needed for NEAR AI backend without an API key.
+    // ChatCompletions mode with an API key skips session auth entirely.
+    if config.llm.backend == ironclaw::config::LlmBackend::NearAi
+        && config.llm.nearai.api_key.is_none()
+    {
         session.ensure_authenticated().await?;
     }
 
@@ -534,6 +537,12 @@ async fn main() -> anyhow::Result<()> {
             llm
         };
 
+    // Initialize cheap LLM provider for lightweight tasks (heartbeat, evaluation)
+    let cheap_llm = create_cheap_llm_provider(&config.llm, session.clone())?;
+    if let Some(ref cheap) = cheap_llm {
+        tracing::info!("Cheap LLM provider initialized: {}", cheap.model_name());
+    }
+
     // Initialize safety layer
     let safety = Arc::new(SafetyLayer::new(&config.safety));
     tracing::info!("Safety layer initialized");
@@ -1185,6 +1194,7 @@ async fn main() -> anyhow::Result<()> {
     let deps = AgentDeps {
         store: db,
         llm,
+        cheap_llm,
         safety,
         tools,
         workspace,
@@ -1229,6 +1239,18 @@ fn check_onboard_needed() -> Option<&'static str> {
         return Some("Database not configured");
     }
 
+    // First run (onboarding never completed and no session).
+    // Reads NEARAI_API_KEY env var directly because this function runs
+    // before Config is loaded -- Config::from_env() may fail without a
+    // database URL, which is what triggers onboarding in the first place.
+    if std::env::var("NEARAI_API_KEY").is_err() {
+        let settings = ironclaw::settings::Settings::load();
+        let session_path = ironclaw::llm::session::default_session_path();
+        if !settings.onboard_completed && !session_path.exists() {
+            return Some("First run");
+        }
+    }
+
     None
 }
 
diff --git a/src/setup/wizard.rs b/src/setup/wizard.rs
@@ -1014,6 +1014,7 @@ impl SetupWizard {
             backend: crate::config::LlmBackend::NearAi,
             nearai: crate::config::NearAiConfig {
                 model: "dummy".to_string(),
+                cheap_model: None,
                 base_url,
                 auth_base_url,
                 session_path: crate::llm::session::default_session_path(),