From 1688b484c4308a906f02ced565c1e0a36172aa7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pedro=20Bol=C3=ADvar=20Puente?= Date: Thu, 11 Jun 2026 02:58:12 +0200 Subject: [PATCH 1/8] Revert "refactor: remove max_iterations from evolution config and UI (#325)" This reverts commit 9a017e9c4e6c5dbf8e013631398eb32227bcdc11. --- .../configurable-derive/src/attrs.rs | 2 +- apps/native/src-tauri/src/cli.rs | 19 +++ .../src-tauri/src/commands/settings_io.rs | 2 + .../native/src-tauri/src/commands/ui_prefs.rs | 7 + apps/native/src-tauri/src/evolve/config.rs | 20 ++- apps/native/src-tauri/src/evolve/mod.rs | 146 +++++++++++++++++- apps/native/src-tauri/src/evolve/tools.rs | 25 ++- apps/native/src-tauri/src/main.rs | 2 + .../src-tauri/src/shared_types/prefs.rs | 4 + .../src/storage/configurable_scope.rs | 2 +- apps/native/src-tauri/src/storage/store.rs | 32 ++++ .../native/src/components/ui/tabs.stories.tsx | 2 +- .../auto-config-field.stories.tsx.snap | 2 +- .../auto-tuning-section.stories.tsx.snap | 2 +- .../settings/auto-config-field.stories.tsx | 12 +- .../settings/auto-tuning-section.stories.tsx | 14 +- apps/native/src/ipc/api.test.ts | 1 + apps/native/src/ipc/types.ts | 8 + apps/native/src/lib/constants.ts | 1 + 19 files changed, 277 insertions(+), 26 deletions(-) diff --git a/apps/native/src-tauri/configurable-derive/src/attrs.rs b/apps/native/src-tauri/configurable-derive/src/attrs.rs index e513ed604..1836a9017 100644 --- a/apps/native/src-tauri/configurable-derive/src/attrs.rs +++ b/apps/native/src-tauri/configurable-derive/src/attrs.rs @@ -188,7 +188,7 @@ mod tests { let input: DeriveInput = parse_quote! { #[config(scope = "repo", display_name = "Evolution")] struct EvolutionLimits { - max_token_budget: usize, + max_iterations: usize, } }; diff --git a/apps/native/src-tauri/src/cli.rs b/apps/native/src-tauri/src/cli.rs index 798c6401f..68505188e 100644 --- a/apps/native/src-tauri/src/cli.rs +++ b/apps/native/src-tauri/src/cli.rs @@ -23,6 +23,7 @@ use tauri::AppHandle; pub struct EvolveConfig { pub prompt: String, pub config: Option, + pub max_iterations: Option, pub max_output_tokens: Option, pub max_token_budget: Option, pub evolve_provider: Option, @@ -55,6 +56,10 @@ pub enum Commands { #[arg(short, long)] config: Option, + /// Legacy fallback for providers that do not report token usage + #[arg(short, long, hide = true)] + max_iterations: Option, + /// Maximum output tokens requested per evolution model call #[arg(long)] max_output_tokens: Option, @@ -106,6 +111,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result let EvolveConfig { prompt, config, + max_iterations, max_output_tokens, max_token_budget, evolve_provider, @@ -192,6 +198,12 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result None => crate::storage::store::get_summary_model(app).ok().flatten(), }; + // Effective legacy iteration fallback: prefer CLI value, otherwise read from store (has default) + let effective_max_iterations: usize = match max_iterations { + Some(v) => v, + None => crate::storage::store::get_max_iterations(app) + .unwrap_or(crate::storage::store::DEFAULT_MAX_ITERATIONS), + }; let effective_max_output_tokens: usize = match max_output_tokens { Some(v) => v, None => crate::storage::store::get_max_output_tokens(app) @@ -205,6 +217,12 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result .unwrap_or(crate::storage::store::DEFAULT_MAX_TOKEN_BUDGET), }; + // Legacy max iterations + if let Some(iterations) = max_iterations { + crate::storage::store::set_max_iterations(app, iterations) + .map_err(|e| format!("Failed to set max iterations: {}", e))?; + } + if let Some(output_tokens) = max_output_tokens { crate::storage::store::set_max_output_tokens(app, output_tokens) .map_err(|e| format!("Failed to set max output tokens: {}", e))?; @@ -272,6 +290,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result "ok": ok, "state": state_str, "prompt": prompt, + "maxIterations": effective_max_iterations, "maxOutputTokens": effective_max_output_tokens, "maxTokenBudget": effective_max_token_budget, "evolveProvider": effective_evolve_provider, diff --git a/apps/native/src-tauri/src/commands/settings_io.rs b/apps/native/src-tauri/src/commands/settings_io.rs index b1eaccaaf..814406207 100644 --- a/apps/native/src-tauri/src/commands/settings_io.rs +++ b/apps/native/src-tauri/src/commands/settings_io.rs @@ -240,6 +240,7 @@ mod tests { &mut output, &mut skipped, serde_json::to_value(EvolutionLimits { + max_iterations: 12, max_token_budget: 80_000, max_build_attempts: 4, max_output_tokens: 16_384, @@ -250,6 +251,7 @@ mod tests { assert_eq!(output.get("hostAttr"), Some(&json!("macbook"))); assert_eq!(output.get("developerMode"), Some(&json!(true))); + assert_eq!(output.get("maxIterations"), Some(&json!(12))); assert_eq!(output.get("maxBuildAttempts"), Some(&json!(4))); assert!(!output.contains_key("openaiApiKey")); assert!(!output.contains_key("promptHistory")); diff --git a/apps/native/src-tauri/src/commands/ui_prefs.rs b/apps/native/src-tauri/src/commands/ui_prefs.rs index 50e09862f..e92bfbeb3 100644 --- a/apps/native/src-tauri/src/commands/ui_prefs.rs +++ b/apps/native/src-tauri/src/commands/ui_prefs.rs @@ -27,6 +27,8 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result Result Self { Self { + max_iterations: 25, max_token_budget: 50_000, max_build_attempts: 5, max_output_tokens: 32_768, @@ -98,6 +108,7 @@ mod tests { fn default_matches_configured_field_defaults() { let limits = EvolutionLimits::default(); + assert_eq!(limits.max_iterations, 25); assert_eq!(limits.max_token_budget, 50_000); assert_eq!(limits.max_build_attempts, 5); assert_eq!(limits.max_output_tokens, 32_768); @@ -106,6 +117,7 @@ mod tests { #[test] fn unknown_fields_do_not_change_limits() { let limits: EvolutionLimits = serde_json::from_value(serde_json::json!({ + "maxIterations": 11, "maxTokenBudget": 80_000, "maxBuildAttempts": 3, "maxOutputTokens": 16_384, @@ -116,6 +128,7 @@ mod tests { assert_eq!( limits, EvolutionLimits { + max_iterations: 11, max_token_budget: 80_000, max_build_attempts: 3, max_output_tokens: 16_384, @@ -125,9 +138,12 @@ mod tests { #[test] fn missing_fields_use_defaults() { - let limits: EvolutionLimits = - serde_json::from_value(serde_json::json!({})).expect("limits deserialize"); + let limits: EvolutionLimits = serde_json::from_value(serde_json::json!({ + "maxIterations": 11, + })) + .expect("limits deserialize"); + assert_eq!(limits.max_iterations, 11); assert_eq!(limits.max_token_budget, 50_000); assert_eq!(limits.max_build_attempts, 5); assert_eq!(limits.max_output_tokens, 32_768); diff --git a/apps/native/src-tauri/src/evolve/mod.rs b/apps/native/src-tauri/src/evolve/mod.rs index 6c81336dc..5250e7f06 100644 --- a/apps/native/src-tauri/src/evolve/mod.rs +++ b/apps/native/src-tauri/src/evolve/mod.rs @@ -41,7 +41,7 @@ use std::sync::Arc; use std::time::Duration; use tauri::{AppHandle, Runtime}; use tokio::time::sleep; -use tools::{ToolResult, create_tools, execute_tool}; +use tools::{ToolResult, create_tools, execute_tool, is_editing_tool}; pub use types::{EvolutionProgress, EvolutionRunError}; use crate::{ @@ -315,6 +315,10 @@ fn log_api_error( const DEFAULT_MODEL: &str = "anthropic/claude-sonnet-4"; const DEFAULT_OLLAMA_API_BASE: &str = "http://localhost:11434"; +// Percentage of max_iterations after which we require at least one edit/build_check. +// Example: with max_iterations=50 and this set to 75, threshold is 37 iterations. +const MAX_ITERATIONS_BEFORE_EDIT_PERCENT: usize = 75; + // Applied separately to stdout and stderr. So when thinking about tokens, // the effective output limit could be up to double this if both are long. const BUILD_OUTPUT_MAX_CHARS: usize = 6_000; @@ -551,6 +555,8 @@ const LIMIT_DECISION_STOP: &str = "Stop"; #[derive(Debug, Clone, Copy)] enum EvolutionLimitKind { + NoProgress, + MaxIterations, BuildAttempts, } @@ -565,6 +571,7 @@ impl EvolutionLimitKind { fn attempts_label(self, attempts: usize) -> String { match self { Self::BuildAttempts => format!("{} build attempts", attempts), + Self::NoProgress | Self::MaxIterations => format!("{} attempts", attempts), } } @@ -577,6 +584,14 @@ impl EvolutionLimitKind { fn stop_summary(self, attempts: usize) -> String { match self { + Self::NoProgress => format!( + "Evolution stopped after {} because the AI had not started making concrete changes.", + self.attempts_label(attempts) + ), + Self::MaxIterations => format!( + "Evolution stopped after reaching {}. The current conversation context was preserved.", + self.attempts_label(attempts) + ), Self::BuildAttempts => format!( "Evolution stopped after reaching {}. You can review the current changes or continue with a follow-up prompt.", self.attempts_label(attempts) @@ -832,13 +847,26 @@ pub async fn generate_evolution( } = config::EvolutionLimits::load(app) .inspect_err(|e| warn!("EvolutionLimits::load failed ({e}); using defaults")) .unwrap_or_default(); + let legacy_max_iterations = + store::get_max_iterations(app).unwrap_or(store::DEFAULT_MAX_ITERATIONS); let max_token_budget = store::get_max_token_budget(app).unwrap_or(store::DEFAULT_MAX_TOKEN_BUDGET); + let mut max_iterations = legacy_max_iterations; + let mut max_iterations_before_edit = std::cmp::max( + 1, + (max_iterations * MAX_ITERATIONS_BEFORE_EDIT_PERCENT) / 100, + ); + let max_iterations_before_edit_increment = max_iterations_before_edit.max(1); + let max_iterations_increment = max_iterations.max(1); let max_build_attempts_increment = max_build_attempts.max(1); let interactive_limit_prompt = !banned_tools.contains(&"ask_user"); info!( - "Limits: max_token_budget={}, max_build_attempts={}", - max_token_budget, max_build_attempts, + "Limits: max_token_budget={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_iterations={}", + max_token_budget, + max_iterations_before_edit, + MAX_ITERATIONS_BEFORE_EDIT_PERCENT, + max_build_attempts, + max_iterations, ); let tools = create_tools(banned_tools); @@ -908,7 +936,8 @@ pub async fn generate_evolution( let gitignore_matcher = gitignore::load_gitignore_matcher(repo_root.as_path())?; - // Track whether we've run a build check + // Track whether we've made any actual edits and/or build checks + let mut made_edit = false; let mut made_build_check = false; // Agentic loop - let the model use tools until done AND build passes @@ -1150,7 +1179,10 @@ pub async fn generate_evolution( success, ); - // Track if we've made a build check + // Track if we've made an edit or build check + if is_editing_tool(tool_name) { + made_edit = true; + } if tool_name == "build_check" { made_build_check = true; tool_key = Some(format!("build_check_{}", iteration)); @@ -1433,6 +1465,110 @@ Do not invent tool names and do not place tool invocations in assistant content. break; } + // Safety limits -- Max Iterations Before Edit Check + if iteration >= max_iterations_before_edit && !(made_edit || made_build_check) { + warn!( + "⚠️ No edit or build_check by iteration {} - asking whether to continue", + max_iterations_before_edit + ); + match ask_to_continue_after_limit( + app, + start_time, + iteration, + EvolutionLimitKind::NoProgress, + iteration, + interactive_limit_prompt, + ) + .await + { + LimitDecision::Continue => { + max_iterations_before_edit += max_iterations_before_edit_increment; + max_iterations = max_iterations.max(max_iterations_before_edit); + info!( + "Extending no-progress limit to iteration {} and max iterations to {}", + max_iterations_before_edit, max_iterations + ); + } + LimitDecision::Stop => { + finish_after_limit_stop( + app, + &mut evolution, + start_time, + iteration, + EvolutionLimitKind::NoProgress, + iteration, + ); + break; + } + LimitDecision::Cancelled => { + evolution.state = EvolutionState::Failed; + return Err(EvolutionRunError::from_state( + session_control::EVOLUTION_CANCELLED_MSG, + &evolution, + iteration, + build_attempts, + total_tokens, + ) + .into()); + } + } + } + + // Safety limits -- Max Iterations + if iteration >= max_iterations { + warn!( + "⚠️ Evolution reached maximum iterations ({}) - asking whether to continue", + max_iterations + ); + match ask_to_continue_after_limit( + app, + start_time, + iteration, + EvolutionLimitKind::MaxIterations, + iteration, + interactive_limit_prompt, + ) + .await + { + LimitDecision::Continue => { + max_iterations += max_iterations_increment; + info!("Extending max iterations to {}", max_iterations); + + // Avoid immediately prompting again this same iteration if build attempts + // are already at/over the current ceiling. + if build_attempts >= max_build_attempts { + max_build_attempts += max_build_attempts_increment; + info!( + "Also extending max build attempts to {}", + max_build_attempts + ); + } + } + LimitDecision::Stop => { + finish_after_limit_stop( + app, + &mut evolution, + start_time, + iteration, + EvolutionLimitKind::MaxIterations, + iteration, + ); + break; + } + LimitDecision::Cancelled => { + evolution.state = EvolutionState::Failed; + return Err(EvolutionRunError::from_state( + session_control::EVOLUTION_CANCELLED_MSG, + &evolution, + iteration, + build_attempts, + total_tokens, + ) + .into()); + } + } + } + // Safety limits -- Max Build Attempts if build_attempts >= max_build_attempts { warn!( diff --git a/apps/native/src-tauri/src/evolve/tools.rs b/apps/native/src-tauri/src/evolve/tools.rs index b464fe00b..33f5bfde2 100644 --- a/apps/native/src-tauri/src/evolve/tools.rs +++ b/apps/native/src-tauri/src/evolve/tools.rs @@ -139,6 +139,13 @@ pub fn execute_tool( } } +/// Helper to determine if a tool is an editing tool, i.e. it +/// makes changes to the nix config that count as "edits" in the +/// evolution process and should be tracked as such. +pub fn is_editing_tool(name: &str) -> bool { + matches!(name, "edit_file" | "edit_nix_file" | "ensure_secret") +} + // ============================================================================= // Shared helpers (used across tool modules) // ============================================================================= @@ -203,7 +210,7 @@ pub(crate) fn ensure_nixmac_edit_allowed(tool: &str, path: &str) -> Result<()> { #[cfg(test)] mod tests { - use super::{ToolResult, execute_tool, truncate_for_log}; + use super::{ToolResult, execute_tool, is_editing_tool, truncate_for_log}; use crate::evolve::gitignore::load_gitignore_matcher; use serde_json::json; use std::fs; @@ -232,6 +239,22 @@ mod tests { assert_eq!(truncate_for_log(&s, 10), format!("{}...", "→".repeat(10))); } + #[test] + fn returns_true_for_editing_tools() { + assert!(is_editing_tool("edit_file")); + assert!(is_editing_tool("edit_nix_file")); + assert!(is_editing_tool("ensure_secret")); + } + + #[test] + fn returns_false_for_non_editing_tools() { + assert!(!is_editing_tool("read_file")); + assert!(!is_editing_tool("list_files")); + assert!(!is_editing_tool("build_check")); + assert!(!is_editing_tool("done")); + assert!(!is_editing_tool("")); + } + #[test] fn read_file_rejects_base_gitignored_files() { let tmp = tempdir().expect("tempdir"); diff --git a/apps/native/src-tauri/src/main.rs b/apps/native/src-tauri/src/main.rs index 119483f60..08cd04794 100644 --- a/apps/native/src-tauri/src/main.rs +++ b/apps/native/src-tauri/src/main.rs @@ -308,6 +308,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { Some(cli::Commands::Evolve { prompt, config, + max_iterations, max_output_tokens, max_token_budget, evolve_provider, @@ -365,6 +366,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { let cfg = cli::EvolveConfig { prompt, config, + max_iterations, max_output_tokens, max_token_budget, evolve_provider, diff --git a/apps/native/src-tauri/src/shared_types/prefs.rs b/apps/native/src-tauri/src/shared_types/prefs.rs index 2826d37ab..ac5fb4678 100644 --- a/apps/native/src-tauri/src/shared_types/prefs.rs +++ b/apps/native/src-tauri/src/shared_types/prefs.rs @@ -32,6 +32,8 @@ pub struct UiPrefs { pub evolve_provider: Option, /// Model used for AI evolution. pub evolve_model: Option, + /// Legacy maximum agent iterations per evolution. + pub max_iterations: Option, /// Maximum provider-reported tokens per evolution. pub max_token_budget: Option, /// Maximum build attempts per evolution. @@ -80,6 +82,8 @@ pub struct UiPrefsUpdate { pub summary_provider: Option, /// Summary model update. pub summary_model: Option, + /// Legacy maximum iteration count update. + pub max_iterations: Option, /// Maximum token budget update. pub max_token_budget: Option, /// Maximum build-attempt count update. diff --git a/apps/native/src-tauri/src/storage/configurable_scope.rs b/apps/native/src-tauri/src/storage/configurable_scope.rs index 459ad51db..af494dd5b 100644 --- a/apps/native/src-tauri/src/storage/configurable_scope.rs +++ b/apps/native/src-tauri/src/storage/configurable_scope.rs @@ -26,7 +26,7 @@ const REPO_README_CONTENT: &str = "\ This directory is managed by [nixmac](https://github.com/darkmatter/nixmac). `settings.json` holds user preferences that should follow you across machines -— things like the default model and confirmation +— things like agent iteration limits, default model, and confirmation behavior. The file is plain JSON; nixmac reads it on the next agent run. Per-device settings (developer mode, pinned version, update channel, model diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs index 690ebcdd4..1201334ed 100644 --- a/apps/native/src-tauri/src/storage/store.rs +++ b/apps/native/src-tauri/src/storage/store.rs @@ -54,6 +54,7 @@ pub const SYNC_SECRET_KEYCHAIN_KEY: &str = "nixmacSyncSecret"; /// Default sync server when the user has not configured a custom endpoint. pub const DEFAULT_SYNC_BASE_URL: &str = "https://sync.nixmac.app"; +pub const DEFAULT_MAX_ITERATIONS: usize = 25; pub const DEFAULT_MAX_OUTPUT_TOKENS: usize = 32_768; pub const DEFAULT_MAX_TOKEN_BUDGET: u32 = 50_000; const KEYCHAIN_SERVICE: &str = "com.darkmatter.nixmac"; @@ -624,6 +625,37 @@ fn get_repo_store( Ok(store) } +/// Gets the maximum iterations for evolution (default: 25). Repo-scoped. +pub fn get_max_iterations(app: &AppHandle) -> Result { + if let Some(limits) = + app.try_state::>() + { + return Ok(limits.read_sync().max_iterations); + } + + let value = get_repo_store(app) + .ok() + .and_then(|s| s.get("maxIterations")) + .and_then(|v| serde_json::from_value::(v).ok()) + .unwrap_or(DEFAULT_MAX_ITERATIONS); + Ok(value) +} + +pub fn set_max_iterations(app: &AppHandle, max: usize) -> Result<()> { + if let Some(limits) = + app.try_state::>() + { + let mut limits = limits.write_sync(app); + limits.max_iterations = max; + return Ok(()); + } + + let store = get_repo_store(app)?; + store.set("maxIterations", serde_json::json!(max)); + store.save()?; + Ok(()) +} + /// Gets the maximum token budget for evolution (default: 50,000). pub fn get_max_token_budget(app: &AppHandle) -> Result { Ok(get_json_pref(app, "maxTokenBudget")?.unwrap_or(DEFAULT_MAX_TOKEN_BUDGET)) diff --git a/apps/native/src/components/ui/tabs.stories.tsx b/apps/native/src/components/ui/tabs.stories.tsx index ade978720..14264bc82 100644 --- a/apps/native/src/components/ui/tabs.stories.tsx +++ b/apps/native/src/components/ui/tabs.stories.tsx @@ -23,7 +23,7 @@ export const SettingsSections = meta.story({ App preferences and telemetry controls. - Provider, model, and token budgets. + Provider, model, and iteration limits. Advanced diagnostics and pinned release controls. diff --git a/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap b/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap index 95839b364..79a8503ab 100644 --- a/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap +++ b/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap @@ -1,3 +1,3 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`Controls 1`] = `"
"`; +exports[`Controls 1`] = `"
"`; diff --git a/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap b/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap index a50b3da2f..f3b0c47f9 100644 --- a/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap +++ b/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap @@ -1,5 +1,5 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`Evolution Settings 1`] = `"
Tuning

Knobs that control how the evolution loop behaves. Changes take effect on the next run. Saved to .nixmac/settings.json in your config repo so they sync across machines.

"`; +exports[`Evolution Settings 1`] = `"
Tuning

Knobs that control how the evolution loop behaves. Changes take effect on the next run. Saved to .nixmac/settings.json in your config repo so they sync across machines.

"`; exports[`Load Error 1`] = `"
Tuning

Knobs that control how the evolution loop behaves. Changes take effect on the next run. Saved to .nixmac/settings.json in your config repo so they sync across machines.

Failed to load settings schema: Config registry unavailable

"`; diff --git a/apps/native/src/components/widget/settings/auto-config-field.stories.tsx b/apps/native/src/components/widget/settings/auto-config-field.stories.tsx index a2f4153a5..2ab009cd2 100644 --- a/apps/native/src/components/widget/settings/auto-config-field.stories.tsx +++ b/apps/native/src/components/widget/settings/auto-config-field.stories.tsx @@ -7,12 +7,12 @@ import type { ConfigField } from "@/ipc/types"; const fields: ConfigField[] = [ { - key: "maxTokenBudget", - label: "Max token budget", - help: "Provider-reported tokens before the agent stops.", - ty: { kind: "number", min: 1000, max: 1000000, step: 1000 }, - default: 50000, - current: 50000, + key: "maxIterations", + label: "Max iterations", + help: "API calls before the agent stops.", + ty: { kind: "number", min: 1, max: 200, step: 1 }, + default: 25, + current: 25, }, { key: "autoSummarize", diff --git a/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx b/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx index f1f4c04f3..3a6a6eb7b 100644 --- a/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx +++ b/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx @@ -13,12 +13,12 @@ const schemas: ConfigurableSchema[] = [ description: "How long the agent will try before giving up.", fields: [ { - key: "maxTokenBudget", - label: "Max token budget", - help: "Provider-reported tokens before the agent stops. Lower is faster but may not finish complex changes.", - ty: { kind: "number", min: 1000, max: 1000000, step: 1000 }, - default: 50000, - current: 50000, + key: "maxIterations", + label: "Max iterations", + help: "API calls before the agent stops. Lower is faster but may not finish complex changes.", + ty: { kind: "number", min: 1, max: 200, step: 1 }, + default: 25, + current: 25, }, { key: "maxBuildAttempts", @@ -69,7 +69,7 @@ export const EvolutionSettings = meta.story({ ], play: async ({ canvasElement }) => { const canvas = within(canvasElement); - await waitFor(() => canvas.getByLabelText("Max token budget")); + await waitFor(() => canvas.getByLabelText("Max iterations")); }, }); diff --git a/apps/native/src/ipc/api.test.ts b/apps/native/src/ipc/api.test.ts index 3175a23b8..96504e996 100644 --- a/apps/native/src/ipc/api.test.ts +++ b/apps/native/src/ipc/api.test.ts @@ -34,6 +34,7 @@ const prefs = (overrides: Partial = {}): UiPrefs => summaryModel: "openai/gpt-4o-mini", evolveProvider: "openrouter", evolveModel: "anthropic/claude-sonnet-4", + maxIterations: 25, maxBuildAttempts: 5, sendDiagnostics: false, confirmBuild: true, diff --git a/apps/native/src/ipc/types.ts b/apps/native/src/ipc/types.ts index e81d0ebbb..4c189169e 100644 --- a/apps/native/src/ipc/types.ts +++ b/apps/native/src/ipc/types.ts @@ -1640,6 +1640,10 @@ evolveProvider: string | null; * Model used for AI evolution. */ evolveModel: string | null; +/** + * Legacy maximum agent iterations per evolution. + */ +maxIterations: number | null; /** * Maximum provider-reported tokens per evolution. */ @@ -1727,6 +1731,10 @@ summaryProvider: string | null; * Summary model update. */ summaryModel: string | null; +/** + * Legacy maximum iteration count update. + */ +maxIterations: number | null; /** * Maximum token budget update. */ diff --git a/apps/native/src/lib/constants.ts b/apps/native/src/lib/constants.ts index 6b81ad695..7b87128cb 100644 --- a/apps/native/src/lib/constants.ts +++ b/apps/native/src/lib/constants.ts @@ -1,5 +1,6 @@ // Must match session_control::EVOLUTION_CANCELLED_MSG in src-tauri. export const EVOLUTION_CANCELLED_MSG = "Evolution cancelled by user"; +export const DEFAULT_MAX_ITERATIONS = 25; export const DEFAULT_MAX_OUTPUT_TOKENS = 32_768; export const DEFAULT_MAX_TOKEN_BUDGET = 50_000; export const EVOLVE_EVENT_CHANNEL = "darwin:evolve:event"; From 3eda1881c5d8762928b950a7e11cd272d2b63a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pedro=20Bol=C3=ADvar=20Puente?= Date: Thu, 11 Jun 2026 03:01:34 +0200 Subject: [PATCH 2/8] evolve: add TokenBudget limit kind + consolidate read path Adds the TokenBudget variant to EvolutionLimitKind (with matching attempts_label/prompt/stop_summary arms and a small format_token_count helper) so the next commit can wire up the loop guard. Also consolidates the max_token_budget read: the evolve loop now destructures it from EvolutionLimits::load alongside max_build_attempts and (post-#325-revert) max_iterations, instead of calling store::get_max_token_budget separately. To keep UI writes via store::set_max_token_budget routed through the same source of truth, the store getter/setter are made Slice-aware (mirroring the existing get_max_iterations / get_max_build_attempts pattern), with a fallback to the legacy tauri-plugin-store path when the Slice isn't registered. No behavior change in the loop yet; enforcement lands in the next commit. --- apps/native/src-tauri/src/evolve/mod.rs | 41 +++++++++++++++++----- apps/native/src-tauri/src/storage/store.rs | 16 ++++++++- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/apps/native/src-tauri/src/evolve/mod.rs b/apps/native/src-tauri/src/evolve/mod.rs index 5250e7f06..2307d2e1a 100644 --- a/apps/native/src-tauri/src/evolve/mod.rs +++ b/apps/native/src-tauri/src/evolve/mod.rs @@ -558,6 +558,21 @@ enum EvolutionLimitKind { NoProgress, MaxIterations, BuildAttempts, + // Enforced in the next commit; the variant lands here so the + // method arms and the loop guard can be split across two commits + // that each compile cleanly. + #[allow(dead_code)] + TokenBudget, +} + +fn format_token_count(tokens: usize) -> String { + if tokens >= 1_000_000 { + format!("{:.1}M", tokens as f64 / 1_000_000.0) + } else if tokens >= 1_000 { + format!("{:.1}K", tokens as f64 / 1_000.0) + } else { + tokens.to_string() + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -572,14 +587,21 @@ impl EvolutionLimitKind { match self { Self::BuildAttempts => format!("{} build attempts", attempts), Self::NoProgress | Self::MaxIterations => format!("{} attempts", attempts), + Self::TokenBudget => format!("{} tokens", format_token_count(attempts)), } } fn prompt(self, attempts: usize) -> String { - format!( - "The AI has made {}. Keep going?", - self.attempts_label(attempts) - ) + match self { + Self::TokenBudget => format!( + "The AI has used {}. Keep going?", + self.attempts_label(attempts) + ), + _ => format!( + "The AI has made {}. Keep going?", + self.attempts_label(attempts) + ), + } } fn stop_summary(self, attempts: usize) -> String { @@ -596,6 +618,10 @@ impl EvolutionLimitKind { "Evolution stopped after reaching {}. You can review the current changes or continue with a follow-up prompt.", self.attempts_label(attempts) ), + Self::TokenBudget => format!( + "Evolution stopped after consuming {}. You can review the current changes or continue with a follow-up prompt.", + self.attempts_label(attempts) + ), } } } @@ -843,15 +869,12 @@ pub async fn generate_evolution( // Read configurable limits from store (hot-reloaded on every run). let config::EvolutionLimits { mut max_build_attempts, + max_token_budget, + mut max_iterations, .. } = config::EvolutionLimits::load(app) .inspect_err(|e| warn!("EvolutionLimits::load failed ({e}); using defaults")) .unwrap_or_default(); - let legacy_max_iterations = - store::get_max_iterations(app).unwrap_or(store::DEFAULT_MAX_ITERATIONS); - let max_token_budget = - store::get_max_token_budget(app).unwrap_or(store::DEFAULT_MAX_TOKEN_BUDGET); - let mut max_iterations = legacy_max_iterations; let mut max_iterations_before_edit = std::cmp::max( 1, (max_iterations * MAX_ITERATIONS_BEFORE_EDIT_PERCENT) / 100, diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs index 1201334ed..52d01dd74 100644 --- a/apps/native/src-tauri/src/storage/store.rs +++ b/apps/native/src-tauri/src/storage/store.rs @@ -656,12 +656,26 @@ pub fn set_max_iterations(app: &AppHandle, max: usize) -> Result< Ok(()) } -/// Gets the maximum token budget for evolution (default: 50,000). +/// Gets the maximum token budget for evolution (default: 50,000). Repo-scoped. pub fn get_max_token_budget(app: &AppHandle) -> Result { + if let Some(limits) = + app.try_state::>() + { + return Ok(limits.read_sync().max_token_budget); + } + Ok(get_json_pref(app, "maxTokenBudget")?.unwrap_or(DEFAULT_MAX_TOKEN_BUDGET)) } pub fn set_max_token_budget(app: &AppHandle, max: u32) -> Result<()> { + if let Some(limits) = + app.try_state::>() + { + let mut limits = limits.write_sync(app); + limits.max_token_budget = max; + return Ok(()); + } + let store = get_store(app)?; store.set("maxTokenBudget", serde_json::json!(max)); store.save()?; From ad9e9bc9b025f6309280051d5bf0339e00b60a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pedro=20Bol=C3=ADvar=20Puente?= Date: Thu, 11 Jun 2026 03:02:48 +0200 Subject: [PATCH 3/8] evolve: enforce max_token_budget in the agent loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the missing comparison: after each API response, if total_tokens has reached max_token_budget, ask the user whether to continue (interactive) or stop (non-interactive). On continue, extend the budget by the original amount, mirroring the BuildAttempts UX. On stop, hand off to finish_after_limit_stop. PR #325 removed max_iterations on the premise that max_token_budget already enforced a session bound. It didn't — the value was loaded, logged, and emitted to the UI progress bar, but never compared against total_tokens to terminate the loop. This closes that gap. Providers that don't return usage (Ollama, some CLI providers) sidestep this guard entirely; the restored MaxIterations check covers those. --- apps/native/src-tauri/src/evolve/mod.rs | 55 ++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/apps/native/src-tauri/src/evolve/mod.rs b/apps/native/src-tauri/src/evolve/mod.rs index 2307d2e1a..2dec2ef64 100644 --- a/apps/native/src-tauri/src/evolve/mod.rs +++ b/apps/native/src-tauri/src/evolve/mod.rs @@ -558,10 +558,6 @@ enum EvolutionLimitKind { NoProgress, MaxIterations, BuildAttempts, - // Enforced in the next commit; the variant lands here so the - // method arms and the loop guard can be split across two commits - // that each compile cleanly. - #[allow(dead_code)] TokenBudget, } @@ -869,7 +865,7 @@ pub async fn generate_evolution( // Read configurable limits from store (hot-reloaded on every run). let config::EvolutionLimits { mut max_build_attempts, - max_token_budget, + mut max_token_budget, mut max_iterations, .. } = config::EvolutionLimits::load(app) @@ -882,6 +878,7 @@ pub async fn generate_evolution( let max_iterations_before_edit_increment = max_iterations_before_edit.max(1); let max_iterations_increment = max_iterations.max(1); let max_build_attempts_increment = max_build_attempts.max(1); + let max_token_budget_increment = max_token_budget.max(1); let interactive_limit_prompt = !banned_tools.contains(&"ask_user"); info!( "Limits: max_token_budget={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_iterations={}", @@ -1124,6 +1121,54 @@ pub async fn generate_evolution( ); } + // Safety limits -- Token budget. Caps cumulative session tokens + // (in addition to the per-call max_output_tokens). Skipped if + // the provider didn't report usage; the iteration guard below + // is the fallback for those providers. + if total_tokens >= max_token_budget { + warn!( + "⚠️ Evolution reached token budget ({}/{}) - asking whether to continue", + total_tokens, max_token_budget, + ); + match ask_to_continue_after_limit( + app, + start_time, + iteration, + EvolutionLimitKind::TokenBudget, + total_tokens as usize, + interactive_limit_prompt, + ) + .await + { + LimitDecision::Continue => { + max_token_budget = max_token_budget.saturating_add(max_token_budget_increment); + info!("Extending token budget to {}", max_token_budget); + } + LimitDecision::Stop => { + finish_after_limit_stop( + app, + &mut evolution, + start_time, + iteration, + EvolutionLimitKind::TokenBudget, + total_tokens as usize, + ); + break; + } + LimitDecision::Cancelled => { + evolution.state = EvolutionState::Failed; + return Err(EvolutionRunError::from_state( + session_control::EVOLUTION_CANCELLED_MSG, + &evolution, + iteration, + build_attempts, + total_tokens, + ) + .into()); + } + } + } + let assistant_msg = response.message; // Log assistant text response if any. If tool calls are present, treat tool_calls as From 0db4b0cbcd56154e358d3ac3bc909ed83b825ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pedro=20Bol=C3=ADvar=20Puente?= Date: Thu, 11 Jun 2026 03:13:01 +0200 Subject: [PATCH 4/8] evolve: distinguish limit-hit completions with EvolutionState::LimitReached Adds a new EvolutionState variant for runs that finish_after_limit_stop terminates. Before this, hitting any safety guard (NoProgress / MaxIterations / BuildAttempts / TokenBudget) ended the run as Conversational or Generated depending on whether edits had been made, making it impossible for downstream consumers (notably the eval harness) to tell "the agent decided it was done" from "we cut it off". finish_after_limit_stop now sets state to LimitReached unconditionally. TypeScript binding regenerated via specta. Eval harness scoring will need a companion update on the nixmac-web side to grade LimitReached separately. --- apps/native/src-tauri/src/evolve/mod.rs | 6 +----- apps/native/src-tauri/src/shared_types/evolve.rs | 5 +++++ apps/native/src/ipc/types.ts | 11 +++++++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/apps/native/src-tauri/src/evolve/mod.rs b/apps/native/src-tauri/src/evolve/mod.rs index 2dec2ef64..ab38b3ebb 100644 --- a/apps/native/src-tauri/src/evolve/mod.rs +++ b/apps/native/src-tauri/src/evolve/mod.rs @@ -729,11 +729,7 @@ fn finish_after_limit_stop( emit_evolve_event(app, EvolveEvent::complete(start_time, iteration, &summary)); evolution.summary = Some(summary); - evolution.state = if evolution.edits.is_empty() { - EvolutionState::Conversational - } else { - EvolutionState::Generated - }; + evolution.state = EvolutionState::LimitReached; } /// Generate an evolution from a user prompt using OpenAI function calling. diff --git a/apps/native/src-tauri/src/shared_types/evolve.rs b/apps/native/src-tauri/src/shared_types/evolve.rs index 700105953..425497cfe 100644 --- a/apps/native/src-tauri/src/shared_types/evolve.rs +++ b/apps/native/src-tauri/src/shared_types/evolve.rs @@ -207,6 +207,11 @@ pub enum EvolutionState { Failed, /// Agent responded conversationally without making any environment changes. Conversational, + /// Evolution was stopped because a safety limit was reached + /// (iterations, build attempts, token budget, or stale progress). + /// Distinguishes "we cut it off" from "the agent finished" so + /// the eval harness can score runaways correctly. + LimitReached, } /// Telemetry counters from a completed evolution run. diff --git a/apps/native/src/ipc/types.ts b/apps/native/src/ipc/types.ts index 4c189169e..cef1de9e6 100644 --- a/apps/native/src/ipc/types.ts +++ b/apps/native/src/ipc/types.ts @@ -282,7 +282,7 @@ error: string | null; /** * Whether the failed operation completed before changing system state. */ -system_untouched: boolean | null; +system_untouched: boolean | null; /** * Path to the captured rebuild log, when available. */ @@ -422,7 +422,14 @@ export type EvolutionState = /** * Agent responded conversationally without making any environment changes. */ -"conversational" +"conversational" | +/** + * Evolution was stopped because a safety limit was reached + * (iterations, build attempts, token budget, or stale progress). + * Distinguishes "we cut it off" from "the agent finished" so + * the eval harness can score runaways correctly. + */ +"limitReached" /** * Telemetry counters from a completed evolution run. From 522b7f9e7d0ab78f007f7321b10bef1fe38566ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pedro=20Bol=C3=ADvar=20Puente?= Date: Thu, 11 Jun 2026 03:22:08 +0200 Subject: [PATCH 5/8] evolve: distinguish LimitReached in CLI and desktop completion UX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cosmetic gaps left behind by the structural LimitReached work: - cli.rs printed "Evolution completed successfully" on any non- conversational success, including runs the loop cut off. Now matches against the state and prints a stopped-for-safety message in the LimitReached arm. - use-evolve.ts toasted ✓ "Evolution complete" with the success variant for every non-error result. Now branches on the new state: a ⏸ "Evolution stopped (safety limit reached)" toast.info fires for LimitReached so the user can tell their run was cut off without reading the telemetry. The partial change map is still mirrored — limit-reached runs can contain useful edits the user may want to review or follow up on. Adds a use-evolve.test.ts case for the new path. --- apps/native/src-tauri/src/cli.rs | 19 +++++++++------ apps/native/src/hooks/use-evolve.test.ts | 30 ++++++++++++++++++++++++ apps/native/src/hooks/use-evolve.ts | 16 +++++++++---- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/apps/native/src-tauri/src/cli.rs b/apps/native/src-tauri/src/cli.rs index 68505188e..eda651cc4 100644 --- a/apps/native/src-tauri/src/cli.rs +++ b/apps/native/src-tauri/src/cli.rs @@ -253,13 +253,18 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result let (ok, output_value, failure_message) = match outcome { Ok(output) => { - let is_conversational = - output.telemetry.state == crate::shared_types::EvolutionState::Conversational; - - if is_conversational { - println!("(conversational response — no changes made)"); - } else { - println!("Evolution completed successfully"); + match output.telemetry.state { + crate::shared_types::EvolutionState::Conversational => { + println!("(conversational response — no changes made)"); + } + crate::shared_types::EvolutionState::LimitReached => { + println!( + "Evolution stopped after reaching a safety limit (iterations, build attempts, token budget, or stale progress). Review any partial changes and re-run with adjusted limits to continue." + ); + } + _ => { + println!("Evolution completed successfully"); + } } let output_value = match serde_json::to_value(&output) { diff --git a/apps/native/src/hooks/use-evolve.test.ts b/apps/native/src/hooks/use-evolve.test.ts index 69d85395c..1ff86ef83 100644 --- a/apps/native/src/hooks/use-evolve.test.ts +++ b/apps/native/src/hooks/use-evolve.test.ts @@ -110,4 +110,34 @@ describe("useEvolve", () => { expect(useViewModel.getState().changeMap).toBe(existingMap); expect(useWidgetStore.getState().conversationalResponse).toBe("No file changes needed."); }); + + it("logs a stopped message when a safety limit is reached", async () => { + const limitReachedResult: EvolutionResult = { + changeMap: { groups: [], singles: [], unsummarizedHashes: [] }, + gitStatus, + evolveState, + conversationalResponse: null, + telemetry: { + state: "limitReached", + iterations: 25, + buildAttempts: 0, + totalTokens: 50_000, + editsCount: 0, + thinkingCount: 0, + toolCallsCount: 0, + durationMs: 12_345, + }, + }; + + mocks.evolve.mockResolvedValue(limitReachedResult); + + const store = useWidgetStore.getState(); + store.setEvolvePrompt("install htop"); + + await useEvolve().handleEvolve(); + + const logs = useWidgetStore.getState().consoleLogs; + expect(logs).toContain("Evolution stopped (safety limit reached)"); + expect(logs).not.toContain("✓ Evolution complete"); + }); }); diff --git a/apps/native/src/hooks/use-evolve.ts b/apps/native/src/hooks/use-evolve.ts index 68f652e26..d4c352c12 100644 --- a/apps/native/src/hooks/use-evolve.ts +++ b/apps/native/src/hooks/use-evolve.ts @@ -88,13 +88,21 @@ const handleEvolve = async () => { // Backend handles: AI + summary + branch + commit + DB const result = await tauriAPI.darwin.evolve(store.evolvePrompt); const isConversational = result?.telemetry?.state === "conversational"; + const isLimitReached = result?.telemetry?.state === "limitReached"; const telemetry = result?.telemetry; - const completionMsg = telemetry - ? `✓ Evolution complete in ${formatDurationMs(telemetry.durationMs)} and ${telemetry.iterations} iteration${telemetry.iterations === 1 ? "" : "s"}\n` - : "✓ Evolution complete\n"; + const iterationSuffix = telemetry + ? ` in ${formatDurationMs(telemetry.durationMs)} and ${telemetry.iterations} iteration${telemetry.iterations === 1 ? "" : "s"}` + : ""; + const completionMsg = isLimitReached + ? `⏸ Evolution stopped (safety limit reached)${iterationSuffix}\n` + : `✓ Evolution complete${iterationSuffix}\n`; useWidgetStore.getState().appendLog(completionMsg); - toast.success(completionMsg); + if (isLimitReached) { + toast.info(completionMsg); + } else { + toast.success(completionMsg); + } if (telemetry) { useWidgetStore.getState().setEvolutionTelemetry(telemetry); } From be64976f9cf962ab51c86c4c7b877000cc3dfc25 Mon Sep 17 00:00:00 2001 From: Cooper Maruyama Date: Sun, 14 Jun 2026 14:51:36 -0700 Subject: [PATCH 6/8] fix conflicts --- apps/native/src-tauri/src/storage/store.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs index 53f1d53f8..c85f2918e 100644 --- a/apps/native/src-tauri/src/storage/store.rs +++ b/apps/native/src-tauri/src/storage/store.rs @@ -628,7 +628,7 @@ fn get_repo_store( /// Gets the maximum iterations for evolution (default: 25). Repo-scoped. pub fn get_max_iterations(app: &AppHandle) -> Result { if let Some(limits) = - app.try_state::>() + app.try_state::>() { return Ok(limits.read_sync().max_iterations); } @@ -643,9 +643,9 @@ pub fn get_max_iterations(app: &AppHandle) -> Result { pub fn set_max_iterations(app: &AppHandle, max: usize) -> Result<()> { if let Some(limits) = - app.try_state::>() + app.try_state::>() { - let mut limits = limits.write_sync(app); + let mut limits = limits.write_sync(); limits.max_iterations = max; return Ok(()); } @@ -659,7 +659,7 @@ pub fn set_max_iterations(app: &AppHandle, max: usize) -> Result< /// Gets the maximum token budget for evolution (default: 50,000). Repo-scoped. pub fn get_max_token_budget(app: &AppHandle) -> Result { if let Some(limits) = - app.try_state::>() + app.try_state::>() { return Ok(limits.read_sync().max_token_budget); } @@ -669,14 +669,14 @@ pub fn get_max_token_budget(app: &AppHandle) -> Result { pub fn set_max_token_budget(app: &AppHandle, max: u32) -> Result<()> { if let Some(limits) = - app.try_state::>() + app.try_state::>() { - let mut limits = limits.write_sync(app); + let mut limits = limits.write_sync(); limits.max_token_budget = max; return Ok(()); } - let store = get_store(app)?; + let store = get_repo_store(app)?; store.set("maxTokenBudget", serde_json::json!(max)); store.save()?; Ok(()) From 43073b35838d11d24883c4eb46de275db4df657d Mon Sep 17 00:00:00 2001 From: Cooper Maruyama Date: Sun, 14 Jun 2026 15:11:43 -0700 Subject: [PATCH 7/8] fix ts errors --- apps/native/src/ipc/api.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/native/src/ipc/api.test.ts b/apps/native/src/ipc/api.test.ts index 96504e996..91ec9cdaf 100644 --- a/apps/native/src/ipc/api.test.ts +++ b/apps/native/src/ipc/api.test.ts @@ -35,6 +35,8 @@ const prefs = (overrides: Partial = {}): UiPrefs => evolveProvider: "openrouter", evolveModel: "anthropic/claude-sonnet-4", maxIterations: 25, + maxTokenBudget: 50000, + maxOutputTokens: 32768, maxBuildAttempts: 5, sendDiagnostics: false, confirmBuild: true, From 46a541fe462e40b074c91e0af477f001705befbb Mon Sep 17 00:00:00 2001 From: Cooper Maruyama Date: Sun, 14 Jun 2026 16:20:58 -0700 Subject: [PATCH 8/8] fix type errors --- .../__snapshots__/setup-step.stories.tsx.snap | 3 + .../src/lib/ai-provider-migration.test.ts | 125 ++++++++++++++++++ apps/native/src/lib/ai-provider-migration.ts | 73 ++++++++++ 3 files changed, 201 insertions(+) create mode 100644 apps/native/src/components/widget/steps/__snapshots__/setup-step.stories.tsx.snap create mode 100644 apps/native/src/lib/ai-provider-migration.test.ts create mode 100644 apps/native/src/lib/ai-provider-migration.ts diff --git a/apps/native/src/components/widget/steps/__snapshots__/setup-step.stories.tsx.snap b/apps/native/src/components/widget/steps/__snapshots__/setup-step.stories.tsx.snap new file mode 100644 index 000000000..e7fdda23e --- /dev/null +++ b/apps/native/src/components/widget/steps/__snapshots__/setup-step.stories.tsx.snap @@ -0,0 +1,3 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`Default Config Required 1`] = `"

Welcome to nixmac

Let's set up your nix-darwin configuration

Creates an empty folder in your home directory, then nixmac can generate a default flake.

Select your own, or proceed below for defaults


No nix-darwin configuration found in this directory

This will be your darwinConfiguration name

This will create a basic nix-darwin flake in the directory

"`; diff --git a/apps/native/src/lib/ai-provider-migration.test.ts b/apps/native/src/lib/ai-provider-migration.test.ts new file mode 100644 index 000000000..29388de2e --- /dev/null +++ b/apps/native/src/lib/ai-provider-migration.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, it } from "vitest"; +import type { UiPrefs } from "@/ipc/types"; +import { migrateLegacyOpenaiProviderPrefs } from "@/lib/ai-provider-migration"; + +const PREFS: UiPrefs = { + openrouterApiKey: "", + openaiApiKey: "", + ollamaApiBaseUrl: "", + vllmApiBaseUrl: "", + vllmApiKey: "", + summaryProvider: "openrouter", + summaryModel: "openai/gpt-4o-mini", + evolveProvider: "openrouter", + evolveModel: "anthropic/claude-sonnet-4", + maxIterations: null, + maxTokenBudget: null, + maxBuildAttempts: null, + maxOutputTokens: null, + sendDiagnostics: false, + confirmBuild: false, + confirmClear: false, + confirmRollback: false, + autoSummarizeOnFocus: false, + scanHomebrewOnStartup: false, + defaultToDiffTab: false, + developerMode: false, + experimentalSpinningMascot: false, + pinnedVersion: null, + updateChannel: "stable", +}; + +describe("migrateLegacyOpenaiProviderPrefs", () => { + it("preserves OpenRouter model slugs when migrating legacy openai provider prefs", () => { + const result = migrateLegacyOpenaiProviderPrefs({ + ...PREFS, + openrouterApiKey: "sk-or-key", + openaiApiKey: "", + evolveProvider: "openai", + evolveModel: "google/gemini-2.5-pro", + summaryProvider: "openai", + summaryModel: "anthropic/claude-3.5-haiku", + }); + + expect(result.values).toEqual({ + evolveProvider: "openrouter", + evolveModel: "google/gemini-2.5-pro", + summaryProvider: "openrouter", + summaryModel: "anthropic/claude-3.5-haiku", + }); + expect(result.update).toEqual({ + evolveProvider: "openrouter", + summaryProvider: "openrouter", + }); + }); + + it("uses OpenRouter defaults when migrated legacy openai models are bare or missing", () => { + const result = migrateLegacyOpenaiProviderPrefs({ + ...PREFS, + openrouterApiKey: "sk-or-key", + openaiApiKey: "", + evolveProvider: "openai", + evolveModel: "gpt-4o", + summaryProvider: "openai", + summaryModel: " ", + }); + + expect(result.values).toEqual({ + evolveProvider: "openrouter", + evolveModel: "anthropic/claude-sonnet-4", + summaryProvider: "openrouter", + summaryModel: "openai/gpt-4o-mini", + }); + expect(result.update).toEqual(result.values); + }); + + it("preserves legacy OpenRouter slugs when both OpenRouter and OpenAI keys exist", () => { + const result = migrateLegacyOpenaiProviderPrefs({ + ...PREFS, + openrouterApiKey: "sk-or-key", + openaiApiKey: "sk-openai-key", + evolveProvider: "openai", + evolveModel: "anthropic/claude-sonnet-4", + summaryProvider: "openai", + summaryModel: "gpt-4o-mini", + }); + + expect(result.values).toEqual({ + evolveProvider: "openrouter", + evolveModel: "anthropic/claude-sonnet-4", + summaryProvider: "openai", + summaryModel: "gpt-4o-mini", + }); + expect(result.update).toEqual({ + evolveProvider: "openrouter", + }); + }); + + it("keeps direct openai prefs when both keys exist with a bare OpenAI model", () => { + const result = migrateLegacyOpenaiProviderPrefs({ + ...PREFS, + openrouterApiKey: "sk-or-key", + openaiApiKey: "sk-openai-key", + evolveProvider: "openai", + evolveModel: "gpt-4o", + }); + + expect(result.values.evolveProvider).toBe("openai"); + expect(result.values.evolveModel).toBe("gpt-4o"); + expect(result.update).toBeNull(); + }); + + it("keeps direct openai prefs when both keys exist without a model", () => { + const result = migrateLegacyOpenaiProviderPrefs({ + ...PREFS, + openrouterApiKey: "sk-or-key", + openaiApiKey: "sk-openai-key", + summaryProvider: "openai", + summaryModel: "", + }); + + expect(result.values.summaryProvider).toBe("openai"); + expect(result.values.summaryModel).toBe(""); + expect(result.update).toBeNull(); + }); +}); diff --git a/apps/native/src/lib/ai-provider-migration.ts b/apps/native/src/lib/ai-provider-migration.ts new file mode 100644 index 000000000..ad18df0e1 --- /dev/null +++ b/apps/native/src/lib/ai-provider-migration.ts @@ -0,0 +1,73 @@ +import type { UiPrefs, UiPrefsUpdate } from "@/ipc/types"; + +const OPENROUTER_PROVIDER = "openrouter"; +const OPENAI_PROVIDER = "openai"; +const DEFAULT_OPENROUTER_EVOLVE_MODEL = "anthropic/claude-sonnet-4"; +const DEFAULT_OPENROUTER_SUMMARY_MODEL = "openai/gpt-4o-mini"; + +interface ProviderMigrationValues { + evolveProvider: string; + evolveModel: string; + summaryProvider: string; + summaryModel: string; +} + +function hasValue(value?: string | null): boolean { + return Boolean(value?.trim()); +} + +function isOpenrouterModelSlug(value?: string | null): boolean { + return Boolean(value?.trim().includes("/")); +} + +function shouldMigrateLegacyOpenaiProvider( + prefs: Pick, + model?: string | null, +): boolean { + if (!hasValue(prefs.openrouterApiKey)) { + return false; + } + return !hasValue(prefs.openaiApiKey) || isOpenrouterModelSlug(model); +} + +export function migrateLegacyOpenaiProviderPrefs(prefs: UiPrefs): { + values: ProviderMigrationValues; + update: Partial | null; +} { + const update: Partial = {}; + const values = { + evolveProvider: prefs.evolveProvider ?? OPENROUTER_PROVIDER, + evolveModel: prefs.evolveModel ?? DEFAULT_OPENROUTER_EVOLVE_MODEL, + summaryProvider: prefs.summaryProvider ?? OPENROUTER_PROVIDER, + summaryModel: prefs.summaryModel ?? DEFAULT_OPENROUTER_SUMMARY_MODEL, + }; + + if ( + prefs.evolveProvider === OPENAI_PROVIDER && + shouldMigrateLegacyOpenaiProvider(prefs, prefs.evolveModel) + ) { + values.evolveProvider = OPENROUTER_PROVIDER; + update.evolveProvider = values.evolveProvider; + if (!isOpenrouterModelSlug(prefs.evolveModel)) { + values.evolveModel = DEFAULT_OPENROUTER_EVOLVE_MODEL; + update.evolveModel = values.evolveModel; + } + } + + if ( + prefs.summaryProvider === OPENAI_PROVIDER && + shouldMigrateLegacyOpenaiProvider(prefs, prefs.summaryModel) + ) { + values.summaryProvider = OPENROUTER_PROVIDER; + update.summaryProvider = values.summaryProvider; + if (!isOpenrouterModelSlug(prefs.summaryModel)) { + values.summaryModel = DEFAULT_OPENROUTER_SUMMARY_MODEL; + update.summaryModel = values.summaryModel; + } + } + + return { + values, + update: Object.keys(update).length > 0 ? update : null, + }; +}