diff --git a/apps/native/src-tauri/configurable-derive/src/attrs.rs b/apps/native/src-tauri/configurable-derive/src/attrs.rs index e513ed604..1836a9017 100644 --- a/apps/native/src-tauri/configurable-derive/src/attrs.rs +++ b/apps/native/src-tauri/configurable-derive/src/attrs.rs @@ -188,7 +188,7 @@ mod tests { let input: DeriveInput = parse_quote! { #[config(scope = "repo", display_name = "Evolution")] struct EvolutionLimits { - max_token_budget: usize, + max_iterations: usize, } }; diff --git a/apps/native/src-tauri/src/cli.rs b/apps/native/src-tauri/src/cli.rs index 798c6401f..eda651cc4 100644 --- a/apps/native/src-tauri/src/cli.rs +++ b/apps/native/src-tauri/src/cli.rs @@ -23,6 +23,7 @@ use tauri::AppHandle; pub struct EvolveConfig { pub prompt: String, pub config: Option, + pub max_iterations: Option, pub max_output_tokens: Option, pub max_token_budget: Option, pub evolve_provider: Option, @@ -55,6 +56,10 @@ pub enum Commands { #[arg(short, long)] config: Option, + /// Legacy fallback for providers that do not report token usage + #[arg(short, long, hide = true)] + max_iterations: Option, + /// Maximum output tokens requested per evolution model call #[arg(long)] max_output_tokens: Option, @@ -106,6 +111,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result let EvolveConfig { prompt, config, + max_iterations, max_output_tokens, max_token_budget, evolve_provider, @@ -192,6 +198,12 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result None => crate::storage::store::get_summary_model(app).ok().flatten(), }; + // Effective legacy iteration fallback: prefer CLI value, otherwise read from store (has default) + let effective_max_iterations: usize = match max_iterations { + Some(v) => v, + None => crate::storage::store::get_max_iterations(app) + .unwrap_or(crate::storage::store::DEFAULT_MAX_ITERATIONS), + }; let effective_max_output_tokens: usize = match max_output_tokens { Some(v) => v, None => crate::storage::store::get_max_output_tokens(app) @@ -205,6 +217,12 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result .unwrap_or(crate::storage::store::DEFAULT_MAX_TOKEN_BUDGET), }; + // Legacy max iterations + if let Some(iterations) = max_iterations { + crate::storage::store::set_max_iterations(app, iterations) + .map_err(|e| format!("Failed to set max iterations: {}", e))?; + } + if let Some(output_tokens) = max_output_tokens { crate::storage::store::set_max_output_tokens(app, output_tokens) .map_err(|e| format!("Failed to set max output tokens: {}", e))?; @@ -235,13 +253,18 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result let (ok, output_value, failure_message) = match outcome { Ok(output) => { - let is_conversational = - output.telemetry.state == crate::shared_types::EvolutionState::Conversational; - - if is_conversational { - println!("(conversational response — no changes made)"); - } else { - println!("Evolution completed successfully"); + match output.telemetry.state { + crate::shared_types::EvolutionState::Conversational => { + println!("(conversational response — no changes made)"); + } + crate::shared_types::EvolutionState::LimitReached => { + println!( + "Evolution stopped after reaching a safety limit (iterations, build attempts, token budget, or stale progress). Review any partial changes and re-run with adjusted limits to continue." + ); + } + _ => { + println!("Evolution completed successfully"); + } } let output_value = match serde_json::to_value(&output) { @@ -272,6 +295,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result "ok": ok, "state": state_str, "prompt": prompt, + "maxIterations": effective_max_iterations, "maxOutputTokens": effective_max_output_tokens, "maxTokenBudget": effective_max_token_budget, "evolveProvider": effective_evolve_provider, diff --git a/apps/native/src-tauri/src/commands/settings_io.rs b/apps/native/src-tauri/src/commands/settings_io.rs index e59ca271e..5dae67947 100644 --- a/apps/native/src-tauri/src/commands/settings_io.rs +++ b/apps/native/src-tauri/src/commands/settings_io.rs @@ -240,6 +240,7 @@ mod tests { &mut output, &mut skipped, serde_json::to_value(EvolutionLimits { + max_iterations: 12, max_token_budget: 80_000, max_build_attempts: 4, max_output_tokens: 16_384, @@ -250,6 +251,7 @@ mod tests { assert_eq!(output.get("hostAttr"), Some(&json!("macbook"))); assert_eq!(output.get("developerMode"), Some(&json!(true))); + assert_eq!(output.get("maxIterations"), Some(&json!(12))); assert_eq!(output.get("maxBuildAttempts"), Some(&json!(4))); assert!(!output.contains_key("openaiApiKey")); assert!(!output.contains_key("promptHistory")); diff --git a/apps/native/src-tauri/src/commands/ui_prefs.rs b/apps/native/src-tauri/src/commands/ui_prefs.rs index 469b2d8dc..c6cd9b909 100644 --- a/apps/native/src-tauri/src/commands/ui_prefs.rs +++ b/apps/native/src-tauri/src/commands/ui_prefs.rs @@ -27,6 +27,8 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result Result Self { Self { + max_iterations: 25, max_token_budget: 50_000, max_build_attempts: 5, max_output_tokens: 32_768, @@ -87,6 +97,7 @@ mod tests { fn default_matches_configured_field_defaults() { let limits = EvolutionLimits::default(); + assert_eq!(limits.max_iterations, 25); assert_eq!(limits.max_token_budget, 50_000); assert_eq!(limits.max_build_attempts, 5); assert_eq!(limits.max_output_tokens, 32_768); @@ -95,6 +106,7 @@ mod tests { #[test] fn unknown_fields_do_not_change_limits() { let limits: EvolutionLimits = serde_json::from_value(serde_json::json!({ + "maxIterations": 11, "maxTokenBudget": 80_000, "maxBuildAttempts": 3, "maxOutputTokens": 16_384, @@ -105,6 +117,7 @@ mod tests { assert_eq!( limits, EvolutionLimits { + max_iterations: 11, max_token_budget: 80_000, max_build_attempts: 3, max_output_tokens: 16_384, @@ -114,9 +127,12 @@ mod tests { #[test] fn missing_fields_use_defaults() { - let limits: EvolutionLimits = - serde_json::from_value(serde_json::json!({})).expect("limits deserialize"); + let limits: EvolutionLimits = serde_json::from_value(serde_json::json!({ + "maxIterations": 11, + })) + .expect("limits deserialize"); + assert_eq!(limits.max_iterations, 11); assert_eq!(limits.max_token_budget, 50_000); assert_eq!(limits.max_build_attempts, 5); assert_eq!(limits.max_output_tokens, 32_768); diff --git a/apps/native/src-tauri/src/evolve/mod.rs b/apps/native/src-tauri/src/evolve/mod.rs index 9ef9e8413..ac2fe935b 100644 --- a/apps/native/src-tauri/src/evolve/mod.rs +++ b/apps/native/src-tauri/src/evolve/mod.rs @@ -41,7 +41,7 @@ use std::sync::Arc; use std::time::Duration; use tauri::{AppHandle, Manager, Runtime}; use tokio::time::sleep; -use tools::{ToolResult, create_tools, execute_tool}; +use tools::{ToolResult, create_tools, execute_tool, is_editing_tool}; pub use types::{EvolutionProgress, EvolutionRunError}; use crate::{ @@ -324,6 +324,10 @@ const DEFAULT_MODEL: &str = "anthropic/claude-sonnet-4"; const DEFAULT_OPENAI_MODEL: &str = "gpt-4o"; const DEFAULT_OLLAMA_API_BASE: &str = "http://localhost:11434"; +// Percentage of max_iterations after which we require at least one edit/build_check. +// Example: with max_iterations=50 and this set to 75, threshold is 37 iterations. +const MAX_ITERATIONS_BEFORE_EDIT_PERCENT: usize = 75; + // Applied separately to stdout and stderr. So when thinking about tokens, // the effective output limit could be up to double this if both are long. const BUILD_OUTPUT_MAX_CHARS: usize = 6_000; @@ -560,7 +564,20 @@ const LIMIT_DECISION_STOP: &str = "Stop"; #[derive(Debug, Clone, Copy)] enum EvolutionLimitKind { + NoProgress, + MaxIterations, BuildAttempts, + TokenBudget, +} + +fn format_token_count(tokens: usize) -> String { + if tokens >= 1_000_000 { + format!("{:.1}M", tokens as f64 / 1_000_000.0) + } else if tokens >= 1_000 { + format!("{:.1}K", tokens as f64 / 1_000.0) + } else { + tokens.to_string() + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -574,22 +591,42 @@ impl EvolutionLimitKind { fn attempts_label(self, attempts: usize) -> String { match self { Self::BuildAttempts => format!("{} build attempts", attempts), + Self::NoProgress | Self::MaxIterations => format!("{} attempts", attempts), + Self::TokenBudget => format!("{} tokens", format_token_count(attempts)), } } fn prompt(self, attempts: usize) -> String { - format!( - "The AI has made {}. Keep going?", - self.attempts_label(attempts) - ) + match self { + Self::TokenBudget => format!( + "The AI has used {}. Keep going?", + self.attempts_label(attempts) + ), + _ => format!( + "The AI has made {}. Keep going?", + self.attempts_label(attempts) + ), + } } fn stop_summary(self, attempts: usize) -> String { match self { + Self::NoProgress => format!( + "Evolution stopped after {} because the AI had not started making concrete changes.", + self.attempts_label(attempts) + ), + Self::MaxIterations => format!( + "Evolution stopped after reaching {}. The current conversation context was preserved.", + self.attempts_label(attempts) + ), Self::BuildAttempts => format!( "Evolution stopped after reaching {}. You can review the current changes or continue with a follow-up prompt.", self.attempts_label(attempts) ), + Self::TokenBudget => format!( + "Evolution stopped after consuming {}. You can review the current changes or continue with a follow-up prompt.", + self.attempts_label(attempts) + ), } } } @@ -701,11 +738,7 @@ fn finish_after_limit_stop( emit_evolve_event(app, EvolveEvent::complete(start_time, iteration, &summary)); evolution.summary = Some(summary); - evolution.state = if evolution.edits.is_empty() { - EvolutionState::Conversational - } else { - EvolutionState::Generated - }; + evolution.state = EvolutionState::LimitReached; } /// Generate an evolution from a user prompt using OpenAI function calling. @@ -896,18 +929,30 @@ pub async fn generate_evolution( // instead of silently swapping in field defaults. let config::EvolutionLimits { mut max_build_attempts, + mut max_token_budget, + mut max_iterations, .. } = app .state::>() .read_sync() .clone(); - let max_token_budget = - store::get_max_token_budget(app).unwrap_or(store::DEFAULT_MAX_TOKEN_BUDGET); + + let mut max_iterations_before_edit = std::cmp::max( + 1, + (max_iterations * MAX_ITERATIONS_BEFORE_EDIT_PERCENT) / 100, + ); + let max_iterations_before_edit_increment = max_iterations_before_edit.max(1); + let max_iterations_increment = max_iterations.max(1); let max_build_attempts_increment = max_build_attempts.max(1); + let max_token_budget_increment = max_token_budget.max(1); let interactive_limit_prompt = !banned_tools.contains(&"ask_user"); info!( - "Limits: max_token_budget={}, max_build_attempts={}", - max_token_budget, max_build_attempts, + "Limits: max_token_budget={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_iterations={}", + max_token_budget, + max_iterations_before_edit, + MAX_ITERATIONS_BEFORE_EDIT_PERCENT, + max_build_attempts, + max_iterations, ); let tools = create_tools(banned_tools); @@ -977,7 +1022,8 @@ pub async fn generate_evolution( let gitignore_matcher = gitignore::load_gitignore_matcher(repo_root.as_path())?; - // Track whether we've run a build check + // Track whether we've made any actual edits and/or build checks + let mut made_edit = false; let mut made_build_check = false; // Agentic loop - let the model use tools until done AND build passes @@ -1141,6 +1187,54 @@ pub async fn generate_evolution( ); } + // Safety limits -- Token budget. Caps cumulative session tokens + // (in addition to the per-call max_output_tokens). Skipped if + // the provider didn't report usage; the iteration guard below + // is the fallback for those providers. + if total_tokens >= max_token_budget { + warn!( + "⚠️ Evolution reached token budget ({}/{}) - asking whether to continue", + total_tokens, max_token_budget, + ); + match ask_to_continue_after_limit( + app, + start_time, + iteration, + EvolutionLimitKind::TokenBudget, + total_tokens as usize, + interactive_limit_prompt, + ) + .await + { + LimitDecision::Continue => { + max_token_budget = max_token_budget.saturating_add(max_token_budget_increment); + info!("Extending token budget to {}", max_token_budget); + } + LimitDecision::Stop => { + finish_after_limit_stop( + app, + &mut evolution, + start_time, + iteration, + EvolutionLimitKind::TokenBudget, + total_tokens as usize, + ); + break; + } + LimitDecision::Cancelled => { + evolution.state = EvolutionState::Failed; + return Err(EvolutionRunError::from_state( + session_control::EVOLUTION_CANCELLED_MSG, + &evolution, + iteration, + build_attempts, + total_tokens, + ) + .into()); + } + } + } + let assistant_msg = response.message; // Log assistant text response if any. If tool calls are present, treat tool_calls as @@ -1219,7 +1313,10 @@ pub async fn generate_evolution( success, ); - // Track if we've made a build check + // Track if we've made an edit or build check + if is_editing_tool(tool_name) { + made_edit = true; + } if tool_name == "build_check" { made_build_check = true; tool_key = Some(format!("build_check_{}", iteration)); @@ -1502,6 +1599,110 @@ Do not invent tool names and do not place tool invocations in assistant content. break; } + // Safety limits -- Max Iterations Before Edit Check + if iteration >= max_iterations_before_edit && !(made_edit || made_build_check) { + warn!( + "⚠️ No edit or build_check by iteration {} - asking whether to continue", + max_iterations_before_edit + ); + match ask_to_continue_after_limit( + app, + start_time, + iteration, + EvolutionLimitKind::NoProgress, + iteration, + interactive_limit_prompt, + ) + .await + { + LimitDecision::Continue => { + max_iterations_before_edit += max_iterations_before_edit_increment; + max_iterations = max_iterations.max(max_iterations_before_edit); + info!( + "Extending no-progress limit to iteration {} and max iterations to {}", + max_iterations_before_edit, max_iterations + ); + } + LimitDecision::Stop => { + finish_after_limit_stop( + app, + &mut evolution, + start_time, + iteration, + EvolutionLimitKind::NoProgress, + iteration, + ); + break; + } + LimitDecision::Cancelled => { + evolution.state = EvolutionState::Failed; + return Err(EvolutionRunError::from_state( + session_control::EVOLUTION_CANCELLED_MSG, + &evolution, + iteration, + build_attempts, + total_tokens, + ) + .into()); + } + } + } + + // Safety limits -- Max Iterations + if iteration >= max_iterations { + warn!( + "⚠️ Evolution reached maximum iterations ({}) - asking whether to continue", + max_iterations + ); + match ask_to_continue_after_limit( + app, + start_time, + iteration, + EvolutionLimitKind::MaxIterations, + iteration, + interactive_limit_prompt, + ) + .await + { + LimitDecision::Continue => { + max_iterations += max_iterations_increment; + info!("Extending max iterations to {}", max_iterations); + + // Avoid immediately prompting again this same iteration if build attempts + // are already at/over the current ceiling. + if build_attempts >= max_build_attempts { + max_build_attempts += max_build_attempts_increment; + info!( + "Also extending max build attempts to {}", + max_build_attempts + ); + } + } + LimitDecision::Stop => { + finish_after_limit_stop( + app, + &mut evolution, + start_time, + iteration, + EvolutionLimitKind::MaxIterations, + iteration, + ); + break; + } + LimitDecision::Cancelled => { + evolution.state = EvolutionState::Failed; + return Err(EvolutionRunError::from_state( + session_control::EVOLUTION_CANCELLED_MSG, + &evolution, + iteration, + build_attempts, + total_tokens, + ) + .into()); + } + } + } + // Safety limits -- Max Build Attempts if build_attempts >= max_build_attempts { warn!( diff --git a/apps/native/src-tauri/src/evolve/tools.rs b/apps/native/src-tauri/src/evolve/tools.rs index b464fe00b..33f5bfde2 100644 --- a/apps/native/src-tauri/src/evolve/tools.rs +++ b/apps/native/src-tauri/src/evolve/tools.rs @@ -139,6 +139,13 @@ pub fn execute_tool( } } +/// Helper to determine if a tool is an editing tool, i.e. it +/// makes changes to the nix config that count as "edits" in the +/// evolution process and should be tracked as such. +pub fn is_editing_tool(name: &str) -> bool { + matches!(name, "edit_file" | "edit_nix_file" | "ensure_secret") +} + // ============================================================================= // Shared helpers (used across tool modules) // ============================================================================= @@ -203,7 +210,7 @@ pub(crate) fn ensure_nixmac_edit_allowed(tool: &str, path: &str) -> Result<()> { #[cfg(test)] mod tests { - use super::{ToolResult, execute_tool, truncate_for_log}; + use super::{ToolResult, execute_tool, is_editing_tool, truncate_for_log}; use crate::evolve::gitignore::load_gitignore_matcher; use serde_json::json; use std::fs; @@ -232,6 +239,22 @@ mod tests { assert_eq!(truncate_for_log(&s, 10), format!("{}...", "→".repeat(10))); } + #[test] + fn returns_true_for_editing_tools() { + assert!(is_editing_tool("edit_file")); + assert!(is_editing_tool("edit_nix_file")); + assert!(is_editing_tool("ensure_secret")); + } + + #[test] + fn returns_false_for_non_editing_tools() { + assert!(!is_editing_tool("read_file")); + assert!(!is_editing_tool("list_files")); + assert!(!is_editing_tool("build_check")); + assert!(!is_editing_tool("done")); + assert!(!is_editing_tool("")); + } + #[test] fn read_file_rejects_base_gitignored_files() { let tmp = tempdir().expect("tempdir"); diff --git a/apps/native/src-tauri/src/main.rs b/apps/native/src-tauri/src/main.rs index 37f7bce1d..2b888ed56 100644 --- a/apps/native/src-tauri/src/main.rs +++ b/apps/native/src-tauri/src/main.rs @@ -309,6 +309,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { Some(cli::Commands::Evolve { prompt, config, + max_iterations, max_output_tokens, max_token_budget, evolve_provider, @@ -363,6 +364,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { let cfg = cli::EvolveConfig { prompt, config, + max_iterations, max_output_tokens, max_token_budget, evolve_provider, diff --git a/apps/native/src-tauri/src/shared_types/evolve.rs b/apps/native/src-tauri/src/shared_types/evolve.rs index 700105953..425497cfe 100644 --- a/apps/native/src-tauri/src/shared_types/evolve.rs +++ b/apps/native/src-tauri/src/shared_types/evolve.rs @@ -207,6 +207,11 @@ pub enum EvolutionState { Failed, /// Agent responded conversationally without making any environment changes. Conversational, + /// Evolution was stopped because a safety limit was reached + /// (iterations, build attempts, token budget, or stale progress). + /// Distinguishes "we cut it off" from "the agent finished" so + /// the eval harness can score runaways correctly. + LimitReached, } /// Telemetry counters from a completed evolution run. diff --git a/apps/native/src-tauri/src/shared_types/prefs.rs b/apps/native/src-tauri/src/shared_types/prefs.rs index 2826d37ab..ac5fb4678 100644 --- a/apps/native/src-tauri/src/shared_types/prefs.rs +++ b/apps/native/src-tauri/src/shared_types/prefs.rs @@ -32,6 +32,8 @@ pub struct UiPrefs { pub evolve_provider: Option, /// Model used for AI evolution. pub evolve_model: Option, + /// Legacy maximum agent iterations per evolution. + pub max_iterations: Option, /// Maximum provider-reported tokens per evolution. pub max_token_budget: Option, /// Maximum build attempts per evolution. @@ -80,6 +82,8 @@ pub struct UiPrefsUpdate { pub summary_provider: Option, /// Summary model update. pub summary_model: Option, + /// Legacy maximum iteration count update. + pub max_iterations: Option, /// Maximum token budget update. pub max_token_budget: Option, /// Maximum build-attempt count update. diff --git a/apps/native/src-tauri/src/storage/configurable_scope.rs b/apps/native/src-tauri/src/storage/configurable_scope.rs index 459ad51db..af494dd5b 100644 --- a/apps/native/src-tauri/src/storage/configurable_scope.rs +++ b/apps/native/src-tauri/src/storage/configurable_scope.rs @@ -26,7 +26,7 @@ const REPO_README_CONTENT: &str = "\ This directory is managed by [nixmac](https://github.com/darkmatter/nixmac). `settings.json` holds user preferences that should follow you across machines -— things like the default model and confirmation +— things like agent iteration limits, default model, and confirmation behavior. The file is plain JSON; nixmac reads it on the next agent run. Per-device settings (developer mode, pinned version, update channel, model diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs index c9646b4ee..9c9cd7128 100644 --- a/apps/native/src-tauri/src/storage/store.rs +++ b/apps/native/src-tauri/src/storage/store.rs @@ -54,6 +54,7 @@ pub const SYNC_SECRET_KEYCHAIN_KEY: &str = "nixmacSyncSecret"; /// Default sync server when the user has not configured a custom endpoint. pub const DEFAULT_SYNC_BASE_URL: &str = "https://sync.nixmac.app"; +pub const DEFAULT_MAX_ITERATIONS: usize = 25; pub const DEFAULT_MAX_OUTPUT_TOKENS: usize = 32_768; pub const DEFAULT_MAX_TOKEN_BUDGET: u32 = 50_000; const KEYCHAIN_SERVICE: &str = "com.darkmatter.nixmac"; @@ -629,13 +630,58 @@ fn get_repo_store( Ok(store) } -/// Gets the maximum token budget for evolution (default: 50,000). +/// Gets the maximum iterations for evolution (default: 25). Repo-scoped. +pub fn get_max_iterations(app: &AppHandle) -> Result { + if let Some(limits) = + app.try_state::>() + { + return Ok(limits.read_sync().max_iterations); + } + + let value = get_repo_store(app) + .ok() + .and_then(|s| s.get("maxIterations")) + .and_then(|v| serde_json::from_value::(v).ok()) + .unwrap_or(DEFAULT_MAX_ITERATIONS); + Ok(value) +} + +pub fn set_max_iterations(app: &AppHandle, max: usize) -> Result<()> { + if let Some(limits) = + app.try_state::>() + { + let mut limits = limits.write_sync(); + limits.max_iterations = max; + return Ok(()); + } + + let store = get_repo_store(app)?; + store.set("maxIterations", serde_json::json!(max)); + store.save()?; + Ok(()) +} + +/// Gets the maximum token budget for evolution (default: 50,000). Repo-scoped. pub fn get_max_token_budget(app: &AppHandle) -> Result { + if let Some(limits) = + app.try_state::>() + { + return Ok(limits.read_sync().max_token_budget); + } + Ok(get_json_pref(app, "maxTokenBudget")?.unwrap_or(DEFAULT_MAX_TOKEN_BUDGET)) } pub fn set_max_token_budget(app: &AppHandle, max: u32) -> Result<()> { - let store = get_store(app)?; + if let Some(limits) = + app.try_state::>() + { + let mut limits = limits.write_sync(); + limits.max_token_budget = max; + return Ok(()); + } + + let store = get_repo_store(app)?; store.set("maxTokenBudget", serde_json::json!(max)); store.save()?; Ok(()) diff --git a/apps/native/src/components/ui/tabs.stories.tsx b/apps/native/src/components/ui/tabs.stories.tsx index ade978720..14264bc82 100644 --- a/apps/native/src/components/ui/tabs.stories.tsx +++ b/apps/native/src/components/ui/tabs.stories.tsx @@ -23,7 +23,7 @@ export const SettingsSections = meta.story({ App preferences and telemetry controls. - Provider, model, and token budgets. + Provider, model, and iteration limits. Advanced diagnostics and pinned release controls. diff --git a/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap b/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap index 95839b364..79a8503ab 100644 --- a/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap +++ b/apps/native/src/components/widget/settings/__snapshots__/auto-config-field.stories.tsx.snap @@ -1,3 +1,3 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`Controls 1`] = `"
"`; +exports[`Controls 1`] = `"
"`; diff --git a/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap b/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap index a50b3da2f..f3b0c47f9 100644 --- a/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap +++ b/apps/native/src/components/widget/settings/__snapshots__/auto-tuning-section.stories.tsx.snap @@ -1,5 +1,5 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`Evolution Settings 1`] = `"
Tuning

Knobs that control how the evolution loop behaves. Changes take effect on the next run. Saved to .nixmac/settings.json in your config repo so they sync across machines.

"`; +exports[`Evolution Settings 1`] = `"
Tuning

Knobs that control how the evolution loop behaves. Changes take effect on the next run. Saved to .nixmac/settings.json in your config repo so they sync across machines.

"`; exports[`Load Error 1`] = `"
Tuning

Knobs that control how the evolution loop behaves. Changes take effect on the next run. Saved to .nixmac/settings.json in your config repo so they sync across machines.

Failed to load settings schema: Config registry unavailable

"`; diff --git a/apps/native/src/components/widget/settings/auto-config-field.stories.tsx b/apps/native/src/components/widget/settings/auto-config-field.stories.tsx index deb49444f..e09ae4fee 100644 --- a/apps/native/src/components/widget/settings/auto-config-field.stories.tsx +++ b/apps/native/src/components/widget/settings/auto-config-field.stories.tsx @@ -6,6 +6,16 @@ import { tauriAPI } from "@/ipc/api"; import type { ConfigFieldSchema, JsonValue } from "@/ipc/types"; const fields: Array<{ schema: ConfigFieldSchema; current: JsonValue }> = [ + { + schema: { + key: "maxIterations", + label: "Max iterations", + help: "API calls before the agent stops.", + ty: { kind: "number", min: 1, max: 200, step: 1 }, + default: 25, + }, + current: 25, + }, { schema: { key: "maxTokenBudget", diff --git a/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx b/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx index d94b34f5b..542c0d3a7 100644 --- a/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx +++ b/apps/native/src/components/widget/settings/auto-tuning-section.stories.tsx @@ -12,6 +12,13 @@ const schemas: ConfigurableSchema[] = [ displayName: "Evolution", description: "How long the agent will try before giving up.", fields: [ + { + key: "maxIterations", + label: "Max iterations", + help: "API calls before the agent stops. Lower is faster but may not finish complex changes.", + ty: { kind: "number", min: 1, max: 200, step: 1 }, + default: 25, + }, { key: "maxTokenBudget", label: "Max token budget", @@ -32,6 +39,7 @@ const schemas: ConfigurableSchema[] = [ const values: Record = { EvolutionLimits: { + maxIterations: 25, maxTokenBudget: 50000, maxBuildAttempts: 5, }, @@ -77,7 +85,7 @@ export const EvolutionSettings = meta.story({ ], play: async ({ canvasElement }) => { const canvas = within(canvasElement); - await waitFor(() => canvas.getByLabelText("Max token budget")); + await waitFor(() => canvas.getByLabelText("Max iterations")); }, }); diff --git a/apps/native/src/hooks/use-evolve.test.ts b/apps/native/src/hooks/use-evolve.test.ts index 69d85395c..1ff86ef83 100644 --- a/apps/native/src/hooks/use-evolve.test.ts +++ b/apps/native/src/hooks/use-evolve.test.ts @@ -110,4 +110,34 @@ describe("useEvolve", () => { expect(useViewModel.getState().changeMap).toBe(existingMap); expect(useWidgetStore.getState().conversationalResponse).toBe("No file changes needed."); }); + + it("logs a stopped message when a safety limit is reached", async () => { + const limitReachedResult: EvolutionResult = { + changeMap: { groups: [], singles: [], unsummarizedHashes: [] }, + gitStatus, + evolveState, + conversationalResponse: null, + telemetry: { + state: "limitReached", + iterations: 25, + buildAttempts: 0, + totalTokens: 50_000, + editsCount: 0, + thinkingCount: 0, + toolCallsCount: 0, + durationMs: 12_345, + }, + }; + + mocks.evolve.mockResolvedValue(limitReachedResult); + + const store = useWidgetStore.getState(); + store.setEvolvePrompt("install htop"); + + await useEvolve().handleEvolve(); + + const logs = useWidgetStore.getState().consoleLogs; + expect(logs).toContain("Evolution stopped (safety limit reached)"); + expect(logs).not.toContain("✓ Evolution complete"); + }); }); diff --git a/apps/native/src/hooks/use-evolve.ts b/apps/native/src/hooks/use-evolve.ts index 68f652e26..d4c352c12 100644 --- a/apps/native/src/hooks/use-evolve.ts +++ b/apps/native/src/hooks/use-evolve.ts @@ -88,13 +88,21 @@ const handleEvolve = async () => { // Backend handles: AI + summary + branch + commit + DB const result = await tauriAPI.darwin.evolve(store.evolvePrompt); const isConversational = result?.telemetry?.state === "conversational"; + const isLimitReached = result?.telemetry?.state === "limitReached"; const telemetry = result?.telemetry; - const completionMsg = telemetry - ? `✓ Evolution complete in ${formatDurationMs(telemetry.durationMs)} and ${telemetry.iterations} iteration${telemetry.iterations === 1 ? "" : "s"}\n` - : "✓ Evolution complete\n"; + const iterationSuffix = telemetry + ? ` in ${formatDurationMs(telemetry.durationMs)} and ${telemetry.iterations} iteration${telemetry.iterations === 1 ? "" : "s"}` + : ""; + const completionMsg = isLimitReached + ? `⏸ Evolution stopped (safety limit reached)${iterationSuffix}\n` + : `✓ Evolution complete${iterationSuffix}\n`; useWidgetStore.getState().appendLog(completionMsg); - toast.success(completionMsg); + if (isLimitReached) { + toast.info(completionMsg); + } else { + toast.success(completionMsg); + } if (telemetry) { useWidgetStore.getState().setEvolutionTelemetry(telemetry); } diff --git a/apps/native/src/ipc/api.test.ts b/apps/native/src/ipc/api.test.ts index 5711c8609..1bfa31c24 100644 --- a/apps/native/src/ipc/api.test.ts +++ b/apps/native/src/ipc/api.test.ts @@ -34,6 +34,9 @@ const prefs = (overrides: Partial = {}): UiPrefs => summaryModel: "openai/gpt-4o-mini", evolveProvider: "openrouter", evolveModel: "anthropic/claude-sonnet-4", + maxIterations: 25, + maxTokenBudget: 50000, + maxOutputTokens: 32768, maxBuildAttempts: 5, sendDiagnostics: false, confirmBuild: true, diff --git a/apps/native/src/ipc/types.ts b/apps/native/src/ipc/types.ts index 975ce77a3..616cd1861 100644 --- a/apps/native/src/ipc/types.ts +++ b/apps/native/src/ipc/types.ts @@ -422,7 +422,14 @@ export type EvolutionState = /** * Agent responded conversationally without making any environment changes. */ -"conversational" +"conversational" | +/** + * Evolution was stopped because a safety limit was reached + * (iterations, build attempts, token budget, or stale progress). + * Distinguishes "we cut it off" from "the agent finished" so + * the eval harness can score runaways correctly. + */ +"limitReached" /** * Telemetry counters from a completed evolution run. @@ -1644,6 +1651,10 @@ evolveProvider: string | null; * Model used for AI evolution. */ evolveModel: string | null; +/** + * Legacy maximum agent iterations per evolution. + */ +maxIterations: number | null; /** * Maximum provider-reported tokens per evolution. */ @@ -1731,6 +1742,10 @@ summaryProvider: string | null; * Summary model update. */ summaryModel: string | null; +/** + * Legacy maximum iteration count update. + */ +maxIterations: number | null; /** * Maximum token budget update. */ diff --git a/apps/native/src/lib/ai-provider-migration.test.ts b/apps/native/src/lib/ai-provider-migration.test.ts index 4c07beb12..29388de2e 100644 --- a/apps/native/src/lib/ai-provider-migration.test.ts +++ b/apps/native/src/lib/ai-provider-migration.test.ts @@ -12,6 +12,7 @@ const PREFS: UiPrefs = { summaryModel: "openai/gpt-4o-mini", evolveProvider: "openrouter", evolveModel: "anthropic/claude-sonnet-4", + maxIterations: null, maxTokenBudget: null, maxBuildAttempts: null, maxOutputTokens: null, diff --git a/apps/native/src/lib/constants.ts b/apps/native/src/lib/constants.ts index 6b81ad695..7b87128cb 100644 --- a/apps/native/src/lib/constants.ts +++ b/apps/native/src/lib/constants.ts @@ -1,5 +1,6 @@ // Must match session_control::EVOLUTION_CANCELLED_MSG in src-tauri. export const EVOLUTION_CANCELLED_MSG = "Evolution cancelled by user"; +export const DEFAULT_MAX_ITERATIONS = 25; export const DEFAULT_MAX_OUTPUT_TOKENS = 32_768; export const DEFAULT_MAX_TOKEN_BUDGET = 50_000; export const EVOLVE_EVENT_CHANNEL = "darwin:evolve:event";