feat(coder): fix max-turn coder bugs (#349)

lwaekfjlk · web-flow · commit 4b3ddd66168f · 2026-03-01T03:46:13.000+08:00
* upadte routing

* upadte routing

* upadte routing

* upadte coder

* upadte coder
diff --git a/tests/test_scientist.py b/tests/test_scientist.py
@@ -49,3 +49,81 @@ def test_output_dir(tmp_path: Path) -> Path:
 
 def test_mock() -> None:
     assert True
+
+
+def test_coder_recovers_from_max_turns_with_existing_workspace(
+    test_output_dir: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    agents = pytest.importorskip("agents")
+    from tiny_scientist.coder import Coder
+
+    MaxTurnsExceeded = agents.exceptions.MaxTurnsExceeded
+    coder = Coder(
+        model="gpt-4-test",
+        output_dir=str(test_output_dir),
+        use_docker=False,
+    )
+
+    idea = {
+        "Title": "Test Idea",
+        "Problem": "Test problem",
+        "Approach": "Test approach",
+        "Experiment": {"Model": {}, "Dataset": {}, "Metric": {}},
+        "ExperimentTable": "| Row | Details |\n| --- | --- |\n| Baselines | Test |\n",
+    }
+
+    monkeypatch.setattr(coder, "setup_agent", lambda: None)
+    monkeypatch.setattr(
+        coder,
+        "_run_experiment_loop",
+        lambda _idea, _baseline=None: (_ for _ in ()).throw(MaxTurnsExceeded("max turns")),
+    )
+    monkeypatch.setattr(coder, "_format_experiment_for_prompt", lambda _exp: ("", "", "", "", "", ""))
+    monkeypatch.setattr(
+        coder,
+        "_run_single_experiment",
+        lambda run_num, idea, experiment_table, table_rows, timeout=7200: (0, "ok"),
+    )
+    monkeypatch.setattr(coder, "_update_notes", lambda: None)
+    monkeypatch.setattr(coder, "_write_search_links_manifest", lambda _idea: None)
+
+    status, exp_dir, note = coder.run(idea=idea)
+
+    assert status is True
+    assert exp_dir == str(test_output_dir)
+    assert note is not None
+    assert "max-turn limit" in note
+
+
+def test_scientist_code_marks_partial_success(
+    test_output_dir: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    pytest.importorskip("agents")
+    from tiny_scientist.scientist import TinyScientist
+
+    scientist = TinyScientist(
+        model="gpt-4-test",
+        output_dir=str(test_output_dir),
+        enable_safety_check=False,
+        use_docker=False,
+        agent_sdk="openai",
+    )
+    monkeypatch.setattr(
+        scientist.coder,
+        "run",
+        lambda idea, baseline_results=None: (
+            True,
+            str(test_output_dir),
+            "Recovered by running the current workspace.",
+        ),
+    )
+
+    status, exp_dir = scientist.code(
+        idea={
+            "Title": "Test Idea",
+            "Experiment": {"Model": {}, "Dataset": {}, "Metric": {}},
+        }
+    )
+
+    assert status is True
+    assert exp_dir == str(test_output_dir)
diff --git a/tiny_scientist/coder.py b/tiny_scientist/coder.py
@@ -10,6 +10,7 @@
 from typing import Any, Dict, List, Optional, Tuple
 
 from agents import Agent, Runner
+from agents.exceptions import MaxTurnsExceeded
 from rich import print
 
 from .budget_checker import BudgetChecker
@@ -48,6 +49,7 @@ class Coder:
     STEP_TEXT_CONTEXT_LIMIT = 4000
     STEP_TABLE_CONTEXT_LIMIT = 5000
     STEP_TODO_CONTEXT_LIMIT = 3000
+    OPENAI_CODER_MAX_TURNS = 24
 
     def __init__(
         self,
@@ -295,7 +297,17 @@ def run(
         self.setup_agent()
 
         # Run experiments
-        success = self._run_experiment_loop(idea, baseline_results)
+        recovery_note: Optional[str] = None
+        try:
+            success = self._run_experiment_loop(idea, baseline_results)
+        except MaxTurnsExceeded:
+            success, recovery_note = self._recover_from_codegen_interruption(
+                idea,
+                interruption_reason=(
+                    "Code generation hit the agent max-turn limit; "
+                    "using the current workspace state."
+                ),
+            )
 
         if not success:
             # Even if failed, save an empty result file to avoid breaking writer
@@ -326,7 +338,42 @@ def run(
 
         self.cost_tracker.report("Coder Total Cost")
 
-        return True, self.output_dir, None
+        return True, self.output_dir, recovery_note
+
+    def _recover_from_codegen_interruption(
+        self,
+        idea: Dict[str, Any],
+        interruption_reason: str,
+    ) -> Tuple[bool, Optional[str]]:
+        """Try to continue from the current workspace when agent coding is interrupted."""
+        main_path = self._entrypoint_path()
+        if not osp.exists(main_path):
+            print("[System] Code generation stopped before main.py was created.")
+            return False, None
+
+        experiment_table = str(idea.get("ExperimentTable", "")).strip()
+        if not experiment_table:
+            print("[System] Cannot recover coder run without ExperimentTable.")
+            return False, None
+
+        table_rows = self._extract_table_rows(experiment_table)
+        print(
+            "[System] Code generation stopped early. "
+            "Attempting to run the current workspace and continue with partial results..."
+        )
+        return_code, message = self._run_single_experiment(
+            run_num=1,
+            idea=idea,
+            experiment_table=experiment_table,
+            table_rows=table_rows,
+        )
+        if return_code != 0:
+            print("[System] Recovery run failed after code generation interruption.")
+            self._print_run_summary(success=False, error_message=message)
+            return False, None
+
+        self._print_run_summary(success=True)
+        return True, f"{interruption_reason} Recovered by running the current workspace."
 
     def _format_experiment_for_prompt(
         self, exp: Dict[str, Any]
@@ -716,7 +763,11 @@ def _generate_experiment(self, prompt: str) -> str:
 
         if self.agent is None:
             raise RuntimeError("Agent not initialized. Call setup_agent() first.")
-        result = Runner.run_sync(self.agent, task_prompt)
+        result = Runner.run_sync(
+            self.agent,
+            task_prompt,
+            max_turns=self.OPENAI_CODER_MAX_TURNS,
+        )
         track_sdk_cost(result, self.cost_tracker, self.model, "generate_experiment")
         return result.final_output or "CONTINUE"
 
diff --git a/tiny_scientist/scientist.py b/tiny_scientist/scientist.py
@@ -242,8 +242,9 @@ def code(
             print_mapping_table(
                 "Coding Result",
                 {
-                    "Status": "success",
+                    "Status": "partial" if error_details else "success",
                     "Experiment Dir": exp_path,
+                    "Note": error_details or "-",
                 },
             )
         else:

Original file line number	Diff line number	Diff line change
`@@ -242,8 +242,9 @@ def code(`
`242`	`242`	`print_mapping_table(`
`243`	`243`	`"Coding Result",`
`244`	`244`	`{`
`245`		`- "Status": "success",`
	`245`	`+ "Status": "partial" if error_details else "success",`
`246`	`246`	`"Experiment Dir": exp_path,`
	`247`	`+ "Note": error_details or "-",`
`247`	`248`	`},`
`248`	`249`	`)`
`249`	`250`	`else:`