Skip to content

Commit 4b3ddd6

Browse files
authored
feat(coder): fix max-turn coder bugs (#349)
* upadte routing * upadte routing * upadte routing * upadte coder * upadte coder
1 parent 2ec90d3 commit 4b3ddd6

File tree

3 files changed

+134
-4
lines changed

3 files changed

+134
-4
lines changed

tests/test_scientist.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,81 @@ def test_output_dir(tmp_path: Path) -> Path:
4949

5050
def test_mock() -> None:
5151
assert True
52+
53+
54+
def test_coder_recovers_from_max_turns_with_existing_workspace(
55+
test_output_dir: Path, monkeypatch: pytest.MonkeyPatch
56+
) -> None:
57+
agents = pytest.importorskip("agents")
58+
from tiny_scientist.coder import Coder
59+
60+
MaxTurnsExceeded = agents.exceptions.MaxTurnsExceeded
61+
coder = Coder(
62+
model="gpt-4-test",
63+
output_dir=str(test_output_dir),
64+
use_docker=False,
65+
)
66+
67+
idea = {
68+
"Title": "Test Idea",
69+
"Problem": "Test problem",
70+
"Approach": "Test approach",
71+
"Experiment": {"Model": {}, "Dataset": {}, "Metric": {}},
72+
"ExperimentTable": "| Row | Details |\n| --- | --- |\n| Baselines | Test |\n",
73+
}
74+
75+
monkeypatch.setattr(coder, "setup_agent", lambda: None)
76+
monkeypatch.setattr(
77+
coder,
78+
"_run_experiment_loop",
79+
lambda _idea, _baseline=None: (_ for _ in ()).throw(MaxTurnsExceeded("max turns")),
80+
)
81+
monkeypatch.setattr(coder, "_format_experiment_for_prompt", lambda _exp: ("", "", "", "", "", ""))
82+
monkeypatch.setattr(
83+
coder,
84+
"_run_single_experiment",
85+
lambda run_num, idea, experiment_table, table_rows, timeout=7200: (0, "ok"),
86+
)
87+
monkeypatch.setattr(coder, "_update_notes", lambda: None)
88+
monkeypatch.setattr(coder, "_write_search_links_manifest", lambda _idea: None)
89+
90+
status, exp_dir, note = coder.run(idea=idea)
91+
92+
assert status is True
93+
assert exp_dir == str(test_output_dir)
94+
assert note is not None
95+
assert "max-turn limit" in note
96+
97+
98+
def test_scientist_code_marks_partial_success(
99+
test_output_dir: Path, monkeypatch: pytest.MonkeyPatch
100+
) -> None:
101+
pytest.importorskip("agents")
102+
from tiny_scientist.scientist import TinyScientist
103+
104+
scientist = TinyScientist(
105+
model="gpt-4-test",
106+
output_dir=str(test_output_dir),
107+
enable_safety_check=False,
108+
use_docker=False,
109+
agent_sdk="openai",
110+
)
111+
monkeypatch.setattr(
112+
scientist.coder,
113+
"run",
114+
lambda idea, baseline_results=None: (
115+
True,
116+
str(test_output_dir),
117+
"Recovered by running the current workspace.",
118+
),
119+
)
120+
121+
status, exp_dir = scientist.code(
122+
idea={
123+
"Title": "Test Idea",
124+
"Experiment": {"Model": {}, "Dataset": {}, "Metric": {}},
125+
}
126+
)
127+
128+
assert status is True
129+
assert exp_dir == str(test_output_dir)

tiny_scientist/coder.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Any, Dict, List, Optional, Tuple
1111

1212
from agents import Agent, Runner
13+
from agents.exceptions import MaxTurnsExceeded
1314
from rich import print
1415

1516
from .budget_checker import BudgetChecker
@@ -48,6 +49,7 @@ class Coder:
4849
STEP_TEXT_CONTEXT_LIMIT = 4000
4950
STEP_TABLE_CONTEXT_LIMIT = 5000
5051
STEP_TODO_CONTEXT_LIMIT = 3000
52+
OPENAI_CODER_MAX_TURNS = 24
5153

5254
def __init__(
5355
self,
@@ -295,7 +297,17 @@ def run(
295297
self.setup_agent()
296298

297299
# Run experiments
298-
success = self._run_experiment_loop(idea, baseline_results)
300+
recovery_note: Optional[str] = None
301+
try:
302+
success = self._run_experiment_loop(idea, baseline_results)
303+
except MaxTurnsExceeded:
304+
success, recovery_note = self._recover_from_codegen_interruption(
305+
idea,
306+
interruption_reason=(
307+
"Code generation hit the agent max-turn limit; "
308+
"using the current workspace state."
309+
),
310+
)
299311

300312
if not success:
301313
# Even if failed, save an empty result file to avoid breaking writer
@@ -326,7 +338,42 @@ def run(
326338

327339
self.cost_tracker.report("Coder Total Cost")
328340

329-
return True, self.output_dir, None
341+
return True, self.output_dir, recovery_note
342+
343+
def _recover_from_codegen_interruption(
344+
self,
345+
idea: Dict[str, Any],
346+
interruption_reason: str,
347+
) -> Tuple[bool, Optional[str]]:
348+
"""Try to continue from the current workspace when agent coding is interrupted."""
349+
main_path = self._entrypoint_path()
350+
if not osp.exists(main_path):
351+
print("[System] Code generation stopped before main.py was created.")
352+
return False, None
353+
354+
experiment_table = str(idea.get("ExperimentTable", "")).strip()
355+
if not experiment_table:
356+
print("[System] Cannot recover coder run without ExperimentTable.")
357+
return False, None
358+
359+
table_rows = self._extract_table_rows(experiment_table)
360+
print(
361+
"[System] Code generation stopped early. "
362+
"Attempting to run the current workspace and continue with partial results..."
363+
)
364+
return_code, message = self._run_single_experiment(
365+
run_num=1,
366+
idea=idea,
367+
experiment_table=experiment_table,
368+
table_rows=table_rows,
369+
)
370+
if return_code != 0:
371+
print("[System] Recovery run failed after code generation interruption.")
372+
self._print_run_summary(success=False, error_message=message)
373+
return False, None
374+
375+
self._print_run_summary(success=True)
376+
return True, f"{interruption_reason} Recovered by running the current workspace."
330377

331378
def _format_experiment_for_prompt(
332379
self, exp: Dict[str, Any]
@@ -716,7 +763,11 @@ def _generate_experiment(self, prompt: str) -> str:
716763

717764
if self.agent is None:
718765
raise RuntimeError("Agent not initialized. Call setup_agent() first.")
719-
result = Runner.run_sync(self.agent, task_prompt)
766+
result = Runner.run_sync(
767+
self.agent,
768+
task_prompt,
769+
max_turns=self.OPENAI_CODER_MAX_TURNS,
770+
)
720771
track_sdk_cost(result, self.cost_tracker, self.model, "generate_experiment")
721772
return result.final_output or "CONTINUE"
722773

tiny_scientist/scientist.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,9 @@ def code(
242242
print_mapping_table(
243243
"Coding Result",
244244
{
245-
"Status": "success",
245+
"Status": "partial" if error_details else "success",
246246
"Experiment Dir": exp_path,
247+
"Note": error_details or "-",
247248
},
248249
)
249250
else:

0 commit comments

Comments
 (0)