Skip to content

Commit 10545ac

Browse files
fix(agent): harden backend execution and playwright mcp error handling
1 parent 661f760 commit 10545ac

5 files changed

Lines changed: 174 additions & 60 deletions

File tree

agent/run_agent.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,17 @@ def main() -> None:
182182
"You are testing a text-based adventure game. Focus on exploration, items, and puzzle logic.",
183183
)
184184
report = orchestrator.run(game_profile)
185-
game_base_url = game_config.get("base_url") or (
186-
f"http://localhost:{game_config['port']}/api/agent"
185+
port = game_config.get("port")
186+
if port is None:
187+
raise ValueError(f"Game config for '{args.game}' is missing required 'port'")
188+
game_base_url = str(game_config.get("base_url") or "").strip() or (
189+
f"http://localhost:{port}/api/agent"
187190
)
191+
frontend_url = str(
192+
game_config.get("frontend_url")
193+
or backend_spec.settings.get("frontend_url")
194+
or f"http://localhost:{port}"
195+
).strip()
188196
report.metadata["llm"] = {
189197
"model": model,
190198
"platform": resolved_platform,
@@ -199,6 +207,7 @@ def main() -> None:
199207
"name": args.game,
200208
"port": game_config.get("port"),
201209
"base_url": game_base_url,
210+
"frontend_url": frontend_url,
202211
"backend_type": backend_spec.backend_type,
203212
"have_ground_truth": bool(game_config.get("ground_truth", False)),
204213
"profile": game_profile,

agent/src/computeruse/mcp_client.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
from collections import deque
56
import json
67
import subprocess
78
import threading
@@ -32,8 +33,11 @@ def __init__(
3233
self._startup_timeout = startup_timeout
3334
self._process: subprocess.Popen[bytes] | None = None
3435
self._reader: threading.Thread | None = None
36+
self._stderr_reader: threading.Thread | None = None
3537
self._messages: Queue[Dict[str, Any]] = Queue()
3638
self._next_id = 1
39+
self._stderr_lines: deque[str] = deque(maxlen=200)
40+
self._stderr_lock = threading.Lock()
3741

3842
def start(self) -> None:
3943
if self._process is not None:
@@ -47,6 +51,11 @@ def start(self) -> None:
4751
)
4852
self._reader = threading.Thread(target=self._read_loop, daemon=True)
4953
self._reader.start()
54+
self._stderr_reader = threading.Thread(
55+
target=self._read_stderr_loop,
56+
daemon=True,
57+
)
58+
self._stderr_reader.start()
5059
self.initialize()
5160
self.notify("notifications/initialized", {})
5261

@@ -152,25 +161,31 @@ def _raise_if_process_exited(self, method: str) -> None:
152161
return_code = process.poll()
153162
if return_code is None:
154163
return
155-
stderr_text = self._read_stderr(process)
164+
stderr_text = self._recent_stderr()
156165
detail = (
157166
f"MCP process exited with code {return_code} while waiting for {method}."
158167
)
159168
if stderr_text:
160169
detail = f"{detail} stderr: {stderr_text}"
161170
raise McpProtocolError(detail)
162171

163-
@staticmethod
164-
def _read_stderr(process: subprocess.Popen[bytes]) -> str:
165-
if process.stderr is None:
166-
return ""
167-
try:
168-
raw = process.stderr.read()
169-
except Exception: # noqa: BLE001
170-
return ""
171-
if not raw:
172-
return ""
173-
return raw.decode("utf-8", errors="replace").strip()
172+
def _read_stderr_loop(self) -> None:
173+
if not self._process or not self._process.stderr:
174+
return
175+
stderr = self._process.stderr
176+
while True:
177+
raw_line = stderr.readline()
178+
if not raw_line:
179+
return
180+
text = raw_line.decode("utf-8", errors="replace").rstrip()
181+
if not text:
182+
continue
183+
with self._stderr_lock:
184+
self._stderr_lines.append(text)
185+
186+
def _recent_stderr(self) -> str:
187+
with self._stderr_lock:
188+
return "\n".join(self._stderr_lines).strip()
174189

175190

176191
def default_mcp_cwd(root_path: str) -> str:

agent/src/computeruse/playwright_backend.py

Lines changed: 86 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def __init__(
5050
*,
5151
client_factory: Optional[Callable[[], StdioMcpClient]] = None,
5252
) -> None:
53+
if not settings.command:
54+
raise ValueError("Playwright MCP command must not be empty")
5355
self._settings = settings
5456
self._client_factory = client_factory or (
5557
lambda: StdioMcpClient(
@@ -95,21 +97,25 @@ def from_config(
9597
def start_session(self, run_context: Dict[str, Any]) -> SessionHandle:
9698
del run_context
9799
client = self._client_factory()
98-
client.start()
99-
tools = client.list_tools()
100-
self._call_tool(
101-
client,
102-
self._settings.navigate_tool,
103-
{"url": self._settings.frontend_url},
104-
)
105-
initial_observation = self._snapshot_observation(client)
106-
return SessionHandle(
107-
session_id=str(uuid4()),
108-
backend_type=self.backend_type,
109-
raw={"client": client, "tools": tools},
110-
metadata={"frontend_url": self._settings.frontend_url},
111-
initial_observation=initial_observation,
112-
)
100+
try:
101+
client.start()
102+
tools = client.list_tools()
103+
self._call_tool(
104+
client,
105+
self._settings.navigate_tool,
106+
{"url": self._settings.frontend_url},
107+
)
108+
initial_observation = self._snapshot_observation(client)
109+
return SessionHandle(
110+
session_id=str(uuid4()),
111+
backend_type=self.backend_type,
112+
raw={"client": client, "tools": tools},
113+
metadata={"frontend_url": self._settings.frontend_url},
114+
initial_observation=initial_observation,
115+
)
116+
except Exception:
117+
client.close()
118+
raise
113119

114120
def describe_capabilities(
115121
self,
@@ -200,40 +206,33 @@ def execute(
200206
attempt.final_status = "completed"
201207
observation.execution = {
202208
"attempts": [self._attempt_to_dict(attempt)],
203-
"diagnostics": {"backend_type": self.backend_type},
209+
"diagnostics": {
210+
"backend_type": self.backend_type,
211+
"per_call_results": per_call_results,
212+
},
204213
}
205214
return BackendExecutionResult(
206215
observation=observation,
207216
attempts=[attempt],
208-
diagnostics={"backend_type": self.backend_type},
217+
diagnostics={
218+
"backend_type": self.backend_type,
219+
"per_call_results": per_call_results,
220+
},
209221
)
210222
except McpProtocolError as exc:
211-
error_text = str(exc)
212-
error_kind = self._error_kind(error_text)
213-
attempt.per_call_results = per_call_results
214-
attempt.error = error_text
215-
attempt.suspected_origin = "execution"
216-
observation = Observation(
217-
success=False,
218-
message=error_text,
219-
state={},
220-
summary=f"Execution failure in Playwright MCP: {error_text}",
221-
env_state={},
222-
artifacts={},
223-
execution={
224-
"attempts": [self._attempt_to_dict(attempt)],
225-
"diagnostics": {
226-
"backend_type": self.backend_type,
227-
"error": error_text,
228-
"error_kind": error_kind,
229-
},
230-
"suspected_origin": "execution",
231-
},
223+
return self._execution_failure_result(
224+
attempt=attempt,
225+
per_call_results=per_call_results,
226+
error_text=str(exc),
227+
error_kind=self._error_kind(str(exc)),
232228
)
233-
return BackendExecutionResult(
234-
observation=observation,
235-
attempts=[attempt],
236-
diagnostics={"backend_type": self.backend_type, "error": error_text},
229+
except Exception as exc: # noqa: BLE001
230+
return self._execution_failure_result(
231+
attempt=attempt,
232+
per_call_results=per_call_results,
233+
error_text=str(exc),
234+
error_kind="backend_exception",
235+
exception_type=type(exc).__name__,
237236
)
238237

239238
def close_session(self, session: SessionHandle) -> None:
@@ -249,6 +248,9 @@ def _snapshot_observation(
249248
screenshots: Optional[List[Dict[str, Any]]] = None,
250249
) -> Observation:
251250
snapshot = self._call_tool(client, self._settings.snapshot_tool, {})
251+
snapshot_error = self._tool_result_error(snapshot)
252+
if snapshot_error:
253+
raise McpProtocolError(snapshot_error)
252254
snapshot_text = self._extract_text(snapshot)
253255
env_state = self._extract_env_state(snapshot_text)
254256
artifacts = {"snapshot": snapshot}
@@ -292,7 +294,9 @@ def _map_call(self, call: ExecutionCall) -> tuple[str, Dict[str, Any]]:
292294
if call.kind == "press":
293295
return self._settings.press_tool, {"key": call.text or call.target}
294296
if call.kind == "wait":
295-
return self._settings.wait_tool, {"time": call.duration_ms or 1000}
297+
return self._settings.wait_tool, {
298+
"time": max(call.duration_ms or 1000, 0) / 1000.0
299+
}
296300
if call.kind == "screenshot":
297301
filename = self._screenshot_filename(call)
298302
arguments: Dict[str, Any] = {"filename": filename}
@@ -501,6 +505,45 @@ def _result_excerpt(payload: Dict[str, Any]) -> str:
501505
return text[:500]
502506
return json.dumps(payload, ensure_ascii=False)[:500]
503507

508+
def _execution_failure_result(
509+
self,
510+
*,
511+
attempt: ExecutionAttempt,
512+
per_call_results: List[Dict[str, Any]],
513+
error_text: str,
514+
error_kind: str,
515+
exception_type: str = "",
516+
) -> BackendExecutionResult:
517+
attempt.per_call_results = per_call_results
518+
attempt.error = error_text
519+
attempt.suspected_origin = "execution"
520+
diagnostics: Dict[str, Any] = {
521+
"backend_type": self.backend_type,
522+
"error": error_text,
523+
"error_kind": error_kind,
524+
"per_call_results": per_call_results,
525+
}
526+
if exception_type:
527+
diagnostics["exception_type"] = exception_type
528+
observation = Observation(
529+
success=False,
530+
message=error_text,
531+
state={},
532+
summary=f"Execution failure in Playwright MCP: {error_text}",
533+
env_state={},
534+
artifacts={},
535+
execution={
536+
"attempts": [self._attempt_to_dict(attempt)],
537+
"diagnostics": diagnostics,
538+
"suspected_origin": "execution",
539+
},
540+
)
541+
return BackendExecutionResult(
542+
observation=observation,
543+
attempts=[attempt],
544+
diagnostics=diagnostics,
545+
)
546+
504547
@staticmethod
505548
def _attempt_to_dict(attempt: ExecutionAttempt) -> Dict[str, Any]:
506549
payload = {

agent/src/execution_backends.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def execute(
118118
summary="No executable calls were produced for this step.",
119119
env_state={},
120120
execution={
121-
"attempts": [attempt.__dict__],
121+
"attempts": [self._attempt_to_dict(attempt)],
122122
"diagnostics": {"error": "empty_execution_request"},
123123
"suspected_origin": "execution",
124124
},
@@ -144,7 +144,7 @@ def execute(
144144
summary=f"Execution failure while sending command: {exc}",
145145
env_state={},
146146
execution={
147-
"attempts": [attempt.__dict__],
147+
"attempts": [self._attempt_to_dict(attempt)],
148148
"diagnostics": {"error": str(exc), "backend_type": self.backend_type},
149149
"suspected_origin": "execution",
150150
},

agent/src/operator.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,24 @@ def execute(
8080
self._translation_failure_result(str(exc)),
8181
attempts,
8282
)
83-
result = backend.execute(session, request)
83+
try:
84+
result = backend.execute(session, request)
85+
except Exception as exc: # noqa: BLE001
86+
attempts.append(
87+
ExecutionAttempt(
88+
attempt=operator_attempt,
89+
translated_calls=request.calls,
90+
retry_reason=retry_reason,
91+
success=False,
92+
final_status="backend_exception",
93+
suspected_origin="execution",
94+
error=str(exc),
95+
)
96+
)
97+
return self._merge_attempts(
98+
self._backend_exception_result(exc),
99+
attempts,
100+
)
84101
attempt = self._coerce_attempt(
85102
request=request,
86103
result=result,
@@ -305,6 +322,36 @@ def _translation_failure_result(error_text: str) -> BackendExecutionResult:
305322
},
306323
)
307324

325+
@staticmethod
326+
def _backend_exception_result(exc: Exception) -> BackendExecutionResult:
327+
error_text = str(exc)
328+
observation = Observation(
329+
success=False,
330+
message=error_text,
331+
state={},
332+
summary=f"Execution backend raised an unexpected exception: {error_text}",
333+
env_state={},
334+
artifacts={},
335+
execution={
336+
"attempts": [],
337+
"diagnostics": {
338+
"error": error_text,
339+
"error_kind": "backend_exception",
340+
"exception_type": type(exc).__name__,
341+
},
342+
"suspected_origin": "execution",
343+
},
344+
)
345+
return BackendExecutionResult(
346+
observation=observation,
347+
attempts=[],
348+
diagnostics={
349+
"error": error_text,
350+
"error_kind": "backend_exception",
351+
"exception_type": type(exc).__name__,
352+
},
353+
)
354+
308355
@staticmethod
309356
def _describe_capabilities(
310357
capability: CapabilityDescriptor,

0 commit comments

Comments
 (0)