Skip to content

Commit d0d8d6f

Browse files
xumapleclaude
andcommitted
Translate terminal sandbox errors to non-retryable ApplicationError
Sandbox integration activities in the OpenAI Agents contrib passed every exception from the agents.sandbox library through as-is, so Temporal treated them all as retryable. A terminal failure (e.g. the sandbox was stopped externally) would retry forever and wedge the workflow. openai-agents 0.17.5 exposes SandboxError.retryable. Wrap each sandbox activity so a SandboxError the library has classified as terminal (retryable is False) is re-raised as a non-retryable ApplicationError; transient and unclassified errors (retryable True or None) still propagate and retry by default. Fixes #1548 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 24badcf commit d0d8d6f

3 files changed

Lines changed: 212 additions & 86 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ classifiers = [
2828
grpc = ["grpcio>=1.48.2,<2"]
2929
opentelemetry = ["opentelemetry-api>=1.11.1,<2", "opentelemetry-sdk>=1.11.1,<2"]
3030
pydantic = ["pydantic>=2.0.0,<3"]
31-
openai-agents = ["openai-agents>=0.17.1", "mcp>=1.9.4, <2"]
31+
openai-agents = ["openai-agents>=0.17.5", "mcp>=1.9.4, <2"]
3232
google-adk = ["google-adk>=1.27.0,<2"]
3333
langgraph = ["langgraph>=1.1.0"]
3434
langsmith = ["langsmith>=0.7.34,<0.9"]

temporalio/contrib/openai_agents/sandbox/_sandbox_client_provider.py

Lines changed: 117 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from __future__ import annotations
44

55
import io
6-
from collections.abc import Callable, Sequence
6+
from collections.abc import Callable, Iterator, Sequence
7+
from contextlib import contextmanager
78
from pathlib import Path
89
from typing import Any
910

11+
from agents.sandbox.errors import SandboxError
1012
from agents.sandbox.session.sandbox_client import BaseSandboxClient
1113
from agents.sandbox.session.sandbox_session import SandboxSession
1214

@@ -34,6 +36,22 @@
3436
from temporalio.contrib.openai_agents.sandbox._temporal_activity_models import (
3537
ExecResult as ExecResultModel,
3638
)
39+
from temporalio.exceptions import ApplicationError
40+
41+
42+
@contextmanager
43+
def _translate_sandbox_errors() -> Iterator[None]:
44+
# Temporal retries every activity exception by default, so only a SandboxError
45+
# the library has classified as terminal (retryable is False) is turned into a
46+
# non-retryable ApplicationError.
47+
try:
48+
yield
49+
except SandboxError as e:
50+
if e.retryable is False:
51+
raise ApplicationError(
52+
str(e), type=str(e.error_code), non_retryable=True
53+
) from e
54+
raise
3755

3856

3957
class SandboxClientProvider:
@@ -99,133 +117,147 @@ def _get_activities(self) -> Sequence[Callable[..., Any]]:
99117

100118
@activity.defn(name=f"{prefix}-sandbox_client_create")
101119
async def create_session(args: CreateSessionArgs) -> SessionResult:
102-
session = await self._client.create(
103-
snapshot=args.snapshot_spec,
104-
manifest=args.manifest,
105-
options=args.client_options,
106-
)
107-
self._sessions[str(session.state.session_id)] = session
108-
return SessionResult(
109-
state=session.state, supports_pty=session.supports_pty()
110-
)
120+
with _translate_sandbox_errors():
121+
session = await self._client.create(
122+
snapshot=args.snapshot_spec,
123+
manifest=args.manifest,
124+
options=args.client_options,
125+
)
126+
self._sessions[str(session.state.session_id)] = session
127+
return SessionResult(
128+
state=session.state, supports_pty=session.supports_pty()
129+
)
111130

112131
@activity.defn(name=f"{prefix}-sandbox_client_resume")
113132
async def resume_session(args: ResumeSessionArgs) -> SessionResult:
114-
session = await self._client.resume(args.state)
115-
self._sessions[str(session.state.session_id)] = session
116-
return SessionResult(
117-
state=session.state, supports_pty=session.supports_pty()
118-
)
133+
with _translate_sandbox_errors():
134+
session = await self._client.resume(args.state)
135+
self._sessions[str(session.state.session_id)] = session
136+
return SessionResult(
137+
state=session.state, supports_pty=session.supports_pty()
138+
)
119139

120140
@activity.defn(name=f"{prefix}-sandbox_client_delete")
121141
async def delete_session(args: StopArgs) -> None:
122-
session = await self._session(args)
123-
await self._client.delete(session)
124-
return None
142+
with _translate_sandbox_errors():
143+
session = await self._session(args)
144+
await self._client.delete(session)
145+
return None
125146

126147
# -- Session-level operations (I/O and lifecycle) --
127148

128149
@activity.defn(name=f"{prefix}-sandbox_session_exec")
129150
async def exec_(args: ExecArgs) -> ExecResultModel:
130-
session = await self._session(args)
131-
result = await session.exec(
132-
*args.command,
133-
timeout=args.timeout,
134-
shell=args.shell,
135-
user=args.user,
136-
)
137-
return ExecResultModel(
138-
stdout=result.stdout,
139-
stderr=result.stderr,
140-
exit_code=result.exit_code,
141-
)
151+
with _translate_sandbox_errors():
152+
session = await self._session(args)
153+
result = await session.exec(
154+
*args.command,
155+
timeout=args.timeout,
156+
shell=args.shell,
157+
user=args.user,
158+
)
159+
return ExecResultModel(
160+
stdout=result.stdout,
161+
stderr=result.stderr,
162+
exit_code=result.exit_code,
163+
)
142164

143165
@activity.defn(name=f"{prefix}-sandbox_session_read")
144166
async def read(args: ReadArgs) -> ReadResult:
145-
session = await self._session(args)
146-
handle = await session.read(Path(args.path))
147-
return ReadResult(data=handle.read())
167+
with _translate_sandbox_errors():
168+
session = await self._session(args)
169+
handle = await session.read(Path(args.path))
170+
return ReadResult(data=handle.read())
148171

149172
@activity.defn(name=f"{prefix}-sandbox_session_write")
150173
async def write(args: WriteArgs) -> None:
151-
session = await self._session(args)
152-
await session.write(Path(args.path), io.BytesIO(args.data))
153-
return None
174+
with _translate_sandbox_errors():
175+
session = await self._session(args)
176+
await session.write(Path(args.path), io.BytesIO(args.data))
177+
return None
154178

155179
@activity.defn(name=f"{prefix}-sandbox_session_running")
156180
async def running(args: RunningArgs) -> RunningResult:
157-
session = await self._session(args)
158-
return RunningResult(is_running=await session.running())
181+
with _translate_sandbox_errors():
182+
session = await self._session(args)
183+
return RunningResult(is_running=await session.running())
159184

160185
@activity.defn(name=f"{prefix}-sandbox_session_persist_workspace")
161186
async def persist_workspace(
162187
args: PersistWorkspaceArgs,
163188
) -> PersistWorkspaceResult:
164-
session = await self._session(args)
165-
stream = await session.persist_workspace()
166-
return PersistWorkspaceResult(data=stream.read())
189+
with _translate_sandbox_errors():
190+
session = await self._session(args)
191+
stream = await session.persist_workspace()
192+
return PersistWorkspaceResult(data=stream.read())
167193

168194
@activity.defn(name=f"{prefix}-sandbox_session_hydrate_workspace")
169195
async def hydrate_workspace(args: HydrateWorkspaceArgs) -> None:
170-
session = await self._session(args)
171-
await session.hydrate_workspace(io.BytesIO(args.data))
172-
return None
196+
with _translate_sandbox_errors():
197+
session = await self._session(args)
198+
await session.hydrate_workspace(io.BytesIO(args.data))
199+
return None
173200

174201
@activity.defn(name=f"{prefix}-sandbox_session_pty_exec_start")
175202
async def pty_exec_start(args: PtyExecStartArgs) -> PtyExecUpdateResult:
176-
session = await self._session(args)
177-
update = await session.pty_exec_start(
178-
*args.command,
179-
timeout=args.timeout,
180-
shell=args.shell,
181-
user=args.user,
182-
tty=args.tty,
183-
yield_time_s=args.yield_time_s,
184-
max_output_tokens=args.max_output_tokens,
185-
)
186-
return PtyExecUpdateResult(
187-
process_id=update.process_id,
188-
output=update.output,
189-
exit_code=update.exit_code,
190-
original_token_count=update.original_token_count,
191-
)
203+
with _translate_sandbox_errors():
204+
session = await self._session(args)
205+
update = await session.pty_exec_start(
206+
*args.command,
207+
timeout=args.timeout,
208+
shell=args.shell,
209+
user=args.user,
210+
tty=args.tty,
211+
yield_time_s=args.yield_time_s,
212+
max_output_tokens=args.max_output_tokens,
213+
)
214+
return PtyExecUpdateResult(
215+
process_id=update.process_id,
216+
output=update.output,
217+
exit_code=update.exit_code,
218+
original_token_count=update.original_token_count,
219+
)
192220

193221
@activity.defn(name=f"{prefix}-sandbox_session_pty_write_stdin")
194222
async def pty_write_stdin(args: PtyWriteStdinArgs) -> PtyExecUpdateResult:
195-
session = await self._session(args)
196-
update = await session.pty_write_stdin(
197-
session_id=args.session_id,
198-
chars=args.chars,
199-
yield_time_s=args.yield_time_s,
200-
max_output_tokens=args.max_output_tokens,
201-
)
202-
return PtyExecUpdateResult(
203-
process_id=update.process_id,
204-
output=update.output,
205-
exit_code=update.exit_code,
206-
original_token_count=update.original_token_count,
207-
)
223+
with _translate_sandbox_errors():
224+
session = await self._session(args)
225+
update = await session.pty_write_stdin(
226+
session_id=args.session_id,
227+
chars=args.chars,
228+
yield_time_s=args.yield_time_s,
229+
max_output_tokens=args.max_output_tokens,
230+
)
231+
return PtyExecUpdateResult(
232+
process_id=update.process_id,
233+
output=update.output,
234+
exit_code=update.exit_code,
235+
original_token_count=update.original_token_count,
236+
)
208237

209238
@activity.defn(name=f"{prefix}-sandbox_session_start")
210239
async def start(args: StartArgs) -> None:
211-
session = await self._session(args)
212-
await session.start()
213-
return None
240+
with _translate_sandbox_errors():
241+
session = await self._session(args)
242+
await session.start()
243+
return None
214244

215245
@activity.defn(name=f"{prefix}-sandbox_session_stop")
216246
async def session_stop(args: StopArgs) -> None:
217-
session = await self._session(args)
218-
await session.stop()
219-
return None
247+
with _translate_sandbox_errors():
248+
session = await self._session(args)
249+
await session.stop()
250+
return None
220251

221252
@activity.defn(name=f"{prefix}-sandbox_session_shutdown")
222253
async def session_shutdown(args: StopArgs) -> None:
223-
key = str(args.state.session_id)
224-
session = self._sessions.get(key)
225-
if session is not None:
226-
await session.shutdown()
227-
del self._sessions[key]
228-
return None
254+
with _translate_sandbox_errors():
255+
key = str(args.state.session_id)
256+
session = self._sessions.get(key)
257+
if session is not None:
258+
await session.shutdown()
259+
del self._sessions[key]
260+
return None
229261

230262
return [
231263
create_session,

tests/contrib/openai_agents/test_openai_sandbox.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
import pytest
1010
from agents import Agent, FunctionTool, RunConfig, Runner, Tool
1111
from agents.sandbox import Capability, Manifest, SandboxAgent, SandboxRunConfig
12+
from agents.sandbox.errors import (
13+
ExecTransportError,
14+
SandboxError,
15+
WorkspaceArchiveReadError,
16+
)
1217
from agents.sandbox.session.base_sandbox_session import BaseSandboxSession
1318
from agents.sandbox.session.sandbox_client import (
1419
BaseSandboxClient,
@@ -55,6 +60,7 @@
5560
TestModelProvider,
5661
)
5762
from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client
63+
from temporalio.exceptions import ApplicationError
5864
from temporalio.workflow import ActivityConfig
5965
from tests.helpers import new_worker
6066

@@ -569,6 +575,94 @@ async def test_multiple_providers_register_distinct_activities():
569575
)
570576

571577

578+
# ── SandboxError retryable mapping tests ──
579+
580+
581+
class _ExecRaisingSession(_MockSandboxSession):
582+
"""Mock session whose exec() raises a chosen SandboxError."""
583+
584+
def __init__(self, error: SandboxError) -> None:
585+
super().__init__()
586+
self._error = error
587+
588+
async def _exec_internal(
589+
self,
590+
*command: str | Path, # type: ignore[reportUnusedParameter]
591+
timeout: float | None = None, # type: ignore[reportUnusedParameter]
592+
) -> ExecResult:
593+
raise self._error
594+
595+
596+
async def _exec_with_error(error: SandboxError) -> None:
597+
provider = SandboxClientProvider(
598+
"mock", _MockSandboxClient(_ExecRaisingSession(error))
599+
)
600+
acts = _activity_map(provider)
601+
state = (
602+
await acts["mock-sandbox_client_create"](
603+
CreateSessionArgs(
604+
snapshot_spec=None, manifest=Manifest(), client_options=None
605+
)
606+
)
607+
).state
608+
await acts["mock-sandbox_session_exec"](
609+
ExecArgs(state=state, command=["boom"], shell=True)
610+
)
611+
612+
613+
async def test_exec_terminal_error_becomes_non_retryable_application_error():
614+
"""retryable is False should map to a non-retryable ApplicationError."""
615+
with pytest.raises(ApplicationError) as exc_info:
616+
await _exec_with_error(ExecTransportError(command=["boom"], retryable=False))
617+
assert exc_info.value.non_retryable is True
618+
assert exc_info.value.type == "exec_transport_error"
619+
620+
621+
async def test_exec_transient_error_propagates_unchanged():
622+
"""retryable is True should let the original SandboxError propagate."""
623+
with pytest.raises(ExecTransportError):
624+
await _exec_with_error(ExecTransportError(command=["boom"], retryable=True))
625+
626+
627+
async def test_exec_unclassified_error_propagates_unchanged():
628+
"""retryable is None should let the original SandboxError propagate (not converted)."""
629+
with pytest.raises(ExecTransportError):
630+
await _exec_with_error(ExecTransportError(command=["boom"], retryable=None))
631+
632+
633+
class _RunningRaisingSession(_MockSandboxSession):
634+
"""Mock session whose running() raises a chosen SandboxError."""
635+
636+
def __init__(self, error: SandboxError) -> None:
637+
super().__init__()
638+
self._error = error
639+
640+
async def running(self) -> bool:
641+
raise self._error
642+
643+
644+
async def test_running_terminal_error_becomes_non_retryable_application_error():
645+
"""A terminal SandboxError from a non-exec activity also maps to a
646+
non-retryable ApplicationError, with type set to its error_code."""
647+
error = WorkspaceArchiveReadError(path=Path("/workspace"), retryable=False)
648+
provider = SandboxClientProvider(
649+
"mock", _MockSandboxClient(_RunningRaisingSession(error))
650+
)
651+
acts = _activity_map(provider)
652+
state = (
653+
await acts["mock-sandbox_client_create"](
654+
CreateSessionArgs(
655+
snapshot_spec=None, manifest=Manifest(), client_options=None
656+
)
657+
)
658+
).state
659+
660+
with pytest.raises(ApplicationError) as exc_info:
661+
await acts["mock-sandbox_session_running"](RunningArgs(state=state))
662+
assert exc_info.value.non_retryable is True
663+
assert exc_info.value.type == "workspace_archive_read_error"
664+
665+
572666
# ── End-to-end test: Runner + SandboxAgent through Temporal activities ──
573667

574668

0 commit comments

Comments
 (0)