Skip to content

Commit d69cbec

Browse files
authored
refactor(sandbox): make the bound file backend the run's single session handle (#1292)
* feat(sandbox): add run_commands to SandboxFileBackend * refactor(git): thread bound sandbox backend through git/publish path * refactor(sandbox): run bash through the bound backend; thread it to subagents * test(sandbox): guard that the backend never advertises execution * feat(sandbox): classify bash failures as transient or permanent The bash tool degraded every transport/HTTP error to the same generic "sandbox call failed" string, so the agent could not tell a momentary blip (worth one retry) from a non-recoverable rejection (stop using the tool). Introduce a BashFailure enum that maps httpx errors to TRANSIENT (no response, or a retryable status: 408/425/429/5xx) vs PERMANENT (auth, session-gone, bad-request, not-implemented), and return distinct agent-facing guidance for each. The transient message is byte-stable so the system prompt's "two identical error strings => stop" backstop still fires when a retry fails the same way. Non-httpx failures (malformed 200 body, unbound-backend RuntimeError) are left to propagate as loud wire/programming bugs.
1 parent cfe9ee7 commit d69cbec

16 files changed

Lines changed: 401 additions & 177 deletions

File tree

daiv/automation/agent/git_manager.py

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
from git import GitCommandError
1111

1212
from automation.agent.constants import REPO_PATH
13-
from core.sandbox.schemas import RunCommandsRequest
1413

1514
if TYPE_CHECKING:
1615
from git import Repo
1716

18-
from core.sandbox.client import DAIVSandboxClient
17+
from automation.agent.middlewares.file_system import SandboxFileBackend
1918

2019
logger = logging.getLogger("daiv.tools")
2120

@@ -56,9 +55,9 @@ class RepoStatus:
5655
class GitManager:
5756
"""Run git operations against a repository in one of two mutually-exclusive modes:
5857
59-
- **Sandbox mode** (``client`` + ``session_id``): git runs *in the sandbox* via
60-
``run_commands`` in ``repo_path`` (``/workspace/repo``). Used for sandbox-enabled
61-
runs, where the agent's changes are sandbox-authoritative (no local copy).
58+
- **Sandbox mode** (``sandbox_backend``): git runs *in the sandbox* via the bound
59+
backend's ``run_commands`` in ``repo_path`` (``/workspace/repo``). Used for
60+
sandbox-enabled runs, where the agent's changes are sandbox-authoritative (no local copy).
6261
- **Local mode** (``repo``): git runs as a subprocess against a GitPython clone's
6362
working tree. Used for sandbox-disabled / repoless runs, where changes live on disk.
6463
@@ -67,8 +66,7 @@ class GitManager:
6766
6867
Args:
6968
repo: GitPython repo for local mode.
70-
client: Sandbox client for sandbox mode.
71-
session_id: Sandbox session id (required with ``client``).
69+
sandbox_backend: The run's bound :class:`SandboxFileBackend` for sandbox mode.
7270
repo_path: Repo path inside the sandbox (defaults to ``REPO_PATH``).
7371
"""
7472

@@ -78,19 +76,15 @@ def __init__(
7876
self,
7977
repo: Repo | None = None,
8078
*,
81-
client: DAIVSandboxClient | None = None,
82-
session_id: str | None = None,
79+
sandbox_backend: SandboxFileBackend | None = None,
8380
repo_path: str | None = None,
8481
) -> None:
85-
if (repo is None) == (client is None):
86-
raise ValueError("GitManager requires exactly one of `repo` (local) or `client` (sandbox).")
87-
if client is not None and not session_id:
88-
raise ValueError("GitManager sandbox mode requires a non-empty session_id.")
82+
if (repo is None) == (sandbox_backend is None):
83+
raise ValueError("GitManager requires exactly one of `repo` (local) or `sandbox_backend` (sandbox).")
8984
if repo_path is None:
9085
repo_path = REPO_PATH
9186
self.repo = repo
92-
self._client = client
93-
self._session_id = session_id
87+
self._sandbox_backend = sandbox_backend
9488
self._repo_path = repo_path
9589

9690
@classmethod
@@ -103,30 +97,31 @@ def for_local(cls, repo: Repo) -> GitManager:
10397
return cls(repo=repo)
10498

10599
@classmethod
106-
def for_sandbox(cls, client: DAIVSandboxClient, session_id: str, *, repo_path: str | None = None) -> GitManager:
100+
def for_sandbox(cls, sandbox_backend: SandboxFileBackend, *, repo_path: str | None = None) -> GitManager:
107101
"""Sandbox-mode manager that runs git in the session's ``repo_path`` (``/workspace/repo``).
108102
109-
Preferred over ``GitManager(client=..., session_id=...)``: the required ``session_id`` is
110-
positional, so a sandbox manager can't be built without one.
103+
Takes the run's already-bound :class:`SandboxFileBackend` — the single session handle.
104+
The backend's ``_require_bound`` guard surfaces an unbound-session programming error on
105+
the first command, so no session id is threaded here.
111106
"""
112-
return cls(client=client, session_id=session_id, repo_path=repo_path)
107+
return cls(sandbox_backend=sandbox_backend, repo_path=repo_path)
113108

114109
# -- git invocation ------------------------------------------------------
115110
async def _git(self, *args: str, check: bool = True) -> _GitResult:
116111
"""Run one git command in the repo. Raises ``GitCommandError`` on a non-zero
117112
exit when ``check`` is True; otherwise returns the result for the caller to inspect.
118113
"""
119-
result = await (self._git_sandbox(args) if self._client is not None else self._git_local(args))
114+
result = await (self._git_sandbox(args) if self._sandbox_backend is not None else self._git_local(args))
120115
if check and result.exit_code != 0:
121116
raise GitCommandError(["git", *args], result.exit_code, result.output)
122117
return result
123118

124119
async def _git_sandbox(self, args: tuple[str, ...]) -> _GitResult:
125-
client, session_id = self._client, self._session_id
126-
if client is None or session_id is None: # pragma: no cover - guaranteed by __init__
120+
backend = self._sandbox_backend
121+
if backend is None: # pragma: no cover - guaranteed by __init__
127122
raise RuntimeError("GitManager is not in sandbox mode")
128123
command = " ".join(_shell_quote(token) for token in ("git", "-C", self._repo_path, *args))
129-
response = await client.run_commands(session_id, RunCommandsRequest(commands=[command], fail_fast=True))
124+
response = await backend.run_commands([command], fail_fast=True)
130125
if not response.results:
131126
# The sandbox always returns one result per command; an empty list is a wire-level
132127
# anomaly. Fail with context rather than a bare IndexError on ``results[0]``.
@@ -159,14 +154,11 @@ async def _git_batch(self, commands: list[tuple[str, ...]]) -> list[_GitResult]:
159154
"""
160155
if not commands:
161156
return []
162-
if self._client is not None:
163-
client, session_id = self._client, self._session_id
164-
if session_id is None: # pragma: no cover - guaranteed by __init__
165-
raise RuntimeError("GitManager is not in sandbox mode")
157+
if self._sandbox_backend is not None:
166158
cmd_strs = [
167159
" ".join(_shell_quote(tok) for tok in ("git", "-C", self._repo_path, *args)) for args in commands
168160
]
169-
response = await client.run_commands(session_id, RunCommandsRequest(commands=cmd_strs, fail_fast=False))
161+
response = await self._sandbox_backend.run_commands(cmd_strs, fail_fast=False)
170162
if len(response.results) != len(commands):
171163
raise RuntimeError(f"Sandbox returned {len(response.results)} results for {len(commands)} git commands")
172164
return [_GitResult(exit_code=r.exit_code, output=r.output) for r in response.results]

daiv/automation/agent/git_utils.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,21 @@
1010

1111
from git import Repo
1212

13-
from core.sandbox.client import DAIVSandboxClient
13+
from automation.agent.middlewares.file_system import SandboxFileBackend
1414

1515

1616
@asynccontextmanager
1717
async def open_git_manager(
18-
*, client: DAIVSandboxClient | None = None, session_id: str | None, gitrepo: Repo | None
18+
*, sandbox_backend: SandboxFileBackend | None, gitrepo: Repo | None
1919
) -> AsyncIterator[GitManager]:
2020
"""Yield a :class:`GitManager` matched to the run's mode.
2121
22-
Sandbox-enabled runs (a ``session_id`` is present) get a **sandbox-mode** manager bound to the
23-
injected run-scoped ``client`` — git runs in ``/workspace/repo`` where the agent's changes are
24-
authoritative. The transport is borrowed, not owned: no open/close here, and no per-call
25-
fallback — a session id without a client is a wiring error. Sandbox-disabled / repoless runs
26-
(no session) get a **local-mode** manager over the GitPython clone.
22+
Sandbox-enabled runs pass the run's bound :class:`SandboxFileBackend` — git runs in
23+
``/workspace/repo`` where the agent's changes are authoritative. Sandbox-disabled /
24+
repoless runs pass ``sandbox_backend=None`` and get a local-mode manager over the
25+
GitPython clone.
2726
"""
28-
if session_id:
29-
if client is None:
30-
raise RuntimeError("open_git_manager: sandbox session given but no sandbox client injected.")
31-
yield GitManager.for_sandbox(client, session_id)
27+
if sandbox_backend is not None:
28+
yield GitManager.for_sandbox(sandbox_backend)
3229
else:
3330
yield GitManager.for_local(cast("Repo", gitrepo))

daiv/automation/agent/graph.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ async def create_daiv_agent(
264264
web_fetch_enabled=_web_fetch_enabled,
265265
fallback_models=fallback_models,
266266
client=run_client,
267+
sandbox_backend=sandbox_backend,
267268
),
268269
create_explore_subagent(backend),
269270
]
@@ -278,6 +279,7 @@ async def create_daiv_agent(
278279
web_fetch_enabled=_web_fetch_enabled,
279280
fallback_models=fallback_models,
280281
client=run_client,
282+
sandbox_backend=sandbox_backend,
281283
)
282284
subagents.extend(custom_subagents)
283285

@@ -314,7 +316,7 @@ async def create_daiv_agent(
314316
AnthropicPromptCachingMiddleware(),
315317
ToolCallLoggingMiddleware(),
316318
ensure_non_empty_response,
317-
GitMiddleware(auto_commit_changes=auto_commit_changes, sandbox_client=run_client),
319+
GitMiddleware(auto_commit_changes=auto_commit_changes, sandbox_backend=sandbox_backend),
318320
GitPlatformMiddleware(git_platform=ctx.git_platform, backend=backend),
319321
dynamic_daiv_system_prompt,
320322
*(middleware or []),

daiv/automation/agent/middlewares/file_system.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
FsLsRequest,
4141
FsReadRequest,
4242
FsWriteRequest,
43+
RunCommandsRequest,
44+
RunCommandsResponse,
4345
)
4446

4547
logger = logging.getLogger("daiv.tools")
@@ -204,7 +206,8 @@ def resolve_backend_for(self, virtual_path: str) -> BackendProtocol:
204206

205207

206208
class SandboxFileBackend(BackendProtocol):
207-
"""Deepagents backend whose files live in a sandbox workspace.
209+
"""Deepagents backend whose files live in a sandbox workspace, and the run's
210+
command-execution handle (``run_commands``).
208211
209212
The agent addresses files by their sandbox-absolute path (``/workspace/repo``,
210213
``/workspace/skills``, ``/workspace/tmp``); the backend is a thin pass-through to
@@ -251,6 +254,21 @@ def _require_bound(self) -> tuple[DAIVSandboxClient, str]:
251254
raise RuntimeError("SandboxFileBackend is not bound to a sandbox session")
252255
return self._client, self._session_id
253256

257+
async def run_commands(self, commands: list[str], *, fail_fast: bool) -> RunCommandsResponse:
258+
"""Run shell commands in the bound session's workspace.
259+
260+
The run's command-execution handle (used by the ``bash`` tool and sandbox-mode
261+
``GitManager``). A thin pass-through to ``DAIVSandboxClient.run_commands`` — it takes a
262+
*list* + ``fail_fast`` (not a single command) so multi-command batches run in one
263+
round-trip. Like the other methods here it **raises** on transport/HTTP errors;
264+
callers that need graceful degradation (the ``bash`` tool) wrap it.
265+
266+
Intentionally NOT deepagents' ``SandboxBackendProtocol.aexecute``: implementing that
267+
protocol would activate deepagents' always-registered, ungated ``execute`` tool.
268+
"""
269+
client, session_id = self._require_bound()
270+
return await client.run_commands(session_id, RunCommandsRequest(commands=commands, fail_fast=fail_fast))
271+
254272
# -- path mapping (identity) --------------------------------------------
255273
# The sandbox is authoritative and the agent addresses files by their
256274
# sandbox-absolute path (/workspace/repo, /workspace/skills, /workspace/tmp).

daiv/automation/agent/middlewares/git.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
from langgraph.runtime import Runtime
3131

32-
from core.sandbox.client import DAIVSandboxClient
32+
from automation.agent.middlewares.file_system import SandboxFileBackend
3333

3434

3535
logger = logging.getLogger("daiv.tools")
@@ -104,9 +104,9 @@ class GitMiddleware(AgentMiddleware[GitState, RuntimeCtx]):
104104
skip_ci: Whether to prefix the commit with ``[skip ci]``.
105105
auto_commit_changes: Whether the run publishes its changes at all. When ``False`` the
106106
middleware is inert.
107-
sandbox_client: Run-scoped sandbox client injected by ``create_daiv_agent``; forwarded to
108-
:class:`GitChangePublisher` so the turn-end publish runs git inside the sandbox. ``None``
109-
for sandbox-disabled / local runs.
107+
sandbox_backend: Run's bound :class:`SandboxFileBackend` injected by ``create_daiv_agent``;
108+
forwarded to :class:`GitChangePublisher` so the turn-end publish runs git inside the
109+
sandbox. ``None`` for sandbox-disabled / local runs.
110110
111111
Example:
112112
```python
@@ -131,14 +131,14 @@ def __init__(
131131
*,
132132
skip_ci: bool = False,
133133
auto_commit_changes: bool = True,
134-
sandbox_client: DAIVSandboxClient | None = None,
134+
sandbox_backend: SandboxFileBackend | None = None,
135135
) -> None:
136136
"""
137137
Initialize the middleware.
138138
"""
139139
self.skip_ci = skip_ci
140140
self.auto_commit_changes = auto_commit_changes
141-
self._sandbox_client = sandbox_client
141+
self._sandbox_backend = sandbox_backend
142142

143143
async def abefore_agent(self, state: GitState, runtime: Runtime[RuntimeCtx]) -> dict[str, Any] | None:
144144
"""
@@ -260,10 +260,8 @@ async def aafter_agent(self, state: GitState, runtime: Runtime[RuntimeCtx]) -> d
260260
if not self.auto_commit_changes:
261261
return None
262262

263-
publisher = GitChangePublisher(runtime.context, sandbox_client=self._sandbox_client)
264-
outcome = await publisher.publish(
265-
session_id=state.get("session_id"), merge_request=state.get("merge_request"), skip_ci=self.skip_ci
266-
)
263+
publisher = GitChangePublisher(runtime.context, sandbox_backend=self._sandbox_backend)
264+
outcome = await publisher.publish(merge_request=state.get("merge_request"), skip_ci=self.skip_ci)
267265

268266
if outcome.merge_request is None:
269267
return None

0 commit comments

Comments
 (0)