-
Notifications
You must be signed in to change notification settings - Fork 102
feat: Sandbox parent PR (base/shell/remote/docker/tools/plugins) #1011
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| import { spawn } from 'child_process' | ||
| import { ShellSandbox } from '../sandbox/shell.js' | ||
| import { shellQuote } from '../utils/shell-quote.js' | ||
| import { streamProcess } from '../sandbox/stream-process.js' | ||
| import type { ExecuteOptions } from '../sandbox/base.js' | ||
| import type { ExecutionResult, StreamChunk } from '../sandbox/types.js' | ||
|
|
||
| /** | ||
| * Test sandbox that executes commands within a specific working directory. | ||
| * | ||
| * Extends ShellSandbox (same base as DockerSandbox and RemoteSandbox) so it | ||
| * exercises the same code path real sandboxes use: base64 file encoding, | ||
| * shell quoting, ls parsing, etc. The only difference is commands run on | ||
| * the host rather than in a container or over SSH. | ||
| */ | ||
| export class TestSandbox extends ShellSandbox { | ||
| readonly workingDir: string | ||
|
|
||
| constructor(workingDir: string) { | ||
| super() | ||
| this.workingDir = workingDir | ||
| } | ||
|
|
||
| async *executeStreaming( | ||
| command: string, | ||
| options?: ExecuteOptions | ||
| ): AsyncGenerator<StreamChunk | ExecutionResult, void, undefined> { | ||
| const cwd = options?.cwd ?? this.workingDir | ||
| const fullCommand = `cd ${shellQuote(cwd)} && ${command}` | ||
| const proc = spawn('sh', ['-c', fullCommand]) | ||
| yield* streamProcess(proc, { timeout: options?.timeout, signal: options?.signal }) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ import { | |
| type LocalAgent, | ||
| type localAgentSymbol, | ||
| } from '../types/agent.js' | ||
| import type { Sandbox } from '../sandbox/base.js' | ||
| import { BedrockModel } from '../models/bedrock.js' | ||
| import { | ||
| contentBlockFromData, | ||
|
|
@@ -220,6 +221,14 @@ export type AgentConfig = { | |
| * Defaults to `'concurrent'`. See {@link ToolExecutorStrategy} for details. | ||
| */ | ||
| toolExecutor?: ToolExecutorStrategy | ||
| /** | ||
| * Sandbox for tool code execution and filesystem access. | ||
| * When provided, sandbox default tools (fileEditor, exec, codeInterpreter) are | ||
| * auto-registered and execute within the sandbox. | ||
| * When omitted, no sandbox tools are auto-registered. | ||
| * Pass `false` to explicitly disable sandbox and sandbox tool auto-registration. | ||
| */ | ||
| sandbox?: Sandbox | false | ||
| } | ||
|
|
||
| /** Default name assigned to agents when none is provided. */ | ||
|
|
@@ -262,6 +271,19 @@ export class Agent implements LocalAgent, InvokableAgent { | |
| */ | ||
| public model: Model | ||
|
|
||
| /** | ||
| * Sandbox for tool code execution and filesystem access. | ||
| * Set immediately if passed via config, otherwise defaults to NotASandboxLocalEnvironment during initialize(). | ||
| */ | ||
|
gautamsirdeshmukh marked this conversation as resolved.
|
||
| private _sandbox: Sandbox | false | undefined | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we allow |
||
|
|
||
| get sandbox(): Sandbox { | ||
| if (!this._sandbox) { | ||
| throw new Error('Sandbox is not available. Pass a Sandbox instance to the agent config to enable it.') | ||
| } | ||
| return this._sandbox | ||
| } | ||
|
|
||
| /** | ||
| * The system prompt to pass to the model provider. | ||
| */ | ||
|
|
@@ -407,6 +429,8 @@ export class Agent implements LocalAgent, InvokableAgent { | |
| this._appendMessageAndFireHooks(message, invocationState) | ||
| ) | ||
|
|
||
| this._sandbox = config?.sandbox | ||
|
|
||
| this._initialized = false | ||
| } | ||
|
|
||
|
|
@@ -443,6 +467,17 @@ export class Agent implements LocalAgent, InvokableAgent { | |
| return | ||
| } | ||
|
|
||
| const userProvidedSandbox = this._sandbox !== undefined && this._sandbox !== false | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: When a user passes
The AgentConfig docs say "Pass Suggestion: Guard the default assignment to respect if (this._sandbox === undefined && typeof process !== 'undefined' && process.versions?.node) {
const { NotASandboxLocalEnvironment } = await import('../sandbox/not-a-sandbox-local-environment.js')
this._sandbox = new NotASandboxLocalEnvironment()
}This way:
|
||
| if (!userProvidedSandbox && typeof process !== 'undefined' && process.versions?.node) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: The const userProvidedSandbox = this._sandbox !== undefined && this._sandbox !== false // → false when sandbox: false
if (!userProvidedSandbox && typeof process !== 'undefined' && process.versions?.node) {
this._sandbox = new NotASandboxLocalEnvironment() // overwrites false!
}When a user passes
The AgentConfig TSDoc says: "Pass Suggestion: Change the default-assignment guard from if (this._sandbox === undefined && typeof process !== 'undefined' && process.versions?.node) {
const { NotASandboxLocalEnvironment } = await import('../sandbox/not-a-sandbox-local-environment.js')
this._sandbox = new NotASandboxLocalEnvironment()
}This way:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: This has been flagged in 3+ prior reviews and remains unfixed. When a user passes
The AgentConfig TSDoc says "Pass Fix (1-line change): if (this._sandbox === undefined && typeof process !== 'undefined' && process.versions?.node) {
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: This bug has been flagged repeatedly but remains unfixed. When a user passes
The AgentConfig TSDoc says "Pass Fix (1-line change): if (this._sandbox === undefined && typeof process !== 'undefined' && process.versions?.node) {This ensures:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: This has been flagged across multiple review passes and remains the only blocking issue. When a user passes const userProvidedSandbox = this._sandbox !== undefined && this._sandbox !== false
// ↑ false !== undefined (true) && false !== false (false) → false
if (!userProvidedSandbox && ...) {
this._sandbox = new NotASandboxLocalEnvironment() // overwrites false!
}The AgentConfig TSDoc says "Pass Fix (1-line change): if (this._sandbox === undefined && typeof process !== 'undefined' && process.versions?.node) {This ensures:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is intentional, not a bug. Setting it as the default ensures the vended tools work uniformly regardless of whether a real sandbox is configured. Without this default, every vended tool would need separate code paths for "sandbox available" vs "no sandbox".
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the clarification — the design rationale makes sense. Having The remaining issue is that the TSDoc is inaccurate for the * Pass `false` to explicitly disable sandbox and sandbox tool auto-registration.Since
This is non-blocking — just a documentation accuracy concern.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, what happens if I don't provide a sandbox, and I am in browser? what's the devx? |
||
| const { NotASandboxLocalEnvironment } = await import('../sandbox/not-a-sandbox-local-environment.js') | ||
| this._sandbox = new NotASandboxLocalEnvironment() | ||
| } | ||
|
|
||
| if (this._sandbox) { | ||
| await this._sandbox.start() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is the agent "starting" the sandbox? should the agent own sandboxes' lifecycle? I wouldn't want that as sandbox can live outside the agent too |
||
| } | ||
|
|
||
| // Initialize MCP clients and register their tools | ||
| await Promise.all( | ||
| this._mcpClients.map(async (client) => { | ||
|
|
@@ -457,6 +492,15 @@ export class Agent implements LocalAgent, InvokableAgent { | |
|
|
||
| await this._pluginRegistry.initialize(this) | ||
|
|
||
| if (userProvidedSandbox) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we do want this behavior, but it also makes me think if DevX is good enough 🤔 |
||
| const { SANDBOX_DEFAULT_TOOLS } = await import('../vended-tools/sandbox-default-tools.js') | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: Dynamic The eslint rule prevents static imports of vended-tools from core SDK files:
This dynamic import creates a hidden runtime coupling between Suggestion: Consider adding an explicit eslint-disable comment here with justification to make the architectural exception visible, or add the dynamic import pattern to the rule. This makes the intentional violation discoverable for future maintainers: // eslint-disable-next-line -- Intentional: sandbox tools are auto-registered when user opts into sandbox
const { SANDBOX_DEFAULT_TOOLS } = await import('../vended-tools/sandbox-default-tools.js')
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: Add an eslint-disable comment documenting the intentional architectural exception The // eslint-disable-next-line -- Intentional: auto-register sandbox tools when user opts into sandbox
const { SANDBOX_DEFAULT_TOOLS } = await import('../vended-tools/sandbox-default-tools.js')(Note: the test file
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: Add an eslint-disable comment documenting the intentional architectural exception The // eslint-disable-next-line -- Intentional: auto-register sandbox tools when user opts into sandbox
const { SANDBOX_DEFAULT_TOOLS } = await import('../vended-tools/sandbox-default-tools.js')
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: Add an eslint-disable comment documenting the intentional architectural bypass The // eslint-disable-next-line -- Intentional: auto-register sandbox tools when user opts into sandbox
const { SANDBOX_DEFAULT_TOOLS } = await import('../vended-tools/sandbox-default-tools.js')(The test file |
||
| for (const tool of SANDBOX_DEFAULT_TOOLS) { | ||
| if (!this._toolRegistry.get(tool.name)) { | ||
| this._toolRegistry.add(tool) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| await this._hooksRegistry.invokeCallbacks(new InitializedEvent({ agent: this })) | ||
|
|
||
| this._initialized = true | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| import { describe, it, expect, vi, beforeEach } from 'vitest' | ||
| import { RemoteSandbox } from '../remote.js' | ||
| import * as childProcess from 'child_process' | ||
|
|
||
| vi.mock('child_process', async () => { | ||
| const actual = await vi.importActual<typeof childProcess>('child_process') | ||
| return { ...actual, spawn: vi.fn() } | ||
| }) | ||
|
|
||
| function createMockProcess() { | ||
| const proc = { | ||
| stdout: { on: vi.fn() }, | ||
| stderr: { on: vi.fn() }, | ||
| on: vi.fn(), | ||
| kill: vi.fn(), | ||
| } | ||
|
|
||
| // Simulate immediate close with exit code 0 | ||
| proc.on.mockImplementation((event: string, cb: (code: number | null) => void) => { | ||
| if (event === 'close') { | ||
| // Schedule the close callback | ||
| Promise.resolve().then(() => cb(0)) | ||
| } | ||
| }) | ||
|
|
||
| return proc | ||
| } | ||
|
|
||
| describe('RemoteSandbox (unit)', () => { | ||
| beforeEach(() => { | ||
| vi.clearAllMocks() | ||
| }) | ||
|
|
||
| describe('constructor', () => { | ||
| it('stores host and workingDir', () => { | ||
| const sandbox = new RemoteSandbox({ host: 'myhost', workingDir: '/workspace' }) | ||
| expect(sandbox.host).toBe('myhost') | ||
| expect(sandbox.workingDir).toBe('/workspace') | ||
| }) | ||
|
|
||
| it('defaults port to 22', () => { | ||
| const sandbox = new RemoteSandbox({ host: 'myhost', workingDir: '/ws' }) | ||
| // Port is private but we can verify via the SSH args in stream() | ||
| expect(sandbox).toBeDefined() | ||
| }) | ||
| }) | ||
|
|
||
| describe('stream() SSH argument construction', () => { | ||
| it('builds correct SSH args with defaults', async () => { | ||
| const mockProc = createMockProcess() | ||
| vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) | ||
|
|
||
| const sandbox = new RemoteSandbox({ host: 'user@server.com', workingDir: '/remote/path' }) | ||
|
|
||
| // Start consuming the generator to trigger spawn | ||
| const gen = sandbox.executeStreaming('echo hi') | ||
| const iter = gen[Symbol.asyncIterator]() | ||
| await iter.next() | ||
|
|
||
| expect(childProcess.spawn).toHaveBeenCalledWith('ssh', [ | ||
| '-o', | ||
| 'StrictHostKeyChecking=accept-new', | ||
| '-o', | ||
| 'BatchMode=yes', | ||
| '-p', | ||
| '22', | ||
| 'user@server.com', | ||
| "cd '/remote/path' && echo hi", | ||
| ]) | ||
| }) | ||
|
|
||
| it('includes identity file when provided', async () => { | ||
| const mockProc = createMockProcess() | ||
| vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) | ||
|
|
||
| const sandbox = new RemoteSandbox({ | ||
| host: 'server', | ||
| workingDir: '/ws', | ||
| identityFile: '/home/user/.ssh/key', | ||
| }) | ||
|
|
||
| const gen = sandbox.executeStreaming('ls') | ||
| const iter = gen[Symbol.asyncIterator]() | ||
| await iter.next() | ||
|
|
||
| const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] | ||
| expect(args).toContain('-i') | ||
| expect(args).toContain('/home/user/.ssh/key') | ||
| }) | ||
|
|
||
| it('uses custom port', async () => { | ||
| const mockProc = createMockProcess() | ||
| vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) | ||
|
|
||
| const sandbox = new RemoteSandbox({ | ||
| host: 'server', | ||
| workingDir: '/ws', | ||
| port: 2222, | ||
| }) | ||
|
|
||
| const gen = sandbox.executeStreaming('ls') | ||
| const iter = gen[Symbol.asyncIterator]() | ||
| await iter.next() | ||
|
|
||
| const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] | ||
| expect(args).toContain('-p') | ||
| expect(args).toContain('2222') | ||
| }) | ||
|
|
||
| it('quotes cwd with single quotes', async () => { | ||
| const mockProc = createMockProcess() | ||
| vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) | ||
|
|
||
| const sandbox = new RemoteSandbox({ | ||
| host: 'server', | ||
| workingDir: "/path/with spaces/and'quotes", | ||
| }) | ||
|
|
||
| const gen = sandbox.executeStreaming('ls') | ||
| const iter = gen[Symbol.asyncIterator]() | ||
| await iter.next() | ||
|
|
||
| const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] | ||
| const remoteCommand = args[args.length - 1] | ||
| expect(remoteCommand).toContain("cd '/path/with spaces/and'\\''quotes'") | ||
| }) | ||
|
|
||
| it('uses cwd option when provided', async () => { | ||
| const mockProc = createMockProcess() | ||
| vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) | ||
|
|
||
| const sandbox = new RemoteSandbox({ host: 'server', workingDir: '/default' }) | ||
|
|
||
| const gen = sandbox.executeStreaming('ls', { cwd: '/override' }) | ||
| const iter = gen[Symbol.asyncIterator]() | ||
| await iter.next() | ||
|
|
||
| const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] | ||
| const remoteCommand = args[args.length - 1] | ||
| expect(remoteCommand).toContain("cd '/override'") | ||
| }) | ||
| }) | ||
|
|
||
| describe('start()', () => { | ||
| it('creates working directory with cwd: /', async () => { | ||
| const mockProc = createMockProcess() | ||
| vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) | ||
|
|
||
| const sandbox = new RemoteSandbox({ host: 'server', workingDir: '/my/workspace' }) | ||
| await sandbox.start() | ||
|
|
||
| const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] | ||
| const remoteCommand = args[args.length - 1] | ||
| expect(remoteCommand).toContain("cd '/'") | ||
| expect(remoteCommand).toContain("mkdir -p '/my/workspace'") | ||
| }) | ||
| }) | ||
| }) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Issue: Test assertions weakened unnecessarily —
toolSpecs: []→toolSpecs: expect.any(Array)These test agents are created with
new Agent({ model })(nosandboxin config), souserProvidedSandbox = falseandSANDBOX_DEFAULT_TOOLSare not registered. ThetoolSpecsshould still be[]for these cases. The original strict assertions should be restored:Weakening to
expect.any(Array)hides regressions where tools are unexpectedly added to the registry.