From d76fe8f01ab305c914615e3d86b6a9942d8a3726 Mon Sep 17 00:00:00 2001 From: Gautam Sirdeshmukh Date: Thu, 14 May 2026 11:29:42 -0400 Subject: [PATCH] feat: add Sandbox interface (base/shell/remote/docker/tools) --- strands-ts/eslint.config.js | 6 + strands-ts/package.json | 12 + strands-ts/src/__fixtures__/agent-helpers.ts | 6 + .../src/__fixtures__/test-sandbox.node.ts | 33 ++ .../agent/__tests__/agent.model-retry.test.ts | 4 +- strands-ts/src/agent/__tests__/agent.test.ts | 2 +- strands-ts/src/agent/agent.ts | 44 ++ strands-ts/src/index.ts | 11 + .../src/sandbox/__tests__/remote.test.ts | 158 ++++++++ .../src/sandbox/__tests__/shell.test.node.ts | 303 ++++++++++++++ strands-ts/src/sandbox/base.ts | 184 +++++++++ strands-ts/src/sandbox/constants.ts | 1 + strands-ts/src/sandbox/docker.ts | 259 ++++++++++++ strands-ts/src/sandbox/index.ts | 14 + .../not-a-sandbox-local-environment.ts | 111 +++++ strands-ts/src/sandbox/remote.ts | 134 ++++++ strands-ts/src/sandbox/shell.ts | 110 +++++ strands-ts/src/sandbox/stream-process.ts | 167 ++++++++ strands-ts/src/sandbox/types.ts | 73 ++++ strands-ts/src/types/agent.ts | 6 + strands-ts/src/utils/shell-quote.ts | 10 + .../__tests__/sandbox-storage.test.node.ts | 75 ++++ .../vended-plugins/context-offloader/index.ts | 2 +- .../context-offloader/plugin.ts | 8 +- .../context-offloader/storage.ts | 78 +++- .../__tests__/agent-skills.test.node.ts | 39 +- .../__tests__/sandbox-skills.test.node.ts | 122 ++++++ .../src/vended-plugins/skills/agent-skills.ts | 60 +-- strands-ts/src/vended-plugins/skills/skill.ts | 35 ++ .../__tests__/code-interpreter.test.node.ts | 103 +++++ .../code-interpreter/code-interpreter.ts | 61 +++ .../vended-tools/code-interpreter/index.ts | 5 + .../exec/__tests__/exec.test.node.ts | 91 +++++ strands-ts/src/vended-tools/exec/exec.ts | 58 +++ strands-ts/src/vended-tools/exec/index.ts | 5 + .../__tests__/file-editor.test.node.ts | 205 ++++++++-- .../vended-tools/file-editor/file-editor.ts | 380 +++++++++--------- .../src/vended-tools/sandbox-default-tools.ts | 6 + .../test/integ/sandbox/docker.test.node.ts | 321 +++++++++++++++ strands-ts/vitest.config.ts | 4 + 40 files changed, 3006 insertions(+), 300 deletions(-) create mode 100644 strands-ts/src/__fixtures__/test-sandbox.node.ts create mode 100644 strands-ts/src/sandbox/__tests__/remote.test.ts create mode 100644 strands-ts/src/sandbox/__tests__/shell.test.node.ts create mode 100644 strands-ts/src/sandbox/base.ts create mode 100644 strands-ts/src/sandbox/constants.ts create mode 100644 strands-ts/src/sandbox/docker.ts create mode 100644 strands-ts/src/sandbox/index.ts create mode 100644 strands-ts/src/sandbox/not-a-sandbox-local-environment.ts create mode 100644 strands-ts/src/sandbox/remote.ts create mode 100644 strands-ts/src/sandbox/shell.ts create mode 100644 strands-ts/src/sandbox/stream-process.ts create mode 100644 strands-ts/src/sandbox/types.ts create mode 100644 strands-ts/src/utils/shell-quote.ts create mode 100644 strands-ts/src/vended-plugins/context-offloader/__tests__/sandbox-storage.test.node.ts create mode 100644 strands-ts/src/vended-plugins/skills/__tests__/sandbox-skills.test.node.ts create mode 100644 strands-ts/src/vended-tools/code-interpreter/__tests__/code-interpreter.test.node.ts create mode 100644 strands-ts/src/vended-tools/code-interpreter/code-interpreter.ts create mode 100644 strands-ts/src/vended-tools/code-interpreter/index.ts create mode 100644 strands-ts/src/vended-tools/exec/__tests__/exec.test.node.ts create mode 100644 strands-ts/src/vended-tools/exec/exec.ts create mode 100644 strands-ts/src/vended-tools/exec/index.ts create mode 100644 strands-ts/src/vended-tools/sandbox-default-tools.ts create mode 100644 strands-ts/test/integ/sandbox/docker.test.node.ts diff --git a/strands-ts/eslint.config.js b/strands-ts/eslint.config.js index 9beb1ad47..29987af52 100644 --- a/strands-ts/eslint.config.js +++ b/strands-ts/eslint.config.js @@ -55,6 +55,11 @@ function sdkRules(options) { process: 'readonly', setTimeout: 'readonly', clearTimeout: 'readonly', + setInterval: 'readonly', + clearInterval: 'readonly', + atob: 'readonly', + btoa: 'readonly', + crypto: 'readonly', }, }, plugins: { @@ -90,6 +95,7 @@ function unitTestRules(options) { navigator: 'readonly', setTimeout: 'readonly', clearTimeout: 'readonly', + crypto: 'readonly', }, }, plugins: { diff --git a/strands-ts/package.json b/strands-ts/package.json index ff6fdc755..07c71e273 100644 --- a/strands-ts/package.json +++ b/strands-ts/package.json @@ -56,6 +56,18 @@ "types": "./dist/src/vended-tools/bash/index.d.ts", "default": "./dist/src/vended-tools/bash/index.js" }, + "./vended-tools/exec": { + "types": "./dist/src/vended-tools/exec/index.d.ts", + "default": "./dist/src/vended-tools/exec/index.js" + }, + "./vended-tools/code-interpreter": { + "types": "./dist/src/vended-tools/code-interpreter/index.d.ts", + "default": "./dist/src/vended-tools/code-interpreter/index.js" + }, + "./sandbox": { + "types": "./dist/src/sandbox/index.d.ts", + "default": "./dist/src/sandbox/index.js" + }, "./a2a": { "types": "./dist/src/a2a/index.d.ts", "default": "./dist/src/a2a/index.js" diff --git a/strands-ts/src/__fixtures__/agent-helpers.ts b/strands-ts/src/__fixtures__/agent-helpers.ts index 53ed7eba0..06509a557 100644 --- a/strands-ts/src/__fixtures__/agent-helpers.ts +++ b/strands-ts/src/__fixtures__/agent-helpers.ts @@ -12,6 +12,7 @@ import type { Role } from '../types/messages.js' import { StateStore } from '../state-store.js' import type { JSONValue } from '../types/json.js' import { ToolRegistry } from '../registry/tool-registry.js' +import type { Sandbox } from '../sandbox/base.js' import type { HookableEvent, StreamEvent } from '../hooks/events.js' import type { HookableEventConstructor, HookCallback } from '../hooks/types.js' import { expectLoopMetrics, type LoopMetricsMatcher } from './metrics-helpers.js' @@ -40,6 +41,10 @@ export interface MockAgentData { * Optional tool registry for the agent. */ toolRegistry?: ToolRegistry + /** + * Sandbox instance for the agent. + */ + sandbox?: Sandbox /** * Additional properties to spread onto the mock agent. */ @@ -66,6 +71,7 @@ export function createMockAgent(data?: MockAgentData): MockAgent { appState: new StateStore(data?.appState ?? {}), modelState: new StateStore(), toolRegistry: data?.toolRegistry ?? new ToolRegistry(), + sandbox: data?.sandbox, cancelSignal: new AbortController().signal, addHook: (eventType: HookableEventConstructor, callback: HookCallback) => { trackedHooks.push({ diff --git a/strands-ts/src/__fixtures__/test-sandbox.node.ts b/strands-ts/src/__fixtures__/test-sandbox.node.ts new file mode 100644 index 000000000..47547c2a3 --- /dev/null +++ b/strands-ts/src/__fixtures__/test-sandbox.node.ts @@ -0,0 +1,33 @@ +import { spawn } from 'child_process' +import { ShellSandbox } from '../sandbox/shell.js' +import { shellQuote } from '../utils/shell-quote.js' +import { streamProcess } from '../sandbox/stream-process.js' +import type { ExecuteOptions } from '../sandbox/base.js' +import type { ExecutionResult, StreamChunk } from '../sandbox/types.js' + +/** + * Test sandbox that executes commands within a specific working directory. + * + * Extends ShellSandbox (same base as DockerSandbox and RemoteSandbox) so it + * exercises the same code path real sandboxes use: base64 file encoding, + * shell quoting, ls parsing, etc. The only difference is commands run on + * the host rather than in a container or over SSH. + */ +export class TestSandbox extends ShellSandbox { + readonly workingDir: string + + constructor(workingDir: string) { + super() + this.workingDir = workingDir + } + + async *executeStreaming( + command: string, + options?: ExecuteOptions + ): AsyncGenerator { + const cwd = options?.cwd ?? this.workingDir + const fullCommand = `cd ${shellQuote(cwd)} && ${command}` + const proc = spawn('sh', ['-c', fullCommand]) + yield* streamProcess(proc, { timeout: options?.timeout, signal: options?.signal }) + } +} diff --git a/strands-ts/src/agent/__tests__/agent.model-retry.test.ts b/strands-ts/src/agent/__tests__/agent.model-retry.test.ts index 5326d05bb..101b59a2d 100644 --- a/strands-ts/src/agent/__tests__/agent.model-retry.test.ts +++ b/strands-ts/src/agent/__tests__/agent.model-retry.test.ts @@ -11,6 +11,9 @@ import { ConstantBackoff } from '../../retry/backoff-strategy.js' import { ModelThrottledError } from '../../errors.js' import { AfterModelCallEvent } from '../../hooks/events.js' import { logger } from '../../logging/logger.js' +import '../../sandbox/not-a-sandbox-local-environment.js' +// eslint-disable-next-line no-restricted-imports +import '../../vended-tools/sandbox-default-tools.js' describe('Agent retryStrategy wiring', () => { beforeEach(() => { @@ -34,7 +37,6 @@ describe('Agent retryStrategy wiring', () => { }) const invokePromise = agent.invoke('hi') - // Flush any pending timers the retry scheduled. await vi.runAllTimersAsync() const result = await invokePromise diff --git a/strands-ts/src/agent/__tests__/agent.test.ts b/strands-ts/src/agent/__tests__/agent.test.ts index abed0afad..f8f286061 100644 --- a/strands-ts/src/agent/__tests__/agent.test.ts +++ b/strands-ts/src/agent/__tests__/agent.test.ts @@ -907,7 +907,7 @@ describe('Agent', () => { await agent.invoke('First prompt') expect(agent.systemPrompt).toEqual([new TextBlock('You are a helpful assistant')]) - // Should have been called with the given promp + // Should have been called with the given prompt and no tools (no sandbox configured) expect(streamSpy).toHaveBeenCalledWith( expect.any(Array), expect.objectContaining({ diff --git a/strands-ts/src/agent/agent.ts b/strands-ts/src/agent/agent.ts index 543ffcca8..789b5d85d 100644 --- a/strands-ts/src/agent/agent.ts +++ b/strands-ts/src/agent/agent.ts @@ -8,6 +8,7 @@ import { type LocalAgent, type localAgentSymbol, } from '../types/agent.js' +import type { Sandbox } from '../sandbox/base.js' import { BedrockModel } from '../models/bedrock.js' import { contentBlockFromData, @@ -220,6 +221,14 @@ export type AgentConfig = { * Defaults to `'concurrent'`. See {@link ToolExecutorStrategy} for details. */ toolExecutor?: ToolExecutorStrategy + /** + * Sandbox for tool code execution and filesystem access. + * When provided, sandbox default tools (fileEditor, exec, codeInterpreter) are + * auto-registered and execute within the sandbox. + * When omitted, no sandbox tools are auto-registered. + * Pass `false` to explicitly disable sandbox and sandbox tool auto-registration. + */ + sandbox?: Sandbox | false } /** Default name assigned to agents when none is provided. */ @@ -262,6 +271,19 @@ export class Agent implements LocalAgent, InvokableAgent { */ public model: Model + /** + * Sandbox for tool code execution and filesystem access. + * Set immediately if passed via config, otherwise defaults to NotASandboxLocalEnvironment during initialize(). + */ + private _sandbox: Sandbox | false | undefined + + get sandbox(): Sandbox { + if (!this._sandbox) { + throw new Error('Sandbox is not available. Pass a Sandbox instance to the agent config to enable it.') + } + return this._sandbox + } + /** * The system prompt to pass to the model provider. */ @@ -407,6 +429,8 @@ export class Agent implements LocalAgent, InvokableAgent { this._appendMessageAndFireHooks(message, invocationState) ) + this._sandbox = config?.sandbox + this._initialized = false } @@ -443,6 +467,17 @@ export class Agent implements LocalAgent, InvokableAgent { return } + const userProvidedSandbox = this._sandbox !== undefined && this._sandbox !== false + + if (!userProvidedSandbox && typeof process !== 'undefined' && process.versions?.node) { + const { NotASandboxLocalEnvironment } = await import('../sandbox/not-a-sandbox-local-environment.js') + this._sandbox = new NotASandboxLocalEnvironment() + } + + if (this._sandbox) { + await this._sandbox.start() + } + // Initialize MCP clients and register their tools await Promise.all( this._mcpClients.map(async (client) => { @@ -457,6 +492,15 @@ export class Agent implements LocalAgent, InvokableAgent { await this._pluginRegistry.initialize(this) + if (userProvidedSandbox) { + const { SANDBOX_DEFAULT_TOOLS } = await import('../vended-tools/sandbox-default-tools.js') + for (const tool of SANDBOX_DEFAULT_TOOLS) { + if (!this._toolRegistry.get(tool.name)) { + this._toolRegistry.add(tool) + } + } + } + await this._hooksRegistry.invokeCallbacks(new InitializedEvent({ agent: this })) this._initialized = true diff --git a/strands-ts/src/index.ts b/strands-ts/src/index.ts index ce2514a99..af9d9a7b4 100644 --- a/strands-ts/src/index.ts +++ b/strands-ts/src/index.ts @@ -300,6 +300,17 @@ export { AgentTrace } from './telemetry/tracer.js' // Local Metrics export { AgentMetrics } from './telemetry/meter.js' +// Sandbox (base class and types only β€” Node-specific implementations available via './sandbox' sub-export) +export { Sandbox, type ExecuteOptions } from './sandbox/base.js' +export type { + StreamType, + StreamChunk, + FileInfo, + OutputFile, + ExecutionResult, + SandboxSnapshot, +} from './sandbox/types.js' + // Multi-agent orchestration export { Graph } from './multiagent/index.js' export { Swarm } from './multiagent/index.js' diff --git a/strands-ts/src/sandbox/__tests__/remote.test.ts b/strands-ts/src/sandbox/__tests__/remote.test.ts new file mode 100644 index 000000000..2ffe282ad --- /dev/null +++ b/strands-ts/src/sandbox/__tests__/remote.test.ts @@ -0,0 +1,158 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { RemoteSandbox } from '../remote.js' +import * as childProcess from 'child_process' + +vi.mock('child_process', async () => { + const actual = await vi.importActual('child_process') + return { ...actual, spawn: vi.fn() } +}) + +function createMockProcess() { + const proc = { + stdout: { on: vi.fn() }, + stderr: { on: vi.fn() }, + on: vi.fn(), + kill: vi.fn(), + } + + // Simulate immediate close with exit code 0 + proc.on.mockImplementation((event: string, cb: (code: number | null) => void) => { + if (event === 'close') { + // Schedule the close callback + Promise.resolve().then(() => cb(0)) + } + }) + + return proc +} + +describe('RemoteSandbox (unit)', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + describe('constructor', () => { + it('stores host and workingDir', () => { + const sandbox = new RemoteSandbox({ host: 'myhost', workingDir: '/workspace' }) + expect(sandbox.host).toBe('myhost') + expect(sandbox.workingDir).toBe('/workspace') + }) + + it('defaults port to 22', () => { + const sandbox = new RemoteSandbox({ host: 'myhost', workingDir: '/ws' }) + // Port is private but we can verify via the SSH args in stream() + expect(sandbox).toBeDefined() + }) + }) + + describe('stream() SSH argument construction', () => { + it('builds correct SSH args with defaults', async () => { + const mockProc = createMockProcess() + vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) + + const sandbox = new RemoteSandbox({ host: 'user@server.com', workingDir: '/remote/path' }) + + // Start consuming the generator to trigger spawn + const gen = sandbox.executeStreaming('echo hi') + const iter = gen[Symbol.asyncIterator]() + await iter.next() + + expect(childProcess.spawn).toHaveBeenCalledWith('ssh', [ + '-o', + 'StrictHostKeyChecking=accept-new', + '-o', + 'BatchMode=yes', + '-p', + '22', + 'user@server.com', + "cd '/remote/path' && echo hi", + ]) + }) + + it('includes identity file when provided', async () => { + const mockProc = createMockProcess() + vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) + + const sandbox = new RemoteSandbox({ + host: 'server', + workingDir: '/ws', + identityFile: '/home/user/.ssh/key', + }) + + const gen = sandbox.executeStreaming('ls') + const iter = gen[Symbol.asyncIterator]() + await iter.next() + + const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] + expect(args).toContain('-i') + expect(args).toContain('/home/user/.ssh/key') + }) + + it('uses custom port', async () => { + const mockProc = createMockProcess() + vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) + + const sandbox = new RemoteSandbox({ + host: 'server', + workingDir: '/ws', + port: 2222, + }) + + const gen = sandbox.executeStreaming('ls') + const iter = gen[Symbol.asyncIterator]() + await iter.next() + + const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] + expect(args).toContain('-p') + expect(args).toContain('2222') + }) + + it('quotes cwd with single quotes', async () => { + const mockProc = createMockProcess() + vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) + + const sandbox = new RemoteSandbox({ + host: 'server', + workingDir: "/path/with spaces/and'quotes", + }) + + const gen = sandbox.executeStreaming('ls') + const iter = gen[Symbol.asyncIterator]() + await iter.next() + + const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] + const remoteCommand = args[args.length - 1] + expect(remoteCommand).toContain("cd '/path/with spaces/and'\\''quotes'") + }) + + it('uses cwd option when provided', async () => { + const mockProc = createMockProcess() + vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) + + const sandbox = new RemoteSandbox({ host: 'server', workingDir: '/default' }) + + const gen = sandbox.executeStreaming('ls', { cwd: '/override' }) + const iter = gen[Symbol.asyncIterator]() + await iter.next() + + const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] + const remoteCommand = args[args.length - 1] + expect(remoteCommand).toContain("cd '/override'") + }) + }) + + describe('start()', () => { + it('creates working directory with cwd: /', async () => { + const mockProc = createMockProcess() + vi.mocked(childProcess.spawn).mockReturnValue(mockProc as never) + + const sandbox = new RemoteSandbox({ host: 'server', workingDir: '/my/workspace' }) + await sandbox.start() + + const args = vi.mocked(childProcess.spawn).mock.calls[0]![1] as string[] + const remoteCommand = args[args.length - 1] + expect(remoteCommand).toContain("cd '/'") + expect(remoteCommand).toContain("mkdir -p '/my/workspace'") + }) + }) +}) diff --git a/strands-ts/src/sandbox/__tests__/shell.test.node.ts b/strands-ts/src/sandbox/__tests__/shell.test.node.ts new file mode 100644 index 000000000..894a4811e --- /dev/null +++ b/strands-ts/src/sandbox/__tests__/shell.test.node.ts @@ -0,0 +1,303 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { spawn } from 'child_process' +import { execSync } from 'child_process' +import { ShellSandbox } from '../shell.js' +import { shellQuote } from '../../utils/shell-quote.js' +import { streamProcess } from '../stream-process.js' +import type { ExecuteOptions } from '../base.js' +import type { ExecutionResult, StreamChunk } from '../types.js' + +const TEST_DIR = '/tmp/strands-test-shell-sandbox' + +/** + * Concrete test subclass that runs commands via local `sh`. + * This exercises the ShellSandbox code paths (base64, shellQuote, ls parsing) + * without requiring SSH or Docker. + */ +class TestShellSandbox extends ShellSandbox { + readonly workingDir: string + + constructor(workingDir: string) { + super() + this.workingDir = workingDir + } + + async *executeStreaming( + command: string, + options?: ExecuteOptions + ): AsyncGenerator { + const cwd = options?.cwd ?? this.workingDir + const fullCommand = `cd ${shellQuote(cwd)} && ${command}` + const proc = spawn('sh', ['-c', fullCommand]) + yield* streamProcess(proc, { timeout: options?.timeout, signal: options?.signal }) + } +} + +describe.skipIf(process.platform === 'win32')('ShellSandbox', () => { + let sandbox: TestShellSandbox + + beforeEach(() => { + execSync(`rm -rf ${TEST_DIR} && mkdir -p ${TEST_DIR}`) + sandbox = new TestShellSandbox(TEST_DIR) + }) + + afterEach(() => { + execSync(`rm -rf ${TEST_DIR}`) + }) + + describe('execute (via shell commands)', () => { + it('runs a command', async () => { + const result = await sandbox.execute('echo hello') + expect(result.exitCode).toBe(0) + expect(result.stdout).toBe('hello\n') + }) + + it('runs in workingDir', async () => { + const result = await sandbox.execute('pwd') + expect(result.stdout.trim()).toContain('strands-test-shell-sandbox') + }) + + it('respects cwd option', async () => { + const result = await sandbox.execute('pwd', { cwd: '/tmp' }) + expect(result.stdout.trim()).toMatch(/\/tmp$/) + }) + }) + + describe('executeCode (via shell quoting)', () => { + it('runs python code through shell', async () => { + const result = await sandbox.executeCode('print(2 + 2)', 'python3') + expect(result.exitCode).toBe(0) + expect(result.stdout).toBe('4\n') + }) + + it('handles code with special characters', async () => { + const result = await sandbox.executeCode('print(\'hello "world"\')', 'python3') + expect(result.stdout).toBe('hello "world"\n') + }) + + it('handles code with single quotes', async () => { + const result = await sandbox.executeCode('print("it\'s working")', 'python3') + expect(result.stdout).toBe("it's working\n") + }) + }) + + describe('language validation', () => { + it('rejects path traversal', async () => { + await expect(sandbox.executeCode('x', '../../../bin/sh')).rejects.toThrow('unsafe characters') + }) + + it('rejects shell metacharacters', async () => { + await expect(sandbox.executeCode('x', 'python;rm -rf /')).rejects.toThrow('unsafe characters') + }) + + it('rejects spaces', async () => { + await expect(sandbox.executeCode('x', 'python -c')).rejects.toThrow('unsafe characters') + }) + + it('allows valid interpreters', async () => { + const result = await sandbox.executeCode('print("safe")', 'python3') + expect(result.exitCode).toBe(0) + }) + + it('allows dots and hyphens', async () => { + const result = await sandbox.executeCode('x', 'fake-lang.99') + expect(result.exitCode).toBe(127) + }) + }) + + describe('read/write (via base64 encoding over shell)', () => { + it('text file roundtrip', async () => { + await sandbox.writeText('test.txt', 'hello shell') + const text = await sandbox.readText('test.txt') + expect(text).toBe('hello shell') + }) + + it('binary file roundtrip', async () => { + const bytes = new Uint8Array([0, 1, 2, 127, 128, 254, 255]) + await sandbox.writeFile('binary.bin', bytes) + const read = await sandbox.readFile('binary.bin') + expect(Array.from(read)).toStrictEqual(Array.from(bytes)) + }) + + it('all 256 byte values roundtrip', async () => { + const bytes = new Uint8Array(256) + for (let i = 0; i < 256; i++) bytes[i] = i + await sandbox.writeFile('all-bytes.bin', bytes) + const read = await sandbox.readFile('all-bytes.bin') + expect(Array.from(read)).toStrictEqual(Array.from(bytes)) + }) + + it('creates parent directories', async () => { + await sandbox.writeText('deep/nested/file.txt', 'deep') + const text = await sandbox.readText('deep/nested/file.txt') + expect(text).toBe('deep') + }) + + it('handles unicode content', async () => { + const content = 'ζ—₯本θͺž πŸš€ Γ©mojis' + await sandbox.writeText('unicode.txt', content) + const text = await sandbox.readText('unicode.txt') + expect(text).toBe(content) + }) + + it('handles shell metacharacters in content', async () => { + const content = '$(rm -rf /) `whoami` && || $HOME' + await sandbox.writeText('meta.txt', content) + const text = await sandbox.readText('meta.txt') + expect(text).toBe(content) + }) + + it('throws on nonexistent file', async () => { + await expect(sandbox.readFile('nope.txt')).rejects.toThrow() + }) + }) + + describe('remove', () => { + it('removes a file', async () => { + await sandbox.writeText('delete-me.txt', 'bye') + await sandbox.removeFile('delete-me.txt') + await expect(sandbox.readFile('delete-me.txt')).rejects.toThrow() + }) + + it('throws on nonexistent file', async () => { + await expect(sandbox.removeFile('nope.txt')).rejects.toThrow() + }) + }) + + describe('list (via ls -1aF parsing)', () => { + it('lists directory contents', async () => { + await sandbox.writeText('a.txt', 'a') + await sandbox.writeText('b.txt', 'b') + const files = await sandbox.listFiles('.') + const names = files.map((f) => f.name) + expect(names).toContain('a.txt') + expect(names).toContain('b.txt') + }) + + it('identifies directories', async () => { + await sandbox.execute('mkdir -p subdir') + const files = await sandbox.listFiles('.') + const subdir = files.find((f) => f.name === 'subdir') + expect(subdir?.isDir).toBe(true) + }) + + it('excludes . and .. entries', async () => { + await sandbox.writeText('file.txt', '') + const files = await sandbox.listFiles('.') + const names = files.map((f) => f.name) + expect(names).not.toContain('.') + expect(names).not.toContain('..') + }) + + it('throws on nonexistent directory', async () => { + await expect(sandbox.listFiles('/tmp/nonexistent-dir-xyz')).rejects.toThrow() + }) + }) + + describe('statFile', () => { + it('returns size for regular file', async () => { + await sandbox.writeText('sized.txt', 'hello') + const info = await sandbox.statFile('sized.txt') + expect(info.name).toBe('sized.txt') + expect(info.isDir).toBe(false) + expect(info.size).toBe(5) + }) + + it('identifies directories', async () => { + await sandbox.execute('mkdir -p mydir') + const info = await sandbox.statFile('mydir') + expect(info.name).toBe('mydir') + expect(info.isDir).toBe(true) + }) + + it('throws on nonexistent path', async () => { + await expect(sandbox.statFile('nonexistent')).rejects.toThrow() + }) + }) + + describe('shellQuote', () => { + it('handles paths with spaces', async () => { + await sandbox.execute('mkdir -p "with spaces"') + await sandbox.writeText('with spaces/file.txt', 'spaced') + const text = await sandbox.readText('with spaces/file.txt') + expect(text).toBe('spaced') + }) + + it('handles paths with single quotes', async () => { + await sandbox.execute('mkdir -p "it\'s"') + await sandbox.writeText("it's/file.txt", 'quoted') + const text = await sandbox.readText("it's/file.txt") + expect(text).toBe('quoted') + }) + }) + + describe('timeout', () => { + it('kills process on timeout', async () => { + const start = Date.now() + await expect(sandbox.execute('sleep 60', { timeout: 0.2 })).rejects.toThrow('timed out') + const elapsed = Date.now() - start + expect(elapsed).toBeLessThan(2000) + }) + + it('does not timeout fast commands', async () => { + const result = await sandbox.execute('echo fast', { timeout: 5 }) + expect(result.exitCode).toBe(0) + expect(result.stdout).toBe('fast\n') + }) + }) + + describe('abort signal', () => { + it('kills process when signal is aborted', async () => { + const controller = new AbortController() + const promise = sandbox.execute('sleep 60', { signal: controller.signal }) + setTimeout(() => controller.abort(), 100) + await expect(promise).rejects.toThrow('aborted') + }) + + it('rejects immediately if signal is already aborted', async () => { + const controller = new AbortController() + controller.abort() + await expect(sandbox.execute('sleep 60', { signal: controller.signal })).rejects.toThrow('aborted') + }) + }) + + describe('concurrent execution', () => { + it('handles multiple concurrent commands', async () => { + const results = await Promise.all([ + sandbox.execute('echo one'), + sandbox.execute('echo two'), + sandbox.execute('echo three'), + ]) + expect(results.map((r) => r.stdout.trim()).sort()).toStrictEqual(['one', 'three', 'two']) + }) + + it('handles concurrent file writes to different files', async () => { + await Promise.all([ + sandbox.writeText('a.txt', 'aaa'), + sandbox.writeText('b.txt', 'bbb'), + sandbox.writeText('c.txt', 'ccc'), + ]) + const [a, b, c] = await Promise.all([ + sandbox.readText('a.txt'), + sandbox.readText('b.txt'), + sandbox.readText('c.txt'), + ]) + expect(a).toBe('aaa') + expect(b).toBe('bbb') + expect(c).toBe('ccc') + }) + }) + + describe('streaming', () => { + it('yields StreamChunks then ExecutionResult', async () => { + const chunks: Array<{ type: string }> = [] + for await (const chunk of sandbox.executeStreaming('echo hello')) { + chunks.push(chunk) + } + const streamChunks = chunks.filter((c) => c.type === 'streamChunk') + const results = chunks.filter((c) => c.type === 'executionResult') + expect(streamChunks.length).toBeGreaterThan(0) + expect(results).toHaveLength(1) + }) + }) +}) diff --git a/strands-ts/src/sandbox/base.ts b/strands-ts/src/sandbox/base.ts new file mode 100644 index 000000000..0362496dd --- /dev/null +++ b/strands-ts/src/sandbox/base.ts @@ -0,0 +1,184 @@ +/** + * Base sandbox interface for agent code execution environments. + * + * Defines the abstract {@link Sandbox} class that all sandbox implementations + * must extend. The class provides six abstract operations (execute, code execution, + * and file I/O) and convenience wrappers for common patterns. + */ + +import type { ExecutionResult, FileInfo, StreamChunk } from './types.js' + +/** + * Options for command and code execution. + */ +export interface ExecuteOptions { + /** Maximum execution time in seconds. `undefined` means no timeout. */ + timeout?: number | undefined + /** Working directory for execution. `undefined` means use the sandbox default. */ + cwd?: string | undefined + /** Abort signal to cancel execution. The process is killed when the signal fires. */ + signal?: AbortSignal | undefined +} + +/** + * Abstract execution environment for agent tools. + * + * A Sandbox provides the runtime context where tools execute code, + * run commands, and interact with a filesystem. Multiple tools share + * the same Sandbox instance, giving them a common working directory, + * environment variables, and filesystem. + * + * Streaming methods (`executeStreaming`, `executeCodeStreaming`) are the abstract primitives. + * Non-streaming convenience methods (`execute`, `executeCode`) consume + * the stream and return the final result. + * + */ +export abstract class Sandbox { + // ---- Lifecycle methods ---- + + async start(): Promise {} + async stop(): Promise {} + + // ---- Abstract streaming methods ---- + + /** + * Execute a shell command, streaming output. + * + * Yields {@link StreamChunk} objects for stdout and stderr as output + * arrives. The final yield is an {@link ExecutionResult} with the + * exit code and complete output. + * + * @param command - The shell command to execute. + * @param options - Execution options (timeout, cwd). + */ + abstract executeStreaming(command: string, options?: ExecuteOptions): AsyncIterable + + /** + * Execute source code via a language interpreter, streaming output. + * + * @param code - The source code to execute. + * @param language - The interpreter to use (e.g., `"python3"`, `"node"`). + * @param options - Execution options (timeout, cwd). + */ + abstract executeCodeStreaming( + code: string, + language: string, + options?: ExecuteOptions + ): AsyncIterable + + /** + * Read a file from the sandbox filesystem as raw bytes. + * + * Returns `Uint8Array` to support both text and binary files. + * Use {@link readText} for a convenience wrapper that decodes to a string. + * + * @param path - Path to the file to read. + * @throws Error if the file does not exist. + */ + abstract readFile(path: string): Promise + + /** + * Write raw bytes to a file in the sandbox filesystem. + * + * Implementations should create parent directories if they do not exist. + * Use {@link writeText} for a convenience wrapper that encodes a string. + * + * @param path - Path to the file to write. + * @param content - The content to write. + */ + abstract writeFile(path: string, content: Uint8Array): Promise + + /** + * Remove a file from the sandbox filesystem. + * + * @param path - Path to the file to remove. + * @throws Error if the file does not exist. + */ + abstract removeFile(path: string): Promise + + /** + * List files in a sandbox directory. + * + * Returns {@link FileInfo} entries with name, isDir, and size metadata. + * Fields `isDir` and `size` may be `undefined` if the backend cannot + * determine them. + * + * @param path - Path to the directory to list. + * @throws Error if the directory does not exist. + */ + abstract listFiles(path: string): Promise + + /** + * Get metadata about a file or directory. + * + * @param path - Path to stat. + * @returns FileInfo with name, isDir, and size. + * @throws Error if the path does not exist. + */ + abstract statFile(path: string): Promise + + // ---- Non-streaming convenience methods ---- + + /** + * Execute a shell command and return the result. + * + * Consumes {@link executeStreaming} and returns the final {@link ExecutionResult}. + * Use `executeStreaming` when you need to process output as it arrives. + * + * @param command - The shell command to execute. + * @param options - Execution options (timeout, cwd). + */ + async execute(command: string, options?: ExecuteOptions): Promise { + for await (const chunk of this.executeStreaming(command, options)) { + if (chunk.type === 'executionResult') { + return chunk + } + } + throw new Error('executeStreaming() did not yield an ExecutionResult') + } + + /** + * Execute source code and return the result. + * + * Consumes {@link executeCodeStreaming} and returns the final {@link ExecutionResult}. + * Use `executeCodeStreaming` when you need to process output as it arrives. + * + * @param code - The source code to execute. + * @param language - The interpreter to use. + * @param options - Execution options (timeout, cwd). + */ + async executeCode(code: string, language: string, options?: ExecuteOptions): Promise { + for await (const chunk of this.executeCodeStreaming(code, language, options)) { + if (chunk.type === 'executionResult') { + return chunk + } + } + throw new Error('executeCodeStreaming() did not yield an ExecutionResult') + } + + /** + * Read a text file from the sandbox filesystem. + * + * Convenience wrapper over {@link readFile} that decodes bytes as UTF-8. + * For other encodings, call `readFile` and decode manually. + * + * @param path - Path to the file to read. + */ + async readText(path: string): Promise { + const data = await this.readFile(path) + return new TextDecoder().decode(data) + } + + /** + * Write a text file to the sandbox filesystem. + * + * Convenience wrapper over {@link writeFile} that encodes a string as UTF-8. + * For other encodings, encode manually and call `writeFile`. + * + * @param path - Path to the file to write. + * @param content - The text content to write. + */ + async writeText(path: string, content: string): Promise { + await this.writeFile(path, new TextEncoder().encode(content)) + } +} diff --git a/strands-ts/src/sandbox/constants.ts b/strands-ts/src/sandbox/constants.ts new file mode 100644 index 000000000..a1c33c080 --- /dev/null +++ b/strands-ts/src/sandbox/constants.ts @@ -0,0 +1 @@ +export const LANGUAGE_PATTERN = /^[a-zA-Z0-9._-]+$/ diff --git a/strands-ts/src/sandbox/docker.ts b/strands-ts/src/sandbox/docker.ts new file mode 100644 index 000000000..5a3efd214 --- /dev/null +++ b/strands-ts/src/sandbox/docker.ts @@ -0,0 +1,259 @@ +/** + * Docker sandbox implementation. + * + * Extends ShellSandbox β€” runs commands inside a Docker container via `docker exec`. + * The container is created on `start()` and destroyed on `stop()`. + * Supports `pause()` via `docker commit` for snapshotting. + */ + +import { randomUUID } from 'crypto' +import { spawn, spawnSync, execFile as execFileCb } from 'child_process' +import { promisify } from 'util' +import type { ExecuteOptions } from './base.js' +import { ShellSandbox } from './shell.js' +import { shellQuote } from '../utils/shell-quote.js' +import { streamProcess } from './stream-process.js' +import type { ExecutionResult, SandboxSnapshot, StreamChunk } from './types.js' +import { logger } from '../logging/logger.js' + +const execFile = promisify(execFileCb) + +const DANGEROUS_MOUNT_PATHS = ['/', '/boot', '/dev', '/etc', '/lib', '/lib64', '/proc', '/sys', '/usr'] +const DANGEROUS_MOUNT_TARGETS = ['/var/run/docker.sock'] + +async function dockerCmd(args: string[]): Promise<{ stdout: string; stderr: string; status: number }> { + try { + const { stdout, stderr } = await execFile('docker', args, { encoding: 'utf-8' }) + return { stdout, stderr, status: 0 } + } catch (err: unknown) { + const e = err as { stdout?: string; stderr?: string; message?: string; code?: number | string } + const status = typeof e.code === 'number' ? e.code : 1 + return { stdout: e.stdout ?? '', stderr: e.stderr ?? e.message ?? '', status } + } +} + +/** + * Options for constructing a {@link DockerSandbox}. + */ +export interface DockerSandboxOptions { + /** Docker image to use (e.g., "python:3.12", "node:20-alpine"). */ + image: string + /** Working directory inside the container. Defaults to "/workspace". */ + workingDir?: string + /** Container name. Auto-generated if not provided. */ + name?: string + /** + * Volume mounts in "host:container" format. + * + * **Security warning**: Host path mounts bypass container isolation. Avoid mounting + * sensitive paths (/, /etc, /proc, /sys, /var/run/docker.sock) as they expose the + * host to LLM-generated commands. + */ + volumes?: string[] + /** Environment variables to set in the container. */ + env?: Record + /** Memory limit (e.g., "512m", "2g"). */ + memory?: string + /** CPU limit (e.g., 1.5 for one and a half cores). */ + cpus?: number + /** Maximum number of PIDs in the container. Prevents fork bombs. */ + pidsLimit?: number + /** Docker network mode. Use "none" to disable network access. */ + network?: string + /** User to run as inside the container. Defaults to "1000:1000" (non-root). Pass "root" to run as root. */ + user?: string + /** + * Allow privilege escalation inside the container. + * + * When `false` (default), applies `--cap-drop ALL` and `--security-opt no-new-privileges` + * to prevent setuid escalation and drop all Linux capabilities. + */ + allowPrivilegeEscalation?: boolean + /** Snapshot to resume from (image ID from a previous pause). */ + snapshot?: SandboxSnapshot +} + +/** + * Execute commands inside a Docker container. + * + * The container is created on {@link start} and destroyed on {@link stop}. + * All sandbox operations (file I/O, code execution, directory listing) + * route through `docker exec`. File ops use base64 encoding inherited + * from ShellSandbox. + * + * @example + * ```typescript + * const sandbox = new DockerSandbox({ image: 'python:3.12' }) + * await sandbox.start() + * const result = await sandbox.execute('python3 -c "print(1+1)"') + * await sandbox.stop() + * ``` + */ +export class DockerSandbox extends ShellSandbox { + readonly image: string + readonly workingDir: string + private readonly _name: string + private readonly _volumes: string[] + private readonly _env: Record + private readonly _memory: string | undefined + private readonly _cpus: number | undefined + private readonly _pidsLimit: number | undefined + private readonly _network: string | undefined + private readonly _user: string + private readonly _allowPrivilegeEscalation: boolean + private readonly _snapshot: SandboxSnapshot | undefined + private _running = false + private _cleanupRegistered = false + + constructor(options: DockerSandboxOptions) { + super() + const snap = options.snapshot?.data + this.image = options.image + this.workingDir = options.workingDir ?? (snap?.workingDir as string) ?? '/workspace' + this._name = options.name ?? `strands-sandbox-${randomUUID()}` + this._volumes = options.volumes ?? (snap?.volumes as string[]) ?? [] + for (const vol of this._volumes) { + const hostPath = vol.split(':')[0]?.replace(/\/+$/, '') || '/' + if (DANGEROUS_MOUNT_PATHS.includes(hostPath) || DANGEROUS_MOUNT_TARGETS.includes(hostPath)) { + logger.warn( + `volume=<${vol}> | mounting this host path exposes the host filesystem to LLM-generated commands, bypassing container isolation` + ) + } + } + this._env = options.env ?? (snap?.env as Record) ?? {} + this._memory = options.memory + this._cpus = options.cpus + this._pidsLimit = options.pidsLimit + this._network = options.network + this._user = options.user ?? '1000:1000' + this._allowPrivilegeEscalation = options.allowPrivilegeEscalation ?? false + this._snapshot = options.snapshot + } + + async start(): Promise { + if (this._running) return + + const info = await dockerCmd(['info']) + if (info.status !== 0) { + throw new Error('Docker is not available. Ensure Docker is installed and running.') + } + + // Remove any stale container from a previous crash where exit handlers didn't fire + await dockerCmd(['rm', '-f', this._name]) + + const image = this._snapshot ? (this._snapshot.data['imageId'] as string) : this.image + + const args: string[] = ['run', '-d', '--name', this._name, '-w', this.workingDir] + + for (const vol of this._volumes) { + args.push('-v', vol) + } + + for (const [key, value] of Object.entries(this._env)) { + args.push('-e', `${key}=${value}`) + } + + if (this._memory) args.push('--memory', this._memory) + if (this._cpus) args.push('--cpus', String(this._cpus)) + if (this._pidsLimit) args.push('--pids-limit', String(this._pidsLimit)) + if (this._network) args.push('--network', this._network) + args.push('--user', this._user) + if (!this._allowPrivilegeEscalation) { + args.push('--cap-drop', 'ALL') + args.push('--security-opt', 'no-new-privileges') + } + + args.push(image, 'tail', '-f', '/dev/null') + + const result = await dockerCmd(args) + if (result.status !== 0) { + throw new Error(`Failed to start Docker container: ${result.stderr}`) + } + + this._running = true + await dockerCmd(['exec', this._name, 'chown', this._user, this.workingDir]) + this._registerCleanup() + } + + async stop(): Promise { + if (!this._running) return + await dockerCmd(['rm', '-f', this._name]) + this._running = false + this._removeCleanup() + } + + async pause(): Promise { + if (!this._running) { + throw new Error('Cannot pause: container is not running') + } + + const result = await dockerCmd(['commit', this._name]) + if (result.status !== 0) { + throw new Error(`Failed to snapshot container: ${result.stderr}`) + } + + return { + backendId: 'docker', + data: { imageId: result.stdout.trim(), workingDir: this.workingDir, volumes: this._volumes, env: this._env }, + } + } + + async *executeStreaming( + command: string, + options?: ExecuteOptions + ): AsyncGenerator { + if (!this._running) { + throw new Error('Container is not running. Call start() before executing commands.') + } + + const cwd = options?.cwd ?? this.workingDir + const execCommand = `cd ${shellQuote(cwd)} && ${command}` + + const proc = spawn('docker', ['exec', this._name, 'sh', '-c', execCommand]) + yield* streamProcess(proc, { timeout: options?.timeout, signal: options?.signal }) + } + + private _onExit: (() => void) | undefined + private _onSigint: (() => void) | undefined + private _onSigterm: (() => void) | undefined + + private _registerCleanup(): void { + if (this._cleanupRegistered) return + this._cleanupRegistered = true + + this._onExit = (): void => { + if (this._running) { + // Must be sync: async I/O is not processed during the 'exit' event + spawnSync('docker', ['rm', '-f', this._name], { stdio: 'pipe', timeout: 5000 }) + this._running = false + } + } + + this._onSigint = (): void => { + this._onExit!() + this._removeCleanup() + process.kill(process.pid, 'SIGINT') + } + + this._onSigterm = (): void => { + this._onExit!() + this._removeCleanup() + process.kill(process.pid, 'SIGTERM') + } + + process.on('exit', this._onExit) + process.on('SIGINT', this._onSigint) + process.on('SIGTERM', this._onSigterm) + } + + private _removeCleanup(): void { + if (!this._onExit) return + process.off('exit', this._onExit) + process.off('SIGINT', this._onSigint!) + process.off('SIGTERM', this._onSigterm!) + this._onExit = undefined + this._onSigint = undefined + this._onSigterm = undefined + this._cleanupRegistered = false + } +} diff --git a/strands-ts/src/sandbox/index.ts b/strands-ts/src/sandbox/index.ts new file mode 100644 index 000000000..bbac3cb1d --- /dev/null +++ b/strands-ts/src/sandbox/index.ts @@ -0,0 +1,14 @@ +/** + * Sandbox abstraction for agent code execution environments. + * + * This module provides the {@link Sandbox} interface that decouples tool logic + * from where code runs. Tools that need to execute code or access a filesystem + * receive a Sandbox instead of managing their own execution, enabling + * portability across local and cloud environments. + */ + +export { Sandbox, type ExecuteOptions } from './base.js' +export { ShellSandbox } from './shell.js' +export { RemoteSandbox, type RemoteSandboxOptions } from './remote.js' +export { DockerSandbox, type DockerSandboxOptions } from './docker.js' +export type { StreamType, StreamChunk, FileInfo, OutputFile, ExecutionResult, SandboxSnapshot } from './types.js' diff --git a/strands-ts/src/sandbox/not-a-sandbox-local-environment.ts b/strands-ts/src/sandbox/not-a-sandbox-local-environment.ts new file mode 100644 index 000000000..afea2db48 --- /dev/null +++ b/strands-ts/src/sandbox/not-a-sandbox-local-environment.ts @@ -0,0 +1,111 @@ +/** + * Local sandbox implementation using native Node.js APIs. + * + * Executes commands and code on the local machine using Node.js child processes + * and native filesystem operations. This is the default sandbox used when + * no explicit sandbox is configured. + */ + +import { spawn } from 'child_process' +import { readFile, writeFile, unlink, mkdir, readdir, stat } from 'fs/promises' +import { join, dirname, basename, isAbsolute } from 'path' +import type { ExecuteOptions } from './base.js' +import { Sandbox } from './base.js' +import { LANGUAGE_PATTERN } from './constants.js' +import { streamProcess } from './stream-process.js' +import type { ExecutionResult, FileInfo, StreamChunk } from './types.js' + +/** + * Default execution environment β€” runs on the local host without isolation. + * + * Uses `child_process.spawn` for command and code execution, and `fs/promises` + * for all file operations. This is NOT a sandbox β€” it runs with the full + * permissions of the host process. Used internally as the default when no + * sandbox is configured. + */ +export class NotASandboxLocalEnvironment extends Sandbox { + readonly workingDir: string + + constructor() { + super() + this.workingDir = process.cwd() + } + + private _resolvePath(path: string): string { + if (isAbsolute(path)) return path + return join(this.workingDir, path) + } + + async *executeStreaming( + command: string, + options?: ExecuteOptions + ): AsyncGenerator { + const cwd = options?.cwd ?? this.workingDir + const proc = spawn(command, [], { cwd, shell: true }) + yield* streamProcess(proc, { timeout: options?.timeout, signal: options?.signal }) + } + + async *executeCodeStreaming( + code: string, + language: string, + options?: ExecuteOptions + ): AsyncGenerator { + if (!LANGUAGE_PATTERN.test(language)) { + throw new Error(`language parameter contains unsafe characters: ${language}`) + } + + const cwd = options?.cwd ?? this.workingDir + const proc = spawn(language, [], { cwd, shell: false, stdio: ['pipe', 'pipe', 'pipe'] }) + proc.stdin!.end(code) + yield* streamProcess(proc, { + timeout: options?.timeout, + signal: options?.signal, + enoentMessage: `Language interpreter not found: ${language}`, + }) + } + + async readFile(path: string): Promise { + return readFile(this._resolvePath(path)) + } + + async writeFile(path: string, content: Uint8Array): Promise { + const fullPath = this._resolvePath(path) + await mkdir(dirname(fullPath), { recursive: true }) + await writeFile(fullPath, content) + } + + async removeFile(path: string): Promise { + await unlink(this._resolvePath(path)) + } + + async listFiles(path: string): Promise { + const fullPath = this._resolvePath(path) + const entries = await readdir(fullPath, { withFileTypes: true }) + const results: FileInfo[] = [] + + for (const entry of entries.sort((a, b) => a.name.localeCompare(b.name))) { + try { + const entryStat = await stat(join(fullPath, entry.name)) + results.push({ + name: entry.name, + isDir: entryStat.isDirectory(), + size: entryStat.size, + }) + } catch { + results.push({ name: entry.name }) + } + } + + return results + } + + async statFile(path: string): Promise { + const fullPath = this._resolvePath(path) + const s = await stat(fullPath) + return { + name: basename(fullPath), + isDir: s.isDirectory(), + size: s.size, + } + } +} diff --git a/strands-ts/src/sandbox/remote.ts b/strands-ts/src/sandbox/remote.ts new file mode 100644 index 000000000..ae4ecc78e --- /dev/null +++ b/strands-ts/src/sandbox/remote.ts @@ -0,0 +1,134 @@ +/** + * Remote sandbox implementation using SSH transport. + * + * Extends ShellSandbox β€” all file operations and code execution route + * through SSH to a remote host. Only executeStreaming() is implemented; + * everything else comes free from ShellSandbox. + */ + +import { spawn } from 'child_process' +import type { ExecuteOptions } from './base.js' +import { ShellSandbox } from './shell.js' +import { shellQuote } from '../utils/shell-quote.js' +import { streamProcess } from './stream-process.js' +import type { ExecutionResult, StreamChunk } from './types.js' + +const BLOCKED_SSH_OPTIONS = [ + 'proxycommand', + 'localcommand', + 'permitlocalcommand', + 'proxyusefdpass', + 'localforward', + 'remoteforward', + 'dynamicforward', +] + +/** + * Options for constructing a {@link RemoteSandbox}. + */ +export interface RemoteSandboxOptions { + /** SSH host (e.g., "localhost", "user\@remote-host"). */ + host: string + /** Working directory on the remote host. */ + workingDir: string + /** Path to SSH private key file. */ + identityFile?: string + /** SSH port. Defaults to 22. */ + port?: number + /** + * Allow connections to hosts with unknown or changed SSH keys. + * + * **Security warning**: Setting this to `true` disables host key verification, + * enabling man-in-the-middle attacks. Only use for local development or trusted networks. + * + * When `false` or unset, uses `accept-new` (trust on first connect, reject if key changes). + */ + allowUnknownHosts?: boolean + /** + * Additional SSH options passed as -o flags (e.g., ["ConnectTimeout=10", "ServerAliveInterval=60"]). + * + * Options that execute commands on the host (ProxyCommand, LocalCommand, PermitLocalCommand, + * ProxyUseFdpass, LocalForward, RemoteForward, DynamicForward) are blocked and will throw. + */ + sshOptions?: string[] +} + +/** + * Execute commands on a remote host via SSH. + * + * All sandbox operations (file I/O, code execution, directory listing) + * route through SSH. Subclasses ShellSandbox, so only executeStreaming() + * is implemented β€” file ops use base64 encoding over the SSH channel. + * + * @example + * ```typescript + * const sandbox = new RemoteSandbox({ + * host: 'localhost', + * workingDir: '/tmp/remote-sandbox', + * identityFile: '~/.ssh/localhost_key', + * }) + * await sandbox.start() + * const result = await sandbox.execute('echo hello') + * ``` + */ +export class RemoteSandbox extends ShellSandbox { + readonly host: string + readonly workingDir: string + private readonly _identityFile: string | undefined + private readonly _port: number + private readonly _allowUnknownHosts: boolean + private readonly _sshOptions: string[] + + constructor(options: RemoteSandboxOptions) { + super() + this.host = options.host + this.workingDir = options.workingDir + this._identityFile = options.identityFile + this._port = options.port ?? 22 + this._allowUnknownHosts = options.allowUnknownHosts ?? false + this._sshOptions = options.sshOptions ?? [] + for (const opt of this._sshOptions) { + const name = (opt.split(/[=\s]/)[0] ?? '').trim().toLowerCase() + if (BLOCKED_SSH_OPTIONS.includes(name)) { + throw new Error( + `SSH option "${opt.split(/[=\s]/)[0] ?? opt}" is blocked because it can execute commands on the host` + ) + } + } + } + + async start(): Promise { + const result = await this.execute(`mkdir -p ${shellQuote(this.workingDir)}`, { cwd: '/' }) + if (result.exitCode !== 0) { + throw new Error(`Failed to create remote working directory: ${result.stderr}`) + } + } + + async *executeStreaming( + command: string, + options?: ExecuteOptions + ): AsyncGenerator { + const cwd = options?.cwd ?? this.workingDir + const remoteCommand = `cd ${shellQuote(cwd)} && ${command}` + + const sshArgs: string[] = [ + '-o', + `StrictHostKeyChecking=${this._allowUnknownHosts ? 'no' : 'accept-new'}`, + '-o', + 'BatchMode=yes', + '-p', + String(this._port), + ] + + if (this._identityFile) { + sshArgs.push('-i', this._identityFile) + } + + sshArgs.push(...this._sshOptions.flatMap((opt) => ['-o', opt])) + + sshArgs.push(this.host, remoteCommand) + + const proc = spawn('ssh', sshArgs) + yield* streamProcess(proc, { timeout: options?.timeout, signal: options?.signal }) + } +} diff --git a/strands-ts/src/sandbox/shell.ts b/strands-ts/src/sandbox/shell.ts new file mode 100644 index 000000000..8c98e76e0 --- /dev/null +++ b/strands-ts/src/sandbox/shell.ts @@ -0,0 +1,110 @@ +/** + * Shell sandbox with default implementations for file and code operations. + * + * Subclasses only need to implement {@link ShellSandbox.executeStreaming} β€” + * all other operations are implemented by running shell commands through it. + * Use this for remote environments where only shell access is available + * (Docker containers, SSH connections, cloud runtimes). + */ + +import { Sandbox } from './base.js' +import type { ExecuteOptions } from './base.js' +import { LANGUAGE_PATTERN } from './constants.js' +import type { ExecutionResult, FileInfo, StreamChunk } from './types.js' +import { shellQuote } from '../utils/shell-quote.js' + +/** + * Abstract sandbox that provides shell-based defaults for file and code operations. + * + * Subclasses only need to implement {@link executeStreaming}. The remaining + * operations β€” `executeCodeStreaming`, `readFile`, `writeFile`, `removeFile`, + * and `listFiles` β€” are implemented via shell commands piped through + * `executeStreaming`. + * + * Subclasses may override any method with a native implementation for + * better performance or to handle edge cases (e.g., binary-safe file + * transfer via Docker stdin pipes, or native API calls for cloud backends). + */ +export abstract class ShellSandbox extends Sandbox { + async *executeCodeStreaming( + code: string, + language: string, + options?: ExecuteOptions + ): AsyncGenerator { + if (!LANGUAGE_PATTERN.test(language)) { + throw new Error(`language parameter contains unsafe characters: ${language}`) + } + const encoded = btoa(Array.from(new TextEncoder().encode(code), (b) => String.fromCharCode(b)).join('')) + const eof = `STRANDS_EOF_${crypto.randomUUID().slice(0, 16)}` + yield* this.executeStreaming(`base64 -d << '${eof}' | ${language}\n${encoded}\n${eof}`, options) + } + + async readFile(path: string): Promise { + const result = await this.execute(`base64 < ${shellQuote(path)}`) + if (result.exitCode !== 0) { + throw new Error(result.stderr || `Failed to read file: ${path}`) + } + const cleaned = result.stdout.replace(/\s/g, '') + const binary = atob(cleaned) + const bytes = new Uint8Array(binary.length) + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i) + } + return bytes + } + + async writeFile(path: string, content: Uint8Array): Promise { + const binary = Array.from(content, (byte) => String.fromCharCode(byte)).join('') + const encoded = btoa(binary) + const quoted = shellQuote(path) + const eof = `STRANDS_EOF_${crypto.randomUUID().slice(0, 16)}` + const cmd = `mkdir -p "$(dirname ${quoted})" && base64 -d << '${eof}' > ${quoted}\n${encoded}\n${eof}` + const result = await this.execute(cmd) + if (result.exitCode !== 0) { + throw new Error(result.stderr || `Failed to write file: ${path}`) + } + } + + async removeFile(path: string): Promise { + const result = await this.execute(`rm ${shellQuote(path)}`) + if (result.exitCode !== 0) { + throw new Error(result.stderr || `Failed to remove file: ${path}`) + } + } + + async statFile(path: string): Promise { + const quoted = shellQuote(path) + const result = await this.execute( + `test -e ${quoted} || exit 1; test -d ${quoted} && printf 'd\\n' || printf 'f\\n'; stat -c '%s' ${quoted} 2>/dev/null || stat -f '%z' ${quoted} 2>/dev/null || wc -c < ${quoted} 2>/dev/null || echo 0` + ) + if (result.exitCode !== 0) { + throw new Error(`Path does not exist: ${path}`) + } + const lines = result.stdout.trim().split('\n') + const isDir = lines[0] === 'd' + const rawSize = parseInt(lines[1] ?? '', 10) + const name = path.split('/').filter(Boolean).pop() ?? path + return Number.isNaN(rawSize) ? { name, isDir } : { name, isDir, size: rawSize } + } + + async listFiles(path: string): Promise { + const result = await this.execute(`ls -1aF ${shellQuote(path)}`) + if (result.exitCode !== 0) { + throw new Error(result.stderr || `Failed to list directory: ${path}`) + } + + const entries: FileInfo[] = [] + for (const raw of result.stdout.split('\n')) { + const line = raw.replace(/\r$/, '') + if (!line || line === '.' || line === '..' || line === './' || line === '../') { + continue + } + const isDir = line.endsWith('/') + const name = line.replace(/[/@*=|]$/, '') + if (name) { + entries.push({ name, isDir }) + } + } + return entries + } +} diff --git a/strands-ts/src/sandbox/stream-process.ts b/strands-ts/src/sandbox/stream-process.ts new file mode 100644 index 000000000..c5693f97d --- /dev/null +++ b/strands-ts/src/sandbox/stream-process.ts @@ -0,0 +1,167 @@ +/** + * Shared utility for streaming stdout/stderr from a child process. + * + * Used by all sandbox backends that spawn a ChildProcess. + */ + +import type { ChildProcess } from 'child_process' +import type { ExecutionResult, StreamChunk } from './types.js' + +const SIGNAL_CODES: Record = { + SIGHUP: 1, + SIGINT: 2, + SIGQUIT: 3, + SIGABRT: 6, + SIGKILL: 9, + SIGSEGV: 11, + SIGPIPE: 13, + SIGTERM: 15, +} + +export interface StreamProcessOptions { + timeout?: number | undefined + signal?: AbortSignal | undefined + enoentMessage?: string | undefined +} + +/** + * Stream stdout/stderr from a child process, then yield the final result. + * + * Bridges Node.js event emitters to an async generator. Chunks are + * yielded incrementally as the process produces output. The final + * yield is an ExecutionResult with the exit code and complete output. + * + * All listeners are attached synchronously before any await to prevent + * missed events from fast-completing processes. + */ +export async function* streamProcess( + proc: ChildProcess, + options?: StreamProcessOptions +): AsyncGenerator { + const chunks: StreamChunk[] = [] + let stdout = '' + let stderr = '' + let done = false + let terminating = false + let exitCode = 0 + let error: Error | undefined + let enoent = false + let resolveWait: (() => void) | undefined + let timeoutHandle: ReturnType | undefined + + const wake = (): void => { + if (resolveWait) { + resolveWait() + resolveWait = undefined + } + } + + const terminate = (reason: Error): void => { + if (terminating || done) return + terminating = true + error = reason + proc.kill('SIGTERM') + wake() + setTimeout(() => { + if (!done) proc.kill('SIGKILL') + }, 1000) + } + + proc.stdout?.on('data', (data) => { + const text = String(data) + stdout += text + chunks.push({ type: 'streamChunk', data: text, streamType: 'stdout' }) + wake() + }) + + proc.stderr?.on('data', (data) => { + const text = String(data) + stderr += text + chunks.push({ type: 'streamChunk', data: text, streamType: 'stderr' }) + wake() + }) + + proc.on('close', (code, signal) => { + if (!done) { + if (code !== null) { + exitCode = code + } else if (signal) { + exitCode = 128 + (SIGNAL_CODES[signal] ?? 1) + } else { + exitCode = 1 + } + done = true + wake() + } + }) + + proc.on('error', (err) => { + if (!done) { + if (options?.enoentMessage && 'code' in err && err.code === 'ENOENT') { + enoent = true + } else { + error = err + } + done = true + wake() + } + }) + + const onAbort = (): void => terminate(new Error('Execution aborted')) + + if (options?.signal) { + if (options.signal.aborted) { + onAbort() + } else { + options.signal.addEventListener('abort', onAbort, { once: true }) + } + } + + if (options?.timeout !== undefined) { + timeoutHandle = setTimeout(() => { + terminate(new Error(`Execution timed out after ${options.timeout} seconds`)) + }, options.timeout * 1000) + } + + try { + while (true) { + if (chunks.length > 0) { + const batch = chunks.splice(0, chunks.length) + for (const chunk of batch) { + yield chunk + } + } + + if (done || terminating) break + + await new Promise((resolve) => { + resolveWait = resolve + }) + } + + if (enoent) { + yield { + type: 'executionResult', + exitCode: 127, + stdout: '', + stderr: options!.enoentMessage!, + outputFiles: [], + } satisfies ExecutionResult + return + } + + if (error) throw error + + yield { + type: 'executionResult', + exitCode, + stdout, + stderr, + outputFiles: [], + } satisfies ExecutionResult + } finally { + if (timeoutHandle !== undefined) clearTimeout(timeoutHandle) + if (options?.signal) options.signal.removeEventListener('abort', onAbort) + if (!done) proc.kill() + } +} diff --git a/strands-ts/src/sandbox/types.ts b/strands-ts/src/sandbox/types.ts new file mode 100644 index 000000000..7aa320482 --- /dev/null +++ b/strands-ts/src/sandbox/types.ts @@ -0,0 +1,73 @@ +/** + * Data types for the sandbox abstraction. + * + * These types represent the inputs and outputs of sandbox operations β€” + * execution results, file metadata, streaming chunks, and snapshots. + */ + +/** + * Type of a streaming output chunk β€” distinguishes stdout from stderr. + */ +export type StreamType = 'stdout' | 'stderr' + +/** + * A typed chunk of streaming output from command or code execution. + * + * Allows consumers to distinguish stdout from stderr during streaming, + * enabling richer UIs and more precise output handling. + */ +export interface StreamChunk { + readonly type: 'streamChunk' + readonly data: string + readonly streamType: StreamType +} + +/** + * Metadata about a file or directory in a sandbox. + * + * Provides minimal structured information that lets tools distinguish + * files from directories and report sizes. `isDir` and `size` are + * `undefined` when the backend cannot determine them accurately. + */ +export interface FileInfo { + readonly name: string + readonly isDir?: boolean + readonly size?: number +} + +/** + * A file produced as output by code execution. + * + * Used to carry binary artifacts (images, charts, PDFs, compiled files) + * from sandbox execution back to the agent. Shell-based sandboxes + * typically return an empty array. Jupyter-backed or API-backed + * sandboxes can populate this with generated artifacts. + */ +export interface OutputFile { + readonly name: string + readonly content: Uint8Array + readonly mimeType: string +} + +/** + * Result of command or code execution in a sandbox. + */ +export interface ExecutionResult { + readonly type: 'executionResult' + readonly exitCode: number + readonly stdout: string + readonly stderr: string + readonly outputFiles: OutputFile[] +} + +/** + * Serializable snapshot of sandbox state returned by {@link Sandbox.pause}. + * + * The contents are opaque and backend-specific β€” consumers should not + * inspect or modify the `data` field. Pass the snapshot back to the + * same backend's constructor to resume. + */ +export interface SandboxSnapshot { + readonly backendId: string + readonly data: Record +} diff --git a/strands-ts/src/types/agent.ts b/strands-ts/src/types/agent.ts index a842feab7..192e9d11b 100644 --- a/strands-ts/src/types/agent.ts +++ b/strands-ts/src/types/agent.ts @@ -1,4 +1,5 @@ import type { StateStore } from '../state-store.js' +import type { Sandbox } from '../sandbox/base.js' import type { ContentBlock, ContentBlockData, Message, MessageData, StopReason, SystemPrompt } from './messages.js' import type { Interrupt } from '../interrupt.js' import type { InterruptResponseContent, InterruptResponseContentData } from './interrupt.js' @@ -213,6 +214,11 @@ export interface LocalAgent { */ readonly model: Model + /** + * Sandbox for tool code execution and filesystem access. + */ + readonly sandbox: Sandbox + /** * The system prompt to pass to the model provider. */ diff --git a/strands-ts/src/utils/shell-quote.ts b/strands-ts/src/utils/shell-quote.ts new file mode 100644 index 000000000..b330d6c97 --- /dev/null +++ b/strands-ts/src/utils/shell-quote.ts @@ -0,0 +1,10 @@ +/** + * Shell-escape a string for safe inclusion in a shell command. + * + * Wraps the value in single quotes and escapes any embedded single quotes + * using the '\'' pattern. Single quotes disable all shell expansion + * (variables, backticks, globbing), making this safe against injection. + */ +export function shellQuote(value: string): string { + return "'" + value.replace(/'/g, "'\\''") + "'" +} diff --git a/strands-ts/src/vended-plugins/context-offloader/__tests__/sandbox-storage.test.node.ts b/strands-ts/src/vended-plugins/context-offloader/__tests__/sandbox-storage.test.node.ts new file mode 100644 index 000000000..21c061f59 --- /dev/null +++ b/strands-ts/src/vended-plugins/context-offloader/__tests__/sandbox-storage.test.node.ts @@ -0,0 +1,75 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { SandboxStorage } from '../storage.js' +import { TestSandbox } from '../../../__fixtures__/test-sandbox.node.js' +import { execSync } from 'child_process' + +const TEST_DIR = '/tmp/strands-test-sandbox-storage' + +describe.skipIf(process.platform === 'win32')('SandboxStorage', () => { + let sandbox: TestSandbox + let storage: SandboxStorage + + beforeEach(() => { + execSync(`rm -rf ${TEST_DIR} && mkdir -p ${TEST_DIR}`) + sandbox = new TestSandbox(TEST_DIR) + storage = new SandboxStorage(sandbox) + }) + + afterEach(() => { + execSync(`rm -rf ${TEST_DIR}`) + }) + + it('stores and retrieves text content', async () => { + const content = new TextEncoder().encode('hello offloaded') + const reference = await storage.store('test-key', content, 'text/plain') + const retrieved = await storage.retrieve(reference) + expect(new TextDecoder().decode(retrieved.content)).toBe('hello offloaded') + expect(retrieved.contentType).toBe('text/plain') + }) + + it('stores and retrieves JSON content', async () => { + const json = JSON.stringify({ key: 'value' }) + const content = new TextEncoder().encode(json) + const reference = await storage.store('json-key', content, 'application/json') + const retrieved = await storage.retrieve(reference) + expect(new TextDecoder().decode(retrieved.content)).toBe(json) + expect(retrieved.contentType).toBe('application/json') + }) + + it('stores and retrieves binary content', async () => { + const bytes = new Uint8Array([0, 1, 2, 127, 128, 254, 255]) + const reference = await storage.store('binary-key', bytes, 'application/octet-stream') + const retrieved = await storage.retrieve(reference) + expect(Array.from(retrieved.content)).toStrictEqual(Array.from(bytes)) + }) + + it('creates files in the basePath directory', async () => { + const content = new TextEncoder().encode('test') + await storage.store('my-key', content, 'text/plain') + const files = execSync(`ls ${TEST_DIR}/artifacts/`, { encoding: 'utf-8' }) + expect(files).toContain('my-key') + expect(files).toContain('.txt') + }) + + it('uses custom basePath', async () => { + const customStorage = new SandboxStorage(sandbox, 'artifacts') + const content = new TextEncoder().encode('custom path') + await customStorage.store('key', content, 'text/plain') + const files = execSync(`ls ${TEST_DIR}/artifacts/`, { encoding: 'utf-8' }) + expect(files).toContain('.txt') + }) + + it('throws on retrieve with invalid reference', async () => { + await expect(storage.retrieve('nonexistent/path.txt')).rejects.toThrow('Reference not found') + }) + + it('handles multiple stores with unique references', async () => { + const c1 = new TextEncoder().encode('first') + const c2 = new TextEncoder().encode('second') + const ref1 = await storage.store('key', c1, 'text/plain') + const ref2 = await storage.store('key', c2, 'text/plain') + expect(ref1).not.toBe(ref2) + expect(new TextDecoder().decode((await storage.retrieve(ref1)).content)).toBe('first') + expect(new TextDecoder().decode((await storage.retrieve(ref2)).content)).toBe('second') + }) +}) diff --git a/strands-ts/src/vended-plugins/context-offloader/index.ts b/strands-ts/src/vended-plugins/context-offloader/index.ts index 21c4fc902..e5171dee2 100644 --- a/strands-ts/src/vended-plugins/context-offloader/index.ts +++ b/strands-ts/src/vended-plugins/context-offloader/index.ts @@ -20,4 +20,4 @@ export { ContextOffloader } from './plugin.js' export type { ContextOffloaderConfig } from './plugin.js' export type { Storage } from './storage.js' -export { InMemoryStorage, FileStorage, S3Storage } from './storage.js' +export { InMemoryStorage, FileStorage, S3Storage, SandboxStorage } from './storage.js' diff --git a/strands-ts/src/vended-plugins/context-offloader/plugin.ts b/strands-ts/src/vended-plugins/context-offloader/plugin.ts index 1c19af01b..acc81bc10 100644 --- a/strands-ts/src/vended-plugins/context-offloader/plugin.ts +++ b/strands-ts/src/vended-plugins/context-offloader/plugin.ts @@ -12,6 +12,8 @@ import { logger } from '../../logging/logger.js' import type { JSONValue } from '../../types/json.js' import type { Storage } from './storage.js' import { isSearchableContent, searchContent } from './search.js' +import { SandboxStorage } from './storage.js' +import { Sandbox } from '../../sandbox/base.js' const CHARS_PER_TOKEN = 4 const DEFAULT_MAX_RESULT_TOKENS = 2_500 @@ -103,8 +105,8 @@ function decodeStoredContent(content: Uint8Array, contentType: string, reference /** Configuration for the {@link ContextOffloader} plugin. */ export interface ContextOffloaderConfig { - /** Storage backend for persisting offloaded content. */ - storage: Storage + /** Storage backend for persisting offloaded content. Pass a Sandbox instance to store in the sandbox filesystem. */ + storage: Storage | Sandbox /** Token threshold above which tool results are offloaded. Defaults to 2,500. */ maxResultTokens?: number /** Number of tokens to keep as an inline preview. Defaults to 1,000. */ @@ -147,7 +149,7 @@ export class ContextOffloader implements Plugin { if (previewTokens < 0) throw new Error('previewTokens must be non-negative') if (previewTokens >= maxResultTokens) throw new Error('previewTokens must be less than maxResultTokens') - this._storage = config.storage + this._storage = config.storage instanceof Sandbox ? new SandboxStorage(config.storage) : config.storage this._maxResultTokens = maxResultTokens this._previewTokens = previewTokens this._includeRetrievalTool = config.includeRetrievalTool ?? true diff --git a/strands-ts/src/vended-plugins/context-offloader/storage.ts b/strands-ts/src/vended-plugins/context-offloader/storage.ts index c6ec1dd7c..f63e521e9 100644 --- a/strands-ts/src/vended-plugins/context-offloader/storage.ts +++ b/strands-ts/src/vended-plugins/context-offloader/storage.ts @@ -1,11 +1,22 @@ /** * Storage backends for offloaded tool result content. * - * This module defines the {@link Storage} interface and provides three built-in - * implementations: {@link InMemoryStorage}, {@link FileStorage}, and {@link S3Storage}. + * This module defines the {@link Storage} interface and provides built-in + * implementations: {@link InMemoryStorage}, {@link FileStorage}, {@link S3Storage}, + * and {@link SandboxStorage}. * Each content block from a tool result is stored individually with its content type preserved. */ +import type { Sandbox } from '../../sandbox/base.js' +import { toMediaFormat } from '../../mime.js' + +function extensionForMime(contentType: string): string { + const format = toMediaFormat(contentType) + if (format) return `.${format}` + const subtype = contentType.split('/')[1] ?? 'bin' + return `.${subtype.split('+').pop()!.split('.').pop()!}` +} + /** * Backend for storing and retrieving offloaded content blocks. * @@ -95,11 +106,6 @@ export class FileStorage implements Storage { this._artifactDir = artifactDir } - private static _extensionFor(contentType: string): string { - if (contentType === 'text/plain') return '.txt' - return `.${contentType.split('/').pop()}` - } - private async _ensureDir(): Promise { const fs = await import('node:fs/promises') await fs.mkdir(this._artifactDir, { recursive: true }) @@ -135,7 +141,7 @@ export class FileStorage implements Storage { const sanitizedKey = sanitizeId(key) const timestampMs = Date.now() this._counter++ - const ext = FileStorage._extensionFor(contentType) + const ext = extensionForMime(contentType) const filename = `${timestampMs}_${this._counter}_${sanitizedKey}${ext}` this._contentTypes[filename] = contentType @@ -260,3 +266,59 @@ export class S3Storage implements Storage { } } } + +/** + * Sandbox-based storage backend. + * + * Stores offloaded content in the agent's sandbox filesystem. This co-locates + * offloaded data with the agent's workspace, making it accessible via sandbox + * tools (editor, run) in addition to the retrieval tool. + * + * Content-type tracking is ephemeral: it is held in-memory on this instance. + * If the sandbox is paused/resumed with a different SandboxStorage instance, + * retrieval falls back to "application/octet-stream". + * + * @param sandbox - Sandbox instance to store content in + * @param basePath - Directory within the sandbox for offloaded content (default: "./artifacts") + */ +export class SandboxStorage implements Storage { + private readonly _sandbox: Sandbox + private readonly _basePath: string + private readonly _contentTypes = new Map() + private _counter = 0 + + constructor(sandbox: Sandbox, basePath: string = './artifacts') { + this._sandbox = sandbox + this._basePath = basePath + } + + /** {@inheritdoc} */ + async store(key: string, content: Uint8Array, contentType: string = 'text/plain'): Promise { + this._counter++ + const sanitized = sanitizeId(key) + const ext = extensionForMime(contentType) + const filename = `${Date.now()}_${this._counter}_${sanitized}${ext}` + const path = `${this._basePath}/${filename}` + + await this._sandbox.writeFile(path, content) + this._contentTypes.set(path, contentType) + + return path + } + + /** {@inheritdoc} */ + async retrieve(reference: string): Promise<{ content: Uint8Array; contentType: string }> { + const base = this._basePath.endsWith('/') ? this._basePath : `${this._basePath}/` + if (!reference.startsWith(base) || reference.includes('..')) { + throw new Error(`Reference not found: ${reference}`) + } + + try { + const content = await this._sandbox.readFile(reference) + const contentType = this._contentTypes.get(reference) ?? 'application/octet-stream' + return { content, contentType } + } catch { + throw new Error(`Reference not found: ${reference}`) + } + } +} diff --git a/strands-ts/src/vended-plugins/skills/__tests__/agent-skills.test.node.ts b/strands-ts/src/vended-plugins/skills/__tests__/agent-skills.test.node.ts index e8bd6f3e5..b99afc49d 100644 --- a/strands-ts/src/vended-plugins/skills/__tests__/agent-skills.test.node.ts +++ b/strands-ts/src/vended-plugins/skills/__tests__/agent-skills.test.node.ts @@ -4,6 +4,7 @@ import { Skill } from '../skill.js' import { BeforeInvocationEvent } from '../../../hooks/events.js' import { TextBlock, CachePointBlock } from '../../../types/messages.js' import { createMockAgent, invokeTrackedHook, type MockAgent } from '../../../__fixtures__/agent-helpers.js' +import { NotASandboxLocalEnvironment } from '../../../sandbox/not-a-sandbox-local-environment.js' import { promises as fs } from 'fs' import * as path from 'path' import { tmpdir } from 'os' @@ -32,6 +33,8 @@ describe('AgentSkills', () => { const makeSkill = (name: string, description = `Description of ${name}`, instructions = `Instructions for ${name}`) => new Skill({ name, description, instructions }) + const createMockAgentWithSandbox = () => createMockAgent({ sandbox: new NotASandboxLocalEnvironment() }) + beforeEach(async () => { testDir = path.join(tmpdir(), `agent-skills-test-${Date.now()}-${Math.random().toString(36).slice(2)}`) await fs.mkdir(testDir, { recursive: true }) @@ -54,6 +57,7 @@ describe('AgentSkills', () => { it('resolves a skill directory path', async () => { await createSkillDir('my-skill', '---\nname: my-skill\ndescription: A skill\n---\nBody.') const plugin = new AgentSkills({ skills: [path.join(testDir, 'my-skill')] }) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(1) }) @@ -61,6 +65,7 @@ describe('AgentSkills', () => { await createSkillDir('skill-a', '---\nname: skill-a\ndescription: Skill A\n---\nA.') await createSkillDir('skill-b', '---\nname: skill-b\ndescription: Skill B\n---\nB.') const plugin = new AgentSkills({ skills: [testDir] }) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(2) }) @@ -70,6 +75,7 @@ describe('AgentSkills', () => { const plugin = new AgentSkills({ skills: [directSkill, path.join(testDir, 'file-skill')], }) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(2) }) @@ -83,6 +89,7 @@ describe('AgentSkills', () => { it('warns and skips non-existent paths', async () => { const plugin = new AgentSkills({ skills: ['/does/not/exist'] }) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(0) }) @@ -92,6 +99,7 @@ describe('AgentSkills', () => { await fs.writeFile(path.join(dirPath, 'SKILL.md'), 'totally broken, no frontmatter at all', 'utf-8') const plugin = new AgentSkills({ skills: [dirPath] }) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(0) }) @@ -106,6 +114,7 @@ describe('AgentSkills', () => { await fs.writeFile(path.join(testDir, 'bad-skill', 'SKILL.md'), 'no frontmatter', 'utf-8') const plugin = new AgentSkills({ skills: [testDir] }) + await plugin.initAgent(createMockAgentWithSandbox()) const skills = await plugin.getAvailableSkills() expect(skills).toHaveLength(1) expect(skills[0]!.name).toBe('good-skill') @@ -129,7 +138,7 @@ describe('AgentSkills', () => { it('registers a BeforeInvocationEvent hook in initAgent', async () => { const plugin = new AgentSkills({ skills: [makeSkill('s')] }) - const agent = createMockAgent() + const agent = createMockAgentWithSandbox() await plugin.initAgent(agent) expect(agent.trackedHooks).toHaveLength(1) expect(agent.trackedHooks[0]!.eventType).toBe(BeforeInvocationEvent) @@ -146,7 +155,7 @@ describe('AgentSkills', () => { plugin = new AgentSkills({ skills: [makeSkill('pdf-skill', 'Process PDFs')], }) - agent = createMockAgent() + agent = createMockAgentWithSandbox() await plugin.initAgent(agent) }) @@ -227,7 +236,7 @@ describe('AgentSkills', () => { const plugin2 = new AgentSkills({ skills: [makeSkill('test-skill', 'Use when: user says & "goodbye"')], }) - const agent2 = createMockAgent() + const agent2 = createMockAgentWithSandbox() await plugin2.initAgent(agent2) const hook = agent2.trackedHooks[0]! @@ -245,7 +254,7 @@ describe('AgentSkills', () => { '---\nname: located-skill\ndescription: Has a path\n---\nBody.' ) const filePlugin = new AgentSkills({ skills: [dirPath] }) - const fileAgent = createMockAgent() + const fileAgent = createMockAgentWithSandbox() await filePlugin.initAgent(fileAgent) await invokeTrackedHook(fileAgent, new BeforeInvocationEvent({ agent: fileAgent as any, invocationState: {} })) @@ -256,7 +265,7 @@ describe('AgentSkills', () => { it('shows "no skills available" when empty', async () => { const emptyPlugin = new AgentSkills({ skills: [] }) - const emptyAgent = createMockAgent() + const emptyAgent = createMockAgentWithSandbox() await emptyPlugin.initAgent(emptyAgent) await invokeTrackedHook(emptyAgent, new BeforeInvocationEvent({ agent: emptyAgent as any, invocationState: {} })) @@ -289,7 +298,7 @@ describe('AgentSkills', () => { const multiPlugin = new AgentSkills({ skills: [makeSkill('skill-a', 'First'), makeSkill('skill-b', 'Second'), makeSkill('skill-c', 'Third')], }) - const multiAgent = createMockAgent() + const multiAgent = createMockAgentWithSandbox() await multiPlugin.initAgent(multiAgent) await invokeTrackedHook(multiAgent, new BeforeInvocationEvent({ agent: multiAgent as any, invocationState: {} })) @@ -321,7 +330,7 @@ describe('AgentSkills', () => { }), ], }) - agent = createMockAgent() + agent = createMockAgentWithSandbox() await plugin.initAgent(agent) }) @@ -409,7 +418,7 @@ describe('AgentSkills', () => { } ) const plugin2 = new AgentSkills({ skills: [dirPath] }) - const agent2 = createMockAgent() + const agent2 = createMockAgentWithSandbox() await plugin2.initAgent(agent2) const tools = plugin2.getTools() @@ -436,7 +445,7 @@ describe('AgentSkills', () => { '---\nname: no-resources\ndescription: No extras\n---\nBody.' ) const plugin2 = new AgentSkills({ skills: [dirPath] }) - const agent2 = createMockAgent() + const agent2 = createMockAgentWithSandbox() await plugin2.initAgent(agent2) const tools = plugin2.getTools() @@ -467,7 +476,7 @@ describe('AgentSkills', () => { files ) const plugin2 = new AgentSkills({ skills: [dirPath], maxResourceFiles: 3 }) - const agent2 = createMockAgent() + const agent2 = createMockAgentWithSandbox() await plugin2.initAgent(agent2) const tools = plugin2.getTools() @@ -522,7 +531,7 @@ describe('AgentSkills', () => { mockFetchSuccess(SAMPLE_CONTENT) const plugin = new AgentSkills({ skills: ['https://example.com/SKILL.md'] }) - await plugin.initAgent(createMockAgent()) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(1) expect((await plugin.getAvailableSkills())[0]!.name).toBe('url-skill') @@ -536,7 +545,7 @@ describe('AgentSkills', () => { const plugin = new AgentSkills({ skills: ['https://example.com/SKILL.md', path.join(testDir, 'local-skill')], }) - await plugin.initAgent(createMockAgent()) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(2) const names = new Set((await plugin.getAvailableSkills()).map((s) => s.name)) @@ -552,7 +561,7 @@ describe('AgentSkills', () => { } as Response) const plugin = new AgentSkills({ skills: ['https://example.com/broken/SKILL.md'] }) - await plugin.initAgent(createMockAgent()) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(0) }) @@ -563,7 +572,7 @@ describe('AgentSkills', () => { const plugin = new AgentSkills({ skills: ['https://example.com/a/SKILL.md', 'https://example.com/b/SKILL.md'], }) - await plugin.initAgent(createMockAgent()) + await plugin.initAgent(createMockAgentWithSandbox()) expect(await plugin.getAvailableSkills()).toHaveLength(1) }) @@ -572,7 +581,7 @@ describe('AgentSkills', () => { mockFetchSuccess(SAMPLE_CONTENT) const plugin = new AgentSkills({ skills: ['https://example.com/SKILL.md'] }) - const agent = createMockAgent() + const agent = createMockAgentWithSandbox() await plugin.initAgent(agent) expect(await plugin.getAvailableSkills()).toHaveLength(1) diff --git a/strands-ts/src/vended-plugins/skills/__tests__/sandbox-skills.test.node.ts b/strands-ts/src/vended-plugins/skills/__tests__/sandbox-skills.test.node.ts new file mode 100644 index 000000000..aadefdcf5 --- /dev/null +++ b/strands-ts/src/vended-plugins/skills/__tests__/sandbox-skills.test.node.ts @@ -0,0 +1,122 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { Skill } from '../skill.js' +import { AgentSkills } from '../agent-skills.js' +import { TestSandbox } from '../../../__fixtures__/test-sandbox.node.js' +import { createMockAgent } from '../../../__fixtures__/agent-helpers.js' +import { execSync } from 'child_process' + +const TEST_DIR = '/tmp/strands-test-sandbox-skills' + +const VALID_SKILL_MD = `--- +name: test-skill +description: A test skill loaded from sandbox +--- +# Instructions +Do the thing. +` + +describe.skipIf(process.platform === 'win32')('Skill sandbox loading', () => { + let sandbox: TestSandbox + + beforeEach(() => { + execSync(`rm -rf ${TEST_DIR} && mkdir -p ${TEST_DIR}`) + sandbox = new TestSandbox(TEST_DIR) + }) + + afterEach(() => { + execSync(`rm -rf ${TEST_DIR}`) + }) + + describe('Skill.fromSandbox', () => { + it('loads a skill from a directory with SKILL.md', async () => { + execSync(`mkdir -p ${TEST_DIR}/my-skill && cat > ${TEST_DIR}/my-skill/SKILL.md << 'EOF'\n${VALID_SKILL_MD}\nEOF`) + const skills = await Skill.fromSandbox(sandbox, 'my-skill') + expect(skills).toHaveLength(1) + expect(skills[0]!.name).toBe('test-skill') + expect(skills[0]!.description).toBe('A test skill loaded from sandbox') + expect(skills[0]!.instructions).toContain('Do the thing') + }) + + it('loads a skill from a direct SKILL.md path', async () => { + execSync(`mkdir -p ${TEST_DIR}/direct && cat > ${TEST_DIR}/direct/SKILL.md << 'EOF'\n${VALID_SKILL_MD}\nEOF`) + const skills = await Skill.fromSandbox(sandbox, 'direct/SKILL.md') + expect(skills).toHaveLength(1) + expect(skills[0]!.name).toBe('test-skill') + }) + + it('loads all skills from a parent directory', async () => { + const skill2 = '---\nname: another-skill\ndescription: Another one\n---\nInstructions here.' + execSync( + `mkdir -p ${TEST_DIR}/skills/skill-a && cat > ${TEST_DIR}/skills/skill-a/SKILL.md << 'EOF'\n${VALID_SKILL_MD}\nEOF` + ) + execSync( + `mkdir -p ${TEST_DIR}/skills/skill-b && cat > ${TEST_DIR}/skills/skill-b/SKILL.md << 'EOF'\n${skill2}\nEOF` + ) + + const skills = await Skill.fromSandbox(sandbox, 'skills') + expect(skills).toHaveLength(2) + const names = skills.map((s) => s.name).sort() + expect(names).toStrictEqual(['another-skill', 'test-skill']) + }) + + it('skips subdirectories without SKILL.md', async () => { + execSync( + `mkdir -p ${TEST_DIR}/skills/valid && cat > ${TEST_DIR}/skills/valid/SKILL.md << 'EOF'\n${VALID_SKILL_MD}\nEOF` + ) + execSync(`mkdir -p ${TEST_DIR}/skills/empty`) + + const skills = await Skill.fromSandbox(sandbox, 'skills') + expect(skills).toHaveLength(1) + expect(skills[0]!.name).toBe('test-skill') + }) + + it('returns empty array for empty directory', async () => { + execSync(`mkdir -p ${TEST_DIR}/empty-dir`) + const skills = await Skill.fromSandbox(sandbox, 'empty-dir') + expect(skills).toStrictEqual([]) + }) + + it('throws for nonexistent path', async () => { + await expect(Skill.fromSandbox(sandbox, 'nonexistent')).rejects.toThrow() + }) + + it('throws for invalid SKILL.md content', async () => { + execSync(`mkdir -p ${TEST_DIR}/bad && echo "no frontmatter" > ${TEST_DIR}/bad/SKILL.md`) + await expect(Skill.fromSandbox(sandbox, 'bad')).rejects.toThrow() + }) + }) + + describe('AgentSkills with sandbox paths', () => { + it('resolves path-based skills via sandbox in initAgent', async () => { + execSync( + `mkdir -p ${TEST_DIR}/skills/my-skill && cat > ${TEST_DIR}/skills/my-skill/SKILL.md << 'EOF'\n${VALID_SKILL_MD}\nEOF` + ) + + const plugin = new AgentSkills({ skills: ['skills'] }) + const agent = createMockAgent({ extra: { sandbox } as never }) + await plugin.initAgent(agent) + + const tools = plugin.getTools() + expect(tools.length).toBeGreaterThan(0) + }) + + it('resolves a single skill path via sandbox', async () => { + execSync( + `mkdir -p ${TEST_DIR}/single-skill && cat > ${TEST_DIR}/single-skill/SKILL.md << 'EOF'\n${VALID_SKILL_MD}\nEOF` + ) + + const plugin = new AgentSkills({ skills: ['single-skill'] }) + const agent = createMockAgent({ extra: { sandbox } as never }) + await plugin.initAgent(agent) + + const tools = plugin.getTools() + expect(tools.length).toBeGreaterThan(0) + }) + + it('warns on failed path load', async () => { + const plugin = new AgentSkills({ skills: ['nonexistent'] }) + const agent = createMockAgent({ extra: { sandbox } as never }) + await plugin.initAgent(agent) + }) + }) +}) diff --git a/strands-ts/src/vended-plugins/skills/agent-skills.ts b/strands-ts/src/vended-plugins/skills/agent-skills.ts index 5154c61d9..cca54f925 100644 --- a/strands-ts/src/vended-plugins/skills/agent-skills.ts +++ b/strands-ts/src/vended-plugins/skills/agent-skills.ts @@ -7,7 +7,7 @@ */ import { readdirSync, statSync, existsSync } from 'fs' -import { join, resolve, relative, sep } from 'path' +import { join, relative, sep } from 'path' import { z } from 'zod' import { tool } from '../../tools/tool-factory.js' import { BeforeInvocationEvent } from '../../hooks/events.js' @@ -98,6 +98,8 @@ export class AgentSkills implements Plugin { private readonly _stateKey: string /** Resolves when all async skill sources (e.g. URLs) have been loaded. */ private _ready: Promise + /** Path-based sources deferred until initAgent when agent.sandbox is available. */ + private readonly _pathSources: string[] = [] constructor(config: AgentSkillsConfig) { this._strict = config.strict ?? false @@ -118,6 +120,8 @@ export class AgentSkills implements Plugin { async initAgent(agent: LocalAgent): Promise { await this._ready + await this._resolvePathSources(agent) + if (this._skills.size === 0) { logger.warn('no skills were loaded, the agent will have no skills available') } @@ -207,47 +211,7 @@ export class AgentSkills implements Plugin { ) ) } else { - const p = source as string - const resolvedPath = resolve(p) - - // Probe the filesystem to decide which loader to use instead of - // relying on exceptions for control flow. - const isDir = existsSync(resolvedPath) && statSync(resolvedPath).isDirectory() - const isSkillFile = - existsSync(resolvedPath) && statSync(resolvedPath).isFile() && resolvedPath.toLowerCase().endsWith('skill.md') - const hasSkillMd = - isDir && - ['SKILL.md', 'skill.md'].some((name) => { - const candidate = join(resolvedPath, name) - return existsSync(candidate) && statSync(candidate).isFile() - }) - - if (isSkillFile || hasSkillMd) { - // Single skill directory (or direct SKILL.md path) - try { - const skill = Skill.fromFile(p, { strict: this._strict }) - if (resolved.has(skill.name)) { - logger.warn(`name=<${skill.name}> | duplicate skill name, overwriting previous skill`) - } - resolved.set(skill.name, skill) - } catch (error) { - logger.warn(`path=<${p}> | failed to load skill: ${error}`) - } - } else if (isDir) { - // Parent directory containing skill subdirectories - try { - for (const skill of Skill.fromDirectory(p, { strict: this._strict })) { - if (resolved.has(skill.name)) { - logger.warn(`name=<${skill.name}> | duplicate skill name, overwriting previous skill`) - } - resolved.set(skill.name, skill) - } - } catch (error) { - logger.warn(`path=<${p}> | failed to load skills from directory: ${error}`) - } - } else { - logger.warn(`path=<${p}> | skill source does not exist or is not a valid path`) - } + this._pathSources.push(source) } } @@ -266,6 +230,18 @@ export class AgentSkills implements Plugin { return { skills: resolved, ready } } + private async _resolvePathSources(agent: LocalAgent): Promise { + for (const path of this._pathSources) { + try { + for (const skill of await Skill.fromSandbox(agent.sandbox, path, { strict: this._strict })) { + this._skills.set(skill.name, skill) + } + } catch (error) { + logger.warn(`path=<${path}> | failed to load skill: ${error}`) + } + } + } + /** * Create the skills activation tool using the tool() factory with Zod schema. */ diff --git a/strands-ts/src/vended-plugins/skills/skill.ts b/strands-ts/src/vended-plugins/skills/skill.ts index 51e5c43e4..ef8ac18fb 100644 --- a/strands-ts/src/vended-plugins/skills/skill.ts +++ b/strands-ts/src/vended-plugins/skills/skill.ts @@ -435,4 +435,39 @@ export class Skill { logger.debug(`path=<${resolvedDir}>, count=<${skills.length}> | loaded skills from directory`) return skills } + + /** + * Load a single skill from a sandbox filesystem. + * + * Reads SKILL.md content from the sandbox and parses it. The path can point + * to a skill directory (will look for SKILL.md inside) or directly to a SKILL.md file. + * + * @param sandbox - Sandbox instance to read from. + * @param skillPath - Path to the skill directory or SKILL.md file in the sandbox. + * @param options - Optional settings. + * @returns A Promise resolving to a Skill instance. + */ + static async fromSandbox( + sandbox: import('../../sandbox/base.js').Sandbox, + skillPath: string, + options?: { strict?: boolean } + ): Promise { + const content = await sandbox.readText(`${skillPath}/SKILL.md`).catch(() => undefined) + if (content !== undefined) return [Skill.fromContent(content, { ...options, path: skillPath })] + + const direct = await sandbox.readText(skillPath).catch(() => undefined) + if (direct && direct.trim()) return [Skill.fromContent(direct, { ...options, path: skillPath })] + + const entries = await sandbox.listFiles(skillPath) + const skills: Skill[] = [] + for (const entry of entries) { + if (!entry.isDir) continue + try { + skills.push(...(await Skill.fromSandbox(sandbox, `${skillPath}/${entry.name}`, options))) + } catch { + // Subdirectory without valid skill β€” skip + } + } + return skills + } } diff --git a/strands-ts/src/vended-tools/code-interpreter/__tests__/code-interpreter.test.node.ts b/strands-ts/src/vended-tools/code-interpreter/__tests__/code-interpreter.test.node.ts new file mode 100644 index 000000000..406bb0684 --- /dev/null +++ b/strands-ts/src/vended-tools/code-interpreter/__tests__/code-interpreter.test.node.ts @@ -0,0 +1,103 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { codeInterpreter } from '../code-interpreter.js' +import { NotASandboxLocalEnvironment } from '../../../sandbox/not-a-sandbox-local-environment.js' +import { TestSandbox } from '../../../__fixtures__/test-sandbox.node.js' +import type { Sandbox } from '../../../sandbox/base.js' +import type { ToolContext } from '../../../tools/tool.js' +import { createMockAgent } from '../../../__fixtures__/agent-helpers.js' +import { execSync } from 'child_process' +import { realpathSync } from 'fs' + +const TEST_DIR = '/tmp/strands-test-code-interpreter-tool' + +function createContext(sandbox: Sandbox): ToolContext { + const agent = createMockAgent({ sandbox }) + return { + toolUse: { name: 'codeInterpreter', toolUseId: 'test-id', input: {} }, + agent, + invocationState: {}, + interrupt: () => { + throw new Error('not implemented') + }, + } +} + +describe.skipIf(process.platform === 'win32')('codeInterpreter tool', () => { + beforeEach(() => { + execSync(`rm -rf ${TEST_DIR} && mkdir -p ${TEST_DIR}`) + }) + + afterEach(() => { + execSync(`rm -rf ${TEST_DIR}`) + }) + + describe('normal mode (NotASandboxLocalEnvironment)', () => { + it('executes python code', async () => { + const context = createContext(new NotASandboxLocalEnvironment()) + const result = await codeInterpreter.invoke({ code: 'print(2 + 2)', language: 'python3' }, context) + expect(result.stdout).toBe('4\n') + expect(result.exitCode).toBe(0) + }) + + it('returns exit code on syntax error', async () => { + const context = createContext(new NotASandboxLocalEnvironment()) + const result = await codeInterpreter.invoke({ code: 'def broken(', language: 'python3' }, context) + expect(result.exitCode).not.toBe(0) + expect(result.stderr).toContain('SyntaxError') + }) + + it('returns exit code 127 for unknown language', async () => { + const context = createContext(new NotASandboxLocalEnvironment()) + const result = await codeInterpreter.invoke({ code: 'x', language: 'nonexistent_lang_xyz' }, context) + expect(result.exitCode).toBe(127) + }) + + it('throws without context', async () => { + await expect(codeInterpreter.invoke({ code: 'print(1)', language: 'python3' })).rejects.toThrow( + 'Tool context is required' + ) + }) + }) + + describe('sandbox mode (TestSandbox)', () => { + it('executes code within the sandbox working directory', async () => { + const sandbox = new TestSandbox(TEST_DIR) + const context = createContext(sandbox) + const result = await codeInterpreter.invoke( + { code: 'import os; print(os.getcwd())', language: 'python3' }, + context + ) + expect(result.stdout.trim()).toBe(realpathSync(TEST_DIR)) + }) + + it('respects workdir relative to sandbox', async () => { + execSync(`mkdir -p ${TEST_DIR}/subdir`) + const sandbox = new TestSandbox(TEST_DIR) + const context = createContext(sandbox) + const result = await codeInterpreter.invoke( + { code: 'import os; print(os.getcwd())', language: 'python3', workdir: `${TEST_DIR}/subdir` }, + context + ) + expect(result.stdout.trim()).toBe(realpathSync(`${TEST_DIR}/subdir`)) + }) + + it('files created by code stay within sandbox directory', async () => { + const sandbox = new TestSandbox(TEST_DIR) + const context = createContext(sandbox) + await codeInterpreter.invoke( + { code: 'open("output.txt", "w").write("from sandbox")', language: 'python3' }, + context + ) + const check = execSync(`cat ${TEST_DIR}/output.txt`, { encoding: 'utf-8' }) + expect(check).toBe('from sandbox') + }) + + it('passes timeout to sandbox', async () => { + const sandbox = new TestSandbox(TEST_DIR) + const context = createContext(sandbox) + await expect( + codeInterpreter.invoke({ code: 'import time; time.sleep(10)', language: 'python3', timeout: 0.1 }, context) + ).rejects.toThrow('timed out') + }) + }) +}) diff --git a/strands-ts/src/vended-tools/code-interpreter/code-interpreter.ts b/strands-ts/src/vended-tools/code-interpreter/code-interpreter.ts new file mode 100644 index 000000000..45596b108 --- /dev/null +++ b/strands-ts/src/vended-tools/code-interpreter/code-interpreter.ts @@ -0,0 +1,61 @@ +/** + * Sandbox-aware code execution tool. + * + * Executes source code in a specified language through the agent's sandbox. + * The sandbox determines where and how the code runs β€” the model picks + * the language, writes the code, and gets the result. + */ + +import { tool } from '../../tools/tool-factory.js' +import { z } from 'zod' + +const codeInterpreterInputSchema = z.object({ + code: z.string().min(1).describe('The source code to execute.'), + language: z.string().min(1).describe('The language interpreter to use (e.g., python3, node, ruby, bash).'), + workdir: z + .string() + .optional() + .describe('Working directory for code execution. If not specified, uses the sandbox default.'), + timeout: z.number().positive().optional().describe('Timeout in seconds.'), +}) + +/** + * Sandbox-aware code execution tool. + * + * @example + * ```typescript + * import { Agent } from '@strands-agents/sdk' + * import { codeInterpreter } from '@strands-agents/sdk/vended-tools/code-interpreter' + * + * const agent = new Agent({ tools: [codeInterpreter] }) + * await agent.invoke('Write a Python script that calculates the first 10 fibonacci numbers') + * ``` + */ +export const codeInterpreter = tool({ + name: 'codeInterpreter', + description: + 'Execute source code in a specified language. ' + + 'The code runs in the sandbox via the language interpreter (e.g., python3 -c ). ' + + 'Use for computations, data processing, file generation, or any task better expressed as code.', + inputSchema: codeInterpreterInputSchema, + callback: async (input, context) => { + if (!context) { + throw new Error('Tool context is required') + } + + const sandbox = context.agent.sandbox + const result = await sandbox.executeCode(input.code, input.language, { + timeout: input.timeout, + cwd: input.workdir, + }) + + return { + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + ...(result.outputFiles.length > 0 && { + outputFiles: result.outputFiles.map((f) => ({ name: f.name, mimeType: f.mimeType })), + }), + } + }, +}) diff --git a/strands-ts/src/vended-tools/code-interpreter/index.ts b/strands-ts/src/vended-tools/code-interpreter/index.ts new file mode 100644 index 000000000..671e5f0c8 --- /dev/null +++ b/strands-ts/src/vended-tools/code-interpreter/index.ts @@ -0,0 +1,5 @@ +/** + * Sandbox-aware code execution tool. + */ + +export { codeInterpreter } from './code-interpreter.js' diff --git a/strands-ts/src/vended-tools/exec/__tests__/exec.test.node.ts b/strands-ts/src/vended-tools/exec/__tests__/exec.test.node.ts new file mode 100644 index 000000000..f88fab76d --- /dev/null +++ b/strands-ts/src/vended-tools/exec/__tests__/exec.test.node.ts @@ -0,0 +1,91 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { exec } from '../exec.js' +import { NotASandboxLocalEnvironment } from '../../../sandbox/not-a-sandbox-local-environment.js' +import { TestSandbox } from '../../../__fixtures__/test-sandbox.node.js' +import type { Sandbox } from '../../../sandbox/base.js' +import type { ToolContext } from '../../../tools/tool.js' +import { createMockAgent } from '../../../__fixtures__/agent-helpers.js' +import { execSync } from 'child_process' +import { existsSync, realpathSync } from 'fs' + +const TEST_DIR = '/tmp/strands-test-exec-tool' + +function createContext(sandbox: Sandbox): ToolContext { + const agent = createMockAgent({ sandbox }) + return { + toolUse: { name: 'exec', toolUseId: 'test-id', input: {} }, + agent, + invocationState: {}, + interrupt: () => { + throw new Error('not implemented') + }, + } +} + +describe.skipIf(process.platform === 'win32')('exec tool', () => { + beforeEach(() => { + execSync(`rm -rf ${TEST_DIR} && mkdir -p ${TEST_DIR}`) + }) + + afterEach(() => { + execSync(`rm -rf ${TEST_DIR}`) + }) + + describe('normal mode (NotASandboxLocalEnvironment)', () => { + it('executes a command', async () => { + const context = createContext(new NotASandboxLocalEnvironment()) + const result = await exec.invoke({ command: 'echo hello' }, context) + expect(result.stdout).toBe('hello\n') + expect(result.exitCode).toBe(0) + }) + + it('captures exit code on failure', async () => { + const context = createContext(new NotASandboxLocalEnvironment()) + const result = await exec.invoke({ command: 'exit 99' }, context) + expect(result.exitCode).toBe(99) + }) + + it('throws without context', async () => { + await expect(exec.invoke({ command: 'echo hi' })).rejects.toThrow('Tool context is required') + }) + }) + + describe('sandbox mode (TestSandbox)', () => { + it('executes command within sandbox working directory', async () => { + const context = createContext(new TestSandbox(TEST_DIR)) + const result = await exec.invoke({ command: 'pwd -P' }, context) + expect(result.stdout.trim()).toBe(realpathSync(TEST_DIR)) + }) + + it('files created by command stay in sandbox directory', async () => { + const context = createContext(new TestSandbox(TEST_DIR)) + await exec.invoke({ command: 'touch created-by-exec.txt' }, context) + expect(existsSync(`${TEST_DIR}/created-by-exec.txt`)).toBe(true) + }) + + it('respects workdir within sandbox', async () => { + execSync(`mkdir -p ${TEST_DIR}/subdir`) + const context = createContext(new TestSandbox(TEST_DIR)) + const result = await exec.invoke({ command: 'pwd -P', workdir: `${TEST_DIR}/subdir` }, context) + expect(result.stdout.trim()).toBe(realpathSync(`${TEST_DIR}/subdir`)) + }) + + it('prevents command injection via workdir', async () => { + const context = createContext(new TestSandbox(TEST_DIR)) + const result = await exec.invoke({ command: 'echo safe', workdir: '/tmp; echo injected' }, context) + expect(result.stdout).not.toContain('injected') + expect(result.exitCode).not.toBe(0) + }) + + it('passes timeout to sandbox', async () => { + const context = createContext(new TestSandbox(TEST_DIR)) + await expect(exec.invoke({ command: 'sleep 10', timeout: 0.1 }, context)).rejects.toThrow('timed out') + }) + + it('captures stderr', async () => { + const context = createContext(new TestSandbox(TEST_DIR)) + const result = await exec.invoke({ command: 'echo err >&2' }, context) + expect(result.stderr).toBe('err\n') + }) + }) +}) diff --git a/strands-ts/src/vended-tools/exec/exec.ts b/strands-ts/src/vended-tools/exec/exec.ts new file mode 100644 index 000000000..c5abe2607 --- /dev/null +++ b/strands-ts/src/vended-tools/exec/exec.ts @@ -0,0 +1,58 @@ +/** + * Sandbox-aware command execution tool. + * + * Runs shell commands in the agent's sandbox. The sandbox determines + * where and how the command executes β€” locally, in a Docker container, + * in a cloud runtime, etc. The tool doesn't care; it delegates to + * `context.agent.sandbox.execute()`. + */ + +import { tool } from '../../tools/tool-factory.js' +import { z } from 'zod' + +const execInputSchema = z.object({ + command: z.string().min(1).describe('The shell command to execute.'), + workdir: z + .string() + .optional() + .describe('Working directory to run the command in. If not specified, uses the sandbox default.'), + timeout: z.number().positive().optional().describe('Timeout in seconds.'), +}) + +/** + * Sandbox-aware command execution tool. + * + * Runs shell commands in the agent's configured sandbox. Use `workdir` + * to execute in a specific directory β€” equivalent to `cd && `. + * + * @example + * ```typescript + * import { Agent } from '@strands-agents/sdk' + * import { exec } from '@strands-agents/sdk/vended-tools/exec' + * + * const agent = new Agent({ tools: [exec] }) + * await agent.invoke('List all files in /tmp') + * ``` + */ +export const exec = tool({ + name: 'exec', + description: + 'Execute a shell command in the sandbox. ' + + 'Use workdir to run in a specific directory. ' + + 'Commands run in a fresh process each time β€” use workdir instead of cd.', + inputSchema: execInputSchema, + callback: async (input, context) => { + if (!context) { + throw new Error('Tool context is required') + } + + const sandbox = context.agent.sandbox + const result = await sandbox.execute(input.command, { timeout: input.timeout, cwd: input.workdir }) + + return { + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + } + }, +}) diff --git a/strands-ts/src/vended-tools/exec/index.ts b/strands-ts/src/vended-tools/exec/index.ts new file mode 100644 index 000000000..c6ea8072d --- /dev/null +++ b/strands-ts/src/vended-tools/exec/index.ts @@ -0,0 +1,5 @@ +/** + * Sandbox-aware command execution tool. + */ + +export { exec } from './exec.js' diff --git a/strands-ts/src/vended-tools/file-editor/__tests__/file-editor.test.node.ts b/strands-ts/src/vended-tools/file-editor/__tests__/file-editor.test.node.ts index 4cc85700e..d24552873 100644 --- a/strands-ts/src/vended-tools/file-editor/__tests__/file-editor.test.node.ts +++ b/strands-ts/src/vended-tools/file-editor/__tests__/file-editor.test.node.ts @@ -3,6 +3,8 @@ import { fileEditor } from '../file-editor.js' import type { ToolContext } from '../../../index.js' import { StateStore } from '../../../state-store.js' import { createMockAgent } from '../../../__fixtures__/agent-helpers.js' +import { NotASandboxLocalEnvironment } from '../../../sandbox/not-a-sandbox-local-environment.js' +import { TestSandbox } from '../../../__fixtures__/test-sandbox.node.js' import { promises as fs } from 'fs' import * as path from 'path' import { tmpdir } from 'os' @@ -13,7 +15,8 @@ describe('fileEditor tool', () => { // Helper to create fresh state and context for each test const createFreshContext = (): { state: StateStore; context: ToolContext } => { - const agent = createMockAgent() + const sandbox = new NotASandboxLocalEnvironment() + const agent = createMockAgent({ sandbox }) const toolContext: ToolContext = { toolUse: { name: 'fileEditor', @@ -156,13 +159,7 @@ describe('fileEditor tool', () => { it('throws when file not found', async () => { const nonExistentPath = path.join(testDir, 'nonexistent.txt') await expect(fileEditor.invoke({ command: 'view', path: nonExistentPath }, context)).rejects.toThrow( - 'does not exist' - ) - }) - - it('throws when path is not absolute', async () => { - await expect(fileEditor.invoke({ command: 'view', path: 'relative/path.txt' }, context)).rejects.toThrow( - 'not an absolute path' + 'no such file' ) }) @@ -237,19 +234,6 @@ describe('fileEditor tool', () => { ).rejects.toThrow('already exists') }) - it('throws when path is not absolute', async () => { - await expect( - fileEditor.invoke({ command: 'create', path: 'relative/path.txt', file_text: 'content' }, context) - ).rejects.toThrow('not an absolute path') - }) - - it('throws when path contains traversal', async () => { - const filePath = '..outside.txt' - await expect( - fileEditor.invoke({ command: 'create', path: filePath, file_text: 'content' }, context) - ).rejects.toThrow() - }) - it('throws when trying to create in directory as path', async () => { const dirPath = await createTestDirectory('testdir', {}) await expect( @@ -342,7 +326,7 @@ describe('fileEditor tool', () => { const nonExistentPath = path.join(testDir, 'nonexistent.txt') await expect( fileEditor.invoke({ command: 'str_replace', path: nonExistentPath, old_str: 'OLD', new_str: 'NEW' }, context) - ).rejects.toThrow('does not exist') + ).rejects.toThrow('no such file') }) it('throws when path is directory', async () => { @@ -448,7 +432,7 @@ describe('fileEditor tool', () => { const nonExistentPath = path.join(testDir, 'nonexistent.txt') await expect( fileEditor.invoke({ command: 'insert', path: nonExistentPath, insert_line: 0, new_str: 'NEW' }, context) - ).rejects.toThrow('does not exist') + ).rejects.toThrow('no such file') }) it('throws when path is directory', async () => { @@ -460,14 +444,6 @@ describe('fileEditor tool', () => { }) }) - describe('path validation and security', () => { - it('rejects relative paths', async () => { - await expect(fileEditor.invoke({ command: 'view', path: 'relative/path.txt' }, context)).rejects.toThrow( - 'not an absolute path' - ) - }) - }) - describe('file size limits', () => { it('throws when file exceeds default size limit', async () => { // Create a file larger than 1MB @@ -478,6 +454,49 @@ describe('fileEditor tool', () => { }) }) + describe('undo command', () => { + it('reverts str_replace', async () => { + const filePath = await createTestFile('test.txt', 'original content') + await fileEditor.invoke( + { command: 'str_replace', path: filePath, old_str: 'original', new_str: 'modified' }, + context + ) + + const result = await fileEditor.invoke({ command: 'undo', path: filePath }, context) + expect(result).toContain('Reverted') + + const fileContent = await fs.readFile(filePath, 'utf-8') + expect(fileContent).toBe('original content') + }) + + it('reverts insert', async () => { + const filePath = await createTestFile('test.txt', 'Line 1\nLine 2') + await fileEditor.invoke({ command: 'insert', path: filePath, insert_line: 1, new_str: 'INSERTED' }, context) + + const result = await fileEditor.invoke({ command: 'undo', path: filePath }, context) + expect(result).toContain('Reverted') + + const fileContent = await fs.readFile(filePath, 'utf-8') + expect(fileContent).toBe('Line 1\nLine 2') + }) + + it('throws when nothing to undo', async () => { + const filePath = await createTestFile('test.txt', 'content') + await expect(fileEditor.invoke({ command: 'undo', path: filePath }, context)).rejects.toThrow('Nothing to undo') + }) + + it('only undoes the last edit', async () => { + const filePath = await createTestFile('test.txt', 'first') + await fileEditor.invoke({ command: 'str_replace', path: filePath, old_str: 'first', new_str: 'second' }, context) + await fileEditor.invoke({ command: 'str_replace', path: filePath, old_str: 'second', new_str: 'third' }, context) + + await fileEditor.invoke({ command: 'undo', path: filePath }, context) + + const fileContent = await fs.readFile(filePath, 'utf-8') + expect(fileContent).toBe('second') + }) + }) + describe('edge cases', () => { it('handles files with special characters in content', async () => { const content = 'Special chars: @#$%^&*()_+-={}[]|:;"<>,.?/~`' @@ -502,4 +521,128 @@ describe('fileEditor tool', () => { expect(result).not.toContain('\t') }) }) + + describe('sandbox mode (TestSandbox)', () => { + let sandboxDir: string + let sandboxContext: ToolContext + + beforeEach(async () => { + sandboxDir = path.join(tmpdir(), `file-editor-sandbox-${Date.now()}-${Math.random().toString(36).slice(2)}`) + await fs.mkdir(sandboxDir, { recursive: true }) + const sandbox = new TestSandbox(sandboxDir) + const agent = createMockAgent({ sandbox }) + sandboxContext = { + toolUse: { name: 'fileEditor', toolUseId: 'test-id', input: {} }, + agent, + invocationState: {}, + interrupt: () => { + throw new Error('interrupt not available in mock context') + }, + } + }) + + afterEach(async () => { + await fs.rm(sandboxDir, { recursive: true, force: true }).catch(() => {}) + }) + + it('creates a file via shell-based sandbox', async () => { + const filePath = path.join(sandboxDir, 'sandbox-created.txt') + await fileEditor.invoke({ command: 'create', path: filePath, file_text: 'sandbox content' }, sandboxContext) + const content = await fs.readFile(filePath, 'utf-8') + expect(content).toBe('sandbox content') + }) + + it('views a file from sandbox directory', async () => { + const filePath = path.join(sandboxDir, 'sandbox-view.txt') + await fs.writeFile(filePath, 'line 1\nline 2\n', 'utf-8') + const result = await fileEditor.invoke({ command: 'view', path: filePath }, sandboxContext) + expect(result).toContain('line 1') + expect(result).toContain('line 2') + }) + + it('str_replace modifies file in sandbox directory', async () => { + const filePath = path.join(sandboxDir, 'sandbox-edit.txt') + await fs.writeFile(filePath, 'hello world', 'utf-8') + await fileEditor.invoke( + { command: 'str_replace', path: filePath, old_str: 'world', new_str: 'sandbox' }, + sandboxContext + ) + const content = await fs.readFile(filePath, 'utf-8') + expect(content).toBe('hello sandbox') + }) + + it('insert_line adds content in sandbox directory', async () => { + const filePath = path.join(sandboxDir, 'sandbox-insert.txt') + await fs.writeFile(filePath, 'first\nthird\n', 'utf-8') + await fileEditor.invoke({ command: 'insert', path: filePath, insert_line: 1, new_str: 'second' }, sandboxContext) + const content = await fs.readFile(filePath, 'utf-8') + expect(content).toBe('first\nsecond\nthird\n') + }) + + it('grep finds pattern in files', async () => { + await fs.writeFile(path.join(sandboxDir, 'a.txt'), 'hello world\nfoo bar\n', 'utf-8') + await fs.writeFile(path.join(sandboxDir, 'b.txt'), 'hello again\n', 'utf-8') + const result = await fileEditor.invoke({ command: 'grep', path: sandboxDir, pattern: 'hello' }, sandboxContext) + expect(result).toContain('a.txt') + expect(result).toContain('b.txt') + expect(result).toContain('hello') + }) + + it('grep returns no matches message', async () => { + await fs.writeFile(path.join(sandboxDir, 'a.txt'), 'foo bar\n', 'utf-8') + const result = await fileEditor.invoke( + { command: 'grep', path: sandboxDir, pattern: 'nonexistent' }, + sandboxContext + ) + expect(result).toContain('No matches found') + }) + + it('grep respects include filter', async () => { + await fs.writeFile(path.join(sandboxDir, 'code.ts'), 'const x = 1\n', 'utf-8') + await fs.writeFile(path.join(sandboxDir, 'notes.txt'), 'const y = 2\n', 'utf-8') + const result = await fileEditor.invoke( + { command: 'grep', path: sandboxDir, pattern: 'const', include: '*.ts' }, + sandboxContext + ) + expect(result).toContain('code.ts') + expect(result).not.toContain('notes.txt') + }) + + it('grep respects max_results', async () => { + let content = '' + for (let i = 0; i < 10; i++) content += `match line ${i}\n` + await fs.writeFile(path.join(sandboxDir, 'many.txt'), content, 'utf-8') + const result = await fileEditor.invoke( + { command: 'grep', path: sandboxDir, pattern: 'match', max_results: 3 }, + sandboxContext + ) + expect(result).toContain('truncated') + }) + + it('glob finds files by pattern', async () => { + await fs.writeFile(path.join(sandboxDir, 'app.ts'), '', 'utf-8') + await fs.writeFile(path.join(sandboxDir, 'util.ts'), '', 'utf-8') + await fs.writeFile(path.join(sandboxDir, 'readme.md'), '', 'utf-8') + const result = await fileEditor.invoke({ command: 'glob', path: sandboxDir, pattern: '*.ts' }, sandboxContext) + expect(result).toContain('app.ts') + expect(result).toContain('util.ts') + expect(result).not.toContain('readme.md') + }) + + it('glob returns no files message', async () => { + const result = await fileEditor.invoke({ command: 'glob', path: sandboxDir, pattern: '*.xyz' }, sandboxContext) + expect(result).toContain('No files found') + }) + + it('glob respects max_results', async () => { + for (let i = 0; i < 10; i++) { + await fs.writeFile(path.join(sandboxDir, `file${i}.ts`), '', 'utf-8') + } + const result = await fileEditor.invoke( + { command: 'glob', path: sandboxDir, pattern: '*.ts', max_results: 3 }, + sandboxContext + ) + expect(result).toContain('truncated') + }) + }) }) diff --git a/strands-ts/src/vended-tools/file-editor/file-editor.ts b/strands-ts/src/vended-tools/file-editor/file-editor.ts index e8fea542c..fd8718a14 100644 --- a/strands-ts/src/vended-tools/file-editor/file-editor.ts +++ b/strands-ts/src/vended-tools/file-editor/file-editor.ts @@ -1,44 +1,53 @@ import { tool } from '../../tools/tool-factory.js' import { z } from 'zod' -import type { IFileReader } from './types.js' -import { promises as fs } from 'fs' -import * as path from 'path' +import type { Sandbox } from '../../sandbox/base.js' +import { shellQuote } from '../../utils/shell-quote.js' const SNIPPET_LINES = 4 const DEFAULT_MAX_FILE_SIZE = 1048576 // 1MB const MAX_DIRECTORY_DEPTH = 2 +const undoStore = new WeakMap>() + +function getUndoMap(agent: object): Map { + let map = undoStore.get(agent) + if (!map) { + map = new Map() + undoStore.set(agent, map) + } + return map +} + /** * Zod schema for file editor input validation. */ const fileEditorInputSchema = z.object({ command: z - .enum(['view', 'create', 'str_replace', 'insert']) - .describe('The operation to perform: `view`, `create`, `str_replace`, `insert`.'), - path: z.string().describe('Absolute path to the file or directory.'), + .enum(['view', 'create', 'str_replace', 'insert', 'undo', 'grep', 'glob']) + .describe('The operation to perform: `view`, `create`, `str_replace`, `insert`, `undo`, `grep`, `glob`.'), + path: z.string().describe('Path to the file or directory. Can be absolute or relative to the working directory.'), file_text: z.string().optional().describe('Content for new file (required for create command).'), view_range: z .tuple([z.number(), z.number()]) .optional() .describe('Line range to view [start, end]. 1-indexed. End can be -1 for end of file.'), old_str: z.string().optional().describe('Exact string to find and replace (required for str_replace command).'), - new_str: z.string().optional().describe('Replacement string (for str_replace and insert commands).'), + new_str: z.string().optional().describe('Replacement string for str_replace, or text to insert for insert command.'), insert_line: z .number() .optional() .describe('Line number where text should be inserted (0-indexed, required for insert command).'), + pattern: z + .string() + .optional() + .describe('Search pattern. Regex for grep, glob pattern (e.g., **/*.ts) for glob command.'), + include: z + .string() + .optional() + .describe('File glob filter for grep (e.g., *.ts). Only searches files matching this pattern.'), + max_results: z.number().optional().describe('Maximum number of results to return for grep or glob commands.'), }) -/** - * Text file reader implementation. - * Reads files as UTF-8 encoded text. - */ -class TextFileReader implements IFileReader { - async read(filePath: string): Promise { - return await fs.readFile(filePath, 'utf-8') - } -} - /** * File editor tool for viewing, creating, and editing files programmatically. * @@ -63,32 +72,45 @@ class TextFileReader implements IFileReader { export const fileEditor = tool({ name: 'fileEditor', description: - 'Filesystem editor tool for viewing, creating, and editing files. Supports view (with line ranges), create, str_replace, and insert operations. Files must use absolute paths.', + 'Filesystem tool for viewing, creating, editing, and searching files. Supports view (with line ranges), create, str_replace, insert, undo, grep (search file contents), and glob (find files by name). Paths can be absolute or relative to the working directory.', inputSchema: fileEditorInputSchema, callback: async (input, context) => { if (!context) { throw new Error('Tool context is required for file editor operations') } - const fileReader = new TextFileReader() + const sandbox = context.agent.sandbox + const undoMap = getUndoMap(context.agent) let result: string switch (input.command) { case 'view': - result = await handleView(input.path, input.view_range, fileReader) + result = await handleView(sandbox, input.path, input.view_range) break case 'create': - result = await handleCreate(input.path, input.file_text!) + result = await handleCreate(sandbox, input.path, input.file_text!) break case 'str_replace': - result = await handleStrReplace(input.path, input.old_str!, input.new_str, fileReader) + result = await handleStrReplace(sandbox, undoMap, input.path, input.old_str!, input.new_str!) break case 'insert': - result = await handleInsert(input.path, input.insert_line!, input.new_str!, fileReader) + result = await handleInsert(sandbox, undoMap, input.path, input.insert_line!, input.new_str!) + break + + case 'undo': + result = await handleUndo(sandbox, undoMap, input.path) + break + + case 'grep': + result = await handleGrep(sandbox, input.path, input.pattern!, input.include, input.max_results) + break + + case 'glob': + result = await handleGlob(sandbox, input.path, input.pattern!, input.max_results) break default: @@ -99,62 +121,6 @@ export const fileEditor = tool({ }, }) -/** - * Validates that a path is absolute and doesn't contain directory traversal. - */ -function validatePath(command: string, filePath: string): void { - // Check if it's an absolute path - if (!path.isAbsolute(filePath)) { - const suggestedPath = path.resolve(filePath) - throw new Error( - `The path ${filePath} is not an absolute path, it should start with \`/\`. Maybe you meant ${suggestedPath}?` - ) - } - - // Check for directory traversal - reject paths containing '..' segments - const normalized = path.normalize(filePath) - if (normalized.includes('..')) { - throw new Error(`Invalid path: path traversal is not allowed`) - } -} - -/** - * Checks if a file exists. - */ -async function fileExists(filePath: string): Promise { - try { - await fs.access(filePath) - return true - } catch { - return false - } -} - -/** - * Checks if a path is a directory. - */ -async function isDirectory(filePath: string): Promise { - try { - const stats = await fs.stat(filePath) - return stats.isDirectory() - } catch { - return false - } -} - -/** - * Checks file size against limit. - */ -async function checkFileSize(filePath: string, maxSize: number = DEFAULT_MAX_FILE_SIZE): Promise { - const stats = await fs.stat(filePath).catch((err) => { - throw new Error(`Failed to check file size: ${err}`) - }) - - if (stats.size > maxSize) { - throw new Error(`File size (${stats.size} bytes) exceeds maximum allowed size (${maxSize} bytes)`) - } -} - /** * Formats file content with line numbers (cat -n style). */ @@ -174,24 +140,21 @@ function makeOutput(fileContent: string, fileDescriptor: string, initLine: numbe /** * Lists directory contents up to 2 levels deep, excluding hidden files. */ -async function listDirectory(dirPath: string): Promise { +async function listDirectory(sandbox: Sandbox, dirPath: string): Promise { const items: string[] = [] - async function walk(currentPath: string, depth: number): Promise { + async function walk(currentPath: string, prefix: string, depth: number): Promise { try { - const entries = await fs.readdir(currentPath, { withFileTypes: true }) + const entries = await sandbox.listFiles(currentPath) for (const entry of entries) { - // Skip hidden files/directories if (entry.name.startsWith('.')) continue - const fullPath = path.join(currentPath, entry.name) - const relativePath = path.relative(dirPath, fullPath) - items.push(relativePath || entry.name) + const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name + items.push(relativePath) - // Continue walking if we haven't reached max depth yet - if (entry.isDirectory() && depth < MAX_DIRECTORY_DEPTH) { - await walk(fullPath, depth + 1) + if (entry.isDir && depth < MAX_DIRECTORY_DEPTH) { + await walk(`${currentPath}/${entry.name}`, relativePath, depth + 1) } } } catch { @@ -199,7 +162,7 @@ async function listDirectory(dirPath: string): Promise { } } - await walk(dirPath, 0) + await walk(dirPath, '', 0) const result = items.sort().join('\n') return `Here's the files and directories up to 2 levels deep in ${dirPath}, excluding hidden items:\n${result}\n` @@ -209,31 +172,31 @@ async function listDirectory(dirPath: string): Promise { * Handles the view command. */ async function handleView( + sandbox: Sandbox, filePath: string, - viewRange: [number, number] | undefined, - fileReader: IFileReader + viewRange: [number, number] | undefined ): Promise { - validatePath('view', filePath) - - const exists = await fileExists(filePath) - if (!exists) { - throw new Error(`The path ${filePath} does not exist. Please provide a valid path.`) - } + const info = await sandbox.statFile(filePath) - const isDir = await isDirectory(filePath) - - if (isDir) { + if (info.isDir) { if (viewRange) { throw new Error('The `view_range` parameter is not allowed when `path` points to a directory.') } - return await listDirectory(filePath) + return await listDirectory(sandbox, filePath) + } + + if (info.size !== undefined && info.size > DEFAULT_MAX_FILE_SIZE) { + throw new Error(`File size (${info.size} bytes) exceeds maximum allowed size (${DEFAULT_MAX_FILE_SIZE} bytes)`) } - // Check file size before reading - await checkFileSize(filePath) + const raw = await sandbox.readFile(filePath) - // Read file content - only if not a directory - const fileContent = await fileReader.read(filePath) + // Null byte heuristic for binary detection (same as git) + if (raw.includes(0)) { + return `Binary file: ${filePath} (${raw.length} bytes)` + } + + const fileContent = new TextDecoder().decode(raw) let initLine = 1 let contentToShow = fileContent @@ -276,24 +239,21 @@ async function handleView( /** * Handles the create command. */ -async function handleCreate(filePath: string, fileText: string): Promise { +async function handleCreate(sandbox: Sandbox, filePath: string, fileText: string): Promise { if (fileText === undefined) { throw new Error('Parameter `file_text` is required for command: create') } - validatePath('create', filePath) + const exists = await sandbox.statFile(filePath).then( + () => true, + () => false + ) - const exists = await fileExists(filePath) if (exists) { throw new Error(`File already exists at: ${filePath}. Cannot overwrite files using command \`create\`.`) } - // Create parent directories if needed - const dir = path.dirname(filePath) - await fs.mkdir(dir, { recursive: true }) - - // Write file - await fs.writeFile(filePath, fileText, 'utf-8') + await sandbox.writeText(filePath, fileText) return `File created successfully at: ${filePath}` } @@ -302,153 +262,177 @@ async function handleCreate(filePath: string, fileText: string): Promise * Handles the str_replace command. */ async function handleStrReplace( + sandbox: Sandbox, + undoMap: Map, filePath: string, oldStr: string, - newStr: string | undefined, - fileReader: IFileReader + newStr: string ): Promise { - if (oldStr === undefined) { - throw new Error('Parameter `old_str` is required for command: str_replace') - } - - validatePath('str_replace', filePath) - - const exists = await fileExists(filePath) - if (!exists) { - throw new Error(`The path ${filePath} does not exist. Please provide a valid path.`) - } + const info = await sandbox.statFile(filePath) - const isDir = await isDirectory(filePath) - if (isDir) { + if (info.isDir) { throw new Error(`The path ${filePath} is a directory and only the \`view\` command can be used on directories`) } - await checkFileSize(filePath) - - // Read file content - let fileContent = await fileReader.read(filePath) - - // Expand tabs in content and search string - fileContent = fileContent.replace(/\t/g, ' ') - const expandedOldStr = oldStr.replace(/\t/g, ' ') - const expandedNewStr = newStr ? newStr.replace(/\t/g, ' ') : '' + if (info.size !== undefined && info.size > DEFAULT_MAX_FILE_SIZE) { + throw new Error(`File size (${info.size} bytes) exceeds maximum allowed size (${DEFAULT_MAX_FILE_SIZE} bytes)`) + } - // Check if old_str is unique - const occurrences = (fileContent.match(new RegExp(escapeRegExp(expandedOldStr), 'g')) || []).length + const fileContent = await sandbox.readText(filePath) - if (occurrences === 0) { + const first = fileContent.indexOf(oldStr) + if (first === -1) { throw new Error(`No replacement was performed, old_str \`${oldStr}\` did not appear verbatim in ${filePath}.`) } - if (occurrences > 1) { + if (fileContent.indexOf(oldStr, first + 1) !== -1) { const lines = fileContent.split('\n') - const lineNumbers = lines - .map((line, index) => (line.includes(expandedOldStr) ? index + 1 : -1)) - .filter((num) => num !== -1) + const lineNumbers = lines.map((line, index) => (line.includes(oldStr) ? index + 1 : -1)).filter((num) => num !== -1) throw new Error( `No replacement was performed. Multiple occurrences of old_str \`${oldStr}\` in lines ${JSON.stringify(lineNumbers)}. Please ensure it is unique` ) } - // Perform replacement - const newFileContent = fileContent.replace(expandedOldStr, () => expandedNewStr) + undoMap.set(filePath, fileContent) + + const newFileContent = fileContent.slice(0, first) + newStr + fileContent.slice(first + oldStr.length) - // Write back to file - await fs.writeFile(filePath, newFileContent, 'utf-8') + await sandbox.writeText(filePath, newFileContent) - // Create snippet - const replacementLine = fileContent.substring(0, fileContent.indexOf(expandedOldStr)).split('\n').length - 1 - const insertedLines = expandedNewStr.split('\n').length - const originalLines = expandedOldStr.split('\n').length - const lineDifference = insertedLines - originalLines + const replacementLine = fileContent.slice(0, first).split('\n').length - 1 + const lineDifference = newStr.split('\n').length - oldStr.split('\n').length const lines = newFileContent.split('\n') const startLine = Math.max(0, replacementLine - SNIPPET_LINES) const endLine = Math.min(lines.length, replacementLine + SNIPPET_LINES + lineDifference + 1) - const snippetLines = lines.slice(startLine, endLine) - const snippet = snippetLines.join('\n') - - const successMsg = `The file ${filePath} has been edited. ${makeOutput(snippet, `a snippet of ${filePath}`, startLine + 1)}Review the changes and make sure they are as expected. Edit the file again if necessary.` + const snippet = lines.slice(startLine, endLine).join('\n') - return successMsg + return `The file ${filePath} has been edited. ${makeOutput(snippet, `a snippet of ${filePath}`, startLine + 1)}Review the changes and make sure they are as expected. Edit the file again if necessary.` } /** * Handles the insert command. */ async function handleInsert( + sandbox: Sandbox, + undoMap: Map, filePath: string, insertLine: number, - newStr: string, - fileReader: IFileReader + newStr: string ): Promise { if (insertLine === undefined || newStr === undefined) { throw new Error('Parameters `insert_line` and `new_str` are required for command: insert') } - validatePath('insert', filePath) + const info = await sandbox.statFile(filePath) - const exists = await fileExists(filePath) - if (!exists) { - throw new Error(`The path ${filePath} does not exist. Please provide a valid path.`) - } - - const isDir = await isDirectory(filePath) - if (isDir) { + if (info.isDir) { throw new Error(`The path ${filePath} is a directory and only the \`view\` command can be used on directories`) } - await checkFileSize(filePath) - - // Read file content - let fileText = await fileReader.read(filePath) + if (info.size !== undefined && info.size > DEFAULT_MAX_FILE_SIZE) { + throw new Error(`File size (${info.size} bytes) exceeds maximum allowed size (${DEFAULT_MAX_FILE_SIZE} bytes)`) + } - // Expand tabs - fileText = fileText.replace(/\t/g, ' ') - const expandedNewStr = newStr.replace(/\t/g, ' ') + const fileText = await sandbox.readText(filePath) const fileTextLines = fileText.split('\n') const nLines = fileTextLines.length - // Validate insert_line if (insertLine < 0 || insertLine > nLines) { throw new Error( `Invalid \`insert_line\` parameter: ${insertLine}. It should be within the range of lines of the file: [0, ${nLines}]` ) } - // Perform insertion - const newStrLines = expandedNewStr.split('\n') + const newStrLines = newStr.split('\n') + const newFileTextLines = + fileText === '' + ? newStrLines + : [...fileTextLines.slice(0, insertLine), ...newStrLines, ...fileTextLines.slice(insertLine)] - // Handle empty file case - let newFileTextLines: string[] - if (fileText === '') { - newFileTextLines = newStrLines - } else { - newFileTextLines = [...fileTextLines.slice(0, insertLine), ...newStrLines, ...fileTextLines.slice(insertLine)] - } + undoMap.set(filePath, fileText) const newFileText = newFileTextLines.join('\n') - // Write back to file - await fs.writeFile(filePath, newFileText, 'utf-8') + await sandbox.writeText(filePath, newFileText) - // Create snippet - show lines around the insertion point - // Show 4 lines before the insertion line and 4 lines after const snippetStartLine = Math.max(0, insertLine - SNIPPET_LINES) const snippetEndLine = Math.min(newFileTextLines.length, insertLine + newStrLines.length + SNIPPET_LINES) - const snippetLines = newFileTextLines.slice(snippetStartLine, snippetEndLine) - const snippet = snippetLines.join('\n') - const startLine = snippetStartLine + 1 + const snippet = newFileTextLines.slice(snippetStartLine, snippetEndLine).join('\n') - const successMsg = `The file ${filePath} has been edited. ${makeOutput(snippet, 'a snippet of the edited file', startLine)}Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary.` + return `The file ${filePath} has been edited. ${makeOutput(snippet, 'a snippet of the edited file', snippetStartLine + 1)}Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary.` +} + +async function handleGrep( + sandbox: Sandbox, + dirPath: string, + pattern: string, + include: string | undefined, + maxResults: number | undefined +): Promise { + const includeFlag = include ? ` --include=${shellQuote(include)}` : '' + const result = await sandbox.execute(`grep -rn${includeFlag} ${shellQuote(pattern)} ${shellQuote(dirPath)}`) - return successMsg + if (result.exitCode === 1) { + return `No matches found for pattern \`${pattern}\` in ${dirPath}` + } + if (result.exitCode !== 0) { + throw new Error(result.stderr || `grep failed with exit code ${result.exitCode}`) + } + + const lines = result.stdout.trim().split('\n') + const limited = maxResults ? lines.slice(0, maxResults) : lines + + let output = limited.join('\n') + if (maxResults && lines.length > maxResults) { + output += `\n\n(${lines.length - maxResults} more results truncated)` + } + + return output +} + +async function handleGlob( + sandbox: Sandbox, + dirPath: string, + pattern: string, + maxResults: number | undefined +): Promise { + const flag = pattern.includes('/') ? '-path' : '-name' + const result = await sandbox.execute(`find ${shellQuote(dirPath)} ${flag} ${shellQuote(pattern)}`) + + if (result.exitCode !== 0) { + throw new Error(result.stderr || `find failed with exit code ${result.exitCode}`) + } + + const paths = result.stdout.trim().split('\n').filter(Boolean) + + if (paths.length === 0) { + return `No files found matching pattern \`${pattern}\` in ${dirPath}` + } + + const limited = maxResults ? paths.slice(0, maxResults) : paths + + let output = limited.join('\n') + if (maxResults && paths.length > maxResults) { + output += `\n\n(${paths.length - maxResults} more results truncated)` + } + + return output } /** - * Escapes special regex characters in a string. + * Handles the undo command. */ -function escapeRegExp(string: string): string { - return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') +async function handleUndo(sandbox: Sandbox, undoMap: Map, filePath: string): Promise { + const previous = undoMap.get(filePath) + + if (previous === undefined) { + throw new Error(`Nothing to undo for ${filePath}`) + } + + await sandbox.writeText(filePath, previous) + undoMap.delete(filePath) + + return `Reverted: ${filePath}` } diff --git a/strands-ts/src/vended-tools/sandbox-default-tools.ts b/strands-ts/src/vended-tools/sandbox-default-tools.ts new file mode 100644 index 000000000..ccc4ca236 --- /dev/null +++ b/strands-ts/src/vended-tools/sandbox-default-tools.ts @@ -0,0 +1,6 @@ +import type { Tool } from '../tools/tool.js' +import { fileEditor } from './file-editor/file-editor.js' +import { exec } from './exec/exec.js' +import { codeInterpreter } from './code-interpreter/code-interpreter.js' + +export const SANDBOX_DEFAULT_TOOLS: Tool[] = [fileEditor, exec, codeInterpreter] diff --git a/strands-ts/test/integ/sandbox/docker.test.node.ts b/strands-ts/test/integ/sandbox/docker.test.node.ts new file mode 100644 index 000000000..39365cde9 --- /dev/null +++ b/strands-ts/test/integ/sandbox/docker.test.node.ts @@ -0,0 +1,321 @@ +import { describe, it, expect, afterEach } from 'vitest' +import { DockerSandbox } from '../../../src/sandbox/docker.js' +import { createMockAgent } from '../../../src/__fixtures__/agent-helpers.js' +import { exec } from '../../../src/vended-tools/exec/exec.js' +import { fileEditor } from '../../../src/vended-tools/file-editor/file-editor.js' +import { codeInterpreter } from '../../../src/vended-tools/code-interpreter/code-interpreter.js' +import { spawnSync } from 'child_process' + +function dockerAvailable(): boolean { + if (process.platform === 'win32') return false + const result = spawnSync('docker', ['info'], { encoding: 'utf-8', stdio: 'pipe' }) + return result.status === 0 +} + +describe.skipIf(!dockerAvailable())('DockerSandbox', () => { + let sandbox: DockerSandbox + + afterEach(async () => { + if (sandbox) { + await sandbox.stop() + } + }) + + describe('lifecycle', () => { + it('start creates a container and stop removes it', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest', name: 'strands-test-docker' }) + await sandbox.start() + + const ps = spawnSync('docker', ['ps', '--filter', 'name=strands-test-docker', '--format', '{{.Names}}'], { + encoding: 'utf-8', + stdio: 'pipe', + }) + expect(ps.stdout.trim()).toBe('strands-test-docker') + + await sandbox.stop() + + const psAfter = spawnSync( + 'docker', + ['ps', '-a', '--filter', 'name=strands-test-docker', '--format', '{{.Names}}'], + { + encoding: 'utf-8', + stdio: 'pipe', + } + ) + expect(psAfter.stdout.trim()).toBe('') + }) + + it('throws if Docker is not running', async () => { + // This test would only fail if Docker stops between the skip check and here + // Just verify the error path exists + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + // start() should succeed since we checked Docker is available + await sandbox.start() + }) + }) + + describe('execute', () => { + it('runs a command inside the container', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const result = await sandbox.execute('echo hello from docker') + expect(result.exitCode).toBe(0) + expect(result.stdout).toBe('hello from docker\n') + }) + + it('runs in workingDir', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest', workingDir: '/opt' }) + await sandbox.start() + + const result = await sandbox.execute('pwd') + expect(result.stdout.trim()).toBe('/opt') + }) + + it('captures exit code', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const result = await sandbox.execute('exit 42') + expect(result.exitCode).toBe(42) + }) + + it('captures stderr', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const result = await sandbox.execute('echo err >&2') + expect(result.stderr).toBe('err\n') + }) + + it('respects cwd option', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const result = await sandbox.execute('pwd', { cwd: '/tmp' }) + expect(result.stdout.trim()).toBe('/tmp') + }) + }) + + describe('executeCode', () => { + it('runs python code', async () => { + sandbox = new DockerSandbox({ image: 'python:3.12-slim' }) + await sandbox.start() + + const result = await sandbox.executeCode('print(6 * 7)', 'python3') + expect(result.exitCode).toBe(0) + expect(result.stdout).toBe('42\n') + }) + }) + + describe('file operations (via base64 over docker exec)', () => { + it('write and read text roundtrip', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + await sandbox.writeText('test.txt', 'hello docker') + const text = await sandbox.readText('test.txt') + expect(text).toBe('hello docker') + }) + + it('binary roundtrip', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const bytes = new Uint8Array([0, 1, 2, 127, 128, 254, 255]) + await sandbox.writeFile('binary.bin', bytes) + const read = await sandbox.readFile('binary.bin') + expect(Array.from(read)).toStrictEqual(Array.from(bytes)) + }) + + it('remove deletes a file', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + await sandbox.writeText('delete-me.txt', 'bye') + await sandbox.removeFile('delete-me.txt') + await expect(sandbox.readFile('delete-me.txt')).rejects.toThrow() + }) + + it('list shows files', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + await sandbox.writeText('a.txt', 'a') + await sandbox.writeText('b.txt', 'b') + const files = await sandbox.listFiles('.') + const names = files.map((f) => f.name) + expect(names).toContain('a.txt') + expect(names).toContain('b.txt') + }) + }) + + describe('isolation', () => { + it('files in container do not exist on host', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + await sandbox.writeText('isolated.txt', 'only in container') + + const hostCheck = spawnSync('test', ['-f', '/workspace/isolated.txt']) + expect(hostCheck.status).not.toBe(0) + }) + + it('two containers are isolated from each other', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const sandbox2 = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox2.start() + + await sandbox.writeText('only-in-1.txt', 'sandbox 1') + await expect(sandbox2.readFile('only-in-1.txt')).rejects.toThrow() + + await sandbox2.stop() + }) + }) + + describe('pause/resume', () => { + it('pauses and resumes container state', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + await sandbox.writeText('persisted.txt', 'survives pause') + const snapshot = await sandbox.pause() + await sandbox.stop() + + expect(snapshot.backendId).toBe('docker') + expect(snapshot.data['imageId']).toBeDefined() + + // Resume from snapshot + sandbox = new DockerSandbox({ image: 'alpine:latest', snapshot }) + await sandbox.start() + + const text = await sandbox.readText('persisted.txt') + expect(text).toBe('survives pause') + + // Cleanup the committed image + spawnSync('docker', ['rmi', snapshot.data['imageId'] as string], { stdio: 'pipe' }) + }) + }) + + describe('volumes', () => { + it('mounts host directory into container', async () => { + spawnSync('mkdir', ['-p', '/tmp/strands-docker-vol-test']) + spawnSync('bash', ['-c', 'echo "from host" > /tmp/strands-docker-vol-test/host-file.txt']) + + sandbox = new DockerSandbox({ + image: 'alpine:latest', + volumes: ['/tmp/strands-docker-vol-test:/mnt/shared'], + }) + await sandbox.start() + + const result = await sandbox.execute('cat /mnt/shared/host-file.txt') + expect(result.stdout.trim()).toBe('from host') + + spawnSync('rm', ['-rf', '/tmp/strands-docker-vol-test']) + }) + }) + + describe('env', () => { + it('passes environment variables to container', async () => { + sandbox = new DockerSandbox({ + image: 'alpine:latest', + env: { MY_VAR: 'hello_from_env' }, + }) + await sandbox.start() + + const result = await sandbox.execute('echo $MY_VAR') + expect(result.stdout.trim()).toBe('hello_from_env') + }) + }) + + describe('error handling', () => { + it('throws on stream if container not started', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await expect(sandbox.execute('echo hi')).rejects.toThrow('not running') + }) + + it('throws on pause if container not started', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await expect(sandbox.pause()).rejects.toThrow('not running') + }) + }) + + describe('vended tools execute inside container', () => { + it('exec tool runs command in container, not on host', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const agent = createMockAgent({ sandbox }) + const context = { + toolUse: { name: 'exec', toolUseId: 'test', input: {} }, + agent, + invocationState: {}, + interrupt: () => { + throw new Error('not implemented') + }, + } + + const result = await exec.invoke({ command: 'cat /etc/os-release' }, context) + expect(result.stdout).toContain('Alpine') + + const hostRelease = spawnSync('cat', ['/etc/os-release'], { encoding: 'utf-8', stdio: 'pipe' }) + if (hostRelease.status === 0) { + expect(hostRelease.stdout).not.toContain('Alpine') + } + }) + + it('fileEditor creates file inside container filesystem', async () => { + sandbox = new DockerSandbox({ image: 'alpine:latest' }) + await sandbox.start() + + const agent = createMockAgent({ sandbox }) + const context = { + toolUse: { name: 'fileEditor', toolUseId: 'test', input: {} }, + agent, + invocationState: {}, + interrupt: () => { + throw new Error('not implemented') + }, + } + + const uuid = `${Date.now()}-${Math.random().toString(36).slice(2)}` + await fileEditor.invoke({ command: 'create', path: '/workspace/docker-test.txt', file_text: uuid }, context) + + const containerContent = await sandbox.readText('/workspace/docker-test.txt') + expect(containerContent).toBe(uuid) + + const hostCheck = spawnSync('test', ['-f', '/workspace/docker-test.txt']) + expect(hostCheck.status).not.toBe(0) + }) + + it('codeInterpreter runs code inside container', async () => { + sandbox = new DockerSandbox({ image: 'python:3.12-slim' }) + await sandbox.start() + + const agent = createMockAgent({ sandbox }) + const context = { + toolUse: { name: 'codeInterpreter', toolUseId: 'test', input: {} }, + agent, + invocationState: {}, + interrupt: () => { + throw new Error('not implemented') + }, + } + + const uuid = `${Date.now()}-${Math.random().toString(36).slice(2)}` + const result = await codeInterpreter.invoke( + { code: `open("/workspace/from-python.txt", "w").write("${uuid}")`, language: 'python3' }, + context + ) + expect(result.exitCode).toBe(0) + + const containerContent = await sandbox.readText('/workspace/from-python.txt') + expect(containerContent).toBe(uuid) + + const hostCheck = spawnSync('test', ['-f', '/workspace/from-python.txt']) + expect(hostCheck.status).not.toBe(0) + }) + }) +}) diff --git a/strands-ts/vitest.config.ts b/strands-ts/vitest.config.ts index 657ebc62d..0ede1adb0 100644 --- a/strands-ts/vitest.config.ts +++ b/strands-ts/vitest.config.ts @@ -49,6 +49,10 @@ export default defineConfig({ 'src/vended-plugins/**/__tests__/**/*.test.ts', 'src/vended-plugins/**/__tests__/**/*.test.browser.ts', ], + exclude: [ + 'src/sandbox/__tests__/remote.test.ts', + 'src/agent/__tests__/agent.model-retry.test.ts', + ], name: { label: 'unit-browser', color: 'cyan' }, browser: { enabled: true,