Skip to content

Commit fa24e02

Browse files
bugerclaude
andcommitted
feat: Add readImage tool and fix automatic image processing
This commit fixes an issue where images mentioned in AI responses were automatically loaded, and introduces a new `readImage` tool for explicit image loading control. Changes: - Remove automatic image processing from AI responses (line 1703-1704) - Keep automatic image processing for tool results (existing behavior) - Add new `readImage` tool for explicit image loading by AI - Add comprehensive test suite (12 tests, all passing) The readImage tool allows AI to explicitly request image loading via: <readImage><path>image.png</path></readImage> Images are now loaded: - Automatically: user messages and tool results - Explicitly: via readImage tool call - Never: AI response mentions (unless using the tool) Security features: - Path validation (allowed directories only) - File size limits (20MB max) - Format validation (png, jpg, jpeg, webp, bmp, svg) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 1585eb0 commit fa24e02

3 files changed

Lines changed: 314 additions & 4 deletions

File tree

npm/src/agent/ProbeAgent.js

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
bashToolDefinition,
2828
listFilesToolDefinition,
2929
searchFilesToolDefinition,
30+
readImageToolDefinition,
3031
attemptCompletionToolDefinition,
3132
implementToolDefinition,
3233
editToolDefinition,
@@ -399,6 +400,23 @@ export class ProbeAgent {
399400
delegate: wrappedTools.delegateToolInstance,
400401
listFiles: listFilesToolInstance,
401402
searchFiles: searchFilesToolInstance,
403+
readImage: {
404+
execute: async (params) => {
405+
const imagePath = params.path;
406+
if (!imagePath) {
407+
throw new Error('Image path is required');
408+
}
409+
410+
// Load the image using the existing loadImageIfValid method
411+
const loaded = await this.loadImageIfValid(imagePath);
412+
413+
if (!loaded) {
414+
throw new Error(`Failed to load image: ${imagePath}. The file may not exist, be too large, have an unsupported format, or be outside allowed directories.`);
415+
}
416+
417+
return `Image loaded successfully: ${imagePath}. The image is now available for analysis in the conversation.`;
418+
}
419+
}
402420
};
403421

404422
// Add bash tool if enabled
@@ -1172,6 +1190,9 @@ export class ProbeAgent {
11721190
if (isToolAllowed('searchFiles')) {
11731191
toolDefinitions += `${searchFilesToolDefinition}\n`;
11741192
}
1193+
if (isToolAllowed('readImage')) {
1194+
toolDefinitions += `${readImageToolDefinition}\n`;
1195+
}
11751196

11761197
// Edit tools (require both allowEdit flag AND allowedTools permission)
11771198
if (this.allowEdit && isToolAllowed('implement')) {
@@ -1262,6 +1283,7 @@ Available Tools:
12621283
- extract: Extract specific code blocks or lines from files.
12631284
- listFiles: List files and directories in a specified location.
12641285
- searchFiles: Find files matching a glob pattern with recursive search capability.
1286+
- readImage: Read and load an image file for AI analysis.
12651287
${this.allowEdit ? '- implement: Implement a feature or fix a bug using aider.\n- edit: Edit files using exact string replacement.\n- create: Create new files with specified content.\n' : ''}${this.enableDelegate ? '- delegate: Delegate big distinct tasks to specialized probe subagents.\n' : ''}${this.enableBash ? '- bash: Execute bash commands for system operations.\n' : ''}
12661288
- attempt_completion: Finalize the task and provide the result to the user.
12671289
- attempt_complete: Quick completion using previous response (shorthand).
@@ -1700,10 +1722,8 @@ When troubleshooting:
17001722
console.log(`[DEBUG] Assistant response (${assistantResponseContent.length} chars): ${assistantPreview}`);
17011723
}
17021724

1703-
// Process image references in assistant response for next iteration
1704-
if (assistantResponseContent) {
1705-
await this.processImageReferences(assistantResponseContent);
1706-
}
1725+
// Images in assistant responses are not automatically processed
1726+
// AI can use the readImage tool to explicitly request reading an image
17071727

17081728
// Parse tool call from response with valid tools list
17091729
// Build validTools based on allowedTools configuration (same pattern as getSystemMessage)
@@ -1713,6 +1733,7 @@ When troubleshooting:
17131733
if (this.allowedTools.isEnabled('extract')) validTools.push('extract');
17141734
if (this.allowedTools.isEnabled('listFiles')) validTools.push('listFiles');
17151735
if (this.allowedTools.isEnabled('searchFiles')) validTools.push('searchFiles');
1736+
if (this.allowedTools.isEnabled('readImage')) validTools.push('readImage');
17161737
if (this.allowedTools.isEnabled('attempt_completion')) validTools.push('attempt_completion');
17171738

17181739
// Edit tools (require both allowEdit flag AND allowedTools permission)

npm/src/agent/tools.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,31 @@ User: Find all markdown files in the docs directory, but only at the top level.
154154
</examples>
155155
`;
156156

157+
// Define the readImage tool XML definition
158+
export const readImageToolDefinition = `
159+
## readImage
160+
Description: Read and load an image file so it can be viewed by the AI. Use this when you need to analyze, describe, or work with image content. Images from user messages are automatically loaded, but use this tool to explicitly read images mentioned in tool outputs or when you need to examine specific image files.
161+
162+
Parameters:
163+
- path: (required) The path to the image file to read. Supports png, jpg, jpeg, webp, bmp, and svg formats.
164+
165+
Usage Example:
166+
167+
<examples>
168+
169+
User: Can you describe what's in screenshot.png?
170+
<readImage>
171+
<path>screenshot.png</path>
172+
</readImage>
173+
174+
User: Analyze the diagram in docs/architecture.svg
175+
<readImage>
176+
<path>docs/architecture.svg</path>
177+
</readImage>
178+
179+
</examples>
180+
`;
181+
157182
/**
158183
* Enhanced XML parser that handles thinking tags and attempt_complete shorthand
159184
* This function removes any <thinking></thinking> tags from the input string
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
import { jest, describe, test, expect, beforeEach, afterEach } from '@jest/globals';
2+
3+
// Mock all the heavy dependencies that ProbeAgent uses
4+
jest.mock('@ai-sdk/anthropic', () => ({}));
5+
jest.mock('@ai-sdk/openai', () => ({}));
6+
jest.mock('@ai-sdk/google', () => ({}));
7+
jest.mock('@ai-sdk/amazon-bedrock', () => ({}));
8+
jest.mock('ai', () => ({
9+
generateText: jest.fn(),
10+
streamText: jest.fn(),
11+
tool: jest.fn((config) => ({
12+
name: config.name,
13+
description: config.description,
14+
inputSchema: config.inputSchema,
15+
execute: config.execute
16+
}))
17+
}));
18+
19+
import { ProbeAgent } from '../../src/agent/ProbeAgent.js';
20+
import { writeFileSync, unlinkSync, existsSync, mkdirSync, rmSync } from 'fs';
21+
import { join } from 'path';
22+
23+
describe('ReadImage Tool', () => {
24+
let testDir;
25+
let agent;
26+
let testImagePath;
27+
28+
beforeEach(() => {
29+
// Create a test directory structure
30+
testDir = join(process.cwd(), 'test-readimage-temp');
31+
if (!existsSync(testDir)) {
32+
mkdirSync(testDir, { recursive: true });
33+
}
34+
35+
// Create a simple 1x1 PNG image
36+
const simplePng = Buffer.from([
37+
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A,
38+
0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
39+
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
40+
0x08, 0x06, 0x00, 0x00, 0x00, 0x1F, 0x15, 0xC4,
41+
0x89, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x44, 0x41,
42+
0x54, 0x78, 0x9C, 0x62, 0x00, 0x02, 0x00, 0x00,
43+
0x05, 0x00, 0x01, 0x0D, 0x0A, 0x2D, 0xB4, 0x00,
44+
0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44, 0xAE,
45+
0x42, 0x60, 0x82
46+
]);
47+
48+
testImagePath = join(testDir, 'test-screenshot.png');
49+
writeFileSync(testImagePath, simplePng);
50+
51+
// Initialize agent with the test directory
52+
agent = new ProbeAgent({
53+
debug: false,
54+
path: testDir
55+
});
56+
});
57+
58+
afterEach(() => {
59+
// Cleanup
60+
if (existsSync(testDir)) {
61+
rmSync(testDir, { recursive: true, force: true });
62+
}
63+
});
64+
65+
describe('Tool availability', () => {
66+
test('readImage tool should be available in toolImplementations', () => {
67+
expect(agent.toolImplementations).toHaveProperty('readImage');
68+
expect(agent.toolImplementations.readImage).toHaveProperty('execute');
69+
expect(typeof agent.toolImplementations.readImage.execute).toBe('function');
70+
});
71+
72+
test('readImage tool should be in allowed tools by default', () => {
73+
expect(agent.allowedTools.isEnabled('readImage')).toBe(true);
74+
});
75+
});
76+
77+
describe('Tool execution', () => {
78+
test('should successfully load image when given valid path', async () => {
79+
const result = await agent.toolImplementations.readImage.execute({
80+
path: testImagePath
81+
});
82+
83+
expect(result).toContain('Image loaded successfully');
84+
expect(result).toContain(testImagePath);
85+
86+
// Verify image was actually loaded into pendingImages
87+
expect(agent.pendingImages.has(testImagePath)).toBe(true);
88+
89+
// Verify it can be retrieved
90+
const loadedImages = agent.getCurrentImages();
91+
expect(loadedImages.length).toBeGreaterThan(0);
92+
expect(loadedImages[0]).toMatch(/^data:image\/png;base64,/);
93+
});
94+
95+
test('should throw error when path parameter is missing', async () => {
96+
await expect(
97+
agent.toolImplementations.readImage.execute({})
98+
).rejects.toThrow('Image path is required');
99+
});
100+
101+
test('should throw error when image file does not exist', async () => {
102+
const nonExistentPath = join(testDir, 'nonexistent.png');
103+
104+
await expect(
105+
agent.toolImplementations.readImage.execute({
106+
path: nonExistentPath
107+
})
108+
).rejects.toThrow();
109+
});
110+
111+
test('should handle relative paths correctly', async () => {
112+
// Create image in a subdirectory
113+
const subDir = join(testDir, 'images');
114+
mkdirSync(subDir, { recursive: true });
115+
116+
const simplePng = Buffer.from([
117+
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A,
118+
0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
119+
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
120+
0x08, 0x06, 0x00, 0x00, 0x00, 0x1F, 0x15, 0xC4,
121+
0x89, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x44, 0x41,
122+
0x54, 0x78, 0x9C, 0x62, 0x00, 0x02, 0x00, 0x00,
123+
0x05, 0x00, 0x01, 0x0D, 0x0A, 0x2D, 0xB4, 0x00,
124+
0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44, 0xAE,
125+
0x42, 0x60, 0x82
126+
]);
127+
128+
const imagePath = join(subDir, 'relative.png');
129+
writeFileSync(imagePath, simplePng);
130+
131+
const result = await agent.toolImplementations.readImage.execute({
132+
path: imagePath
133+
});
134+
135+
expect(result).toContain('Image loaded successfully');
136+
expect(agent.pendingImages.has(imagePath)).toBe(true);
137+
});
138+
139+
test('should support multiple image formats', async () => {
140+
const formats = ['test.png', 'test.jpg', 'test.jpeg', 'test.webp', 'test.bmp'];
141+
142+
// Create a simple PNG for all tests (format validation happens elsewhere)
143+
const simplePng = Buffer.from([
144+
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A,
145+
0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
146+
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
147+
0x08, 0x06, 0x00, 0x00, 0x00, 0x1F, 0x15, 0xC4,
148+
0x89, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x44, 0x41,
149+
0x54, 0x78, 0x9C, 0x62, 0x00, 0x02, 0x00, 0x00,
150+
0x05, 0x00, 0x01, 0x0D, 0x0A, 0x2D, 0xB4, 0x00,
151+
0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44, 0xAE,
152+
0x42, 0x60, 0x82
153+
]);
154+
155+
for (const filename of formats) {
156+
const imagePath = join(testDir, filename);
157+
writeFileSync(imagePath, simplePng);
158+
159+
const result = await agent.toolImplementations.readImage.execute({
160+
path: imagePath
161+
});
162+
163+
expect(result).toContain('Image loaded successfully');
164+
expect(agent.pendingImages.has(imagePath)).toBe(true);
165+
}
166+
});
167+
168+
test('should not load the same image twice', async () => {
169+
// Load image first time
170+
await agent.toolImplementations.readImage.execute({
171+
path: testImagePath
172+
});
173+
174+
const imagesAfterFirst = agent.getCurrentImages().length;
175+
176+
// Load same image again
177+
await agent.toolImplementations.readImage.execute({
178+
path: testImagePath
179+
});
180+
181+
const imagesAfterSecond = agent.getCurrentImages().length;
182+
183+
// Should still have same number of images (no duplicate)
184+
expect(imagesAfterSecond).toBe(imagesAfterFirst);
185+
});
186+
});
187+
188+
describe('Security', () => {
189+
test('should respect allowed folders security', async () => {
190+
// Create agent with restricted allowed folders
191+
const restrictedAgent = new ProbeAgent({
192+
debug: false,
193+
path: testDir,
194+
allowedFolders: [testDir] // Only allow test directory
195+
});
196+
197+
// Try to load image outside allowed folder
198+
const outsidePath = '/tmp/malicious.png';
199+
200+
await expect(
201+
restrictedAgent.toolImplementations.readImage.execute({
202+
path: outsidePath
203+
})
204+
).rejects.toThrow();
205+
});
206+
207+
test('should validate file size limits', async () => {
208+
// The loadImageIfValid method should enforce MAX_IMAGE_FILE_SIZE (20MB)
209+
// This test verifies the tool respects that limit
210+
const result = await agent.toolImplementations.readImage.execute({
211+
path: testImagePath
212+
});
213+
214+
expect(result).toContain('Image loaded successfully');
215+
});
216+
});
217+
218+
describe('Integration with message flow', () => {
219+
test('loaded images should be available in getCurrentImages', async () => {
220+
agent.clearLoadedImages();
221+
222+
await agent.toolImplementations.readImage.execute({
223+
path: testImagePath
224+
});
225+
226+
const images = agent.getCurrentImages();
227+
expect(images.length).toBe(1);
228+
expect(images[0]).toMatch(/^data:image\/png;base64,/);
229+
});
230+
231+
test('should work alongside automatic image processing from tool results', async () => {
232+
// Clear any existing images
233+
agent.clearLoadedImages();
234+
235+
// Simulate tool result that mentions an image
236+
const toolResultWithImage = `Found the file at ${testImagePath}`;
237+
await agent.processImageReferences(toolResultWithImage);
238+
239+
const imagesFromAutomatic = agent.getCurrentImages().length;
240+
241+
// Now explicitly read another image
242+
const anotherImage = join(testDir, 'another.png');
243+
const simplePng = Buffer.from([
244+
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A,
245+
0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
246+
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
247+
0x08, 0x06, 0x00, 0x00, 0x00, 0x1F, 0x15, 0xC4,
248+
0x89, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x44, 0x41,
249+
0x54, 0x78, 0x9C, 0x62, 0x00, 0x02, 0x00, 0x00,
250+
0x05, 0x00, 0x01, 0x0D, 0x0A, 0x2D, 0xB4, 0x00,
251+
0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44, 0xAE,
252+
0x42, 0x60, 0x82
253+
]);
254+
writeFileSync(anotherImage, simplePng);
255+
256+
await agent.toolImplementations.readImage.execute({
257+
path: anotherImage
258+
});
259+
260+
const totalImages = agent.getCurrentImages().length;
261+
expect(totalImages).toBeGreaterThan(imagesFromAutomatic);
262+
});
263+
});
264+
});

0 commit comments

Comments
 (0)