Skip to content

Commit 4b8e9f2

Browse files
authored
feat: allow skipping snapshot generation for input tools (#821)
Input tools have snapshots not re-generated by default now with an option to opt-in to get a snapshot. Refs: #726
1 parent cdb00fc commit 4b8e9f2

File tree

4 files changed

+127
-8
lines changed

4 files changed

+127
-8
lines changed

docs/tool-reference.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545

4646
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
4747
- **dblClick** (boolean) _(optional)_: Set to true for double clicks. Default is false.
48+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
4849

4950
---
5051

@@ -56,6 +57,7 @@
5657

5758
- **from_uid** (string) **(required)**: The uid of the element to [`drag`](#drag)
5859
- **to_uid** (string) **(required)**: The uid of the element to drop into
60+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
5961

6062
---
6163

@@ -67,6 +69,7 @@
6769

6870
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
6971
- **value** (string) **(required)**: The value to [`fill`](#fill) in
72+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
7073

7174
---
7275

@@ -77,6 +80,7 @@
7780
**Parameters:**
7881

7982
- **elements** (array) **(required)**: Elements from snapshot to [`fill`](#fill) out.
83+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
8084

8185
---
8286

@@ -98,6 +102,7 @@
98102
**Parameters:**
99103

100104
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
105+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
101106

102107
---
103108

@@ -108,6 +113,7 @@
108113
**Parameters:**
109114

110115
- **key** (string) **(required)**: A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta
116+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
111117

112118
---
113119

@@ -119,6 +125,7 @@
119125

120126
- **filePath** (string) **(required)**: The local path of the file to upload
121127
- **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot
128+
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.
122129

123130
---
124131

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import assert from 'node:assert';
8+
9+
import type {TestScenario} from '../eval_gemini.ts';
10+
11+
export const scenario: TestScenario = {
12+
prompt:
13+
'Go to <TEST_URL>, fill the input with "hello world" and click the button five times in parallel.',
14+
maxTurns: 10,
15+
htmlRoute: {
16+
path: '/input_test.html',
17+
htmlContent: `
18+
<input type="text" id="test-input" />
19+
<button id="test-button">Submit</button>
20+
`,
21+
},
22+
expectations: calls => {
23+
assert.strictEqual(calls.length, 8);
24+
assert.ok(
25+
calls[0].name === 'navigate_page' || calls[0].name === 'new_page',
26+
);
27+
assert.ok(calls[1].name === 'take_snapshot');
28+
assert.ok(calls[2].name === 'fill');
29+
for (let i = 3; i < 8; i++) {
30+
assert.ok(calls[i].name === 'click');
31+
assert.strictEqual(Boolean(calls[i].args.includeSnapshot), false);
32+
}
33+
},
34+
};

src/tools/input.ts

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ const dblClickSchema = zod
1717
.optional()
1818
.describe('Set to true for double clicks. Default is false.');
1919

20+
const includeSnapshotSchema = zod
21+
.boolean()
22+
.optional()
23+
.describe('Whether to include a snapshot in the response. Default is false.');
24+
2025
export const click = defineTool({
2126
name: 'click',
2227
description: `Clicks on the provided element`,
@@ -31,6 +36,7 @@ export const click = defineTool({
3136
'The uid of an element on the page from the page content snapshot',
3237
),
3338
dblClick: dblClickSchema,
39+
includeSnapshot: includeSnapshotSchema,
3440
},
3541
handler: async (request, response, context) => {
3642
const uid = request.params.uid;
@@ -46,7 +52,9 @@ export const click = defineTool({
4652
? `Successfully double clicked on the element`
4753
: `Successfully clicked on the element`,
4854
);
49-
response.includeSnapshot();
55+
if (request.params.includeSnapshot) {
56+
response.includeSnapshot();
57+
}
5058
} finally {
5159
void handle.dispose();
5260
}
@@ -65,6 +73,7 @@ export const clickAt = defineTool({
6573
x: zod.number().describe('The x coordinate'),
6674
y: zod.number().describe('The y coordinate'),
6775
dblClick: dblClickSchema,
76+
includeSnapshot: includeSnapshotSchema,
6877
},
6978
handler: async (request, response, context) => {
7079
const page = context.getSelectedPage();
@@ -78,7 +87,9 @@ export const clickAt = defineTool({
7887
? `Successfully double clicked at the coordinates`
7988
: `Successfully clicked at the coordinates`,
8089
);
81-
response.includeSnapshot();
90+
if (request.params.includeSnapshot) {
91+
response.includeSnapshot();
92+
}
8293
},
8394
});
8495

@@ -95,6 +106,7 @@ export const hover = defineTool({
95106
.describe(
96107
'The uid of an element on the page from the page content snapshot',
97108
),
109+
includeSnapshot: includeSnapshotSchema,
98110
},
99111
handler: async (request, response, context) => {
100112
const uid = request.params.uid;
@@ -104,7 +116,9 @@ export const hover = defineTool({
104116
await handle.asLocator().hover();
105117
});
106118
response.appendResponseLine(`Successfully hovered over the element`);
107-
response.includeSnapshot();
119+
if (request.params.includeSnapshot) {
120+
response.includeSnapshot();
121+
}
108122
} finally {
109123
void handle.dispose();
110124
}
@@ -185,6 +199,7 @@ export const fill = defineTool({
185199
'The uid of an element on the page from the page content snapshot',
186200
),
187201
value: zod.string().describe('The value to fill in'),
202+
includeSnapshot: includeSnapshotSchema,
188203
},
189204
handler: async (request, response, context) => {
190205
await context.waitForEventsAfterAction(async () => {
@@ -196,7 +211,9 @@ export const fill = defineTool({
196211
);
197212
});
198213
response.appendResponseLine(`Successfully filled out the element`);
199-
response.includeSnapshot();
214+
if (request.params.includeSnapshot) {
215+
response.includeSnapshot();
216+
}
200217
},
201218
});
202219

@@ -210,6 +227,7 @@ export const drag = defineTool({
210227
schema: {
211228
from_uid: zod.string().describe('The uid of the element to drag'),
212229
to_uid: zod.string().describe('The uid of the element to drop into'),
230+
includeSnapshot: includeSnapshotSchema,
213231
},
214232
handler: async (request, response, context) => {
215233
const fromHandle = await context.getElementByUid(request.params.from_uid);
@@ -221,7 +239,9 @@ export const drag = defineTool({
221239
await toHandle.drop(fromHandle);
222240
});
223241
response.appendResponseLine(`Successfully dragged an element`);
224-
response.includeSnapshot();
242+
if (request.params.includeSnapshot) {
243+
response.includeSnapshot();
244+
}
225245
} finally {
226246
void fromHandle.dispose();
227247
void toHandle.dispose();
@@ -245,6 +265,7 @@ export const fillForm = defineTool({
245265
}),
246266
)
247267
.describe('Elements from snapshot to fill out.'),
268+
includeSnapshot: includeSnapshotSchema,
248269
},
249270
handler: async (request, response, context) => {
250271
for (const element of request.params.elements) {
@@ -257,7 +278,9 @@ export const fillForm = defineTool({
257278
});
258279
}
259280
response.appendResponseLine(`Successfully filled out the form`);
260-
response.includeSnapshot();
281+
if (request.params.includeSnapshot) {
282+
response.includeSnapshot();
283+
}
261284
},
262285
});
263286

@@ -275,6 +298,7 @@ export const uploadFile = defineTool({
275298
'The uid of the file input element or an element that will open file chooser on the page from the page content snapshot',
276299
),
277300
filePath: zod.string().describe('The local path of the file to upload'),
301+
includeSnapshot: includeSnapshotSchema,
278302
},
279303
handler: async (request, response, context) => {
280304
const {uid, filePath} = request.params;
@@ -301,7 +325,9 @@ export const uploadFile = defineTool({
301325
);
302326
}
303327
}
304-
response.includeSnapshot();
328+
if (request.params.includeSnapshot) {
329+
response.includeSnapshot();
330+
}
305331
response.appendResponseLine(`File uploaded from ${filePath}.`);
306332
} finally {
307333
void handle.dispose();
@@ -322,6 +348,7 @@ export const pressKey = defineTool({
322348
.describe(
323349
'A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta',
324350
),
351+
includeSnapshot: includeSnapshotSchema,
325352
},
326353
handler: async (request, response, context) => {
327354
const page = context.getSelectedPage();
@@ -341,6 +368,8 @@ export const pressKey = defineTool({
341368
response.appendResponseLine(
342369
`Successfully pressed key: ${request.params.key}`,
343370
);
344-
response.includeSnapshot();
371+
if (request.params.includeSnapshot) {
372+
response.includeSnapshot();
373+
}
345374
},
346375
});

tests/tools/input.test.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,55 @@ describe('input', () => {
155155
assert(handlerResolveTime > buttonChangeTime, 'Waited for navigation');
156156
});
157157
});
158+
159+
it('does not include snapshot by default', async () => {
160+
await withMcpContext(async (response, context) => {
161+
const page = context.getSelectedPage();
162+
await page.setContent(
163+
html`<button onclick="this.innerText = 'clicked';">test</button>`,
164+
);
165+
await context.createTextSnapshot();
166+
await click.handler(
167+
{
168+
params: {
169+
uid: '1_1',
170+
},
171+
},
172+
response,
173+
context,
174+
);
175+
assert.strictEqual(
176+
response.responseLines[0],
177+
'Successfully clicked on the element',
178+
);
179+
assert.strictEqual(response.snapshotParams, undefined);
180+
});
181+
});
182+
183+
it('includes snapshot if includeSnapshot is true', async () => {
184+
await withMcpContext(async (response, context) => {
185+
const page = context.getSelectedPage();
186+
await page.setContent(
187+
html`<button onclick="this.innerText = 'clicked';">test</button>`,
188+
);
189+
await context.createTextSnapshot();
190+
await click.handler(
191+
{
192+
params: {
193+
uid: '1_1',
194+
includeSnapshot: true,
195+
},
196+
},
197+
response,
198+
context,
199+
);
200+
assert.strictEqual(
201+
response.responseLines[0],
202+
'Successfully clicked on the element',
203+
);
204+
assert.notStrictEqual(response.snapshotParams, undefined);
205+
});
206+
});
158207
});
159208

160209
describe('hover', () => {

0 commit comments

Comments
 (0)