test: add browser agent integration tests (google-gemini#21151)

kunal-10-cloud · web-flow · commit 09259a8ec497 · 2026-03-05T13:29:35.000Z
diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml
@@ -264,6 +264,27 @@ jobs:
         run: 'npm run build'
         shell: 'pwsh'
 
+      - name: 'Ensure Chrome is available'
+        shell: 'pwsh'
+        run: |
+          $chromePaths = @(
+            "${env:ProgramFiles}\Google\Chrome\Application\chrome.exe",
+            "${env:ProgramFiles(x86)}\Google\Chrome\Application\chrome.exe"
+          )
+          $chromeExists = $chromePaths | Where-Object { Test-Path $_ } | Select-Object -First 1
+          if (-not $chromeExists) {
+            Write-Host 'Chrome not found, installing via Chocolatey...'
+            choco install googlechrome -y --no-progress --ignore-checksums
+          }
+          $installed = $chromePaths | Where-Object { Test-Path $_ } | Select-Object -First 1
+          if ($installed) {
+            Write-Host "Chrome found at: $installed"
+            & $installed --version
+          } else {
+            Write-Error 'Chrome installation failed'
+            exit 1
+          }
+
       - name: 'Run E2E tests'
         env:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
diff --git a/integration-tests/browser-agent.cleanup.responses b/integration-tests/browser-agent.cleanup.responses
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll open https://example.com and check the page title for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Open https://example.com and get the page title"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":35,"totalTokenCount":135}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The page title of https://example.com is \"Example Domain\". The browser session has been completed and cleaned up successfully."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]}
diff --git a/integration-tests/browser-agent.interaction.responses b/integration-tests/browser-agent.interaction.responses
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and analyze the links on the page."},{"functionCall":{"name":"browser_agent","args":{"task":"Go to https://example.com and find all links on the page, then describe them"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"After analyzing https://example.com, I found the following links:\n\n1. **\"More information...\"** - This is the main link on the page that points to the IANA (Internet Assigned Numbers Authority) website for more details about reserved domains.\n\nThe page is quite minimal with just this single informational link, which is typical for example domains used in documentation."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":70,"totalTokenCount":270}}]}
diff --git a/integration-tests/browser-agent.navigate-snapshot.responses b/integration-tests/browser-agent.navigate-snapshot.responses
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll help you open https://example.com and analyze the page. Let me use the browser agent to navigate and capture the page information."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and capture the accessibility tree to get the page title and main content"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Based on the browser analysis of https://example.com:\n\n**Page Title**: Example Domain\n\n**Main Content**: The page contains a simple heading \"Example Domain\" and explanatory text stating \"This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.\"\n\nThe page has a clean, minimal layout typical of placeholder domains used for documentation and examples."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":80,"totalTokenCount":280}}]}
diff --git a/integration-tests/browser-agent.screenshot.responses b/integration-tests/browser-agent.screenshot.responses
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and take a screenshot for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and take a screenshot of the page"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully navigated to https://example.com and captured a screenshot. The page shows the familiar \"Example Domain\" header with explanatory text below it. The screenshot captures the clean, minimal layout of this demonstration website."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":50,"totalTokenCount":250}}]}
diff --git a/integration-tests/browser-agent.sequential.responses b/integration-tests/browser-agent.sequential.responses
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll help you navigate to https://example.com and perform both operations - capturing the accessibility tree and taking a screenshot."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com, take a snapshot of the accessibility tree, then take a screenshot"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":45,"totalTokenCount":145}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully completed both operations on https://example.com:\n\n**Accessibility Tree Snapshot**: The page has a clear structure with the main heading \"Example Domain\" and descriptive text about the domain's purpose for documentation examples.\n\n**Screenshot**: Captured a visual representation of the page showing the clean, minimal layout with the heading and explanatory text.\n\nBoth the accessibility data and visual screenshot confirm this is the standard example domain page used for documentation purposes."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":80,"totalTokenCount":280}}]}
diff --git a/integration-tests/browser-agent.test.ts b/integration-tests/browser-agent.test.ts
@@ -0,0 +1,206 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Integration tests for the browser agent.
+ *
+ * These tests verify the complete end-to-end flow from CLI prompt through
+ * browser_agent delegation to MCP/Chrome DevTools and back. Unlike the unit
+ * tests in packages/core/src/agents/browser/ which mock all MCP components,
+ * these tests launch real Chrome instances in headless mode.
+ *
+ * Tests are skipped on systems without Chrome/Chromium installed.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { TestRig, assertModelHasOutput } from './test-helper.js';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { execSync } from 'node:child_process';
+import { existsSync } from 'node:fs';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const chromeAvailable = (() => {
+  try {
+    if (process.platform === 'darwin') {
+      execSync(
+        'test -d "/Applications/Google Chrome.app"  || test -d "/Applications/Chromium.app"',
+        {
+          stdio: 'ignore',
+        },
+      );
+    } else if (process.platform === 'linux') {
+      execSync(
+        'which google-chrome || which chromium-browser || which chromium',
+        { stdio: 'ignore' },
+      );
+    } else if (process.platform === 'win32') {
+      // Check standard Windows installation paths using Node.js fs
+      const chromePaths = [
+        'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
+        'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
+        `${process.env['LOCALAPPDATA'] ?? ''}\\Google\\Chrome\\Application\\chrome.exe`,
+      ];
+      const found = chromePaths.some((p) => existsSync(p));
+      if (!found) {
+        // Fall back to PATH check
+        execSync('where chrome || where chromium', { stdio: 'ignore' });
+      }
+    } else {
+      return false;
+    }
+    return true;
+  } catch {
+    return false;
+  }
+})();
+
+describe.skipIf(!chromeAvailable)('browser-agent', () => {
+  let rig: TestRig;
+
+  beforeEach(() => {
+    rig = new TestRig();
+  });
+
+  afterEach(async () => await rig.cleanup());
+
+  it('should navigate to a page and capture accessibility tree', async () => {
+    rig.setup('browser-navigate-and-snapshot', {
+      fakeResponsesPath: join(
+        __dirname,
+        'browser-agent.navigate-snapshot.responses',
+      ),
+      settings: {
+        agents: {
+          browser_agent: {
+            headless: true,
+            sessionMode: 'isolated',
+          },
+        },
+      },
+    });
+
+    const result = await rig.run({
+      args: 'Open https://example.com in the browser and tell me the page title and main content.',
+    });
+
+    assertModelHasOutput(result);
+
+    const toolLogs = rig.readToolLogs();
+    const browserAgentCall = toolLogs.find(
+      (t) => t.toolRequest.name === 'browser_agent',
+    );
+    expect(
+      browserAgentCall,
+      'Expected browser_agent to be called',
+    ).toBeDefined();
+  });
+
+  it('should take screenshots of web pages', async () => {
+    rig.setup('browser-screenshot', {
+      fakeResponsesPath: join(__dirname, 'browser-agent.screenshot.responses'),
+      settings: {
+        agents: {
+          browser_agent: {
+            headless: true,
+            sessionMode: 'isolated',
+          },
+        },
+      },
+    });
+
+    const result = await rig.run({
+      args: 'Navigate to https://example.com and take a screenshot.',
+    });
+
+    const toolLogs = rig.readToolLogs();
+    const browserCalls = toolLogs.filter(
+      (t) => t.toolRequest.name === 'browser_agent',
+    );
+    expect(browserCalls.length).toBeGreaterThan(0);
+
+    assertModelHasOutput(result);
+  });
+
+  it('should interact with page elements', async () => {
+    rig.setup('browser-interaction', {
+      fakeResponsesPath: join(__dirname, 'browser-agent.interaction.responses'),
+      settings: {
+        agents: {
+          browser_agent: {
+            headless: true,
+            sessionMode: 'isolated',
+          },
+        },
+      },
+    });
+
+    const result = await rig.run({
+      args: 'Go to https://example.com, find any links on the page, and describe them.',
+    });
+
+    const toolLogs = rig.readToolLogs();
+    const browserAgentCall = toolLogs.find(
+      (t) => t.toolRequest.name === 'browser_agent',
+    );
+    expect(
+      browserAgentCall,
+      'Expected browser_agent to be called',
+    ).toBeDefined();
+
+    assertModelHasOutput(result);
+  });
+
+  it('should clean up browser processes after completion', async () => {
+    rig.setup('browser-cleanup', {
+      fakeResponsesPath: join(__dirname, 'browser-agent.cleanup.responses'),
+      settings: {
+        agents: {
+          browser_agent: {
+            headless: true,
+            sessionMode: 'isolated',
+          },
+        },
+      },
+    });
+
+    await rig.run({
+      args: 'Open https://example.com in the browser and check the page title.',
+    });
+
+    // Test passes if we reach here, relying on Vitest's timeout mechanism
+    // to detect hanging browser processes.
+  });
+
+  it('should handle multiple browser operations in sequence', async () => {
+    rig.setup('browser-sequential', {
+      fakeResponsesPath: join(__dirname, 'browser-agent.sequential.responses'),
+      settings: {
+        agents: {
+          browser_agent: {
+            headless: true,
+            sessionMode: 'isolated',
+          },
+        },
+      },
+    });
+
+    const result = await rig.run({
+      args: 'Navigate to https://example.com, take a snapshot of the accessibility tree, then take a screenshot.',
+    });
+
+    const toolLogs = rig.readToolLogs();
+    const browserCalls = toolLogs.filter(
+      (t) => t.toolRequest.name === 'browser_agent',
+    );
+    expect(browserCalls.length).toBeGreaterThan(0);
+
+    // Should successfully complete all operations
+    assertModelHasOutput(result);
+  });
+});
diff --git a/packages/core/src/agents/browser/browserManager.test.ts b/packages/core/src/agents/browser/browserManager.test.ts
@@ -147,7 +147,7 @@ describe('BrowserManager', () => {
       // Verify StdioClientTransport was created with correct args
       expect(StdioClientTransport).toHaveBeenCalledWith(
         expect.objectContaining({
-          command: 'npx',
+          command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
           args: expect.arrayContaining([
             '-y',
             expect.stringMatching(/chrome-devtools-mcp@/),
@@ -185,7 +185,7 @@ describe('BrowserManager', () => {
 
       expect(StdioClientTransport).toHaveBeenCalledWith(
         expect.objectContaining({
-          command: 'npx',
+          command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
           args: expect.arrayContaining(['--headless']),
         }),
       );
@@ -210,7 +210,7 @@ describe('BrowserManager', () => {
 
       expect(StdioClientTransport).toHaveBeenCalledWith(
         expect.objectContaining({
-          command: 'npx',
+          command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
           args: expect.arrayContaining(['--userDataDir', '/path/to/profile']),
         }),
       );
diff --git a/packages/core/src/agents/browser/browserManager.ts b/packages/core/src/agents/browser/browserManager.ts
@@ -283,7 +283,7 @@ export class BrowserManager {
     // stderr is piped (not inherited) to prevent MCP server banners and
     // warnings from corrupting the UI in alternate buffer mode.
     this.mcpTransport = new StdioClientTransport({
-      command: 'npx',
+      command: process.platform === 'win32' ? 'npx.cmd' : 'npx',
       args: mcpArgs,
       stderr: 'pipe',
     });

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll open https://example.com and check the page title for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Open https://example.com and get the page title"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":35,"totalTokenCount":135}}]}`
	`2`	`+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The page title of https://example.com is \"Example Domain\". The browser session has been completed and cleaned up successfully."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and analyze the links on the page."},{"functionCall":{"name":"browser_agent","args":{"task":"Go to https://example.com and find all links on the page, then describe them"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]}`
	`2`	+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"After analyzing https://example.com, I found the following links:\n\n1. \"More information...\" - This is the main link on the page that points to the IANA (Internet Assigned Numbers Authority) website for more details about reserved domains.\n\nThe page is quite minimal with just this single informational link, which is typical for example domains used in documentation."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":70,"totalTokenCount":270}}]}