import { pageMetadata } from "@/lib/page-metadata"
export const metadata = pageMetadata("commands")
agent-browser open <url> # Navigate (aliases: goto, navigate)
agent-browser click <sel> # Click element (--new-tab to open in new tab)
agent-browser dblclick <sel> # Double-click
agent-browser fill <sel> <text> # Clear and fill
agent-browser type <sel> <text> # Type into element
agent-browser press <key> # Press key (Enter, Tab, Control+a) (alias: key)
agent-browser keyboard type <text> # Type at current focus (no selector needed)
agent-browser keyboard inserttext <text> # Insert text without key events
agent-browser keydown <key> # Hold key down
agent-browser keyup <key> # Release key
agent-browser hover <sel> # Hover element
agent-browser focus <sel> # Focus element
agent-browser select <sel> <val> # Select dropdown option
agent-browser check <sel> # Check checkbox
agent-browser uncheck <sel> # Uncheck checkbox
agent-browser scroll <dir> [px] # Scroll (up/down/left/right, --selector <sel>)
agent-browser scrollintoview <sel> # Scroll element into view
agent-browser drag <src> <dst> # Drag and drop
agent-browser upload <sel> <files> # Upload files
agent-browser screenshot [path] # Screenshot (--full for full page)
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
agent-browser pdf <path> # Save page as PDF
agent-browser snapshot # Accessibility tree with refs
agent-browser eval <js> # Run JavaScript
agent-browser connect <port|url> # Connect to browser via CDP
agent-browser close # Close browser (aliases: quit, exit)agent-browser get text <sel> # Get text content
agent-browser get html <sel> # Get innerHTML
agent-browser get value <sel> # Get input value
agent-browser get attr <sel> <attr> # Get attribute
agent-browser get title # Get page title
agent-browser get url # Get current URL
agent-browser get count <sel> # Count matching elements
agent-browser get box <sel> # Get bounding box
agent-browser get styles <sel> # Get computed stylesagent-browser is visible <sel> # Check if visible
agent-browser is enabled <sel> # Check if enabled
agent-browser is checked <sel> # Check if checkedSemantic locators with actions (click, fill, type, hover, focus, check, uncheck, text):
agent-browser find role <role> <action> [value]
agent-browser find text <text> <action>
agent-browser find label <label> <action> [value]
agent-browser find placeholder <ph> <action> [value]
agent-browser find alt <text> <action>
agent-browser find title <text> <action>
agent-browser find testid <id> <action> [value]
agent-browser find first <sel> <action> [value]
agent-browser find last <sel> <action> [value]
agent-browser find nth <n> <sel> <action> [value]Options:
--name <name>-- filter role by accessible name--exact-- require exact text match
Examples:
agent-browser find role button click --name "Submit"
agent-browser find label "Email" fill "test@test.com"
agent-browser find alt "Logo" click
agent-browser find first ".item" click
agent-browser find last ".item" text
agent-browser find nth 2 ".card" hoveragent-browser wait <selector> # Wait for element
agent-browser wait <ms> # Wait for time
agent-browser wait --text "Welcome" # Wait for text
agent-browser wait --url "**/dash" # Wait for URL pattern
agent-browser wait --load networkidle # Wait for load state
agent-browser wait --fn "condition" # Wait for JS condition
agent-browser wait --download [path] # Wait for downloadagent-browser download <sel> <path> # Click element to trigger download
agent-browser wait --download [path] # Wait for any download to completeUse --download-path <dir> (or AGENT_BROWSER_DOWNLOAD_PATH env) to set a default download directory. Without it, downloads go to a temporary directory that is deleted when the browser closes.
agent-browser mouse move <x> <y> # Move mouse
agent-browser mouse down [button] # Press button
agent-browser mouse up [button] # Release button
agent-browser mouse wheel <dy> [dx] # Scroll wheelagent-browser set viewport <w> <h> # Set viewport size
agent-browser set device <name> # Emulate device ("iPhone 14")
agent-browser set geo <lat> <lng> # Set geolocation
agent-browser set offline [on|off] # Toggle offline mode
agent-browser set headers <json> # Extra HTTP headers
agent-browser set credentials <u> <p> # HTTP basic auth
agent-browser set media [dark|light] # Emulate color scheme (persists for session)
agent-browser set webauthn enable # Enable WebAuthn
agent-browser set webauthn add-virtual-authenticator # Add virtual authenticatorUse --color-scheme for persistent dark/light mode across all commands:
agent-browser --color-scheme dark open https://example.comagent-browser cookies # Get all cookies
agent-browser cookies set <name> <val> # Set cookie
agent-browser cookies clear # Clear cookies
agent-browser storage local # Get all localStorage
agent-browser storage local <key> # Get specific key
agent-browser storage local set <k> <v> # Set value
agent-browser storage local clear # Clear all
agent-browser storage session # Same for sessionStorageagent-browser network route <url> # Intercept requests
agent-browser network route <url> --abort # Block requests
agent-browser network route <url> --body <json> # Mock response
agent-browser network unroute [url] # Remove routes
agent-browser network requests # View tracked requests
agent-browser network requests --clear # Clear request log
agent-browser network requests --filter <pat> # Filter by URL patternagent-browser tab # List tabs
agent-browser tab new [url] # New tab
agent-browser tab <n> # Switch to tab
agent-browser tab close [n] # Close tab
agent-browser window new # Open new browser window
agent-browser frame <sel> # Switch to iframe
agent-browser frame main # Back to main frameagent-browser dialog accept [text] # Accept dialog (with optional prompt text)
agent-browser dialog dismiss # Dismiss dialogagent-browser trace start [path] # Start trace
agent-browser trace stop [path] # Stop and save trace
agent-browser profiler start # Start Chrome DevTools profiling
agent-browser profiler stop [path] # Stop and save profile (.json)
agent-browser record start <path> # Start video recording (WebM)
agent-browser record stop # Stop and save video
agent-browser record restart <path> # Stop current and start new recording
agent-browser console # View console messages
agent-browser console --clear # Clear console log
agent-browser errors # View page errors
agent-browser errors --clear # Clear error log
agent-browser highlight <sel> # Highlight elementagent-browser auth save <name> [opts] # Save auth profile
agent-browser auth login <name> # Login using saved credentials
agent-browser auth list # List saved profiles (names and URLs only)
agent-browser auth show <name> # Show profile metadata (no passwords)
agent-browser auth delete <name> # Delete a saved profileSave options:
--url <url>-- login page URL (required)--username <user>-- username (required)--password <pass>-- password (required unless--password-stdin)--password-stdin-- read password from stdin (recommended to avoid shell history exposure)--username-selector <sel>-- custom CSS selector for username field--password-selector <sel>-- custom CSS selector for password field--submit-selector <sel>-- custom CSS selector for submit button
echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin
agent-browser auth login github
agent-browser auth listWhen --confirm-actions is set, certain action categories return a confirmation_required response instead of executing immediately. Use confirm or deny to approve or reject the action.
agent-browser confirm <confirmation-id> # Approve a pending action
agent-browser deny <confirmation-id> # Deny a pending actionPending confirmations auto-deny after 60 seconds.
agent-browser --confirm-actions eval,download eval "document.title"
# Returns confirmation_required with ID
agent-browser confirm c_8f3a1234agent-browser state save <path> # Save auth state to file
agent-browser state load <path> # Load auth state from file
agent-browser state list # List saved state files
agent-browser state show <file> # Show state summary
agent-browser state rename <old> <new> # Rename state file
agent-browser state clear [name] # Clear states for session name
agent-browser state clear --all # Clear all saved states
agent-browser state clean --older-than <days> # Delete old statesagent-browser session # Show current session name
agent-browser session list # List active sessionsagent-browser back # Go back
agent-browser forward # Go forward
agent-browser reload # Reload page--session <name> # Isolated browser session
--session-name <name> # Auto-save/restore session state (cookies, localStorage)
--profile <path> # Persistent browser profile directory
--state <path> # Load storage state from JSON file
--headers <json> # HTTP headers scoped to URL's origin
--executable-path <path> # Custom browser executable
--extension <path> # Load browser extension (repeatable)
--args <args> # Browser launch args (comma separated)
--user-agent <ua> # Custom User-Agent string
--proxy <url> # Proxy server URL
--proxy-bypass <hosts> # Hosts to bypass proxy
--ignore-https-errors # Ignore HTTPS certificate errors
--allow-file-access # Allow file:// URLs to access local files (Chromium only)
-p, --provider <name> # Browser provider (ios, browserbase, kernel, browseruse)
--device <name> # iOS device name (e.g., "iPhone 15 Pro")
--json # JSON output (for scripts)
--full, -f # Full page screenshot
--annotate # Annotated screenshot with numbered element labels
--headed # Show browser window (not headless)
--cdp <port|url> # Connect via Chrome DevTools Protocol (port or WebSocket URL)
--auto-connect # Auto-discover and connect to running Chrome
--color-scheme <scheme> # Color scheme: dark, light, no-preference
--download-path <path> # Default download directory
--content-boundaries # Wrap page output in boundary markers for LLM safety
--max-output <chars> # Truncate page output to N characters
--allowed-domains <list> # Comma-separated allowed domain patterns
--action-policy <path> # Path to action policy JSON file
--confirm-actions <list> # Action categories requiring confirmation
--confirm-interactive # Interactive confirmation prompts (auto-denies if stdin is not a TTY)
--config <path> # Use a custom config file
--debug # Debug outputChain commands with && in a single shell invocation. The browser persists via a background daemon, so chaining works naturally and is more efficient than separate calls:
agent-browser open example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
agent-browser fill @e1 "user@example.com" && agent-browser fill @e2 "pass" && agent-browser click @e3
agent-browser open example.com && agent-browser wait --load networkidle && agent-browser screenshot page.pngUse && when you don't need to read intermediate output. Run commands separately when you need to parse output first (e.g., snapshot to discover refs, then interact with those refs).
Open local files (PDFs, HTML) using file:// URLs:
agent-browser --allow-file-access open file:///path/to/document.pdf
agent-browser --allow-file-access open file:///path/to/page.html
agent-browser screenshot output.pngThe --allow-file-access flag enables JavaScript to access other local files. Chromium only.