Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,28 +55,28 @@ agent-browser find role button click --name "Submit"
### Core Commands

```bash
agent-browser open <url> # Navigate to URL
agent-browser open <url> # Navigate to URL (aliases: goto, navigate)
agent-browser click <sel> # Click element
agent-browser dblclick <sel> # Double-click element
agent-browser focus <sel> # Focus element
agent-browser type <sel> <text> # Type into element
agent-browser fill <sel> <text> # Clear and fill
agent-browser press <key> # Press key (Enter, Tab, Control+a)
agent-browser press <key> # Press key (Enter, Tab, Control+a) (alias: key)
agent-browser keydown <key> # Hold key down
agent-browser keyup <key> # Release key
agent-browser hover <sel> # Hover element
agent-browser select <sel> <val> # Select dropdown option
agent-browser check <sel> # Check checkbox
agent-browser uncheck <sel> # Uncheck checkbox
agent-browser scroll <dir> [px] # Scroll (up/down/left/right)
agent-browser scrollintoview <sel> # Scroll element into view
agent-browser scrollintoview <sel> # Scroll element into view (alias: scrollinto)
agent-browser drag <src> <tgt> # Drag and drop
agent-browser upload <sel> <files> # Upload files
agent-browser screenshot [path] # Take screenshot (--full for full page)
agent-browser pdf <path> # Save as PDF
agent-browser snapshot # Accessibility tree with refs (best for AI)
agent-browser eval <js> # Run JavaScript
agent-browser close # Close browser
agent-browser close # Close browser (aliases: quit, exit)
```

### Get Info
Expand Down Expand Up @@ -129,9 +129,9 @@ agent-browser find nth 2 "a" text
### Wait

```bash
agent-browser wait <selector> # Wait for element
agent-browser wait <ms> # Wait for time
agent-browser wait --text "Welcome" # Wait for text
agent-browser wait <selector> # Wait for element to be visible
agent-browser wait <ms> # Wait for time (milliseconds)
agent-browser wait --text "Welcome" # Wait for text to appear
agent-browser wait --url "**/dash" # Wait for URL pattern
agent-browser wait --load networkidle # Wait for load state
agent-browser wait --fn "window.ready === true" # Wait for JS condition
Expand Down Expand Up @@ -253,6 +253,10 @@ AGENT_BROWSER_SESSION=agent1 agent-browser click "#btn"

# List active sessions
agent-browser session list
# Output:
# Active sessions:
# -> default
# agent1

# Show current session
agent-browser session
Expand Down Expand Up @@ -393,15 +397,17 @@ agent-browser uses a client-daemon architecture:

The daemon starts automatically on first command and persists between commands for fast subsequent operations.

**Browser Engine:** Uses Chromium by default. The daemon also supports Firefox and WebKit via the Playwright protocol.

## Platforms

| Platform | Binary | Fallback |
|----------|--------|----------|
| macOS ARM64 | Native Rust | Node.js |
| macOS x64 | Native Rust | Node.js |
| Linux ARM64 | Native Rust | Node.js |
| Linux x64 | Native Rust | Node.js |
| Windows | - | Node.js |
| macOS ARM64 | Native Rust | Node.js |
| macOS x64 | Native Rust | Node.js |
| Linux ARM64 | Native Rust | Node.js |
| Linux x64 | Native Rust | Node.js |
| Windows x64 | Native Rust | Node.js |

## Usage with AI Agents

Expand Down
91 changes: 90 additions & 1 deletion cli/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,44 @@ pub fn parse_command(args: &[String], flags: &Flags) -> Result<Value, ParseError

// === Wait ===
"wait" => {
// Check for --url flag: wait --url "**/dashboard"
if let Some(idx) = rest.iter().position(|&s| s == "--url" || s == "-u") {
let url = rest.get(idx + 1).ok_or_else(|| ParseError::MissingArguments {
context: "wait --url".to_string(),
usage: "wait --url <pattern>",
})?;
return Ok(json!({ "id": id, "action": "waitforurl", "url": url }));
}

// Check for --load flag: wait --load networkidle
if let Some(idx) = rest.iter().position(|&s| s == "--load" || s == "-l") {
let state = rest.get(idx + 1).ok_or_else(|| ParseError::MissingArguments {
context: "wait --load".to_string(),
usage: "wait --load <state>",
})?;
return Ok(json!({ "id": id, "action": "waitforloadstate", "state": state }));
}

// Check for --fn flag: wait --fn "window.ready === true"
if let Some(idx) = rest.iter().position(|&s| s == "--fn" || s == "-f") {
let expr = rest.get(idx + 1).ok_or_else(|| ParseError::MissingArguments {
context: "wait --fn".to_string(),
usage: "wait --fn <expression>",
})?;
return Ok(json!({ "id": id, "action": "waitforfunction", "expression": expr }));
}

// Check for --text flag: wait --text "Welcome"
if let Some(idx) = rest.iter().position(|&s| s == "--text" || s == "-t") {
let text = rest.get(idx + 1).ok_or_else(|| ParseError::MissingArguments {
context: "wait --text".to_string(),
usage: "wait --text <text>",
})?;
// Use getByText locator to wait for text to appear
return Ok(json!({ "id": id, "action": "wait", "selector": format!("text={}", text) }));
}

// Default: selector or timeout
if let Some(arg) = rest.get(0) {
if arg.parse::<u64>().is_ok() {
Ok(json!({ "id": id, "action": "wait", "timeout": arg.parse::<u64>().unwrap() }))
Expand All @@ -221,7 +259,7 @@ pub fn parse_command(args: &[String], flags: &Flags) -> Result<Value, ParseError
} else {
Err(ParseError::MissingArguments {
context: "wait".to_string(),
usage: "wait <selector|ms>",
usage: "wait <selector|ms|--url|--load|--fn|--text>",
})
}
}
Expand Down Expand Up @@ -1090,6 +1128,57 @@ mod tests {
assert_eq!(cmd["maxDepth"], 3);
}

// === Wait ===

#[test]
fn test_wait_selector() {
let cmd = parse_command(&args("wait #element"), &default_flags()).unwrap();
assert_eq!(cmd["action"], "wait");
assert_eq!(cmd["selector"], "#element");
}

#[test]
fn test_wait_timeout() {
let cmd = parse_command(&args("wait 5000"), &default_flags()).unwrap();
assert_eq!(cmd["action"], "wait");
assert_eq!(cmd["timeout"], 5000);
}

#[test]
fn test_wait_url() {
let cmd = parse_command(&args("wait --url **/dashboard"), &default_flags()).unwrap();
assert_eq!(cmd["action"], "waitforurl");
assert_eq!(cmd["url"], "**/dashboard");
}

#[test]
fn test_wait_load() {
let cmd = parse_command(&args("wait --load networkidle"), &default_flags()).unwrap();
assert_eq!(cmd["action"], "waitforloadstate");
assert_eq!(cmd["state"], "networkidle");
}

#[test]
fn test_wait_load_missing_state() {
let result = parse_command(&args("wait --load"), &default_flags());
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), ParseError::MissingArguments { .. }));
}

#[test]
fn test_wait_fn() {
let cmd = parse_command(&args("wait --fn window.ready"), &default_flags()).unwrap();
assert_eq!(cmd["action"], "waitforfunction");
assert_eq!(cmd["expression"], "window.ready");
}

#[test]
fn test_wait_text() {
let cmd = parse_command(&args("wait --text Welcome"), &default_flags()).unwrap();
assert_eq!(cmd["action"], "wait");
assert_eq!(cmd["selector"], "text=Welcome");
}

// === Unknown command ===

#[test]
Expand Down