slop-fixer/browser.py at main · CelestoAI/slop-fixer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""Wrapper around the `agent-browser` CLI for headless browser automation."""

from __future__ import annotations

import asyncio
import logging
import shutil
from pathlib import Path

from models import Screenshot, Viewport

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_AGENT_BROWSER = "agent-browser"


def _ensure_agent_browser() -> str:
    """Return the path to agent-browser, or raise if not installed."""
    path = shutil.which(_AGENT_BROWSER)
    if path is None:
        raise RuntimeError(
            "agent-browser is not installed. "
            "Install it with: npm install -g agent-browser"
        )
    return path


async def _run(args: list[str], *, timeout: int = 30) -> str:
    """Run an agent-browser sub-command and return stdout."""
    cmd = [_ensure_agent_browser(), *args]
    logger.debug("Running: %s", " ".join(cmd))

    proc = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)

    if proc.returncode != 0:
        err = stderr.decode().strip()
        raise RuntimeError(f"agent-browser {args[0]} failed (rc={proc.returncode}): {err}")

    return stdout.decode().strip()


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


class Browser:
    """Manages an agent-browser session for taking screenshots."""

    def __init__(self, session: str = "slop-fixer") -> None:
        self.session = session
        self._opened = False

    async def open(self, url: str) -> None:
        """Navigate to a URL (starts the daemon automatically)."""
        await _run(["open", url, "--session", self.session])
        self._opened = True

    async def set_viewport(self, width: int, height: int) -> None:
        """Resize the browser viewport."""
        # agent-browser CLI currently does not support dynamic resizing via "execute".
        # We will use the default viewport for now.
        logger.debug("Skipping viewport resize to %dx%d (not supported by CLI)", width, height)

    async def screenshot(self, output_path: Path) -> Path:
        """Capture a screenshot and save it to *output_path*."""
        output_path.parent.mkdir(parents=True, exist_ok=True)
        await _run(["screenshot", str(output_path), "--session", self.session])
        if not output_path.exists():
            raise RuntimeError(f"Screenshot was not saved to {output_path}")
        logger.info("Screenshot saved: %s", output_path)
        return output_path

    async def snapshot(self) -> str:
        """Return the accessibility tree snapshot (compact text)."""
        return await _run(["snapshot", "-i", "--session", self.session])

    async def close(self) -> None:
        """Close the browser session."""
        if self._opened:
            await _run(["close", "--session", self.session])
            self._opened = False


async def capture_screenshots(
    base_url: str,
    routes: list[str],
    viewports: list[Viewport],
    output_dir: Path,
) -> list[Screenshot]:
    """Capture screenshots for every route × viewport combination.

    Returns a list of `Screenshot` metadata objects.
    """
    browser = Browser()
    screenshots: list[Screenshot] = []

    try:
        for route in routes:
            url = f"{base_url.rstrip('/')}{route}"
            await browser.open(url)

            # Allow the page to settle
            await asyncio.sleep(2)

            for vp in viewports:
                await browser.set_viewport(vp.width, vp.height)
                await asyncio.sleep(1)  # let layout reflow

                safe_route = route.replace("/", "_").strip("_") or "root"
                filename = f"{safe_route}_{vp.label.lower()}_{vp.width}x{vp.height}.png"
                filepath = output_dir / filename

                await browser.screenshot(filepath)

                # Optionally grab the a11y snapshot
                a11y = None
                try:
                    a11y = await browser.snapshot()
                except Exception:
                    logger.warning("Could not get a11y snapshot for %s @ %s", route, vp.label)

                screenshots.append(
                    Screenshot(
                        route=route,
                        viewport=vp,
                        file_path=filepath,
                        accessibility_snapshot=a11y,
                    )
                )
    finally:
        await browser.close()

    logger.info("Captured %d screenshots", len(screenshots))
    return screenshots