diff --git a/.env.example b/.env.example index 43032b5..e94ceaf 100644 --- a/.env.example +++ b/.env.example @@ -6,4 +6,6 @@ OPENAI_ORG = "org-123" BROWSERBASE_API_KEY="00000000-0000-0000-0000-000000000000" BROWSERBASE_PROJECT_ID="bb_live_00000000-00000" -SCRAPYBARA_API_KEY="scrapy-123" \ No newline at end of file +SCRAPYBARA_API_KEY="scrapy-123" + +STEEL_API_KEY="ste-123" \ No newline at end of file diff --git a/README.md b/README.md index 8795b43..eb23c55 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Other included sample [computer environments](#computer-environments): - [Docker](https://docker.com/) (containerized desktop) - [Browserbase](https://www.browserbase.com/) (remote browser, requires account) - [Scrapybara](https://scrapybara.com) (remote browser or computer, requires account) +- [Steel](https://steel.dev) (remote browser, requires account) - ...or implement your own `Computer`! ## Overview @@ -93,6 +94,7 @@ This sample app provides a set of implemented `Computer` examples, but feel free | `Browserbase` | browserbase | `browser` | Remote browser environment | [Browserbase](https://www.browserbase.com/) API key in `.env` | | `ScrapybaraBrowser` | scrapybara-browser | `browser` | Remote browser environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | | `ScrapybaraUbuntu` | scrapybara-ubuntu | `linux` | Remote Ubuntu desktop environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | +| `Steel` | steel | `browser` | Remote browser API for AI agents | [Steel](https://steel.dev) API key in `.env` | Using the CLI, you can run the sample app with different computer environments using the options listed above: diff --git a/cli.py b/cli.py index 15bc37b..12dc652 100644 --- a/cli.py +++ b/cli.py @@ -6,6 +6,7 @@ ScrapybaraUbuntu, LocalPlaywrightComputer, DockerComputer, + SteelBrowser ) @@ -28,6 +29,7 @@ def main(): "browserbase", "scrapybara-browser", "scrapybara-ubuntu", + "steel" ], help="Choose the computer environment to use.", default="local-playwright", @@ -62,6 +64,7 @@ def main(): "browserbase": BrowserbaseBrowser, "scrapybara-browser": ScrapybaraBrowser, "scrapybara-ubuntu": ScrapybaraUbuntu, + "steel": SteelBrowser } ComputerClass = computer_mapping[args.computer] diff --git a/computers/__init__.py b/computers/__init__.py index 606332e..5422fe3 100644 --- a/computers/__init__.py +++ b/computers/__init__.py @@ -3,3 +3,4 @@ from .local_playwright import LocalPlaywrightComputer from .docker import DockerComputer from .scrapybara import ScrapybaraBrowser, ScrapybaraUbuntu +from .steel import SteelBrowser diff --git a/computers/steel.py b/computers/steel.py new file mode 100644 index 0000000..6cbea01 --- /dev/null +++ b/computers/steel.py @@ -0,0 +1,192 @@ +import os +from typing import Tuple, Optional +from playwright.sync_api import Browser, Page, Error as PlaywrightError +from .base_playwright import BasePlaywrightComputer +from dotenv import load_dotenv +import base64 +from steel import Steel + +load_dotenv() + + +class SteelBrowser(BasePlaywrightComputer): + """ + Steel is an open-source browser API purpose-built for AI agents. + Head to https://app.steel.dev to get started. + + If you're running Steel locally or self-hosted, add the following to your .env file: + STEEL_API_KEY=your_api_key + STEEL_BASE_URL=http://localhost:3000 (or your self-hosted URL) + + IMPORTANT: The `goto` tool, as defined in playwright_with_custom_functions.py, is strongly recommended when using the Steel computer. + Make sure to include this tool in your configuration when using the Steel computer. + """ + + def __init__( + self, + width: int = 1024, + height: int = 768, + proxy: bool = False, + solve_captcha: bool = False, + virtual_mouse: bool = True, + session_timeout: int = 900000, # 15 minutes default + ad_blocker: bool = True, + start_url: str = "https://bing.com" + ): + """ + Initialize the Steel browser instance. + + Args: + width (int): Browser viewport width. Default is 1024. + height (int): Browser viewport height. Default is 768. + use_proxy (bool): Whether to use Steel's proxy network (residential IPs). Default is False. + solve_captcha (bool): Whether to enable automatic CAPTCHA solving. Default is False. + virtual_mouse (bool): Whether to show a virtual mouse cursor. Default is True. + session_timeout (int): Session timeout in milliseconds. Default is 5 minutes. + ad_blocker (bool): Whether to enable ad blocking. Default is True. + start_url (str): The initial URL to navigate to. Default is "https://bing.com". + """ + super().__init__() + + # Initialize Steel client + self.client = Steel( + steel_api_key=os.getenv("STEEL_API_KEY"), + base_url=os.getenv("STEEL_BASE_URL") if os.getenv("STEEL_BASE_URL") else "https://api.steel.dev" + ) + self.dimensions = (width, height) + self.proxy = proxy + self.solve_captcha = solve_captcha + self.virtual_mouse = virtual_mouse + self.session_timeout = session_timeout + self.ad_blocker = ad_blocker + self.start_url = start_url + self.session = None + + def _get_browser_and_page(self) -> Tuple[Browser, Page]: + """ + Create a Steel browser session and connect to it. + + Returns: + Tuple[Browser, Page]: A tuple containing the connected browser and page objects. + """ + # Create Steel session + width, height = self.dimensions + self.session = self.client.sessions.create( + use_proxy=self.proxy, + solve_captcha=self.solve_captcha, + api_timeout=self.session_timeout, + block_ads=self.ad_blocker, + dimensions={"width": width, "height": height} + ) + + print("Steel Session created successfully!") + print(f"View live session at: {self.session.session_viewer_url}") + + # Connect to the remote browser using Steel's connection URL + browser = self._playwright.chromium.connect_over_cdp( + f"wss://connect.steel.dev?apiKey={os.getenv('STEEL_API_KEY')}&sessionId={self.session.id}" + ) + context = browser.contexts[0] + + # Set up page event handlers + context.on("page", self._handle_new_page) + + # Add virtual mouse cursor if enabled + if self.virtual_mouse: + context.add_init_script(""" + // Only run in the top frame + if (window.self === window.top) { + function initCursor() { + const CURSOR_ID = '__cursor__'; + if (document.getElementById(CURSOR_ID)) return; + + const cursor = document.createElement('div'); + cursor.id = CURSOR_ID; + Object.assign(cursor.style, { + position: 'fixed', + top: '0px', + left: '0px', + width: '20px', + height: '20px', + backgroundImage: 'url("data:image/svg+xml;utf8,")', + backgroundSize: 'cover', + pointerEvents: 'none', + zIndex: '99999', + transform: 'translate(-2px, -2px)', + }); + + document.body.appendChild(cursor); + document.addEventListener("mousemove", (e) => { + cursor.style.top = e.clientY + "px"; + cursor.style.left = e.clientX + "px"; + }); + } + + requestAnimationFrame(function checkBody() { + if (document.body) { + initCursor(); + } else { + requestAnimationFrame(checkBody); + } + }); + } + """) + + page = context.pages[0] + page.on("close", self._handle_page_close) + + # Navigate to start URL + page.goto(self.start_url) + + return browser, page + + def _handle_new_page(self, page: Page): + """Handle creation of a new page.""" + print("New page created") + self._page = page + page.on("close", self._handle_page_close) + + def _handle_page_close(self, page: Page): + """Handle page closure.""" + print("Page closed") + if self._page == page: + if self._browser.contexts[0].pages: + self._page = self._browser.contexts[0].pages[-1] + else: + print("Warning: All pages have been closed.") + self._page = None + + def __exit__(self, exc_type, exc_val, exc_tb): + """Clean up resources when exiting.""" + if self._page: + self._page.close() + if self._browser: + self._browser.close() + if self._playwright: + self._playwright.stop() + + # Release the Steel session + if self.session: + print("Releasing Steel session...") + self.client.sessions.release(self.session.id) + print( + f"Session completed. View replay at {self.session.session_viewer_url}") + + def screenshot(self) -> str: + """ + Capture a screenshot of the current viewport using CDP. + + Returns: + str: Base64 encoded screenshot data + """ + try: + cdp_session = self._page.context.new_cdp_session(self._page) + result = cdp_session.send("Page.captureScreenshot", { + "format": "png", + "fromSurface": True + }) + return result['data'] + except PlaywrightError as error: + print( + f"CDP screenshot failed, falling back to standard screenshot: {error}") + return super().screenshot() diff --git a/requirements.txt b/requirements.txt index 13769fb..bd3016c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 +steel-sdk>=0.1.0b10