Skip to content

Commit 588a131

Browse files
authored
Merge branch 'main' into computer
2 parents 703a109 + 781d24a commit 588a131

File tree

6 files changed

+124
-31
lines changed

6 files changed

+124
-31
lines changed

pyproject.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
[project]
2+
name = "scrapybara"
3+
14
[tool.poetry]
25
name = "scrapybara"
3-
version = "2.3.0"
6+
version = "2.3.1"
47
description = ""
58
readme = "README.md"
69
authors = []

src/scrapybara/anthropic/__init__.py

+91
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pydantic import Field
44

55
from ..types.act import Model
6+
from datetime import datetime
67

78

89
class Anthropic(Model):
@@ -29,3 +30,93 @@ def __init__(
2930
api_key: Optional[str] = None,
3031
) -> None:
3132
super().__init__(provider="anthropic", name=name, api_key=api_key)
33+
34+
35+
UBUNTU_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
36+
* You have access to an Ubuntu VM with internet connectivity
37+
* You can install Ubuntu applications using the bash tool (use curl over wget)
38+
* To run GUI applications with the bash tool, use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses
39+
* GUI apps will appear but may take time to load - confirm with an extra screenshot
40+
* Chromium is the default browser
41+
* Start Chromium via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool
42+
* If you need to read a HTML file:
43+
- Open with the address bar in Chromium
44+
* For commands with large text output:
45+
- Redirect to a temp file
46+
- Use str_replace_editor or grep with context (-B and -A flags) to view output
47+
* When viewing pages:
48+
- Zoom out to see full content, or
49+
- Scroll to ensure you see everything
50+
* When interacting with a field, always clear the field first using "ctrl+A" and "delete"
51+
- Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field
52+
* Computer function calls take time, string together calls when possible
53+
* You are allowed to take actions on behalf of the user on sites that are authenticated
54+
* If the user asks you to access a site, assume that the user has already authenticated
55+
* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop
56+
* If first screenshot shows black screen:
57+
- Click mouse in screen center
58+
- Take another screenshot
59+
* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')}
60+
</SYSTEM_CAPABILITY>
61+
62+
<IMPORTANT>
63+
* If given a complex task, break down into smaller steps and ask the user for details only if necessary
64+
* Read through web pages thoroughly by scrolling down till you have gathered enough info
65+
* Be concise!
66+
</IMPORTANT>"""
67+
"""Recommended Anthropic system prompt for Ubuntu instances"""
68+
69+
70+
BROWSER_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
71+
* You have access to a Chromium VM with internet connectivity
72+
* Chromium should already be open and running
73+
* You can interact with web pages using the computer tool
74+
* When viewing pages:
75+
- Zoom out to see full content, or
76+
- Scroll to ensure you see everything
77+
* When interacting with a field, always clear the field first using "ctrl+A" and "delete"
78+
- Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field
79+
* Computer function calls take time, string together calls when possible
80+
* You are allowed to take actions on behalf of the user on sites that are authenticated
81+
* If the user asks you to access a site, assume that the user has already authenticated
82+
* To login additional sites, ask the user to use Auth Contexts
83+
* If first screenshot shows black screen:
84+
- Click mouse in screen center
85+
- Take another screenshot
86+
* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')}
87+
</SYSTEM_CAPABILITY>
88+
89+
<IMPORTANT>
90+
* If given a complex task, break down into smaller steps and ask the user for details only if necessary
91+
* Read through web pages thoroughly by scrolling down till you have gathered enough info
92+
* Be concise!
93+
</IMPORTANT>"""
94+
"""Recommended Anthropic system prompt for Browser instances"""
95+
96+
97+
WINDOWS_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
98+
* You wave access to a Windows VM with internet connectivity
99+
* You can interact with the Windows desktop using the computer tool
100+
* GUI apps will appear but may take time to load - confirm with an extra screenshot
101+
* Edge is the default browser
102+
* When viewing pages:
103+
- Zoom out to see full content, or
104+
- Scroll to ensure you see everything
105+
* When interacting with a field, always clear the field first using "ctrl+A" and "delete"
106+
- Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field
107+
* Computer function calls take time, string together calls when possible
108+
* You are allowed to take actions on behalf of the user on sites that are authenticated
109+
* If the user asks you to access a site, assume that the user has already authenticated
110+
* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop
111+
* If first screenshot shows black screen:
112+
- Click mouse in screen center
113+
- Take another screenshot
114+
* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')}
115+
</SYSTEM_CAPABILITY>
116+
117+
<IMPORTANT>
118+
* If given a complex task, break down into smaller steps and ask the user for details only if necessary
119+
* Read through web pages thoroughly by scrolling down till you have gathered enough info
120+
* Be concise!
121+
</IMPORTANT>"""
122+
"""Recommended Anthropic system prompt for Windows instances"""

src/scrapybara/client.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -745,9 +745,9 @@ def computer(
745745
*,
746746
action: Literal["click_mouse"],
747747
button: Button,
748-
click_type: Optional[ClickMouseActionClickType] = None,
748+
click_type: Optional[ClickMouseActionClickType] = "click",
749749
coordinates: Optional[List[int]] = None,
750-
num_clicks: Optional[int] = None,
750+
num_clicks: Optional[int] = 1,
751751
hold_keys: Optional[List[str]] = None,
752752
request_options: Optional[RequestOptions] = None,
753753
) -> ComputerResponse: ...
@@ -768,8 +768,8 @@ def computer(
768768
*,
769769
action: Literal["scroll"],
770770
coordinates: Optional[List[int]] = None,
771-
delta_x: Optional[float] = None,
772-
delta_y: Optional[float] = None,
771+
delta_x: Optional[float] = 0,
772+
delta_y: Optional[float] = 0,
773773
hold_keys: Optional[List[str]] = None,
774774
request_options: Optional[RequestOptions] = None,
775775
) -> ComputerResponse: ...
@@ -835,11 +835,11 @@ def computer(
835835
GetCursorPositionAction,
836836
],
837837
button: Optional[Button] = None,
838-
click_type: Optional[ClickMouseActionClickType] = None,
838+
click_type: Optional[ClickMouseActionClickType] = "click",
839839
coordinates: Optional[List[int]] = None,
840-
delta_x: Optional[float] = None,
841-
delta_y: Optional[float] = None,
842-
num_clicks: Optional[int] = None,
840+
delta_x: Optional[float] = 0,
841+
delta_y: Optional[float] = 0,
842+
num_clicks: Optional[int] = 1,
843843
hold_keys: Optional[List[str]] = None,
844844
path: Optional[List[List[int]]] = None,
845845
keys: Optional[List[str]] = None,
@@ -1181,9 +1181,9 @@ async def computer(
11811181
*,
11821182
action: Literal["click_mouse"],
11831183
button: Button,
1184-
click_type: Optional[ClickMouseActionClickType] = None,
1184+
click_type: Optional[ClickMouseActionClickType] = "click",
11851185
coordinates: Optional[List[int]] = None,
1186-
num_clicks: Optional[int] = None,
1186+
num_clicks: Optional[int] = 1,
11871187
hold_keys: Optional[List[str]] = None,
11881188
request_options: Optional[RequestOptions] = None,
11891189
) -> ComputerResponse: ...
@@ -1204,8 +1204,8 @@ async def computer(
12041204
*,
12051205
action: Literal["scroll"],
12061206
coordinates: Optional[List[int]] = None,
1207-
delta_x: Optional[float] = None,
1208-
delta_y: Optional[float] = None,
1207+
delta_x: Optional[float] = 0,
1208+
delta_y: Optional[float] = 0,
12091209
hold_keys: Optional[List[str]] = None,
12101210
request_options: Optional[RequestOptions] = None,
12111211
) -> ComputerResponse: ...
@@ -1271,11 +1271,11 @@ async def computer(
12711271
GetCursorPositionAction,
12721272
],
12731273
button: Optional[Button] = None,
1274-
click_type: Optional[ClickMouseActionClickType] = None,
1274+
click_type: Optional[ClickMouseActionClickType] = "click",
12751275
coordinates: Optional[List[int]] = None,
1276-
delta_x: Optional[float] = None,
1277-
delta_y: Optional[float] = None,
1278-
num_clicks: Optional[int] = None,
1276+
delta_x: Optional[float] = 0,
1277+
delta_y: Optional[float] = 0,
1278+
num_clicks: Optional[int] = 1,
12791279
hold_keys: Optional[List[str]] = None,
12801280
path: Optional[List[List[int]]] = None,
12811281
keys: Optional[List[str]] = None,

src/scrapybara/core/client_wrapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]:
1616
headers: typing.Dict[str, str] = {
1717
"X-Fern-Language": "Python",
1818
"X-Fern-SDK-Name": "scrapybara",
19-
"X-Fern-SDK-Version": "2.3.0",
19+
"X-Fern-SDK-Version": "2.3.1",
2020
}
2121
headers["x-api-key"] = self.api_key
2222
return headers

src/scrapybara/prompts/__init__.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
from datetime import datetime
22

33
UBUNTU_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
4-
* You have access to an Ubuntu virtual machine with internet connectivity
4+
* You have access to an Ubuntu VM with internet connectivity
55
* You can install Ubuntu applications using the bash tool (use curl over wget)
66
* To run GUI applications with the bash tool, use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses
77
* GUI apps will appear but may take time to load - confirm with an extra screenshot
88
* Chromium is the default browser
99
* Start Chromium via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool
10-
* If you need to read a full PDF after initial screenshot
11-
- Download with curl
12-
- Convert to text using pdftotext
13-
- Read the text file with StrReplaceEditTool
1410
* If you need to read a HTML file:
1511
- Open with the address bar in Chromium
1612
* For commands with large text output:
@@ -33,13 +29,14 @@
3329
3430
<IMPORTANT>
3531
* If given a complex task, break down into smaller steps and ask the user for details only if necessary
36-
* Read through web pages thoroughly by scrolling down till the end
32+
* Read through web pages thoroughly by scrolling down till you have gathered enough info
3733
* Be concise!
3834
</IMPORTANT>"""
35+
"""DEPRECATED — Please import prompts from their respective models instead: `from scrapybara.anthropic import UBUNTU_SYSTEM_PROMPT`"""
3936

4037

4138
BROWSER_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
42-
* You have access to a Chromium browser instance with internet connectivity
39+
* You have access to a Chromium VM with internet connectivity
4340
* Chromium should already be open and running
4441
* You can interact with web pages using the computer tool
4542
* When viewing pages:
@@ -59,13 +56,14 @@
5956
6057
<IMPORTANT>
6158
* If given a complex task, break down into smaller steps and ask the user for details only if necessary
62-
* Read through web pages thoroughly by scrolling down till the end
59+
* Read through web pages thoroughly by scrolling down till you have gathered enough info
6360
* Be concise!
6461
</IMPORTANT>"""
62+
"""DEPRECATED — Please import prompts from their respective models instead: `from scrapybara.anthropic import BROWSER_SYSTEM_PROMPT`"""
6563

6664

6765
WINDOWS_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
68-
* You have access to a Windows virtual machine with internet connectivity
66+
* You have access to a Windows VM with internet connectivity
6967
* You can interact with the Windows desktop using the computer tool
7068
* GUI apps will appear but may take time to load - confirm with an extra screenshot
7169
* Edge is the default browser
@@ -86,6 +84,7 @@
8684
8785
<IMPORTANT>
8886
* If given a complex task, break down into smaller steps and ask the user for details only if necessary
89-
* Read through web pages thoroughly by scrolling down till the end
87+
* Read through web pages thoroughly by scrolling down till you have gathered enough info
9088
* Be concise!
9189
</IMPORTANT>"""
90+
"""DEPRECATED — Please import prompts from their respective models instead: `from scrapybara.anthropic import WINDOWS_SYSTEM_PROMPT`"""

tests/custom/test_client.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
import os
44
import pytest
55

6-
from scrapybara.anthropic import Anthropic
7-
from scrapybara.prompts import (
8-
BROWSER_SYSTEM_PROMPT,
6+
from scrapybara.anthropic import (
7+
Anthropic,
98
UBUNTU_SYSTEM_PROMPT,
9+
BROWSER_SYSTEM_PROMPT,
1010
WINDOWS_SYSTEM_PROMPT,
1111
)
1212
from scrapybara.tools import BashTool, ComputerTool, EditTool

0 commit comments

Comments
 (0)