-
Notifications
You must be signed in to change notification settings - Fork 373
Expand file tree
/
Copy pathdesktop.py
More file actions
199 lines (176 loc) · 6.17 KB
/
desktop.py
File metadata and controls
199 lines (176 loc) · 6.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import io
import sys
import time
import webbrowser
from typing import Literal
import pyautogui
from ..computer import Computer, EnvState
class DesktopComputer(Computer):
"""Controls the local desktop using OS-level input automation."""
def __init__(
self,
screen_size: tuple[int, int],
initial_url: str = "https://www.google.com",
search_engine_url: str = "https://www.google.com",
):
self._initial_url = initial_url
self._search_engine_url = search_engine_url
size = pyautogui.size()
self._screen_size = (size.width, size.height)
self._current_url = ""
self._spotlight_pending = False
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return None
def screen_size(self) -> tuple[int, int]:
return self._screen_size
def open_web_browser(self) -> EnvState:
webbrowser.open(self._initial_url)
self._current_url = self._initial_url
time.sleep(1)
return self.current_state()
def click_at(self, x: int, y: int) -> EnvState:
pyautogui.click(x, y)
return self.current_state()
def hover_at(self, x: int, y: int) -> EnvState:
pyautogui.moveTo(x, y)
return self.current_state()
def type_text_at(
self,
x: int,
y: int,
text: str,
press_enter: bool = False,
clear_before_typing: bool = True,
) -> EnvState:
if self._spotlight_pending:
self._spotlight_pending = False
else:
pyautogui.click(x, y)
if clear_before_typing:
if sys.platform == "darwin":
pyautogui.hotkey("command", "a")
else:
pyautogui.hotkey("ctrl", "a")
pyautogui.press("backspace")
pyautogui.write(text)
if press_enter:
pyautogui.press("enter")
return self.current_state()
def scroll_document(
self, direction: Literal["up", "down", "left", "right"]
) -> EnvState:
scroll_amount = self._screen_size[1] // 2
if direction == "up":
pyautogui.scroll(scroll_amount)
elif direction == "down":
pyautogui.scroll(-scroll_amount)
elif direction == "left":
pyautogui.hscroll(-scroll_amount)
elif direction == "right":
pyautogui.hscroll(scroll_amount)
else:
raise ValueError("Unsupported direction: ", direction)
return self.current_state()
def scroll_at(
self,
x: int,
y: int,
direction: Literal["up", "down", "left", "right"],
magnitude: int = 800,
) -> EnvState:
pyautogui.moveTo(x, y)
if direction == "up":
pyautogui.scroll(magnitude)
elif direction == "down":
pyautogui.scroll(-magnitude)
elif direction == "left":
pyautogui.hscroll(-magnitude)
elif direction == "right":
pyautogui.hscroll(magnitude)
else:
raise ValueError("Unsupported direction: ", direction)
return self.current_state()
def wait_5_seconds(self) -> EnvState:
time.sleep(5)
return self.current_state()
def go_back(self) -> EnvState:
if sys.platform == "darwin":
pyautogui.hotkey("command", "[")
else:
pyautogui.hotkey("alt", "left")
return self.current_state()
def go_forward(self) -> EnvState:
if sys.platform == "darwin":
pyautogui.hotkey("command", "]")
else:
pyautogui.hotkey("alt", "right")
return self.current_state()
def search(self) -> EnvState:
return self.navigate(self._search_engine_url)
def navigate(self, url: str) -> EnvState:
normalized_url = url
if not normalized_url.startswith(("http://", "https://")):
normalized_url = "https://" + normalized_url
if sys.platform == "darwin":
pyautogui.hotkey("command", "l")
else:
pyautogui.hotkey("ctrl", "l")
pyautogui.write(normalized_url)
pyautogui.press("enter")
self._current_url = normalized_url
time.sleep(1)
return self.current_state()
def key_combination(self, keys: list[str]) -> EnvState:
normalized_keys = [self._normalize_key(key) for key in keys]
if len(normalized_keys) == 1:
pyautogui.press(normalized_keys[0])
else:
pyautogui.hotkey(*normalized_keys)
if sys.platform == "darwin" and normalized_keys == ["command", "space"]:
self._spotlight_pending = True
time.sleep(0.2)
return self.current_state()
def drag_and_drop(
self, x: int, y: int, destination_x: int, destination_y: int
) -> EnvState:
pyautogui.moveTo(x, y)
pyautogui.dragTo(destination_x, destination_y, button="left")
return self.current_state()
def current_state(self) -> EnvState:
screenshot = pyautogui.screenshot()
buffer = io.BytesIO()
screenshot.save(buffer, format="PNG")
return EnvState(screenshot=buffer.getvalue(), url=self._current_url)
def _normalize_key(self, key: str) -> str:
k = key.strip().lower()
if k in ("controlormeta", "meta", "command"):
return "command" if sys.platform == "darwin" else "ctrl"
if k in ("control", "ctrl"):
return "ctrl"
if k in ("alt", "option"):
return "alt"
if k in ("return", "enter"):
return "enter"
if k in ("escape", "esc"):
return "esc"
if k in ("space", "spacebar"):
return "space"
if k in ("pageup", "page_up"):
return "pageup"
if k in ("pagedown", "page_down"):
return "pagedown"
if k in ("arrowleft", "left"):
return "left"
if k in ("arrowright", "right"):
return "right"
if k in ("arrowup", "up"):
return "up"
if k in ("arrowdown", "down"):
return "down"
if k == "delete":
return "delete"
if k == "backspace":
return "backspace"
return k