-
Notifications
You must be signed in to change notification settings - Fork 805
Expand file tree
/
Copy pathactions.ts
More file actions
533 lines (438 loc) · 16.1 KB
/
actions.ts
File metadata and controls
533 lines (438 loc) · 16.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
/**
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import type { InteractiveElementDomNode } from './dom/dom_tree/type'
// ======= general utils =======
async function waitFor(seconds: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
}
/**
* Find a scrollable container without scanning the entire DOM.
* Tries common selectors first, then falls back to viewport elements.
*/
function findScrollableContainer(
canScroll: (el: HTMLElement | null) => boolean
): HTMLElement | undefined {
// Try common scrollable container selectors first (fast path)
const commonSelectors = [
'[data-scrollable="true"]',
'.scrollable',
'.overflow-auto',
'.overflow-scroll',
'[class*="scroll"]',
'main',
'article',
'section',
'aside',
]
for (const selector of commonSelectors) {
const containers = document.querySelectorAll<HTMLElement>(selector)
for (const container of containers) {
if (canScroll(container)) return container
}
}
// Fallback: check elements near viewport center (more likely to be visible)
const viewportElements = document.elementsFromPoint(
window.innerWidth / 2,
window.innerHeight / 2
) as HTMLElement[]
for (const el of viewportElements) {
if (canScroll(el)) return el
// Check ancestors up to 3 levels
let parent = el.parentElement
for (let i = 0; i < 3 && parent; i++) {
if (canScroll(parent)) return parent
parent = parent.parentElement
}
}
return undefined
}
// ======= dom utils =======
export async function movePointerToElement(element: HTMLElement) {
const rect = element.getBoundingClientRect()
const x = rect.left + rect.width / 2
const y = rect.top + rect.height / 2
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
await waitFor(0.3)
}
/**
* Get the HTMLElement by index from a selectorMap.
*/
export function getElementByIndex(
selectorMap: Map<number, InteractiveElementDomNode>,
index: number
): HTMLElement {
const interactiveNode = selectorMap.get(index)
if (!interactiveNode) {
throw new Error(`No interactive element found at index ${index}`)
}
const element = interactiveNode.ref
if (!element) {
throw new Error(`Element at index ${index} does not have a reference`)
}
if (!(element instanceof HTMLElement)) {
throw new Error(`Element at index ${index} is not an HTMLElement`)
}
return element
}
let lastClickedElement: HTMLElement | null = null
function blurLastClickedElement() {
if (lastClickedElement) {
lastClickedElement.blur()
lastClickedElement.dispatchEvent(
new MouseEvent('mouseout', { bubbles: true, cancelable: true })
)
lastClickedElement = null
}
}
/**
* Simulate a click on the element
*/
export async function clickElement(element: HTMLElement) {
blurLastClickedElement()
lastClickedElement = element
await scrollIntoViewIfNeeded(element)
await movePointerToElement(element)
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
await waitFor(0.1)
// hover it
element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true }))
// dispatch a sequence of events to ensure all listeners are triggered
element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }))
// focus it to ensure it gets the click event
element.focus()
element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }))
// dispatch a click event
// element.click()
await waitFor(0.2) // Wait to ensure click event processing completes
}
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLInputElement.prototype,
'value'
)!.set!
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLTextAreaElement.prototype,
'value'
)!.set!
export async function inputTextElement(element: HTMLElement, text: string) {
const isContentEditable = element.isContentEditable
if (
!(element instanceof HTMLInputElement) &&
!(element instanceof HTMLTextAreaElement) &&
!isContentEditable
) {
throw new Error('Element is not an input, textarea, or contenteditable')
}
await clickElement(element)
if (isContentEditable) {
// Contenteditable support (partial)
// Not supported:
// - Monaco/CodeMirror: Require direct JS instance access. No universal way to obtain.
// - Draft.js: Not responsive to synthetic/execCommand/Range/DataTransfer. Unmaintained.
//
// Plan A: Dispatch synthetic events
// Works: LinkedIn, React contenteditable, Quill.
// Fails: Slate.js
// Sequence: beforeinput -> mutation -> input -> change -> blur
// Dispatch beforeinput + mutation + input for clearing
if (
element.dispatchEvent(
new InputEvent('beforeinput', {
bubbles: true,
cancelable: true,
inputType: 'deleteContent',
})
)
) {
element.innerText = ''
element.dispatchEvent(
new InputEvent('input', {
bubbles: true,
inputType: 'deleteContent',
})
)
}
// Dispatch beforeinput + mutation + input for insertion (important for React apps)
if (
element.dispatchEvent(
new InputEvent('beforeinput', {
bubbles: true,
cancelable: true,
inputType: 'insertText',
data: text,
})
)
) {
element.innerText = text
element.dispatchEvent(
new InputEvent('input', {
bubbles: true,
inputType: 'insertText',
data: text,
})
)
}
// Dispatch change event (for good measure)
element.dispatchEvent(new Event('change', { bubbles: true }))
// Trigger blur for validation
element.blur()
// Plan B: execCommand (deprecated but works better for some editors)
// Works: LinkedIn, Quill, Slate.js, react contenteditable components
//
// document.execCommand('selectAll')
// document.execCommand('delete')
// document.execCommand('insertText', false, text)
} else if (element instanceof HTMLTextAreaElement) {
nativeTextAreaValueSetter.call(element, text)
} else {
nativeInputValueSetter.call(element, text)
}
// Only dispatch shared input event for non-contenteditable (contenteditable has its own)
if (!isContentEditable) {
element.dispatchEvent(new Event('input', { bubbles: true }))
}
await waitFor(0.1)
blurLastClickedElement()
}
/**
* @todo browser-use version is very complex and supports menu tags, need to follow up
*/
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
if (!(selectElement instanceof HTMLSelectElement)) {
throw new Error('Element is not a select element')
}
const options = Array.from(selectElement.options)
const option = options.find((opt) => opt.textContent?.trim() === optionText.trim())
if (!option) {
throw new Error(`Option with text "${optionText}" not found in select element`)
}
selectElement.value = option.value
selectElement.dispatchEvent(new Event('change', { bubbles: true }))
await waitFor(0.1) // Wait to ensure change event processing completes
}
interface ScrollableElement extends HTMLElement {
scrollIntoViewIfNeeded?: (centerIfNeeded?: boolean) => void
}
export async function scrollIntoViewIfNeeded(element: HTMLElement) {
const el = element as ScrollableElement
if (typeof el.scrollIntoViewIfNeeded === 'function') {
el.scrollIntoViewIfNeeded()
// await waitFor(0.5) // Animation playback
} else {
// @todo visibility check
element.scrollIntoView({ behavior: 'auto', block: 'center', inline: 'nearest' })
// await waitFor(0.5) // Animation playback
}
}
export async function scrollVertically(
down: boolean,
scroll_amount: number,
element?: HTMLElement | null
) {
// Element-specific scrolling if element is provided
if (element) {
const targetElement = element
let currentElement = targetElement as HTMLElement | null
let scrollSuccess = false
let scrolledElement: HTMLElement | null = null
let scrollDelta = 0
let attempts = 0
const dy = scroll_amount
while (currentElement && attempts < 10) {
const computedStyle = window.getComputedStyle(currentElement)
const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY)
const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight
if (hasScrollableY && canScrollVertically) {
const beforeScroll = currentElement.scrollTop
const maxScroll = currentElement.scrollHeight - currentElement.clientHeight
let scrollAmount = dy / 3
if (scrollAmount > 0) {
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
} else {
scrollAmount = Math.max(scrollAmount, -beforeScroll)
}
currentElement.scrollTop = beforeScroll + scrollAmount
const afterScroll = currentElement.scrollTop
const actualScrollDelta = afterScroll - beforeScroll
if (Math.abs(actualScrollDelta) > 0.5) {
scrollSuccess = true
scrolledElement = currentElement
scrollDelta = actualScrollDelta
break
}
}
if (currentElement === document.body || currentElement === document.documentElement) {
break
}
currentElement = currentElement.parentElement
attempts++
}
if (scrollSuccess) {
return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`
} else {
return `No scrollable container found for element (${targetElement.tagName})`
}
}
// Page-level scrolling (default or fallback)
const dy = scroll_amount
const bigEnough = (el: HTMLElement) => el.clientHeight >= window.innerHeight * 0.5
const canScroll = (el: HTMLElement | null) =>
el &&
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowY) &&
el.scrollHeight > el.clientHeight &&
bigEnough(el)
let el: HTMLElement | null = document.activeElement as HTMLElement | null
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
el = canScroll(el)
? el
: findScrollableContainer(canScroll) ||
(document.scrollingElement as HTMLElement) ||
(document.documentElement as HTMLElement)
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
// Page-level scroll
const scrollBefore = window.scrollY
const scrollMax = document.documentElement.scrollHeight - window.innerHeight
window.scrollBy(0, dy)
const scrollAfter = window.scrollY
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dy > 0
? `⚠️ Already at the bottom of the page, cannot scroll down further.`
: `⚠️ Already at the top of the page, cannot scroll up further.`
}
const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1
const reachedTop = dy < 0 && scrollAfter <= 1
if (reachedBottom) return `✅ Scrolled page by ${scrolled}px. Reached the bottom of the page.`
if (reachedTop) return `✅ Scrolled page by ${scrolled}px. Reached the top of the page.`
return `✅ Scrolled page by ${scrolled}px.`
} else {
// Container scroll
const scrollBefore = el!.scrollTop
const scrollMax = el!.scrollHeight - el!.clientHeight
el!.scrollBy({ top: dy, behavior: 'smooth' })
await waitFor(0.1)
const scrollAfter = el!.scrollTop
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dy > 0
? `⚠️ Already at the bottom of container (${el!.tagName}), cannot scroll down further.`
: `⚠️ Already at the top of container (${el!.tagName}), cannot scroll up further.`
}
const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1
const reachedTop = dy < 0 && scrollAfter <= 1
if (reachedBottom)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.`
if (reachedTop)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.`
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px.`
}
}
export async function scrollHorizontally(
right: boolean,
scroll_amount: number,
element?: HTMLElement | null
) {
// Element-specific scrolling if element is provided
if (element) {
const targetElement = element
let currentElement = targetElement as HTMLElement | null
let scrollSuccess = false
let scrolledElement: HTMLElement | null = null
let scrollDelta = 0
let attempts = 0
const dx = right ? scroll_amount : -scroll_amount
while (currentElement && attempts < 10) {
const computedStyle = window.getComputedStyle(currentElement)
const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX)
const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth
if (hasScrollableX && canScrollHorizontally) {
const beforeScroll = currentElement.scrollLeft
const maxScroll = currentElement.scrollWidth - currentElement.clientWidth
let scrollAmount = dx / 3
if (scrollAmount > 0) {
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
} else {
scrollAmount = Math.max(scrollAmount, -beforeScroll)
}
currentElement.scrollLeft = beforeScroll + scrollAmount
const afterScroll = currentElement.scrollLeft
const actualScrollDelta = afterScroll - beforeScroll
if (Math.abs(actualScrollDelta) > 0.5) {
scrollSuccess = true
scrolledElement = currentElement
scrollDelta = actualScrollDelta
break
}
}
if (currentElement === document.body || currentElement === document.documentElement) {
break
}
currentElement = currentElement.parentElement
attempts++
}
if (scrollSuccess) {
return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`
} else {
return `No horizontally scrollable container found for element (${targetElement.tagName})`
}
}
// Page-level scrolling (default or fallback)
const dx = right ? scroll_amount : -scroll_amount
const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5
const canScroll = (el: HTMLElement | null) =>
el &&
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowX) &&
el.scrollWidth > el.clientWidth &&
bigEnough(el)
let el: HTMLElement | null = document.activeElement as HTMLElement | null
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
el = canScroll(el)
? el
: findScrollableContainer(canScroll) ||
(document.scrollingElement as HTMLElement) ||
(document.documentElement as HTMLElement)
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
// Page-level scroll
const scrollBefore = window.scrollX
const scrollMax = document.documentElement.scrollWidth - window.innerWidth
window.scrollBy(dx, 0)
const scrollAfter = window.scrollX
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dx > 0
? `⚠️ Already at the right edge of the page, cannot scroll right further.`
: `⚠️ Already at the left edge of the page, cannot scroll left further.`
}
const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1
const reachedLeft = dx < 0 && scrollAfter <= 1
if (reachedRight)
return `✅ Scrolled page by ${scrolled}px. Reached the right edge of the page.`
if (reachedLeft) return `✅ Scrolled page by ${scrolled}px. Reached the left edge of the page.`
return `✅ Scrolled page horizontally by ${scrolled}px.`
} else {
// Container scroll
const scrollBefore = el!.scrollLeft
const scrollMax = el!.scrollWidth - el!.clientWidth
el!.scrollBy({ left: dx, behavior: 'smooth' })
await waitFor(0.1)
const scrollAfter = el!.scrollLeft
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dx > 0
? `⚠️ Already at the right edge of container (${el!.tagName}), cannot scroll right further.`
: `⚠️ Already at the left edge of container (${el!.tagName}), cannot scroll left further.`
}
const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1
const reachedLeft = dx < 0 && scrollAfter <= 1
if (reachedRight)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.`
if (reachedLeft)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.`
return `✅ Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.`
}
}