diff --git a/apps/site/docs/en/faq.md b/apps/site/docs/en/faq.md index 065273111..8164a134a 100644 --- a/apps/site/docs/en/faq.md +++ b/apps/site/docs/en/faq.md @@ -12,7 +12,7 @@ There are some limitations with Midscene. We are still working on them. 1. The interaction types are limited to only tap, type, keyboard press, and scroll. 2. LLM is not 100% stable. Even GPT-4o can't return the right answer all the time. Following the [Prompting Tips](./prompting-tips) will help improve stability. -3. Since we use JavaScript to retrieve elements from the page, the elements inside the iframe cannot be accessed. +3. Since we use JavaScript to retrieve elements from the page, the elements inside the cross-origin iframe cannot be accessed. 4. We cannot access the native elements of Chrome, like the right-click context menu or file upload dialog. 5. Do not use Midscene to bypass CAPTCHA. Some LLM services are set to decline requests that involve CAPTCHA-solving (e.g., OpenAI), while the DOM of some CAPTCHA pages is not accessible by regular web scraping methods. Therefore, using Midscene to bypass CAPTCHA is not a reliable method. diff --git a/apps/site/docs/zh/faq.md b/apps/site/docs/zh/faq.md index ef62fbbf5..65f5550ee 100644 --- a/apps/site/docs/zh/faq.md +++ b/apps/site/docs/zh/faq.md @@ -14,7 +14,7 @@ Midscene 存在一些局限性,我们仍在努力改进。 1. 交互类型有限:目前仅支持点击、输入、键盘和滚动操作。 2. 稳定性风险:即使是 GPT-4o 也无法确保 100% 返回正确答案。遵循 [编写提示词的技巧](./prompting-tips) 可以帮助提高 SDK 稳定性。 -3. 元素访问受限:由于我们使用 JavaScript 从页面提取元素,所以无法访问 iframe 内部的元素。 +3. 元素访问受限:由于我们使用 JavaScript 从页面提取元素,所以无法访问跨域 iframe 内部的元素。 4. 无法访问 Chrome 原生元素:无法访问右键菜单、文件上传对话框等。 5. 无法绕过验证码:有些 LLM 服务会拒绝涉及验证码解决的请求(例如 OpenAI),而有些验证码页面的 DOM 无法通过常规的网页抓取方法访问。因此,使用 Midscene 绕过验证码不是一个可靠的方法。 diff --git a/packages/web-integration/package.json b/packages/web-integration/package.json index 4b08e4c5a..0919008bf 100644 --- a/packages/web-integration/package.json +++ b/packages/web-integration/package.json @@ -145,6 +145,7 @@ "devtools-protocol": "0.0.1380148", "dotenv": "16.4.5", "fs-extra": "11.2.0", + "http-server": "14.1.1", "js-sha256": "0.11.0", "js-yaml": "4.1.0", "playwright": "1.44.1", diff --git a/packages/web-integration/src/extractor/util.ts b/packages/web-integration/src/extractor/util.ts index dfc97fe01..5143f40d9 100644 --- a/packages/web-integration/src/extractor/util.ts +++ b/packages/web-integration/src/extractor/util.ts @@ -33,10 +33,11 @@ function selectorForValue(val: number | string): string { export function setDataForNode( node: HTMLElement | Node, nodeHash: string, - setToParentNode = false, + setToParentNode: boolean, // should be false for default + currentWindow: typeof window, ): string { const taskId = taskIdKey; - if (!(node instanceof Element)) { + if (!(node instanceof currentWindow.HTMLElement)) { return ''; } if (!taskId) { @@ -47,7 +48,7 @@ export function setDataForNode( const selector = selectorForValue(nodeHash); if (getDebugMode()) { if (setToParentNode) { - if (node.parentNode instanceof HTMLElement) { + if (node.parentNode instanceof currentWindow.HTMLElement) { node.parentNode.setAttribute(taskIdKey, nodeHash.toString()); } } else { @@ -57,17 +58,25 @@ export function setDataForNode( return selector; } -function isElementPartiallyInViewport(rect: ReturnType) { +function isElementPartiallyInViewport( + rect: ReturnType, + currentWindow: typeof window, + currentDocument: typeof document, +) { const elementHeight = rect.height; const elementWidth = rect.width; const viewportRect = { left: 0, top: 0, - width: window.innerWidth || document.documentElement.clientWidth, - height: window.innerHeight || document.documentElement.clientHeight, - right: window.innerWidth || document.documentElement.clientWidth, - bottom: window.innerHeight || document.documentElement.clientHeight, + width: + currentWindow.innerWidth || currentDocument.documentElement.clientWidth, + height: + currentWindow.innerHeight || currentDocument.documentElement.clientHeight, + right: + currentWindow.innerWidth || currentDocument.documentElement.clientWidth, + bottom: + currentWindow.innerHeight || currentDocument.documentElement.clientHeight, x: 0, y: 0, zoom: 1, @@ -84,17 +93,20 @@ function isElementPartiallyInViewport(rect: ReturnType) { return visibleArea / totalArea >= 2 / 3; } -export function getPseudoElementContent(element: Node): { +export function getPseudoElementContent( + element: Node, + currentWindow: typeof window, +): { before: string; after: string; } { - if (!(element instanceof HTMLElement)) { + if (!(element instanceof currentWindow.HTMLElement)) { return { before: '', after: '' }; } - const beforeContent = window + const beforeContent = currentWindow .getComputedStyle(element, '::before') .getPropertyValue('content'); - const afterContent = window + const afterContent = currentWindow .getComputedStyle(element, '::after') .getPropertyValue('content'); return { @@ -103,8 +115,11 @@ export function getPseudoElementContent(element: Node): { }; } -export function hasOverflowY(element: HTMLElement): boolean { - const style = window.getComputedStyle(element); +export function hasOverflowY( + element: HTMLElement, + currentWindow: typeof window, +): boolean { + const style = currentWindow.getComputedStyle(element); return ( style.overflowY === 'scroll' || style.overflowY === 'auto' || @@ -149,18 +164,22 @@ export function overlappedRect( return null; } -export function getRect(el: HTMLElement | Node, baseZoom = 1): ExtractedRect { +export function getRect( + el: HTMLElement | Node, + baseZoom: number, // base zoom + currentWindow: typeof window, +): ExtractedRect { let originalRect: DOMRect; let newZoom = 1; - if (!(el instanceof HTMLElement)) { - const range = document.createRange(); + if (!(el instanceof currentWindow.HTMLElement)) { + const range = currentWindow.document.createRange(); range.selectNodeContents(el); originalRect = range.getBoundingClientRect(); } else { originalRect = el.getBoundingClientRect(); // from Chrome v128, the API would return differently https://docs.google.com/document/d/1AcnDShjT-kEuRaMchZPm5uaIgNZ4OiYtM4JI9qiV8Po/edit if (!('currentCSSZoom' in el)) { - newZoom = Number.parseFloat(window.getComputedStyle(el).zoom) || 1; + newZoom = Number.parseFloat(currentWindow.getComputedStyle(el).zoom) || 1; } } @@ -179,13 +198,17 @@ export function getRect(el: HTMLElement | Node, baseZoom = 1): ExtractedRect { }; } -const isElementCovered = (el: HTMLElement | Node, rect: ExtractedRect) => { +const isElementCovered = ( + el: HTMLElement | Node, + rect: ExtractedRect, + currentWindow: typeof window, +) => { // Gets the center coordinates of the element const x = rect.left + rect.width / 2; const y = rect.top + rect.height / 2; // Gets the element above that point - const topElement = document.elementFromPoint(x, y); + const topElement = currentWindow.document.elementFromPoint(x, y); if (!topElement) { return false; // usually because it's outside the screen } @@ -201,7 +224,7 @@ const isElementCovered = (el: HTMLElement | Node, rect: ExtractedRect) => { return false; } - const rectOfTopElement = getRect(topElement as HTMLElement, 1); + const rectOfTopElement = getRect(topElement as HTMLElement, 1, currentWindow); // get the remaining area of the base element const overlapRect = overlappedRect(rect, rectOfTopElement); @@ -232,6 +255,8 @@ const isElementCovered = (el: HTMLElement | Node, rect: ExtractedRect) => { export function visibleRect( el: HTMLElement | Node | null, + currentWindow: typeof window, + currentDocument: typeof document, baseZoom = 1, ): | { left: number; top: number; width: number; height: number; zoom: number } @@ -242,7 +267,7 @@ export function visibleRect( } if ( - !(el instanceof HTMLElement) && + !(el instanceof currentWindow.HTMLElement) && el.nodeType !== Node.TEXT_NODE && el.nodeName.toLowerCase() !== 'svg' ) { @@ -250,8 +275,8 @@ export function visibleRect( return false; } - if (el instanceof HTMLElement) { - const style = window.getComputedStyle(el); + if (el instanceof currentWindow.HTMLElement) { + const style = currentWindow.getComputedStyle(el); if ( style.display === 'none' || style.visibility === 'hidden' || @@ -262,7 +287,7 @@ export function visibleRect( } } - const rect = getRect(el, baseZoom); + const rect = getRect(el, baseZoom, currentWindow); if (rect.width === 0 && rect.height === 0) { logger(el, 'Element has no size'); @@ -271,18 +296,24 @@ export function visibleRect( // check if the element is covered by another element // if the element is zoomed, the coverage check should be done with the original zoom - if (baseZoom === 1 && isElementCovered(el, rect)) { + if (baseZoom === 1 && isElementCovered(el, rect, currentWindow)) { return false; } - const scrollLeft = window.pageXOffset || document.documentElement.scrollLeft; - const scrollTop = window.pageYOffset || document.documentElement.scrollTop; + const scrollLeft = + currentWindow.pageXOffset || currentDocument.documentElement.scrollLeft; + const scrollTop = + currentWindow.pageYOffset || currentDocument.documentElement.scrollTop; const viewportWidth = - window.innerWidth || document.documentElement.clientWidth; + currentWindow.innerWidth || currentDocument.documentElement.clientWidth; const viewportHeight = - window.innerHeight || document.documentElement.clientHeight; + currentWindow.innerHeight || currentDocument.documentElement.clientHeight; - const isPartiallyInViewport = isElementPartiallyInViewport(rect); + const isPartiallyInViewport = isElementPartiallyInViewport( + rect, + currentWindow, + currentDocument, + ); if (!isPartiallyInViewport) { logger(el, 'Element is completely outside the viewport', { @@ -297,14 +328,14 @@ export function visibleRect( // check if the element is hidden by an ancestor let parent: HTMLElement | Node | null = el; - while (parent && parent !== document.body) { - if (!(parent instanceof HTMLElement)) { + while (parent && parent !== currentDocument.body) { + if (!(parent instanceof currentWindow.HTMLElement)) { parent = parent.parentElement; continue; } - const parentStyle = window.getComputedStyle(parent); + const parentStyle = currentWindow.getComputedStyle(parent); if (parentStyle.overflow === 'hidden') { - const parentRect = getRect(parent, 1); + const parentRect = getRect(parent, 1, currentWindow); const tolerance = 10; if ( @@ -348,23 +379,6 @@ export function validTextNodeContent(node: Node): string | false { return false; } - // const everyChildNodeIsText = Array.from(node.childNodes).every((child) => { - // const tagName = ((child as HTMLElement).tagName || '').toLowerCase(); - // if ( - // tagName === 'script' || - // tagName === 'style' || - // tagName === 'link' || - // tagName !== '#text' - // ) { - // return false; - // } - // return true; - // }); - - // if (!everyChildNodeIsText) { - // return false; - // } - const content = node.textContent || (node as HTMLElement).innerText; if (content && !/^\s*$/.test(content)) { return content.trim(); @@ -375,8 +389,13 @@ export function validTextNodeContent(node: Node): string | false { export function getNodeAttributes( node: HTMLElement | Node, + currentWindow: typeof window, ): Record { - if (!node || !(node instanceof HTMLElement) || !node.attributes) { + if ( + !node || + !(node instanceof currentWindow.HTMLElement) || + !node.attributes + ) { return {}; } @@ -464,7 +483,7 @@ export function setExtractTextWithPositionOnWindow() { } } -export function getDocument(): HTMLElement { +export function getTopDocument(): HTMLElement { const container: HTMLElement = document.body || document; return container; } diff --git a/packages/web-integration/src/extractor/web-extractor.ts b/packages/web-integration/src/extractor/web-extractor.ts index ce103ca3d..c63ca20dc 100644 --- a/packages/web-integration/src/extractor/web-extractor.ts +++ b/packages/web-integration/src/extractor/web-extractor.ts @@ -1,3 +1,4 @@ +import type { Point } from '@midscene/core'; import { CONTAINER_MINI_HEIGHT, CONTAINER_MINI_WIDTH, @@ -13,10 +14,10 @@ import { isTextElement, } from './dom-util'; import { - getDocument, getNodeAttributes, getPseudoElementContent, getRect, + getTopDocument, logger, midsceneGenerateHash, resetNodeHashCacheList, @@ -50,9 +51,12 @@ function tagNameOfNode(node: Node): string { function collectElementInfo( node: Node, nodePath: string, + currentWindow: typeof window, + currentDocument: typeof document, baseZoom = 1, + basePoint: Point = { left: 0, top: 0 }, ): WebElementInfo | null { - const rect = visibleRect(node, baseZoom); + const rect = visibleRect(node, currentWindow, currentDocument, baseZoom); if ( !rect || rect.width < CONTAINER_MINI_WIDTH || @@ -60,6 +64,10 @@ function collectElementInfo( ) { return null; } + if (basePoint.left !== 0 || basePoint.top !== 0) { + rect.left += basePoint.left; + rect.top += basePoint.top; + } // Skip elements that cover the entire viewport, as they are likely background containers // rather than meaningful interactive elements if (rect.height >= window.innerHeight && rect.width >= window.innerWidth) { @@ -67,11 +75,11 @@ function collectElementInfo( } if (isFormElement(node)) { - const attributes = getNodeAttributes(node); + const attributes = getNodeAttributes(node, currentWindow); let valueContent = attributes.value || attributes.placeholder || node.textContent || ''; const nodeHashId = midsceneGenerateHash(node, valueContent, rect); - const selector = setDataForNode(node, nodeHashId); + const selector = setDataForNode(node, nodeHashId, false, currentWindow); const tagName = (node as HTMLElement).tagName.toLowerCase(); if ((node as HTMLElement).tagName.toLowerCase() === 'select') { // Get the selected option using the selectedIndex property @@ -110,18 +118,18 @@ function collectElementInfo( Math.round(rect.top + rect.height / 2), ], zoom: rect.zoom, - screenWidth: window.innerWidth, - screenHeight: window.innerHeight, + screenWidth: currentWindow.innerWidth, + screenHeight: currentWindow.innerHeight, }; return elementInfo; } if (isButtonElement(node)) { - const attributes = getNodeAttributes(node); - const pseudo = getPseudoElementContent(node); + const attributes = getNodeAttributes(node, currentWindow); + const pseudo = getPseudoElementContent(node, currentWindow); const content = node.innerText || pseudo.before || pseudo.after || ''; const nodeHashId = midsceneGenerateHash(node, content, rect); - const selector = setDataForNode(node, nodeHashId); + const selector = setDataForNode(node, nodeHashId, false, currentWindow); const elementInfo: WebElementInfo = { id: nodeHashId, indexId: indexId++, @@ -141,16 +149,16 @@ function collectElementInfo( Math.round(rect.top + rect.height / 2), ], zoom: rect.zoom, - screenWidth: window.innerWidth, - screenHeight: window.innerHeight, + screenWidth: currentWindow.innerWidth, + screenHeight: currentWindow.innerHeight, }; return elementInfo; } if (isImgElement(node)) { - const attributes = getNodeAttributes(node); + const attributes = getNodeAttributes(node, currentWindow); const nodeHashId = midsceneGenerateHash(node, '', rect); - const selector = setDataForNode(node, nodeHashId); + const selector = setDataForNode(node, nodeHashId, false, currentWindow); const elementInfo: WebElementInfo = { id: nodeHashId, indexId: indexId++, @@ -175,8 +183,8 @@ function collectElementInfo( Math.round(rect.top + rect.height / 2), ], zoom: rect.zoom, - screenWidth: window.innerWidth, - screenHeight: window.innerHeight, + screenWidth: currentWindow.innerWidth, + screenHeight: currentWindow.innerHeight, }; return elementInfo; } @@ -186,13 +194,13 @@ function collectElementInfo( if (!text) { return null; } - const attributes = getNodeAttributes(node); + const attributes = getNodeAttributes(node, currentWindow); const attributeKeys = Object.keys(attributes); if (!text.trim() && attributeKeys.length === 0) { return null; } const nodeHashId = midsceneGenerateHash(node, text, rect); - const selector = setDataForNode(node, nodeHashId, true); + const selector = setDataForNode(node, nodeHashId, true, currentWindow); const elementInfo: WebElementInfo = { id: nodeHashId, indexId: indexId++, @@ -213,17 +221,17 @@ function collectElementInfo( content: text, rect, zoom: rect.zoom, - screenWidth: window.innerWidth, - screenHeight: window.innerHeight, + screenWidth: currentWindow.innerWidth, + screenHeight: currentWindow.innerHeight, }; return elementInfo; } // else, consider as a container if (isContainerElement(node)) { - const attributes = getNodeAttributes(node); + const attributes = getNodeAttributes(node, currentWindow); const nodeHashId = midsceneGenerateHash(node, '', rect); - const selector = setDataForNode(node, nodeHashId); + const selector = setDataForNode(node, nodeHashId, false, currentWindow); const elementInfo: WebElementInfo = { id: nodeHashId, nodePath, @@ -243,8 +251,8 @@ function collectElementInfo( Math.round(rect.top + rect.height / 2), ], zoom: rect.zoom, - screenWidth: window.innerWidth, - screenHeight: window.innerHeight, + screenWidth: currentWindow.innerWidth, + screenHeight: currentWindow.innerHeight, }; return elementInfo; } @@ -254,10 +262,8 @@ function collectElementInfo( export function extractTextWithPosition( initNode: Node, debugMode = false, - currentFrame = { id: 0, left: 0, top: 0 }, ): WebElementInfo[] { setDebugMode(debugMode); - setFrameId(currentFrame.id); resetNodeHashCacheList(); indexId = 0; const elementInfoArray: WebElementInfo[] = []; @@ -265,7 +271,10 @@ export function extractTextWithPosition( function dfs( node: Node, nodePath: string, + currentWindow: typeof window, + currentDocument: typeof document, baseZoom = 1, + basePoint: Point = { left: 0, top: 0 }, ): WebElementInfo | null { if (!node) { return null; @@ -276,7 +285,25 @@ export function extractTextWithPosition( return null; } - const elementInfo = collectElementInfo(node, nodePath, baseZoom); + const elementInfo = collectElementInfo( + node, + nodePath, + currentWindow, + currentDocument, + baseZoom, + basePoint, + ); + + if (elementInfo && node instanceof currentWindow.HTMLIFrameElement) { + if ( + (node as HTMLIFrameElement).contentWindow && + (node as HTMLIFrameElement).contentWindow + ) { + // other scripts will handle this + return elementInfo; + } + } + // stop collecting if the node is a Button or Image if ( elementInfo?.nodeType === NodeType.BUTTON || @@ -289,10 +316,17 @@ export function extractTextWithPosition( return elementInfo; } - const rect = getRect(node, baseZoom); + const rect = getRect(node, baseZoom, currentWindow); for (let i = 0; i < node.childNodes.length; i++) { logger('will dfs', node.childNodes[i]); - dfs(node.childNodes[i], `${nodePath}-${i}`, rect.zoom); + dfs( + node.childNodes[i], + `${nodePath}-${i}`, + currentWindow, + currentDocument, + rect.zoom, + basePoint, + ); } if (!elementInfo) { @@ -302,18 +336,40 @@ export function extractTextWithPosition( return elementInfo; } - const rootNode = initNode || getDocument(); - dfs(rootNode, '0'); + const topDocument = getTopDocument(); + const rootNode = initNode || topDocument; - if (currentFrame.left !== 0 || currentFrame.top !== 0) { - for (let i = 0; i < elementInfoArray.length; i++) { - elementInfoArray[i].rect.left += currentFrame.left; - elementInfoArray[i].rect.top += currentFrame.top; - elementInfoArray[i].center[0] += currentFrame.left; - elementInfoArray[i].center[1] += currentFrame.top; - elementInfoArray[i].nodePath = - `frame${currentFrame.id}-${elementInfoArray[i].nodePath}`; + dfs(rootNode, '0', window, document, 1, { left: 0, top: 0 }); + if (rootNode === topDocument) { + // find all the same-origin iframes + const iframes = document.querySelectorAll('iframe'); + for (let i = 0; i < iframes.length; i++) { + const iframe = iframes[i]; + if (iframe.contentDocument && iframe.contentWindow) { + const iframeInfo = collectElementInfo( + iframe, + `${i}`, + window, + document, + 1, + ); + if (iframeInfo) { + // it's still in the viewport + dfs( + iframe.contentDocument.body, + `${i}`, + iframe.contentWindow as any, + iframe.contentDocument, + 1, + { + left: iframeInfo.rect.left, + top: iframeInfo.rect.top, + }, + ); + } + } } } + return elementInfoArray; } diff --git a/packages/web-integration/src/puppeteer/agent-launcher.ts b/packages/web-integration/src/puppeteer/agent-launcher.ts index fa90a1df4..45b967cd7 100644 --- a/packages/web-integration/src/puppeteer/agent-launcher.ts +++ b/packages/web-integration/src/puppeteer/agent-launcher.ts @@ -6,8 +6,8 @@ import type { MidsceneYamlScriptEnv } from '@midscene/core'; export const defaultUA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'; -export const defaultViewportWidth = 1280; -export const defaultViewportHeight = 960; +export const defaultViewportWidth = 1440; +export const defaultViewportHeight = 900; export const defaultViewportScale = process.platform === 'darwin' ? 2 : 1; export const defaultWaitForNetworkIdleTimeout = 10 * 1000; @@ -29,7 +29,9 @@ export async function puppeteerAgentForTarget( // prepare the environment const ua = target.userAgent || defaultUA; let width = defaultViewportWidth; + let preferMaximizedWindow = true; if (target.viewportWidth) { + preferMaximizedWindow = false; assert( typeof target.viewportWidth === 'number', 'viewportWidth must be a number', @@ -39,6 +41,7 @@ export async function puppeteerAgentForTarget( } let height = defaultViewportHeight; if (target.viewportHeight) { + preferMaximizedWindow = false; assert( typeof target.viewportHeight === 'number', 'viewportHeight must be a number', @@ -51,6 +54,7 @@ export async function puppeteerAgentForTarget( } let dpr = defaultViewportScale; if (target.viewportScale) { + preferMaximizedWindow = false; assert( typeof target.viewportScale === 'number', 'viewportScale must be a number', @@ -65,6 +69,10 @@ export async function puppeteerAgentForTarget( }; const headed = preference?.headed || preference?.keepWindow; + + // only maximize window in headed mode + preferMaximizedWindow = preferMaximizedWindow && !!headed; + // launch the browser if (headed && process.env.CI === '1') { console.warn( @@ -80,8 +88,9 @@ export async function puppeteerAgentForTarget( ...(isWindows ? [] : ['--no-sandbox', '--disable-setuid-sandbox']), '--disable-features=PasswordLeakDetection', '--disable-save-password-bubble', - '--start-maximized', - `--window-size=${width},${height}`, + preferMaximizedWindow + ? '--start-maximized' + : `--window-size=${width},${height}`, ], }); freeFn.push({ diff --git a/packages/web-integration/src/yaml/player.ts b/packages/web-integration/src/yaml/player.ts index 5d79bb13d..f5aeac66a 100644 --- a/packages/web-integration/src/yaml/player.ts +++ b/packages/web-integration/src/yaml/player.ts @@ -1,7 +1,6 @@ import assert from 'node:assert'; -import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { existsSync, mkdirSync, writeFileSync } from 'node:fs'; import { dirname, join } from 'node:path'; -import { paramStr, typeStr } from '@/common/ui-utils'; import type { PageAgent } from '@/common/agent'; import type { diff --git a/packages/web-integration/tests/ai/web/puppeteer/utils.ts b/packages/web-integration/tests/ai/web/puppeteer/utils.ts index 1a29e85db..491f7fef4 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/utils.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/utils.ts @@ -28,12 +28,18 @@ export async function launchPage( await originPage.setUserAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', ); - const response = await originPage.goto(url); - await originPage.waitForNetworkIdle({ - concurrency: 1, - }); - if (response?.status) { - assert(response.status() <= 399, `Page load failed: ${response.status()}`); + let response; + try { + response = await originPage.goto(url); + await originPage.waitForNetworkIdle({ concurrency: 1 }); + } catch (e) { + // ignore navigation error + if (response?.status) { + assert( + response.status() <= 399, + `Page load failed: ${response.status()}`, + ); + } } const page = new PuppeteerWebPage(originPage); @@ -44,6 +50,7 @@ export async function launchPage( const pages = await browser.pages(); await Promise.all(pages.map((page) => page.close())); await browser.close(); + await new Promise((resolve) => setTimeout(resolve, 1000)); }, }; } diff --git a/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap b/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap index 650df4766..2af94c84e 100644 --- a/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap +++ b/packages/web-integration/tests/unit-test/__snapshots__/web-extractor.test.ts.snap @@ -596,14 +596,32 @@ exports[`extractor > basic 1`] = ` }, { "attributes": { - "frameborder": "0", - "height": "200px", + "class": ".two-columns", "htmlTagName": "", "nodeType": "CONTAINER Node", - "src": "child.html", - "width": "400px", }, "content": "", }, + { + "attributes": { + "htmlTagName": "", + "nodeType": "TEXT Node", + }, + "content": "Child Page", + }, + { + "attributes": { + "htmlTagName": "", + "nodeType": "TEXT Node", + }, + "content": "This is a child page.", + }, + { + "attributes": { + "htmlTagName": "", + "nodeType": "TEXT Node", + }, + "content": "Click me", + }, ] `; diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/assets/search-dark.svg b/packages/web-integration/tests/unit-test/fixtures/web-extractor/assets/search-dark.svg new file mode 100644 index 000000000..5e58f36e5 --- /dev/null +++ b/packages/web-integration/tests/unit-test/fixtures/web-extractor/assets/search-dark.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/assets/search.svg b/packages/web-integration/tests/unit-test/fixtures/web-extractor/assets/search.svg new file mode 100644 index 000000000..c4abb56f1 --- /dev/null +++ b/packages/web-integration/tests/unit-test/fixtures/web-extractor/assets/search.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html b/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html index 0b09a2f8b..30a842519 100644 --- a/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html +++ b/packages/web-integration/tests/unit-test/fixtures/web-extractor/index.html @@ -190,8 +190,8 @@

Form

@@ -324,8 +324,30 @@

Form

content Right - - + + + +
+
+ + +
+
+ + +
+
\ No newline at end of file diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png index fd1662a45..69dea01a6 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png differ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png index 232c736e1..74280b6f1 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png differ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png index 753b1d86c..335b7ae87 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png differ diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png index 42dea3688..4db1d52be 100644 Binary files a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png and b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png differ diff --git a/packages/web-integration/tests/unit-test/http-server.d.ts b/packages/web-integration/tests/unit-test/http-server.d.ts new file mode 100644 index 000000000..32a0eeb1f --- /dev/null +++ b/packages/web-integration/tests/unit-test/http-server.d.ts @@ -0,0 +1,6 @@ +declare module 'http-server' { + export function createServer(options: http.ServerOptions): { + server: http.Server; + listen: (port: number, host: string, callback: () => void) => void; + }; +} diff --git a/packages/web-integration/tests/unit-test/web-extractor.test.ts b/packages/web-integration/tests/unit-test/web-extractor.test.ts index f583e4d3d..7038d9473 100644 --- a/packages/web-integration/tests/unit-test/web-extractor.test.ts +++ b/packages/web-integration/tests/unit-test/web-extractor.test.ts @@ -4,15 +4,33 @@ import { generateExtractData } from '@/debug'; import StaticPage from '@/playground/static-page'; import type { WebElementInfo } from '@/web-element'; import { imageInfoOfBase64 } from '@midscene/shared/img'; -import { describe, expect, it } from 'vitest'; +import { createServer } from 'http-server'; +import { beforeAll, describe, expect, it } from 'vitest'; import { launchPage } from '../ai/web/puppeteer/utils'; -const pagePath = join(__dirname, './fixtures/web-extractor/index.html'); +const pageDir = join(__dirname, './fixtures/web-extractor'); +const pagePath = join(pageDir, 'index.html'); describe( 'extractor', () => { + const port = 8082; + beforeAll(async () => { + const localServer = await new Promise((resolve, reject) => { + const server = createServer({ + root: pageDir, + }); + server.listen(port, '127.0.0.1', () => { + resolve(server); + }); + }); + + return () => { + (localServer as any).server.close(); + }; + }); + it('basic', async () => { - const { page, reset } = await launchPage(`file://${pagePath}`, { + const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, { viewport: { width: 1080, height: 3000, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cd680bec5..a1277a3d5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -368,6 +368,9 @@ importers: fs-extra: specifier: 11.2.0 version: 11.2.0 + http-server: + specifier: 14.1.1 + version: 14.1.1 js-sha256: specifier: 0.11.0 version: 0.11.0