From f67714d12da23f3e78f4010e6bb65c25f5c52640 Mon Sep 17 00:00:00 2001 From: "zhouxiao.shaw" Date: Fri, 7 Feb 2025 12:05:26 +0800 Subject: [PATCH] refactor(extract): modify dependencies --- .gitignore | 1 + biome.json | 1 + packages/cli/tsconfig.json | 2 +- packages/midscene/src/ai-model/prompt/util.ts | 21 +-- packages/shared/modern.config.ts | 2 + .../modern.inspect.config.ts | 0 packages/shared/package.json | 28 ++- .../src/extractor/client-extractor.ts | 10 +- .../src/extractor/constants.ts | 2 +- .../src/extractor/debug.ts | 0 .../src/extractor/dom-util.ts | 22 ++- .../src/extractor/index.ts | 4 +- packages/shared/src/extractor/tree.ts | 173 ++++++++++++++++++ .../src/extractor/util.ts | 44 ++--- .../src/extractor/web-extractor.ts | 22 +-- packages/shared/src/img/box-select.ts | 2 +- packages/shared/src/img/draw-box.ts | 3 +- packages/shared/src/types/index.ts | 33 +++- .../tests/unit-test/tree.test.ts | 4 +- packages/shared/tsconfig.json | 3 - packages/visualizer/scripts/build-html.ts | 2 +- packages/web-integration/modern.config.ts | 22 +++ packages/web-integration/package.json | 6 +- packages/web-integration/src/appium/page.ts | 5 +- .../src/chrome-extension/page.ts | 4 +- packages/web-integration/src/common/tasks.ts | 2 +- packages/web-integration/src/common/utils.ts | 40 +--- .../web-integration/src/debug/img/index.ts | 149 --------------- .../web-integration/src/debug/img/util.ts | 33 ---- packages/web-integration/src/debug/index.ts | 4 +- packages/web-integration/src/page.ts | 2 +- .../src/puppeteer/base-page.ts | 7 +- packages/web-integration/src/web-element.ts | 3 +- .../tests/unit-test/web-extractor.test.ts | 3 +- 34 files changed, 349 insertions(+), 310 deletions(-) rename packages/{web-integration => shared}/modern.inspect.config.ts (100%) rename packages/{web-integration => shared}/src/extractor/client-extractor.ts (95%) rename packages/{web-integration => shared}/src/extractor/constants.ts (62%) rename packages/{web-integration => shared}/src/extractor/debug.ts (100%) rename packages/{web-integration => shared}/src/extractor/dom-util.ts (77%) rename packages/{web-integration => shared}/src/extractor/index.ts (86%) create mode 100644 packages/shared/src/extractor/tree.ts rename packages/{web-integration => shared}/src/extractor/util.ts (92%) rename packages/{web-integration => shared}/src/extractor/web-extractor.ts (96%) rename packages/{midscene => shared}/tests/unit-test/tree.test.ts (98%) delete mode 100644 packages/web-integration/src/debug/img/index.ts delete mode 100644 packages/web-integration/src/debug/img/util.ts diff --git a/.gitignore b/.gitignore index 7df6cec2f..fa91aa207 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,7 @@ test-results/ playwright-report/ blob-report/ playwright/.cache/ +iife-script/ # Midscene.js dump files __ai_responses__/ diff --git a/biome.json b/biome.json index aafc4fcc7..12efcf7e9 100644 --- a/biome.json +++ b/biome.json @@ -16,6 +16,7 @@ "**/doc_build", "*-dump.json", "test-results/**", + "iife-script/**", "script_get_all_texts.tmp.js", "**/playwright-report/**", "**/todo-report.spec.ts-snapshots/**", diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json index 54623c412..231cacd2b 100644 --- a/packages/cli/tsconfig.json +++ b/packages/cli/tsconfig.json @@ -7,7 +7,7 @@ "forceConsistentCasingInFileNames": true, "isolatedModules": true, "jsx": "preserve", - "lib": ["ESNext"], + "lib": ["ESNext", "DOM"], "moduleResolution": "node", "paths": { "@/*": ["./src/*"] diff --git a/packages/midscene/src/ai-model/prompt/util.ts b/packages/midscene/src/ai-model/prompt/util.ts index c8642ef99..a61d45ad3 100644 --- a/packages/midscene/src/ai-model/prompt/util.ts +++ b/packages/midscene/src/ai-model/prompt/util.ts @@ -1,10 +1,10 @@ import assert from 'node:assert'; import { MATCH_BY_POSITION, getAIConfig } from '@/env'; import { imageInfoOfBase64 } from '@/image'; -import { descriptionOfTree } from '@/tree'; import type { BaseElement, ElementTreeNode, Size, UIContext } from '@/types'; import { PromptTemplate } from '@langchain/core/prompts'; import { NodeType } from '@midscene/shared/constants'; +import { descriptionOfTree, treeToList } from '@midscene/shared/extractor'; import { generateHashId } from '@midscene/shared/utils'; import type { ResponseFormatJSONSchema } from 'openai/resources'; @@ -261,21 +261,10 @@ export async function describeUserPage< const treeRoot = context.tree; // dfs tree, save the id and element info const idElementMap: Record = {}; - const flatElements: ElementType[] = []; - function dfsTree(node: ElementTreeNode) { - if (node?.node) { - idElementMap[node.node.id] = node.node; - - if (typeof node.node.indexId !== 'undefined') { - idElementMap[`${node.node.indexId}`] = node.node; - } - flatElements.push(node.node); - } - for (let i = 0; i < (node.children || []).length; i++) { - dfsTree(node.children[i]); - } - } - dfsTree(treeRoot); + const flatElements: ElementType[] = treeToList(treeRoot); + flatElements.forEach((element) => { + idElementMap[element.id] = element; + }); const contentTree = await descriptionOfTree( treeRoot, diff --git a/packages/shared/modern.config.ts b/packages/shared/modern.config.ts index 41680d1e3..5d3980592 100644 --- a/packages/shared/modern.config.ts +++ b/packages/shared/modern.config.ts @@ -5,6 +5,8 @@ const commonConfig = { index: './src/index.ts', img: './src/img/index.ts', constants: './src/constants/index.ts', + extractor: './src/extractor/index.ts', + 'extractor-debug': './src/extractor/debug.ts', fs: './src/fs/index.ts', utils: './src/utils.ts', 'us-keyboard-layout': './src/us-keyboard-layout.ts', diff --git a/packages/web-integration/modern.inspect.config.ts b/packages/shared/modern.inspect.config.ts similarity index 100% rename from packages/web-integration/modern.inspect.config.ts rename to packages/shared/modern.inspect.config.ts diff --git a/packages/shared/package.json b/packages/shared/package.json index a36c8a2c4..c77c85610 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -38,6 +38,16 @@ "require": "./dist/lib/utils.js", "import": "./dist/es/utils.js" }, + "./extractor": { + "types": "./src/extractor/index.ts", + "require": "./dist/lib/extractor.js", + "import": "./dist/es/extractor.js" + }, + "./extractor-debug": { + "types": "./src/extractor/debug.ts", + "require": "./dist/lib/extractor-debug.js", + "import": "./dist/es/extractor-debug.js" + }, "./keyboard-layout": { "types": "./src/us-keyboard-layout.ts", "require": "./dist/lib/us-keyboard-layout.js", @@ -52,13 +62,17 @@ "browser/img": ["./src/img/index.ts"], "fs": ["./src/fs/index.ts"], "utils": ["./src/utils.ts"], + "extractor": ["./src/extractor/index.ts"], + "extractor-debug": ["./src/extractor/debug.ts"], "keyboard-layout": ["./src/us-keyboard-layout.ts"] } }, "files": ["dist", "src", "README.md"], "scripts": { "dev": "modern dev", - "build": "modern build", + "build": "npm run build:pkg && npm run build:script", + "build:pkg": "modern build -c ./modern.config.ts", + "build:script": "modern build -c ./modern.inspect.config.ts", "build:watch": "modern build -w", "reset": "rimraf ./**/node_modules", "lint": "modern lint", @@ -120,6 +134,16 @@ "require": "./dist/lib/utils.js", "import": "./dist/es/utils.js" }, + "./extractor": { + "types": "./dist/lib/extractor.d.ts", + "require": "./dist/lib/extractor.js", + "import": "./dist/es/extractor.js" + }, + "./extractor-debug": { + "types": "./dist/lib/extractor-debug.d.ts", + "require": "./dist/lib/extractor-debug.js", + "import": "./dist/es/extractor-debug.js" + }, "./keyboard-layout": { "types": "./dist/lib/us-keyboard-layout.d.ts", "require": "./dist/lib/us-keyboard-layout.js", @@ -134,6 +158,8 @@ "browser/img": ["./dist/browser/img.d.ts"], "fs": ["./dist/lib/fs.d.ts"], "utils": ["./dist/lib/utils.d.ts"], + "extractor": ["./dist/lib/extractor.d.ts"], + "extractor-debug": ["./dist/lib/extractor-debug.d.ts"], "keyboard-layout": ["./dist/lib/us-keyboard-layout.d.ts"] } } diff --git a/packages/web-integration/src/extractor/client-extractor.ts b/packages/shared/src/extractor/client-extractor.ts similarity index 95% rename from packages/web-integration/src/extractor/client-extractor.ts rename to packages/shared/src/extractor/client-extractor.ts index 7d570bc22..34b8dccd5 100644 --- a/packages/web-integration/src/extractor/client-extractor.ts +++ b/packages/shared/src/extractor/client-extractor.ts @@ -5,17 +5,17 @@ import { generateId, midsceneGenerateHash } from './util'; // https://github.com/appium/appium/tree/master/packages/universal-xml-plugin // Definition of NodeDescriptor interface interface NodeDescriptor { - node: Node; + node: globalThis.Node; children: NodeDescriptor[]; } // Retrieve attributes from a node -function getNodeAttributes(node: Node): { [key: string]: string } { +function getNodeAttributes(node: globalThis.Node): { [key: string]: string } { const attrs: { [key: string]: string } = {}; // Check if node exists and its type is ELEMENT_NODE if (node && node.nodeType === 1) { - const element = node as Element; + const element = node as globalThis.Element; for (let i = 0; i < element.attributes.length; i++) { const attr = element.attributes[i]; @@ -115,7 +115,9 @@ function getXPathForElement(element: Node): string { } // Perform DFS traversal and collect element information -export function extractTextWithPosition(initNode: Document): ElementInfo[] { +export function extractTextWithPosition( + initNode: globalThis.Document, +): ElementInfo[] { const elementInfoArray: ElementInfo[] = []; let nodeIndex = 1; diff --git a/packages/web-integration/src/extractor/constants.ts b/packages/shared/src/extractor/constants.ts similarity index 62% rename from packages/web-integration/src/extractor/constants.ts rename to packages/shared/src/extractor/constants.ts index 63704b8c5..2d982f28a 100644 --- a/packages/web-integration/src/extractor/constants.ts +++ b/packages/shared/src/extractor/constants.ts @@ -2,4 +2,4 @@ export { NodeType, TEXT_MAX_SIZE, TEXT_SIZE_THRESHOLD, -} from '@midscene/shared/constants'; +} from '../constants'; diff --git a/packages/web-integration/src/extractor/debug.ts b/packages/shared/src/extractor/debug.ts similarity index 100% rename from packages/web-integration/src/extractor/debug.ts rename to packages/shared/src/extractor/debug.ts diff --git a/packages/web-integration/src/extractor/dom-util.ts b/packages/shared/src/extractor/dom-util.ts similarity index 77% rename from packages/web-integration/src/extractor/dom-util.ts rename to packages/shared/src/extractor/dom-util.ts index 42b08e54b..06e4bcfdc 100644 --- a/packages/web-integration/src/extractor/dom-util.ts +++ b/packages/shared/src/extractor/dom-util.ts @@ -1,4 +1,4 @@ -export function isFormElement(node: Node) { +export function isFormElement(node: globalThis.Node) { return ( node instanceof HTMLElement && (node.tagName.toLowerCase() === 'input' || @@ -8,11 +8,15 @@ export function isFormElement(node: Node) { ); } -export function isButtonElement(node: Node): node is HTMLButtonElement { +export function isButtonElement( + node: globalThis.Node, +): node is globalThis.HTMLButtonElement { return node instanceof HTMLElement && node.tagName.toLowerCase() === 'button'; } -export function isImgElement(node: Node): node is HTMLImageElement { +export function isImgElement( + node: globalThis.Node, +): node is globalThis.HTMLImageElement { // check if the node is an image element if (!includeBaseElement(node) && node instanceof Element) { const computedStyle = window.getComputedStyle(node); @@ -32,7 +36,7 @@ export function isImgElement(node: Node): node is HTMLImageElement { ); } -function isIconfont(node: Node): boolean { +function isIconfont(node: globalThis.Node): boolean { if (node instanceof Element) { const computedStyle = window.getComputedStyle(node); const fontFamilyValue = computedStyle.fontFamily || ''; @@ -42,11 +46,15 @@ function isIconfont(node: Node): boolean { return false; } -export function isTextElement(node: Node): node is HTMLTextAreaElement { +export function isTextElement( + node: globalThis.Node, +): node is globalThis.HTMLTextAreaElement { return node.nodeName.toLowerCase() === '#text' && !isIconfont(node); } -export function isContainerElement(node: Node): node is HTMLElement { +export function isContainerElement( + node: globalThis.Node, +): node is globalThis.HTMLElement { if (!(node instanceof HTMLElement)) return false; // include other base elements @@ -63,7 +71,7 @@ export function isContainerElement(node: Node): node is HTMLElement { return false; } -function includeBaseElement(node: Node) { +function includeBaseElement(node: globalThis.Node) { if (!(node instanceof HTMLElement)) return false; // include text diff --git a/packages/web-integration/src/extractor/index.ts b/packages/shared/src/extractor/index.ts similarity index 86% rename from packages/web-integration/src/extractor/index.ts rename to packages/shared/src/extractor/index.ts index ae3c410d7..83b76439b 100644 --- a/packages/web-integration/src/extractor/index.ts +++ b/packages/shared/src/extractor/index.ts @@ -1,4 +1,4 @@ -import type { NodeType } from '@midscene/shared/constants'; +import type { NodeType } from '../constants'; export interface ElementInfo { id: string; @@ -20,6 +20,8 @@ export interface ElementNode { children: ElementNode[]; } +export { descriptionOfTree, traverseTree, treeToList } from './tree'; + export { extractTextWithPosition as webExtractTextWithPosition } from './web-extractor'; export { extractTextWithPosition as clientExtractTextWithPosition } from './client-extractor'; diff --git a/packages/shared/src/extractor/tree.ts b/packages/shared/src/extractor/tree.ts new file mode 100644 index 000000000..aec6cd775 --- /dev/null +++ b/packages/shared/src/extractor/tree.ts @@ -0,0 +1,173 @@ +import type { BaseElement, ElementTreeNode } from '../types'; + +export function truncateText( + text: string | number | object | undefined, + maxLength = 150, +) { + if (typeof text === 'undefined') { + return ''; + } + + if (typeof text === 'object') { + text = JSON.stringify(text); + } + + if (typeof text === 'number') { + return text.toString(); + } + + if (typeof text === 'string' && text.length > maxLength) { + return `${text.slice(0, maxLength)}...`; + } + + if (typeof text === 'string') { + return text.trim(); + } + + return ''; +} + +export function trimAttributes( + attributes: Record, + truncateTextLength?: number, +) { + const tailorAttributes = Object.keys(attributes).reduce( + (res, currentKey: string) => { + const attributeVal = (attributes as any)[currentKey]; + if ( + currentKey === 'style' || + currentKey === 'src' || + currentKey === 'htmlTagName' || + currentKey === 'nodeType' + ) { + return res; + } + + res[currentKey] = truncateText(attributeVal, truncateTextLength); + return res; + }, + {} as BaseElement['attributes'], + ); + return tailorAttributes; +} + +const nodeSizeThreshold = 4; +export function descriptionOfTree< + ElementType extends BaseElement = BaseElement, +>( + tree: ElementTreeNode, + truncateTextLength?: number, + filterNonTextContent = false, +) { + const attributesString = (kv: Record) => { + return Object.entries(kv) + .map( + ([key, value]) => `${key}="${truncateText(value, truncateTextLength)}"`, + ) + .join(' '); + }; + + function buildContentTree( + node: ElementTreeNode, + indent = 0, + ): string { + let before = ''; + let contentWithIndent = ''; + let after = ''; + let emptyNode = true; + const indentStr = ' '.repeat(indent); + + let children = ''; + for (let i = 0; i < (node.children || []).length; i++) { + const childContent = buildContentTree(node.children[i], indent + 1); + if (childContent) { + children += `\n${childContent}`; + } + } + + if ( + node.node && + node.node.rect.width > nodeSizeThreshold && + node.node.rect.height > nodeSizeThreshold && + (!filterNonTextContent || (filterNonTextContent && node.node.content)) + ) { + emptyNode = false; + let nodeTypeString: string; + if (node.node.attributes?.htmlTagName) { + nodeTypeString = node.node.attributes.htmlTagName.replace(/[<>]/g, ''); + } else { + nodeTypeString = node.node.attributes.nodeType + .replace(/\sNode$/, '') + .toLowerCase(); + } + const markerId = node.node.indexId; + const markerIdString = markerId ? `markerId="${markerId}"` : ''; + const rectAttribute = node.node.rect + ? { + left: node.node.rect.left, + top: node.node.rect.top, + width: node.node.rect.width, + height: node.node.rect.height, + } + : {}; + before = `<${nodeTypeString} id="${node.node.id}" ${markerIdString} ${attributesString(trimAttributes(node.node.attributes || {}, truncateTextLength))} ${attributesString(rectAttribute)}>`; + const content = truncateText(node.node.content, truncateTextLength); + contentWithIndent = content ? `\n${indentStr} ${content}` : ''; + after = ``; + } else if (!filterNonTextContent) { + if (!children.trim().startsWith('<>')) { + before = '<>'; + contentWithIndent = ''; + after = ''; + } + } + + if (emptyNode && !children.trim()) { + return ''; + } + + const result = `${indentStr}${before}${contentWithIndent}${children}\n${indentStr}${after}`; + if (result.trim()) { + return result; + } + return ''; + } + + const result = buildContentTree(tree); + return result.replace(/^\s*\n/gm, ''); +} + +export function treeToList( + tree: ElementTreeNode, +): T[] { + const result: T[] = []; + function dfs(node: ElementTreeNode) { + if (node.node) { + result.push(node.node); + } + for (const child of node.children) { + dfs(child); + } + } + dfs(tree); + return result; +} + +export function traverseTree< + T extends BaseElement, + ReturnNodeType extends BaseElement, +>( + tree: ElementTreeNode, + onNode: (node: T) => ReturnNodeType, +): ElementTreeNode { + function dfs(node: ElementTreeNode) { + if (node.node) { + node.node = onNode(node.node) as any; + } + for (const child of node.children) { + dfs(child); + } + } + dfs(tree); + return tree as any; +} diff --git a/packages/web-integration/src/extractor/util.ts b/packages/shared/src/extractor/util.ts similarity index 92% rename from packages/web-integration/src/extractor/util.ts rename to packages/shared/src/extractor/util.ts index 5143f40d9..5b779d042 100644 --- a/packages/web-integration/src/extractor/util.ts +++ b/packages/shared/src/extractor/util.ts @@ -1,4 +1,4 @@ -import { generateHashId } from '@midscene/shared/utils'; +import { generateHashId } from '../utils'; import { extractTextWithPosition } from './web-extractor'; // import { TEXT_MAX_SIZE } from './constants'; @@ -31,10 +31,10 @@ function selectorForValue(val: number | string): string { } export function setDataForNode( - node: HTMLElement | Node, + node: globalThis.HTMLElement | globalThis.Node, nodeHash: string, setToParentNode: boolean, // should be false for default - currentWindow: typeof window, + currentWindow: typeof globalThis.window, ): string { const taskId = taskIdKey; if (!(node instanceof currentWindow.HTMLElement)) { @@ -94,8 +94,8 @@ function isElementPartiallyInViewport( } export function getPseudoElementContent( - element: Node, - currentWindow: typeof window, + element: globalThis.Node, + currentWindow: typeof globalThis.window, ): { before: string; after: string; @@ -116,8 +116,8 @@ export function getPseudoElementContent( } export function hasOverflowY( - element: HTMLElement, - currentWindow: typeof window, + element: globalThis.HTMLElement, + currentWindow: typeof globalThis.window, ): boolean { const style = currentWindow.getComputedStyle(element); return ( @@ -165,9 +165,9 @@ export function overlappedRect( } export function getRect( - el: HTMLElement | Node, + el: globalThis.HTMLElement | globalThis.Node, baseZoom: number, // base zoom - currentWindow: typeof window, + currentWindow: typeof globalThis.window, ): ExtractedRect { let originalRect: DOMRect; let newZoom = 1; @@ -179,7 +179,9 @@ export function getRect( originalRect = el.getBoundingClientRect(); // from Chrome v128, the API would return differently https://docs.google.com/document/d/1AcnDShjT-kEuRaMchZPm5uaIgNZ4OiYtM4JI9qiV8Po/edit if (!('currentCSSZoom' in el)) { - newZoom = Number.parseFloat(currentWindow.getComputedStyle(el).zoom) || 1; + newZoom = + Number.parseFloat((currentWindow.getComputedStyle(el) as any).zoom) || + 1; } } @@ -199,9 +201,9 @@ export function getRect( } const isElementCovered = ( - el: HTMLElement | Node, + el: globalThis.HTMLElement | globalThis.Node, rect: ExtractedRect, - currentWindow: typeof window, + currentWindow: typeof globalThis.window, ) => { // Gets the center coordinates of the element const x = rect.left + rect.width / 2; @@ -254,9 +256,9 @@ const isElementCovered = ( }; export function visibleRect( - el: HTMLElement | Node | null, - currentWindow: typeof window, - currentDocument: typeof document, + el: globalThis.HTMLElement | globalThis.Node | null, + currentWindow: typeof globalThis.window, + currentDocument: typeof globalThis.document, baseZoom = 1, ): | { left: number; top: number; width: number; height: number; zoom: number } @@ -367,7 +369,7 @@ export function visibleRect( }; } -export function validTextNodeContent(node: Node): string | false { +export function validTextNodeContent(node: globalThis.Node): string | false { if (!node) { return false; } @@ -388,8 +390,8 @@ export function validTextNodeContent(node: Node): string | false { } export function getNodeAttributes( - node: HTMLElement | Node, - currentWindow: typeof window, + node: globalThis.HTMLElement | globalThis.Node, + currentWindow: typeof globalThis.window, ): Record { if ( !node || @@ -440,7 +442,7 @@ export function resetNodeHashCacheList() { } export function midsceneGenerateHash( - node: Node | null, + node: globalThis.Node | null, content: string, rect: any, ): string { @@ -483,7 +485,7 @@ export function setExtractTextWithPositionOnWindow() { } } -export function getTopDocument(): HTMLElement { - const container: HTMLElement = document.body || document; +export function getTopDocument(): globalThis.HTMLElement { + const container: globalThis.HTMLElement = document.body || document; return container; } diff --git a/packages/web-integration/src/extractor/web-extractor.ts b/packages/shared/src/extractor/web-extractor.ts similarity index 96% rename from packages/web-integration/src/extractor/web-extractor.ts rename to packages/shared/src/extractor/web-extractor.ts index 6ce894431..66da69ff4 100644 --- a/packages/web-integration/src/extractor/web-extractor.ts +++ b/packages/shared/src/extractor/web-extractor.ts @@ -1,11 +1,10 @@ -import type { Point } from '@midscene/core'; -import { descriptionOfTree } from '@midscene/core/tree'; +import type { ElementInfo } from '.'; import { CONTAINER_MINI_HEIGHT, CONTAINER_MINI_WIDTH, NodeType, -} from '@midscene/shared/constants'; -import type { ElementInfo } from '.'; +} from '../constants'; +import type { Point } from '../types'; import { isButtonElement, isContainerElement, @@ -13,6 +12,7 @@ import { isImgElement, isTextElement, } from './dom-util'; +import { descriptionOfTree } from './tree'; import { getNodeAttributes, getPseudoElementContent, @@ -34,7 +34,7 @@ interface WebElementInfo extends ElementInfo { let indexId = 0; -function tagNameOfNode(node: Node): string { +function tagNameOfNode(node: globalThis.Node): string { let tagName = ''; if (node instanceof HTMLElement) { tagName = node.tagName.toLowerCase(); @@ -260,7 +260,7 @@ interface WebElementNode { // @deprecated export function extractTextWithPosition( - initNode: Node, + initNode: globalThis.Node, debugMode = false, ): WebElementInfo[] { const elementNode = extractTreeNode(initNode, debugMode); @@ -280,7 +280,7 @@ export function extractTextWithPosition( } export function extractTreeNodeAsString( - initNode: Node, + initNode: globalThis.Node, debugMode = false, ): string { const elementNode = extractTreeNode(initNode, debugMode); @@ -288,7 +288,7 @@ export function extractTreeNodeAsString( } export function extractTreeNode( - initNode: Node, + initNode: globalThis.Node, debugMode = false, ): WebElementNode { setDebugMode(debugMode); @@ -300,9 +300,9 @@ export function extractTreeNode( const topChildren: WebElementNode[] = []; function dfs( - node: Node, - currentWindow: typeof window, - currentDocument: typeof document, + node: globalThis.Node, + currentWindow: typeof globalThis.window, + currentDocument: typeof globalThis.document, baseZoom = 1, basePoint: Point = { left: 0, top: 0 }, ): WebElementNode | null { diff --git a/packages/shared/src/img/box-select.ts b/packages/shared/src/img/box-select.ts index a06de25d5..da5b8f403 100644 --- a/packages/shared/src/img/box-select.ts +++ b/packages/shared/src/img/box-select.ts @@ -1,8 +1,8 @@ import assert from 'node:assert'; import type { Buffer } from 'node:buffer'; -import type { Rect } from '@/types'; import type Jimp from 'jimp'; import type { NodeType } from '../constants'; +import type { Rect } from '../types'; import getJimp from './get-jimp'; import { bufferFromBase64, imageInfo, imageInfoOfBase64 } from './index'; diff --git a/packages/shared/src/img/draw-box.ts b/packages/shared/src/img/draw-box.ts index 2aa5d84ce..e40515bca 100644 --- a/packages/shared/src/img/draw-box.ts +++ b/packages/shared/src/img/draw-box.ts @@ -1,5 +1,4 @@ -import type { Buffer } from 'node:buffer'; -import type { Rect } from '@/types'; +import type { Rect } from '../types'; import getJimp from './get-jimp'; import { bufferFromBase64 } from './info'; import { saveBase64Image } from './transform'; diff --git a/packages/shared/src/types/index.ts b/packages/shared/src/types/index.ts index 0bd15cda5..b5dde8f20 100644 --- a/packages/shared/src/types/index.ts +++ b/packages/shared/src/types/index.ts @@ -1,11 +1,40 @@ +import type { NodeType } from '../constants'; + export interface Point { left: number; top: number; } export interface Size { - width: number; + width: number; // device independent window size height: number; + dpr?: number; // the scale factor of the screenshots } -export type Rect = Point & Size; +export type Rect = Point & Size & { zoom?: number }; + +export abstract class BaseElement { + abstract id: string; + + abstract indexId?: number; // markerId for web + + abstract attributes: { + nodeType: NodeType; + [key: string]: string; + }; + + abstract content: string; + + abstract rect: Rect; + + abstract center: [number, number]; + + abstract locator?: string; +} + +export interface ElementTreeNode< + ElementType extends BaseElement = BaseElement, +> { + node: ElementType | null; + children: ElementTreeNode[]; +} diff --git a/packages/midscene/tests/unit-test/tree.test.ts b/packages/shared/tests/unit-test/tree.test.ts similarity index 98% rename from packages/midscene/tests/unit-test/tree.test.ts rename to packages/shared/tests/unit-test/tree.test.ts index 871608569..08132f212 100644 --- a/packages/midscene/tests/unit-test/tree.test.ts +++ b/packages/shared/tests/unit-test/tree.test.ts @@ -1,6 +1,6 @@ -import { descriptionOfTree } from '@/tree'; -import { NodeType } from '@midscene/shared/constants'; import { describe, expect, it } from 'vitest'; +import { NodeType } from '../../src/constants'; +import { descriptionOfTree } from '../../src/extractor/tree'; describe('utils', () => { const tree = { diff --git a/packages/shared/tsconfig.json b/packages/shared/tsconfig.json index 9c9232283..0ddeb9b58 100644 --- a/packages/shared/tsconfig.json +++ b/packages/shared/tsconfig.json @@ -10,9 +10,6 @@ "jsx": "preserve", "lib": ["DOM", "ESNext"], "moduleResolution": "node", - "paths": { - "@/*": ["./src/*"] - }, "resolveJsonModule": true, "rootDir": "src", "skipLibCheck": true, diff --git a/packages/visualizer/scripts/build-html.ts b/packages/visualizer/scripts/build-html.ts index 3a139117d..2b3fc59c6 100644 --- a/packages/visualizer/scripts/build-html.ts +++ b/packages/visualizer/scripts/build-html.ts @@ -153,7 +153,7 @@ function buildExtension() { // put the htmlElement.js into the extension safeCopyFile( - join(__dirname, '../../web-integration/dist/script/htmlElement.js'), + join(__dirname, '../../web-integration/iife-script/htmlElement.js'), join(__dirname, '../unpacked-extension/lib/htmlElement.js'), ); } diff --git a/packages/web-integration/modern.config.ts b/packages/web-integration/modern.config.ts index 88afdf2d8..953fc6e99 100644 --- a/packages/web-integration/modern.config.ts +++ b/packages/web-integration/modern.config.ts @@ -1,6 +1,28 @@ +import fs from 'node:fs'; +import path from 'node:path'; import { defineConfig, moduleTools } from '@modern-js/module-tools'; import { version } from './package.json'; +// Create directories and copy files +// The file copying functionality in modern.js is not operating correctly. +const files = [ + [ + 'node_modules/@midscene/shared/dist/script/htmlElement.js', + 'iife-script/htmlElement.js', + ], + [ + 'node_modules/@midscene/shared/dist/script/htmlElementDebug.js', + 'iife-script/htmlElementDebug.js', + ], +]; +files.forEach(([src, dest]) => { + // Create parent directory if it doesn't exist + const destDir = path.dirname(path.join(__dirname, dest)); + fs.mkdirSync(destDir, { recursive: true }); + // Copy file + fs.copyFileSync(path.join(__dirname, src), path.join(__dirname, dest)); +}); + export default defineConfig({ plugins: [moduleTools()], buildPreset: 'npm-library', diff --git a/packages/web-integration/package.json b/packages/web-integration/package.json index bee8ccb4a..6a4499acb 100644 --- a/packages/web-integration/package.json +++ b/packages/web-integration/package.json @@ -100,10 +100,8 @@ "scripts": { "dev": "modern dev", "dev:server": "npm run build && ./bin/midscene-playground", - "build": "npm run build:pkg && npm run build:script", - "build:pkg": "modern build -c ./modern.config.ts", - "build:script": "modern build -c ./modern.inspect.config.ts", - "build:watch": "modern build -w -c ./modern.config.ts & modern build -w -c ./modern.inspect.config.ts", + "build": "modern build -c ./modern.config.ts", + "build:watch": "modern build -w -c ./modern.config.ts", "test": "vitest --run", "test:u": "vitest --run -u", "test:ai": "AI_TEST_TYPE=web npm run test", diff --git a/packages/web-integration/src/appium/page.ts b/packages/web-integration/src/appium/page.ts index f7784dc51..e9e901571 100644 --- a/packages/web-integration/src/appium/page.ts +++ b/packages/web-integration/src/appium/page.ts @@ -1,11 +1,14 @@ import fs from 'node:fs'; import type { Point, Size } from '@midscene/core'; import { getTmpFile } from '@midscene/core/utils'; +import { + type ElementInfo, + clientExtractTextWithPosition, +} from '@midscene/shared/extractor'; import { base64Encoded, resizeImg } from '@midscene/shared/img'; import { DOMParser } from '@xmldom/xmldom'; import type { KeyInput as PuppeteerKeyInput } from 'puppeteer'; import type { Browser } from 'webdriverio'; -import { type ElementInfo, clientExtractTextWithPosition } from '../extractor'; import type { AbstractPage, MouseButton } from '../page'; type WebKeyInput = PuppeteerKeyInput; diff --git a/packages/web-integration/src/chrome-extension/page.ts b/packages/web-integration/src/chrome-extension/page.ts index 948e2b47a..b701fb3ca 100644 --- a/packages/web-integration/src/chrome-extension/page.ts +++ b/packages/web-integration/src/chrome-extension/page.ts @@ -7,10 +7,10 @@ import assert from 'node:assert'; import type { WebKeyInput } from '@/common/page'; -import { treeToList } from '@/common/utils'; -import type { ElementInfo } from '@/extractor'; import type { AbstractPage } from '@/page'; import type { BaseElement, ElementTreeNode, Point, Size } from '@midscene/core'; +import type { ElementInfo } from '@midscene/shared/extractor'; +import { treeToList } from '@midscene/shared/extractor'; import type { Protocol as CDPTypes } from 'devtools-protocol'; import { CdpKeyboard } from './cdpInput'; import { diff --git a/packages/web-integration/src/common/tasks.ts b/packages/web-integration/src/common/tasks.ts index 26320cd1d..80e4210a4 100644 --- a/packages/web-integration/src/common/tasks.ts +++ b/packages/web-integration/src/common/tasks.ts @@ -33,8 +33,8 @@ import { vlmPlanning, } from '@midscene/core/ai-model'; import { sleep } from '@midscene/core/utils'; +import type { ElementInfo } from '@midscene/shared/extractor'; import type { KeyInput } from 'puppeteer'; -import type { ElementInfo } from '../extractor'; import type { WebElementInfo } from '../web-element'; import { TaskCache } from './task-cache'; import type { WebUIContext } from './utils'; diff --git a/packages/web-integration/src/common/utils.ts b/packages/web-integration/src/common/utils.ts index a9995cfb9..7c88215b5 100644 --- a/packages/web-integration/src/common/utils.ts +++ b/packages/web-integration/src/common/utils.ts @@ -1,7 +1,6 @@ import assert from 'node:assert'; import { readFileSync } from 'node:fs'; import path from 'node:path'; -import type { ElementInfo } from '@/extractor'; import type { StaticPage } from '@/playground'; import type { BaseElement, @@ -16,6 +15,8 @@ import { } from '@midscene/core/env'; import { uploadTestInfoToServer } from '@midscene/core/utils'; import { NodeType } from '@midscene/shared/constants'; +import type { ElementInfo } from '@midscene/shared/extractor'; +import { traverseTree, treeToList } from '@midscene/shared/extractor'; import { findNearestPackageJson } from '@midscene/shared/fs'; import { compositeElementInfoImg } from '@midscene/shared/img'; import { uuid } from '@midscene/shared/utils'; @@ -104,45 +105,10 @@ export async function parseContextFromWebPage( }; } -export function treeToList( - tree: ElementTreeNode, -): T[] { - const result: T[] = []; - function dfs(node: ElementTreeNode) { - if (node.node) { - result.push(node.node); - } - for (const child of node.children) { - dfs(child); - } - } - dfs(tree); - return result; -} - -export function traverseTree< - T extends BaseElement, - ReturnNodeType extends BaseElement, ->( - tree: ElementTreeNode, - onNode: (node: T) => ReturnNodeType, -): ElementTreeNode { - function dfs(node: ElementTreeNode) { - if (node.node) { - node.node = onNode(node.node) as any; - } - for (const child of node.children) { - dfs(child); - } - } - dfs(tree); - return tree as any; -} - export async function getExtraReturnLogic(tree = false) { const pathDir = findNearestPackageJson(__dirname); assert(pathDir, `can't find pathDir, with ${__dirname}`); - const scriptPath = path.join(pathDir, './dist/script/htmlElement.js'); + const scriptPath = path.join(pathDir, './iife-script/htmlElement.js'); const elementInfosScriptContent = readFileSync(scriptPath, 'utf-8'); if (tree) { return `${elementInfosScriptContent}midscene_element_inspector.webExtractNodeTree()`; diff --git a/packages/web-integration/src/debug/img/index.ts b/packages/web-integration/src/debug/img/index.ts deleted file mode 100644 index 2dec6830e..000000000 --- a/packages/web-integration/src/debug/img/index.ts +++ /dev/null @@ -1,149 +0,0 @@ -import assert from 'node:assert'; -import { Buffer } from 'node:buffer'; -import type { NodeType } from '@/extractor/constants'; -import sharp from 'sharp'; - -// Define picture path -type ElementType = { - x: number; - y: number; - width: number; - height: number; - label: string; - attributes: { - [key: string]: string; - nodeType: NodeType; - }; -}; - -const createSvgOverlay = ( - elements: Array, - imageWidth: number, - imageHeight: number, -) => { - let svgContent = ``; - - // Define color array - const colors = [ - { rect: 'blue', text: 'white' }, - { rect: 'green', text: 'white' }, - ]; - - // Define clipping path - svgContent += ''; - elements.forEach((element, index) => { - svgContent += ` - - - - `; - }); - svgContent += ''; - - elements.forEach((element, index) => { - // Calculate the width and height of the text - const textWidth = element.label.length * 8; // Assume that each character is 8px wide - const textHeight = 12; // Assume that the text height is 20px - - // Calculates the position of the initial color block so that it wraps and centers the text - const rectWidth = textWidth + 5; - const rectHeight = textHeight + 4; - let rectX = element.x - rectWidth; - let rectY = element.y + element.height / 2 - textHeight / 2 - 2; - - // Initial text position - let textX = rectX + rectWidth / 2; - let textY = rectY + rectHeight / 2 + 6; - - // Check to see if it's obscured by the left - if (rectX < 0) { - rectX = element.x; - rectY = element.y - rectHeight; - textX = rectX + rectWidth / 2; - textY = rectY + rectHeight / 2 + 6; - } - - // Choose color - const color = colors[index % colors.length]; - - // Draw boxes and text - svgContent += ` - - - - ${element.label} - - `; - }); - - svgContent += ''; - return Buffer.from(svgContent); -}; - -export const processImageElementInfo = async (options: { - inputImgBase64: string; - elementsPositionInfo: Array; - elementsPositionInfoWithoutText: Array; -}) => { - // Get the size of the original image - const base64Image = options.inputImgBase64.split(';base64,').pop(); - assert(base64Image, 'base64Image is undefined'); - - const imageBuffer = Buffer.from(base64Image, 'base64'); - const metadata = await sharp(imageBuffer).metadata(); - const { width, height } = metadata; - - if (width && height) { - // Create svg overlay - const svgOverlay = createSvgOverlay( - options.elementsPositionInfo, - width, - height, - ); - const svgOverlayWithoutText = createSvgOverlay( - options.elementsPositionInfoWithoutText, - width, - height, - ); - - // Composite picture - const compositeElementInfoImgBase64 = await sharp(imageBuffer) - // .resize(newDimensions.width, newDimensions.height) - .composite([{ input: svgOverlay, blend: 'over' }]) - .toBuffer() - .then((data) => { - // Convert image data to base64 encoding - return data.toString('base64'); - }) - .catch((err) => { - throw err; - }); - - // Composite picture withoutText - const compositeElementInfoImgWithoutTextBase64 = await sharp(imageBuffer) - // .resize(newDimensions.width, newDimensions.height) - .composite([{ input: svgOverlayWithoutText, blend: 'over' }]) - .toBuffer() - .then((data) => { - // Convert image data to base64 encoding - return data.toString('base64'); - }) - .catch((err) => { - throw err; - }); - - return { - compositeElementInfoImgBase64, - compositeElementInfoImgWithoutTextBase64, - }; - } - throw Error('Image processing failed because width or height is undefined'); -}; - -export const compressImageSize = async (buffer: Buffer) => { - const resizedBuffer = await sharp(buffer).resize({ width: 640 }).toBuffer(); - - return resizedBuffer; -}; diff --git a/packages/web-integration/src/debug/img/util.ts b/packages/web-integration/src/debug/img/util.ts deleted file mode 100644 index 8998f57ea..000000000 --- a/packages/web-integration/src/debug/img/util.ts +++ /dev/null @@ -1,33 +0,0 @@ -import type { ElementInfo } from '@/extractor'; -import { NodeType } from '@/extractor/constants'; -import type { WebPage } from '../../common/page'; - -export async function getElementsInfo(page: WebPage) { - const captureElementSnapshot: Array = - await page.getElementsInfo(); - const elementsPositionInfo = captureElementSnapshot.map( - (elementInfo, index) => { - return { - label: elementInfo.indexId, - x: elementInfo.rect.left, - y: elementInfo.rect.top, - width: elementInfo.rect.width, - height: elementInfo.rect.height, - attributes: elementInfo.attributes, - }; - }, - ); - const elementsPositionInfoWithoutText = elementsPositionInfo.filter( - (elementInfo) => { - if (elementInfo.attributes.nodeType === NodeType.TEXT) { - return false; - } - return true; - }, - ); - return { - elementsPositionInfo, - captureElementSnapshot, - elementsPositionInfoWithoutText, - }; -} diff --git a/packages/web-integration/src/debug/index.ts b/packages/web-integration/src/debug/index.ts index 227753cbb..f5bc41221 100644 --- a/packages/web-integration/src/debug/index.ts +++ b/packages/web-integration/src/debug/index.ts @@ -1,9 +1,9 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import path from 'node:path'; import type { WebPage } from '@/common/page'; -import type { ElementInfo } from '@/extractor'; -import { NodeType } from '@/extractor/constants'; import { descriptionOfTree } from '@midscene/core/tree'; +import { NodeType } from '@midscene/shared/constants'; +import type { ElementInfo } from '@midscene/shared/extractor'; import { processImageElementInfo, resizeImgBase64, diff --git a/packages/web-integration/src/page.ts b/packages/web-integration/src/page.ts index 40f690366..29dfb6733 100644 --- a/packages/web-integration/src/page.ts +++ b/packages/web-integration/src/page.ts @@ -1,7 +1,7 @@ import type { Point, Size } from '@midscene/core'; +import type { ElementInfo, ElementNode } from '@midscene/shared/extractor'; import type { WebKeyInput } from './common/page'; import type { WebUIContext } from './common/utils'; -import type { ElementInfo, ElementNode } from './extractor'; export type MouseButton = 'left' | 'right' | 'middle'; diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index a13cb9e2d..f2b0971b2 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -1,11 +1,12 @@ import type { ElementTreeNode, Point, Size } from '@midscene/core'; import { getTmpFile, sleep } from '@midscene/core/utils'; +import type { ElementInfo } from '@midscene/shared/extractor'; +import { treeToList } from '@midscene/shared/extractor'; import { base64Encoded } from '@midscene/shared/img'; import type { Page as PlaywrightPage } from 'playwright'; -import type { KeyInput, Page as PuppeteerPage } from 'puppeteer'; +import type { Page as PuppeteerPage } from 'puppeteer'; import type { WebKeyInput } from '../common/page'; -import { getExtraReturnLogic, treeToList } from '../common/utils'; -import type { ElementInfo, ElementNode } from '../extractor'; +import { getExtraReturnLogic } from '../common/utils'; import type { AbstractPage } from '../page'; import type { MouseButton } from '../page'; diff --git a/packages/web-integration/src/web-element.ts b/packages/web-integration/src/web-element.ts index 60dce987b..904c2a986 100644 --- a/packages/web-integration/src/web-element.ts +++ b/packages/web-integration/src/web-element.ts @@ -1,6 +1,5 @@ import type { BaseElement, Rect } from '@midscene/core'; -import type { WebPage } from './common/page'; -import type { NodeType } from './extractor/constants'; +import type { NodeType } from '@midscene/shared/constants'; export interface WebElementInfoType extends BaseElement { id: string; diff --git a/packages/web-integration/tests/unit-test/web-extractor.test.ts b/packages/web-integration/tests/unit-test/web-extractor.test.ts index 530488155..e1b860764 100644 --- a/packages/web-integration/tests/unit-test/web-extractor.test.ts +++ b/packages/web-integration/tests/unit-test/web-extractor.test.ts @@ -1,8 +1,9 @@ import path, { join } from 'node:path'; -import { parseContextFromWebPage, traverseTree } from '@/common/utils'; +import { parseContextFromWebPage } from '@/common/utils'; import { generateExtractData } from '@/debug'; import StaticPage from '@/playground/static-page'; import type { WebElementInfo } from '@/web-element'; +import { traverseTree } from '@midscene/shared/extractor'; import { imageInfoOfBase64 } from '@midscene/shared/img'; import { createServer } from 'http-server'; import { beforeAll, describe, expect, it } from 'vitest';