diff --git a/apps/site/docs/visualization/index.mdx b/apps/site/docs/visualization/index.mdx index 15a329775..65d049882 100644 --- a/apps/site/docs/visualization/index.mdx +++ b/apps/site/docs/visualization/index.mdx @@ -3,4 +3,4 @@ pageType: custom --- import Visualizer from '@midscene/visualizer'; - + diff --git a/packages/midscene/src/utils.ts b/packages/midscene/src/utils.ts index 10f6b3f1d..2fcbbbc0b 100644 --- a/packages/midscene/src/utils.ts +++ b/packages/midscene/src/utils.ts @@ -87,8 +87,8 @@ export function getTmpDir() { return path; } -export function getTmpFile(fileExt: string) { - const filename = `${randomUUID()}.${fileExt}`; +export function getTmpFile(fileExtWithoutDot: string) { + const filename = `${randomUUID()}.${fileExtWithoutDot}`; return join(getTmpDir(), filename); } diff --git a/packages/midscene/tests/fixtures/heytea.jpeg b/packages/midscene/tests/fixtures/heytea.jpeg index 9d2ba092d..2f07f0298 100644 Binary files a/packages/midscene/tests/fixtures/heytea.jpeg and b/packages/midscene/tests/fixtures/heytea.jpeg differ diff --git a/packages/midscene/tests/utils.ts b/packages/midscene/tests/utils.ts index 674cd55d2..7823825f1 100644 --- a/packages/midscene/tests/utils.ts +++ b/packages/midscene/tests/utils.ts @@ -23,35 +23,6 @@ export function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } -// export async function launch(url: string, opt?: { -// viewport?: Viewport, -// }) { -// const browser = await puppeteer.launch(); - -// const page = (await browser.pages())[0]; -// const viewportConfig = { -// width: opt?.viewport?.pixelWidth || 1920, -// height: opt?.viewport?.pixelHeight || 1080, -// deviceScaleFactor: opt?.viewport?.dpr || 1, -// } -// await page.setViewport(viewportConfig); -// await Promise.all([ -// page.waitForNavigation({ -// timeout: 20 * 1000, -// waitUntil: 'networkidle0', -// }), -// (async () => { -// const response = await page.goto(url); -// if (response?.status) { -// assert(response.status() <= 399, `Page load failed: ${response.status()}`); -// } -// })(), -// ]); -// await sleep(2 * 1000); - -// return browser; -// } - export function fakeInsight(content: string) { const screenshot = getFixture('baidu.png'); const basicContext = { diff --git a/packages/visualizer/docs/index.tsx b/packages/visualizer/docs/index.tsx index 14394bc9a..1d1f80190 100644 --- a/packages/visualizer/docs/index.tsx +++ b/packages/visualizer/docs/index.tsx @@ -2,5 +2,6 @@ import React from 'react'; import Tool from '@/index'; export default () => { + // return ; return ; }; diff --git a/packages/visualizer/src/component/common.less b/packages/visualizer/src/component/common.less index caf79cf24..ef044842e 100644 --- a/packages/visualizer/src/component/common.less +++ b/packages/visualizer/src/component/common.less @@ -3,7 +3,7 @@ @main-orange: #F9483E; -@side-bg: #ECECEC; +@side-bg: #f7f7f7; @title-bg: #DDDDDD; @border-color: #CCCCCC; @heavy-border-color: #888; diff --git a/packages/visualizer/src/component/detail-panel.tsx b/packages/visualizer/src/component/detail-panel.tsx index b8b15b262..9fe74a048 100644 --- a/packages/visualizer/src/component/detail-panel.tsx +++ b/packages/visualizer/src/component/detail-panel.tsx @@ -18,15 +18,15 @@ const ScreenshotItem = (props: { time: string; img: string }) => { ); }; +const VIEW_TYPE_BLACKBOARD = 'blackboard'; const VIEW_TYPE_SCREENSHOT = 'screenshot'; const VIEW_TYPE_JSON = 'json'; -const VIEW_TYPE_BLACKBOARD = 'blackboard'; const DetailPanel = (): JSX.Element => { const dumpId = useInsightDump((store) => store._loadId); const blackboardViewAvailable = Boolean(dumpId); const activeTask = useExecutionDump((store) => store.activeTask); - const [preferredViewType, setViewType] = useState(dumpId ? VIEW_TYPE_BLACKBOARD : VIEW_TYPE_SCREENSHOT); + const [preferredViewType, setViewType] = useState(VIEW_TYPE_BLACKBOARD); const viewType = preferredViewType === VIEW_TYPE_BLACKBOARD && !dumpId ? VIEW_TYPE_SCREENSHOT : preferredViewType; diff --git a/packages/visualizer/src/component/detail-side.less b/packages/visualizer/src/component/detail-side.less index 275a35746..a4be658dd 100644 --- a/packages/visualizer/src/component/detail-side.less +++ b/packages/visualizer/src/component/detail-side.less @@ -119,10 +119,8 @@ } } - .context { - pre { - text-wrap: balance; - } + pre { + text-wrap: balance; } .item-list-space-up { diff --git a/packages/visualizer/src/component/detail-side.tsx b/packages/visualizer/src/component/detail-side.tsx index 8abc9d49a..7167a3ac9 100644 --- a/packages/visualizer/src/component/detail-side.tsx +++ b/packages/visualizer/src/component/detail-side.tsx @@ -313,8 +313,14 @@ const DetailSide = (): JSX.Element => { ) : null; const dataCard = dump?.data ? ( - {kv(dump.data)}}> + {JSON.stringify(dump.data, undefined, 2)}} + > ) : null; + console.log('dump is', dump); const plans = (task as ExecutionTaskPlanning)?.output?.plans; let timelineData: TimelineItemProps[] = []; diff --git a/packages/visualizer/src/component/sidebar.tsx b/packages/visualizer/src/component/sidebar.tsx index 089e2b815..47a205fc7 100644 --- a/packages/visualizer/src/component/sidebar.tsx +++ b/packages/visualizer/src/component/sidebar.tsx @@ -8,7 +8,7 @@ import { LogoutOutlined, MinusOutlined, } from '@ant-design/icons'; -import { ExecutionTask } from '@midscene/core'; +import { ExecutionTask, ExecutionTaskInsightQuery } from '@midscene/core'; import { Button } from 'antd'; import PanelTitle from './panel-title'; import { timeCostStrElement } from './misc'; @@ -43,8 +43,17 @@ const SideItem = (props: { statusText = timeCostStrElement(task.timing.cost); } - const contentRow = - task.type === 'Planning' ?
{task.param?.userPrompt}
: null; + let contentRow: JSX.Element | undefined; + if (task.type === 'Planning') { + contentRow =
{task.param?.userPrompt}
; + } else if (task.type === 'Insight' && task.subType === 'Query') { + // debugger; + const demand = (task as ExecutionTaskInsightQuery).param?.dataDemand; + const contentToShow = typeof demand === 'string' ? demand : JSON.stringify(demand); + contentRow =
{contentToShow}
; + } else { + // debugger; + } // add hover listener return (
{ +const Sidebar = (props: { hideLogo?: boolean }): JSX.Element => { const groupedDumps = useExecutionDump((store) => store.dump); const setActiveTask = useExecutionDump((store) => store.setActiveTask); const activeTask = useExecutionDump((store) => store.activeTask); @@ -177,7 +186,7 @@ const Sidebar = (): JSX.Element => { return (
-
+
Logo void; reset: () => void; -}>((set) => { +}>((set, get) => { const initData = { dump: null, activeTask: null, @@ -62,14 +63,15 @@ export const useExecutionDump = create<{ // set the first one as selected for (const item of dump) { if (item.executions.length > 0 && item.executions[0].tasks.length > 0) { - set({ activeTask: item.executions[0].tasks[0] }); + get().setActiveTask(item.executions[0].tasks[0]); break; } } }, setActiveTask(task: ExecutionTask) { set({ activeTask: task }); - if ((task as ExecutionTaskInsightLocate).log?.dump?.matchedElement) { + console.log('task set', task); + if (task.type === 'Insight') { syncToInsightDump((task as ExecutionTaskInsightLocate).log!.dump!); } else { resetInsightDump(); diff --git a/packages/visualizer/src/component/timeline.tsx b/packages/visualizer/src/component/timeline.tsx index 626f36861..0ebdb4c76 100644 --- a/packages/visualizer/src/component/timeline.tsx +++ b/packages/visualizer/src/component/timeline.tsx @@ -67,7 +67,7 @@ const TimelineWidget = (props: { const sizeRatio = 2; const titleBg = 0xdddddd; // @title-bg - const sideBg = 0xececec; + const sideBg = 0xf7f7f7; // @side-bg const gridTextColor = 0; const shotBorderColor = 0x777777; const gridLineColor = 0xcccccc; // @border-color diff --git a/packages/visualizer/src/index.less b/packages/visualizer/src/index.less index 10988249b..126b8007a 100644 --- a/packages/visualizer/src/index.less +++ b/packages/visualizer/src/index.less @@ -88,7 +88,7 @@ footer.mt-8{ .main-canvas-container { flex-grow: 1; height: 100%; - background: #F5F5F5; + background: #ffffff; overflow-x: hidden; overflow-y: scroll; border-left: 1px solid @border-color; diff --git a/packages/visualizer/src/index.tsx b/packages/visualizer/src/index.tsx index d8f356713..a1460ed68 100644 --- a/packages/visualizer/src/index.tsx +++ b/packages/visualizer/src/index.tsx @@ -13,7 +13,7 @@ import DetailSide from '@/component/detail-side'; import Sidebar from '@/component/sidebar'; const { Dragger } = Upload; -const Index = (): JSX.Element => { +const Index = (props: { hideLogo?: boolean }): JSX.Element => { const executionDump = useExecutionDump((store) => store.dump); const setGroupedDump = useExecutionDump((store) => store.setGroupedDump); const reset = useExecutionDump((store) => store.reset); @@ -148,8 +148,8 @@ const Index = (): JSX.Element => { } }} > - - + + { diff --git a/packages/web-integration/modern.inspect.config.ts b/packages/web-integration/modern.inspect.config.ts index aafec190f..2e56939b2 100644 --- a/packages/web-integration/modern.inspect.config.ts +++ b/packages/web-integration/modern.inspect.config.ts @@ -8,7 +8,7 @@ export default defineConfig({ buildType: 'bundle', format: 'iife', input: { - htmlElement:'src/html-element/index.ts', + htmlElement:'src/extractor/index.ts', }, outDir: 'dist/script', esbuildOptions: options => { diff --git a/packages/web-integration/src/common/agent.ts b/packages/web-integration/src/common/agent.ts new file mode 100644 index 000000000..76d60e200 --- /dev/null +++ b/packages/web-integration/src/common/agent.ts @@ -0,0 +1,80 @@ +import { ExecutionDump, GroupedActionDump } from '@midscene/core'; +import { groupedActionDumpFileExt, writeDumpFile } from '@midscene/core/utils'; +import { PageTaskExecutor } from '../common/tasks'; +import { WebPage } from '@/common/page'; + +export class PageAgent { + page: WebPage; + + dumps: GroupedActionDump[]; + + constructor(page: WebPage) { + this.page = page; + this.dumps = []; + } + + appendDump(groupName: string, execution: ExecutionDump) { + let currentDump = this.dumps.find((dump) => dump.groupName === groupName); + if (!currentDump) { + currentDump = { + groupName, + executions: [], + }; + this.dumps.push(currentDump); + } + currentDump.executions.push(execution); + } + + writeOutActionDumps() { + writeDumpFile(`playwright-${process.pid}`, groupedActionDumpFileExt, JSON.stringify(this.dumps)); + } + + async aiAction(taskPrompt: string, dumpCaseName = 'AI Action', dumpGroupName = 'MidScene / Web') { + const actionAgent = new PageTaskExecutor(this.page, { taskName: dumpCaseName }); + let error: Error | undefined; + try { + await actionAgent.action(taskPrompt); + } catch (e: any) { + error = e; + } + if (actionAgent.executionDump) { + this.appendDump(dumpGroupName, actionAgent.executionDump); + this.writeOutActionDumps(); + } + if (error) { + // playwright cli won't print error cause, so we print it here + console.error(error); + throw new Error(error.message, { cause: error }); + } + } + + async aiQuery(demand: any, dumpCaseName = 'AI Query', dumpGroupName = 'MidScene / Web') { + const actionAgent = new PageTaskExecutor(this.page, { taskName: dumpCaseName }); + let error: Error | undefined; + let result: any; + try { + result = await actionAgent.query(demand); + } catch (e: any) { + error = e; + } + if (actionAgent.executionDump) { + this.appendDump(dumpGroupName, actionAgent.executionDump); + this.writeOutActionDumps(); + } + if (error) { + // playwright cli won't print error cause, so we print it here + console.error(error); + throw new Error(error.message, { cause: error }); + } + return result; + } + + async ai(taskPrompt: string, type = 'action', dumpCaseName = 'AI', dumpGroupName = 'MidScene / Web') { + if (type === 'action') { + return this.aiAction(taskPrompt, dumpCaseName, dumpGroupName); + } else if (type === 'query') { + return this.aiQuery(taskPrompt, dumpCaseName, dumpGroupName); + } + throw new Error(`Unknown or Unsupported task type: ${type}, only support 'action' or 'query'`); + } +} diff --git a/packages/web-integration/src/playwright/cdp.ts b/packages/web-integration/src/common/cdp.ts similarity index 100% rename from packages/web-integration/src/playwright/cdp.ts rename to packages/web-integration/src/common/cdp.ts diff --git a/packages/web-integration/src/common/page.d.ts b/packages/web-integration/src/common/page.d.ts new file mode 100644 index 000000000..442939ff4 --- /dev/null +++ b/packages/web-integration/src/common/page.d.ts @@ -0,0 +1,5 @@ +import type { Page as PlaywrightPage } from 'playwright'; +import type { Page as PuppeteerPage, KeyInput } from 'puppeteer'; + +export type WebPage = PlaywrightPage | PuppeteerPage; +export type WebKeyInput = KeyInput; diff --git a/packages/web-integration/src/playwright/actions.ts b/packages/web-integration/src/common/tasks.ts similarity index 83% rename from packages/web-integration/src/playwright/actions.ts rename to packages/web-integration/src/common/tasks.ts index fe4d7e543..0c266977b 100644 --- a/packages/web-integration/src/playwright/actions.ts +++ b/packages/web-integration/src/common/tasks.ts @@ -1,5 +1,4 @@ import assert from 'assert'; -import type { Page as PlaywrightPage } from 'playwright'; import Insight, { DumpSubscriber, ExecutionDump, @@ -21,24 +20,26 @@ import Insight, { } from '@midscene/core'; import { commonScreenshotParam, getTmpFile, sleep } from '@midscene/core/utils'; import { base64Encoded } from '@midscene/core/image'; -import { parseContextFromPlaywrightPage } from './utils'; -import { WebElementInfo } from './element'; +import type { KeyInput, Page as PuppeteerPage } from 'puppeteer'; +import { WebElementInfo } from '../web-element'; +import { parseContextFromWebPage } from './utils'; +import { WebPage } from '@/common/page'; -export class PlayWrightActionAgent { - page: PlaywrightPage; +export class PageTaskExecutor { + page: WebPage; insight: Insight; - executor: Executor; + taskExecutor: Executor; - actionDump?: ExecutionDump; + executionDump?: ExecutionDump; - constructor(page: PlaywrightPage, opt?: { taskName?: string }) { + constructor(page: WebPage, opt?: { taskName?: string }) { this.page = page; this.insight = new Insight(async () => { - return await parseContextFromPlaywrightPage(page); + return await parseContextFromWebPage(page); }); - this.executor = new Executor(opt?.taskName || 'MidScene - PlayWrightAI'); + this.taskExecutor = new Executor(opt?.taskName || 'MidScene - PlayWrightAI'); } private async recordScreenshot(timing: ExecutionRecorderItem['timing']) { @@ -117,7 +118,6 @@ export class PlayWrightActionAgent { await this.page.keyboard.type(taskParam.value); }, }; - // TODO: return a recorder Object return taskActionInput; } else if (plan.type === 'KeyboardPress') { const taskActionKeyboardPress: ExecutionTaskActionApply = { @@ -126,7 +126,7 @@ export class PlayWrightActionAgent { param: plan.param, executor: async (taskParam) => { assert(taskParam.value, 'No key to press'); - await this.page.keyboard.press(taskParam.value); + await this.page.keyboard.press(taskParam.value as KeyInput); }, }; return taskActionKeyboardPress; @@ -158,7 +158,7 @@ export class PlayWrightActionAgent { param: plan.param, executor: async (taskParam) => { const scrollToEventName = taskParam.scrollType; - const innerHeight = await this.page.evaluate(() => window.innerHeight); + const innerHeight = await (this.page as PuppeteerPage).evaluate(() => window.innerHeight); switch (scrollToEventName) { case 'ScrollUntilTop': @@ -193,7 +193,7 @@ export class PlayWrightActionAgent { } async action(userPrompt: string /* , actionInfo?: { actionType?: EventActions[number]['action'] } */) { - this.executor.description = userPrompt; + this.taskExecutor.description = userPrompt; const pageContext = await this.insight.contextRetrieverFn(); let plans: PlanningAction[] = []; @@ -215,32 +215,32 @@ export class PlayWrightActionAgent { try { // plan - await this.executor.append(this.wrapExecutorWithScreenshot(planningTask)); - await this.executor.flush(); - this.actionDump = this.executor.dump(); + await this.taskExecutor.append(this.wrapExecutorWithScreenshot(planningTask)); + await this.taskExecutor.flush(); + this.executionDump = this.taskExecutor.dump(); // append tasks const executables = await this.convertPlanToExecutable(plans); - await this.executor.append(executables); + await this.taskExecutor.append(executables); // flush actions - await this.executor.flush(); - this.actionDump = this.executor.dump(); + await this.taskExecutor.flush(); + this.executionDump = this.taskExecutor.dump(); assert( - this.executor.status !== 'error', - `failed to execute tasks: ${this.executor.status}, msg: ${this.executor.errorMsg || ''}`, + this.taskExecutor.status !== 'error', + `failed to execute tasks: ${this.taskExecutor.status}, msg: ${this.taskExecutor.errorMsg || ''}`, ); } catch (e: any) { // keep the dump before throwing - this.actionDump = this.executor.dump(); + this.executionDump = this.taskExecutor.dump(); const err = new Error(e.message, { cause: e }); throw err; } } async query(demand: InsightExtractParam) { - this.executor.description = JSON.stringify(demand); + this.taskExecutor.description = JSON.stringify(demand); let data: any; const queryTask: ExecutionTaskInsightQueryApply = { type: 'Insight', @@ -262,12 +262,12 @@ export class PlayWrightActionAgent { }, }; try { - await this.executor.append(this.wrapExecutorWithScreenshot(queryTask)); - await this.executor.flush(); - this.actionDump = this.executor.dump(); + await this.taskExecutor.append(this.wrapExecutorWithScreenshot(queryTask)); + await this.taskExecutor.flush(); + this.executionDump = this.taskExecutor.dump(); } catch (e: any) { // keep the dump before throwing - this.actionDump = this.executor.dump(); + this.executionDump = this.taskExecutor.dump(); const err = new Error(e.message, { cause: e }); throw err; } diff --git a/packages/web-integration/src/playwright/utils.ts b/packages/web-integration/src/common/utils.ts similarity index 81% rename from packages/web-integration/src/playwright/utils.ts rename to packages/web-integration/src/common/utils.ts index 8f1d4cbd1..1c8a5e820 100644 --- a/packages/web-integration/src/playwright/utils.ts +++ b/packages/web-integration/src/common/utils.ts @@ -2,19 +2,19 @@ import fs, { readFileSync } from 'fs'; import assert from 'assert'; import { Buffer } from 'buffer'; import path from 'path'; -import type { Page as PlaywrightPage } from 'playwright'; -import { Page } from 'puppeteer'; import { UIContext, PlaywrightParserOpt } from '@midscene/core'; -import { alignCoordByTrim, base64Encoded, imageInfo, imageInfoOfBase64 } from '@midscene/core/image'; +import { alignCoordByTrim, base64Encoded, imageInfoOfBase64 } from '@midscene/core/image'; import { getTmpFile } from '@midscene/core/utils'; -import { WebElementInfo, WebElementInfoType } from './element'; +import { WebElementInfo, WebElementInfoType } from '../web-element'; +import { WebPage } from './page'; -export async function parseContextFromPlaywrightPage( - page: PlaywrightPage, +export async function parseContextFromWebPage( + page: WebPage, _opt?: PlaywrightParserOpt, ): Promise> { assert(page, 'page is required'); - const file = '/Users/bytedance/workspace/midscene/packages/midscene/tests/fixtures/heytea.jpeg'; // getTmpFile('jpeg'); + + const file = getTmpFile('jpeg'); await page.screenshot({ path: file, type: 'jpeg', quality: 75 }); const screenshotBuffer = readFileSync(file); const screenshotBase64 = base64Encoded(file); @@ -30,7 +30,7 @@ export async function parseContextFromPlaywrightPage( }; } -export async function getElementInfosFromPage(page: Page | PlaywrightPage) { +export async function getElementInfosFromPage(page: WebPage) { const pathDir = findNearestPackageJson(__dirname); assert(pathDir, `can't find pathDir, with ${__dirname}`); const scriptPath = path.join(pathDir, './dist/script/htmlElement.js'); @@ -44,7 +44,7 @@ export async function getElementInfosFromPage(page: Page | PlaywrightPage) { async function alignElements( screenshotBuffer: Buffer, elements: WebElementInfoType[], - page: PlaywrightPage, + page: WebPage, ): Promise { const textsAligned: WebElementInfo[] = []; for (const item of elements) { diff --git a/packages/web-integration/src/html-element/constants.ts b/packages/web-integration/src/extractor/constants.ts similarity index 100% rename from packages/web-integration/src/html-element/constants.ts rename to packages/web-integration/src/extractor/constants.ts diff --git a/packages/web-integration/src/html-element/debug.ts b/packages/web-integration/src/extractor/debug.ts similarity index 100% rename from packages/web-integration/src/html-element/debug.ts rename to packages/web-integration/src/extractor/debug.ts diff --git a/packages/web-integration/src/html-element/dom-util.ts b/packages/web-integration/src/extractor/dom-util.ts similarity index 100% rename from packages/web-integration/src/html-element/dom-util.ts rename to packages/web-integration/src/extractor/dom-util.ts diff --git a/packages/web-integration/src/html-element/extractInfo.ts b/packages/web-integration/src/extractor/extractor.ts similarity index 96% rename from packages/web-integration/src/html-element/extractInfo.ts rename to packages/web-integration/src/extractor/extractor.ts index 208cda4b1..5f5082a7d 100644 --- a/packages/web-integration/src/html-element/extractInfo.ts +++ b/packages/web-integration/src/extractor/extractor.ts @@ -11,7 +11,7 @@ import { isButtonElement, isImgElement, isInputElement } from './dom-util'; interface NodeDescriptor { node: Node; - childrens: NodeDescriptor[]; + children: NodeDescriptor[]; } export interface ElementInfo { @@ -39,7 +39,7 @@ function generateId(numberId: number) { export function extractTextWithPositionDFS(initNode: Node = container): ElementInfo[] { const elementInfoArray: ElementInfo[] = []; - const nodeMapTree: NodeDescriptor = { node: initNode, childrens: [] }; + const nodeMapTree: NodeDescriptor = { node: initNode, children: [] }; let nodeIndex = 1; function dfs(node: Node, parentNode: NodeDescriptor | null = null): void { @@ -47,9 +47,9 @@ export function extractTextWithPositionDFS(initNode: Node = container): ElementI return; } - const currentNodeDes: NodeDescriptor = { node, childrens: [] }; - if (parentNode?.childrens) { - parentNode.childrens.push(currentNodeDes); + const currentNodeDes: NodeDescriptor = { node, children: [] }; + if (parentNode?.children) { + parentNode.children.push(currentNodeDes); } collectElementInfo(node); diff --git a/packages/web-integration/src/extractor/index.ts b/packages/web-integration/src/extractor/index.ts new file mode 100644 index 000000000..8d5a8a219 --- /dev/null +++ b/packages/web-integration/src/extractor/index.ts @@ -0,0 +1 @@ +export { extractTextWithPositionDFS } from './extractor'; diff --git a/packages/web-integration/src/html-element/util.ts b/packages/web-integration/src/extractor/util.ts similarity index 100% rename from packages/web-integration/src/html-element/util.ts rename to packages/web-integration/src/extractor/util.ts diff --git a/packages/web-integration/src/html-element/index.ts b/packages/web-integration/src/html-element/index.ts deleted file mode 100644 index a9a124c14..000000000 --- a/packages/web-integration/src/html-element/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { extractTextWithPositionDFS } from './extractInfo'; diff --git a/packages/web-integration/src/img/img.ts b/packages/web-integration/src/img/img.ts index 8b274a348..f53ba5c14 100644 --- a/packages/web-integration/src/img/img.ts +++ b/packages/web-integration/src/img/img.ts @@ -1,7 +1,7 @@ import assert from 'assert'; import { Buffer } from 'node:buffer'; import sharp from 'sharp'; -import { NodeType } from '@/html-element/constants'; +import { NodeType } from '@/extractor/constants'; // Define picture path type ElementType = { @@ -80,8 +80,8 @@ const createSvgOverlay = (elements: Array, imageWidth: number, imag export const processImageElementInfo = async (options: { inputImgBase64: string; - elementsPostionInfo: Array; - elementsPostionInfoWithoutText: Array; + elementsPositionInfo: Array; + elementsPositionInfoWithoutText: Array; }) => { // Get the size of the original image const base64Image = options.inputImgBase64.split(';base64,').pop(); @@ -93,8 +93,8 @@ export const processImageElementInfo = async (options: { if (width && height) { // Create svg overlay - const svgOverlay = createSvgOverlay(options.elementsPostionInfo, width, height); - const svgOverlayWithoutText = createSvgOverlay(options.elementsPostionInfoWithoutText, width, height); + const svgOverlay = createSvgOverlay(options.elementsPositionInfo, width, height); + const svgOverlayWithoutText = createSvgOverlay(options.elementsPositionInfoWithoutText, width, height); // Composite picture const compositeElementInfoImgBase64 = await sharp(imageBuffer) @@ -109,7 +109,7 @@ export const processImageElementInfo = async (options: { throw err; }); - // Composite picture withtoutText + // Composite picture withoutText const compositeElementInfoImgWithoutTextBase64 = await sharp(imageBuffer) // .resize(newDimensions.width, newDimensions.height) .composite([{ input: svgOverlayWithoutText, blend: 'over' }]) diff --git a/packages/web-integration/src/img/util.ts b/packages/web-integration/src/img/util.ts index 6082e8b4a..432773140 100644 --- a/packages/web-integration/src/img/util.ts +++ b/packages/web-integration/src/img/util.ts @@ -1,10 +1,10 @@ -import { getElementInfosFromPage } from '../playwright/utils'; -import { NodeType } from '@/html-element/constants'; -import { ElementInfo } from '@/html-element/extractInfo'; +import { getElementInfosFromPage } from '../common/utils'; +import { NodeType } from '@/extractor/constants'; +import { ElementInfo } from '@/extractor/extractor'; export async function getElementInfos(page: any) { const captureElementSnapshot: Array = await getElementInfosFromPage(page); - const elementsPostionInfo = captureElementSnapshot.map((elementInfo) => { + const elementsPositionInfo = captureElementSnapshot.map((elementInfo) => { return { label: elementInfo.id.toString(), x: elementInfo.rect.left, @@ -14,15 +14,15 @@ export async function getElementInfos(page: any) { attributes: elementInfo.attributes, }; }); - const elementsPostionInfoWithoutText = elementsPostionInfo.filter((elementInfo) => { + const elementsPositionInfoWithoutText = elementsPositionInfo.filter((elementInfo) => { if (elementInfo.attributes.nodeType === NodeType.TEXT) { return false; } return true; }); return { - elementsPostionInfo, + elementsPositionInfo, captureElementSnapshot, - elementsPostionInfoWithoutText, + elementsPositionInfoWithoutText, }; } diff --git a/packages/web-integration/src/index.ts b/packages/web-integration/src/index.ts index e2d10b9b6..508505357 100644 --- a/packages/web-integration/src/index.ts +++ b/packages/web-integration/src/index.ts @@ -1,2 +1,4 @@ export { PlaywrightAiFixture } from './playwright'; export type { PlayWrightAiFixtureType } from './playwright'; + +export { PuppeteerAgent } from './puppeteer'; diff --git a/packages/web-integration/src/playwright/index.ts b/packages/web-integration/src/playwright/index.ts index 8938096df..4861fb545 100644 --- a/packages/web-integration/src/playwright/index.ts +++ b/packages/web-integration/src/playwright/index.ts @@ -1,113 +1,63 @@ +import { randomUUID } from 'crypto'; import { TestInfo, TestType } from '@playwright/test'; -import { ExecutionDump, GroupedActionDump } from '@midscene/core'; -import { groupedActionDumpFileExt, writeDumpFile } from '@midscene/core/utils'; -import { PlayWrightActionAgent } from './actions'; - -export { PlayWrightActionAgent } from './actions'; +import { PageTaskExecutor } from '../common/tasks'; +import { WebPage } from '@/common/page'; +import { PageAgent } from '@/common/agent'; export type APITestType = Pick, 'step'>; -export const PlaywrightAiFixture = () => { - const dumps: GroupedActionDump[] = []; - - const appendDump = (groupName: string, execution: ExecutionDump) => { - let currentDump = dumps.find((dump) => dump.groupName === groupName); - if (!currentDump) { - currentDump = { - groupName, - executions: [], - }; - dumps.push(currentDump); - } - currentDump.executions.push(execution); - }; - - const writeOutActionDumps = () => { - writeDumpFile(`playwright-${process.pid}`, groupedActionDumpFileExt, JSON.stringify(dumps)); - }; - - const groupAndCaseForTest = (testInfo: TestInfo) => { - let groupName: string; - let caseName: string; - const titlePath = [...testInfo.titlePath]; - - if (titlePath.length > 1) { - caseName = titlePath.pop()!; - groupName = titlePath.join(' > '); - } else if (titlePath.length === 1) { - caseName = titlePath[0]; - groupName = caseName; - } else { - caseName = 'unnamed'; - groupName = 'unnamed'; - } - return { groupName, caseName }; - }; - - const aiAction = async (page: any, testInfo: TestInfo, taskPrompt: string) => { - const { groupName, caseName } = groupAndCaseForTest(testInfo); - - const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName }); - let error: Error | undefined; - try { - await actionAgent.action(taskPrompt); - } catch (e: any) { - error = e; - } - if (actionAgent.actionDump) { - appendDump(groupName, actionAgent.actionDump); - writeOutActionDumps(); - } - if (error) { - // playwright cli won't print error cause, so we print it here - console.error(error); - throw new Error(error.message, { cause: error }); - } - }; +const groupAndCaseForTest = (testInfo: TestInfo) => { + let groupName: string; + let caseName: string; + const titlePath = [...testInfo.titlePath]; - const aiQuery = async (page: any, testInfo: TestInfo, demand: any) => { - const { groupName, caseName } = groupAndCaseForTest(testInfo); + if (titlePath.length > 1) { + caseName = titlePath.pop()!; + groupName = titlePath.join(' > '); + } else if (titlePath.length === 1) { + caseName = titlePath[0]; + groupName = caseName; + } else { + caseName = 'unnamed'; + groupName = 'unnamed'; + } + return { groupName, caseName }; +}; - const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName }); - let error: Error | undefined; - let result: any; - try { - result = await actionAgent.query(demand); - } catch (e: any) { - error = e; - } - if (actionAgent.actionDump) { - appendDump(groupName, actionAgent.actionDump); - writeOutActionDumps(); - } - if (error) { - // playwright cli won't print error cause, so we print it here - console.error(error); - throw new Error(error.message, { cause: error }); +const midSceneAgentKeyId = '_midSceneAgentId'; +export const PlaywrightAiFixture = () => { + const pageAgentMap: Record = {}; + const agentForPage = (page: WebPage) => { + let idForPage = (page as any)[midSceneAgentKeyId]; + if (!idForPage) { + idForPage = randomUUID(); + (page as any)[midSceneAgentKeyId] = idForPage; + pageAgentMap[idForPage] = new PageAgent(page); } - return result; + return pageAgentMap[idForPage]; }; return { - // shortcut ai: async ({ page }: any, use: any, testInfo: TestInfo) => { await use(async (taskPrompt: string, type = 'action') => { - if (type === 'action') { - return aiAction(page, testInfo, taskPrompt); - } else if (type === 'query') { - return aiQuery(page, testInfo, taskPrompt); - } - throw new Error(`Unknown or Unsupported task type: ${type}, only support 'action' or 'query'`); + const { groupName, caseName } = groupAndCaseForTest(testInfo); + const agent = agentForPage(page); + return agent.ai(taskPrompt, type, caseName, groupName); }); }, aiAction: async ({ page }: any, use: any, testInfo: TestInfo) => { await use(async (taskPrompt: string) => { - await aiAction(page, testInfo, taskPrompt); + const agent = agentForPage(page); + + const { groupName, caseName } = groupAndCaseForTest(testInfo); + await agent.aiAction(taskPrompt, caseName, groupName); }); }, aiQuery: async ({ page }: any, use: any, testInfo: TestInfo) => { await use(async function (demand: any) { - return aiQuery(page, testInfo, demand); + const agent = agentForPage(page); + const { groupName, caseName } = groupAndCaseForTest(testInfo); + return agent.aiQuery(demand, caseName, groupName); }); }, }; @@ -115,6 +65,6 @@ export const PlaywrightAiFixture = () => { export type PlayWrightAiFixtureType = { ai: (prompt: string, type?: 'action' | 'query') => Promise; - aiAction: (taskPrompt: string) => ReturnType; + aiAction: (taskPrompt: string) => ReturnType; aiQuery: (demand: any) => Promise; }; diff --git a/packages/web-integration/src/puppeteer/element.ts b/packages/web-integration/src/puppeteer/element.ts deleted file mode 100644 index 5bd470f15..000000000 --- a/packages/web-integration/src/puppeteer/element.ts +++ /dev/null @@ -1,49 +0,0 @@ -// import { Page } from 'puppeteer'; -// import { BaseElement, Rect } from '@/types'; - -// export class Element implements BaseElement { -// id: string; - -// attributes: Record; - -// nodeType: string; - -// content: string; - -// locator: string; - -// rect: Rect; - -// center: [number, number]; - -// page: Page; - -// constructor(options: { -// id: string, attributes: Record, nodeType: string, content: string, rect: Rect, page: Page, locator: string -// }) { -// this.id = options.id; -// this.attributes = options.attributes; -// this.nodeType = options.nodeType; -// this.content = options.content; -// this.rect = options.rect; -// this.center = [Math.floor(options.rect.left + options.rect.width / 2), Math.floor(options.rect.top + options.rect.height / 2)]; -// this.page = options.page; -// this.locator = options.locator; -// } - -// async tap() { -// await this.page.mouse.click(this.center[0], this.center[1]); -// } - -// async hover() { -// console.log('hover'); -// } - -// async type(text: string) { -// await this.page.keyboard.type(text, { delay: 100 }); -// } - -// async press(key: string) { -// await this.page.keyboard.press(key as any, { delay: 100 }); -// } -// } diff --git a/packages/web-integration/src/puppeteer/index.ts b/packages/web-integration/src/puppeteer/index.ts index 3cb866c85..ca0787bff 100644 --- a/packages/web-integration/src/puppeteer/index.ts +++ b/packages/web-integration/src/puppeteer/index.ts @@ -1,6 +1 @@ -// export { Element } from './element'; -// export { -// parseContextFromPuppeteerBrowser, -// parseContextFromPuppeteerPage, -// parseContextFromPlaywrightPage, -// } from './utils'; +export { PageAgent as PuppeteerAgent } from '@/common/agent'; diff --git a/packages/web-integration/src/puppeteer/utils.ts b/packages/web-integration/src/puppeteer/utils.ts deleted file mode 100644 index ae1623307..000000000 --- a/packages/web-integration/src/puppeteer/utils.ts +++ /dev/null @@ -1,116 +0,0 @@ -// import { readFileSync } from 'fs'; -// import { Buffer } from 'buffer'; -// import assert from 'assert'; -// import type { Browser, Page } from 'puppeteer'; -// import type { Page as PlaywrightPage } from 'playwright'; -// import { Element } from './index'; -// import { alignCoordByTrim, base64Encoded, imageInfoOfBase64 } from '@/image'; -// import { UIContext, PuppeteerParserOpt, PlaywrightParserOpt, Rect, BaseElement } from '@/types'; -// import { getTmpFile } from '@/utils'; -// import { pageScriptToGetTexts } from '@/query'; -// import { describeUserPage } from '@/insight/prompt'; - -// export interface TextElement { -// content: string; -// rect: Rect; -// center: [number, number]; // center coordinates as [rect.left + rect.width/2, rect.top + rect.height/2], use this for better control of page -// locator: string; -// } - -// export async function alignTextElements( -// screenshotBuffer: Buffer, -// elements: TextElement[], -// ): Promise { -// const textsAligned: TextElement[] = []; -// for (const item of elements) { -// const { rect } = item; -// const aligned = await alignCoordByTrim(screenshotBuffer, rect); -// item.rect = aligned; -// item.center = [ -// Math.round(aligned.left + aligned.width / 2), -// Math.round(aligned.top + aligned.height / 2), -// ]; -// textsAligned.push(item); -// } -// return textsAligned; -// } - -// async function extractDataFromPage(page: Page, opt?: PuppeteerParserOpt): Promise> { -// assert(page, 'page is required'); -// const file = getTmpFile('jpeg'); -// await page.screenshot({ path: file, type: 'jpeg', quality: 75 }); -// const screenshotBuffer = readFileSync(file); -// const screenshotBase64 = base64Encoded(file); -// const size = await imageInfoOfBase64(screenshotBase64); - -// const scripts = pageScriptToGetTexts(opt?.selector); -// const texts = (await page.evaluate(scripts)) as BaseElement[]; - -// // align texts -// const textsAligned = await alignTextElements(screenshotBuffer, texts); - -// const baseElements = textsAligned.map((item) => { -// const { center, ...res } = item; -// return new Element(res); -// }); - -// const basicContext = { -// screenshotBase64, -// size, -// content: baseElements, -// }; - -// return { -// ...basicContext, -// describer: async () => { -// return describeUserPage(basicContext); -// }, -// }; -// } - -// export async function parseContextFromPuppeteerPage( -// page: Page, -// opt?: PuppeteerParserOpt, -// ): Promise> { -// return extractDataFromPage(page, opt); -// } - -// export async function parseContextFromPuppeteerBrowser(browser: Browser): Promise> { -// const pages = await browser.pages(); -// let visiblePage: Page; -// if (!pages.length) { -// throw new Error('No page found in the puppeteer browser'); -// } else if (pages.length === 1) { -// visiblePage = pages[0]; - -// // filter a visible page, otherwise use the last one -// } else { -// const candidates = []; -// for (const page of pages) { -// // eslint-disable-next-line @typescript-eslint/no-loop-func -// const isVisible = await page.evaluate(() => document.visibilityState === 'visible'); -// if (isVisible) { -// candidates.push(page); -// } -// } -// if (candidates.length === 0) { -// const lastUrl = pages[pages.length - 1].url(); -// console.warn(`There are no visible pages, use the last one (${lastUrl})`); -// visiblePage = candidates[candidates.length - 1]; -// } else if (candidates.length === 1) { -// visiblePage = candidates[0]; -// } else { -// const lastUrl = pages[pages.length - 1].url(); -// console.warn(`Multiple visible pages found, use the last one (${lastUrl})`); -// visiblePage = candidates[candidates.length - 1]; -// } -// } -// return parseContextFromPuppeteerPage(visiblePage); -// } - -// export async function parseContextFromPlaywrightPage( -// page: PlaywrightPage, -// opt?: PlaywrightParserOpt, -// ): Promise> { -// return extractDataFromPage(page as any as Page, opt); // seems key APIs are the same ? -// } diff --git a/packages/web-integration/src/playwright/element.ts b/packages/web-integration/src/web-element.ts similarity index 66% rename from packages/web-integration/src/playwright/element.ts rename to packages/web-integration/src/web-element.ts index 5ffd6c55f..2f4e3807c 100644 --- a/packages/web-integration/src/playwright/element.ts +++ b/packages/web-integration/src/web-element.ts @@ -1,6 +1,6 @@ -import { Page } from 'playwright'; import { BaseElement, Rect } from '@midscene/core'; -import { NodeType } from '../html-element/constants'; +import { NodeType } from './extractor/constants'; +import { WebPage } from './common/page'; export interface WebElementInfoType extends BaseElement { id: string; @@ -20,7 +20,7 @@ export class WebElementInfo implements BaseElement { center: [number, number]; - page: Page; + page: WebPage; id: string; @@ -39,7 +39,7 @@ export class WebElementInfo implements BaseElement { }: { content: string; rect: Rect; - page: Page; + page: WebPage; locator: string; id: string; attributes: { @@ -55,20 +55,4 @@ export class WebElementInfo implements BaseElement { this.id = id; this.attributes = attributes; } - - async tap() { - await this.page.mouse.click(this.center[0], this.center[1]); - } - - async hover() { - await this.page.mouse.move(this.center[0], this.center[1]); - } - - async type(text: string) { - await this.page.keyboard.type(text); - } - - async press(key: Parameters[0]) { - await this.page.keyboard.press(key); - } } diff --git a/packages/web-integration/tests/e2e/ai-xicha.spec.ts b/packages/web-integration/tests/e2e/ai-xicha.spec.ts index e094f0ddf..a855cec2e 100644 --- a/packages/web-integration/tests/e2e/ai-xicha.spec.ts +++ b/packages/web-integration/tests/e2e/ai-xicha.spec.ts @@ -1,3 +1,4 @@ +import { expect } from 'playwright/test'; import { test } from './fixture'; test.beforeEach(async ({ page }) => { @@ -6,12 +7,12 @@ test.beforeEach(async ({ page }) => { await page.waitForLoadState('networkidle'); }); -test('ai order', async ({ ai }) => { +test('ai order', async ({ ai, aiQuery }) => { await ai('点击左上角语言切换按钮(英文、中文),在弹出的下拉列表中点击中文'); await ai('在向下滚动一屏'); await ai('直接点击多肉葡萄的规格按钮'); await ai('点击不使用吸管、点击冰沙推荐、点击正常冰推荐'); - await ai('在向下滚动一屏'); + await ai('向下滚动一屏'); await ai('点击标准甜、点击绿妍(推荐)、点击标准口味'); await ai('滚动到最下面'); await ai('点击选好了按钮'); @@ -20,6 +21,14 @@ test('ai order', async ({ ai }) => { // 随便滚动一下 await ai('滚动到最下面'); + const cardDetail = await aiQuery({ + productName: '商品名称,在价格上面', + productPrice: '商品价格, string', + productDescription: '商品描述(饮品的各种参数,吸管、冰沙等),在价格下面', + }); + + expect(cardDetail.productName.indexOf('多肉葡萄')).toBeGreaterThanOrEqual(0); + // const content = await aiQuery(query('购物车商品详情', { // productName: "商品名称,在价格上面", // productPrice: "商品价格", diff --git a/packages/web-integration/tests/e2e/tool.ts b/packages/web-integration/tests/e2e/tool.ts index 4fa2af32a..74582b188 100644 --- a/packages/web-integration/tests/e2e/tool.ts +++ b/packages/web-integration/tests/e2e/tool.ts @@ -6,7 +6,7 @@ import { getElementInfos } from '@/img/util'; import { processImageElementInfo } from '@/img/img'; export async function generateTestData(page: PlaywrightPage, targetDir: string, inputImgBase64: string) { - const { elementsPostionInfo, captureElementSnapshot, elementsPostionInfoWithoutText } = + const { elementsPositionInfo, captureElementSnapshot, elementsPositionInfoWithoutText } = await getElementInfos(page); const inputImagePath = path.join(targetDir, 'input.png'); @@ -17,8 +17,8 @@ export async function generateTestData(page: PlaywrightPage, targetDir: string, const { compositeElementInfoImgBase64, compositeElementInfoImgWithoutTextBase64 } = await processImageElementInfo({ - elementsPostionInfo, - elementsPostionInfoWithoutText, + elementsPositionInfo, + elementsPositionInfoWithoutText, inputImgBase64, }); diff --git a/packages/web-integration/tests/puppeteer/bing.spec.ts b/packages/web-integration/tests/puppeteer/bing.spec.ts new file mode 100644 index 000000000..e4e4a168b --- /dev/null +++ b/packages/web-integration/tests/puppeteer/bing.spec.ts @@ -0,0 +1,23 @@ +import { it, describe, expect, vi } from 'vitest'; +import { sleep } from '@midscene/core/utils'; +import { launchPage } from './utils'; +import { PuppeteerAgent } from '@/puppeteer'; + +vi.setConfig({ + testTimeout: 60 * 1000, +}); + +describe('puppeteer integration', () => { + it('basic launch', async () => { + const page = await launchPage('https://www.bing.com'); + + const agent = new PuppeteerAgent(page); + + await agent.aiAction('type "how much is the ferry ticket in Shanghai" in search box, hit Enter'); + await sleep(5000); + + const relatedSearch = await agent.aiQuery('string[], related search keywords on the right'); + console.log('related search', relatedSearch); + expect(relatedSearch.length).toBeGreaterThan(3); + }); +}); diff --git a/packages/web-integration/tests/puppeteer/utils.ts b/packages/web-integration/tests/puppeteer/utils.ts new file mode 100644 index 000000000..a8a1c4513 --- /dev/null +++ b/packages/web-integration/tests/puppeteer/utils.ts @@ -0,0 +1,35 @@ +import assert from 'assert'; +import puppeteer, { Viewport } from 'puppeteer'; + +export async function launchPage( + url: string, + opt?: { + viewport?: Viewport; + }, +) { + const browser = await puppeteer.launch({ + headless: false, + }); + + const page = (await browser.pages())[0]; + const viewportConfig = { + width: opt?.viewport?.width || 1920, + height: opt?.viewport?.height || 1080, + deviceScaleFactor: opt?.viewport?.deviceScaleFactor || 1, + }; + await page.setViewport(viewportConfig); + await Promise.all([ + page.waitForNavigation({ + timeout: 20 * 1000, + waitUntil: 'networkidle0', + }), + (async () => { + const response = await page.goto(url); + if (response?.status) { + assert(response.status() <= 399, `Page load failed: ${response.status()}`); + } + })(), + ]); + + return page; +} diff --git a/packages/web-integration/vitest.config.ts b/packages/web-integration/vitest.config.ts index 6749bb413..71508d36e 100644 --- a/packages/web-integration/vitest.config.ts +++ b/packages/web-integration/vitest.config.ts @@ -8,7 +8,6 @@ export default defineConfig({ }, }, test: { - // 你的其他配置... - include: [], + include: ['./tests/puppeteer/**/*.spec.ts'], }, });