Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use different color for annotations #366

Merged
merged 13 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/ai-evaluation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: AI unit test
on:
push:
branches:
- main
workflow_dispatch:
inputs:
branch:
description: 'Branch to checkout'
required: false
default: 'main'
type: string

jobs:
main:
runs-on: ubuntu-22.04
strategy:
matrix:
node-version: [18.19.0]

env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
MIDSCENE_MODEL_NAME: gpt-4o-2024-11-20
CI: 1

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.branch || 'main' }}

- name: Setup pnpm
uses: pnpm/action-setup@v2

Check warning

Code scanning / CodeQL

Unpinned tag for a non-immutable Action in workflow Medium

Unpinned 3rd party Action 'AI evaluation' step
Uses Step
uses 'pnpm/action-setup' with ref 'v2', not a pinned commit hash
with:
version: 9.3.0

- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'pnpm'

- name: Install dependencies
run: pnpm install --frozen-lockfile

- name: Build project
run: pnpm run build

- name: Run evaluation
run: |
cd packages/evaluation
pnpm run evaluate:inspect
pnpm run evaluate:assertion
Comment on lines +16 to +54

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions Job or Workflow does not set permissions
3 changes: 2 additions & 1 deletion biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
"**/playwright-report/**",
"**/todo-report.spec.ts-snapshots/**",
"**/visualizer/scripts/fixture/*",
"**/unpacked-extension/*"
"**/unpacked-extension/*",
"**/page-data/**"
]
},
"javascript": {
Expand Down
4 changes: 4 additions & 0 deletions packages/evaluation/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

# Midscene.js dump files
midscene_run/report
midscene_run/tmp
5 changes: 5 additions & 0 deletions packages/evaluation/data-generator/fixture.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import type { PlayWrightAiFixtureType } from '@midscene/web';
import { PlaywrightAiFixture } from '@midscene/web/playwright';
import { test as base } from '@playwright/test';

export const test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture());
23 changes: 23 additions & 0 deletions packages/evaluation/data-generator/generator-headed.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { PlaywrightWebPage } from '@midscene/web/playwright';
import { test } from './fixture';
import { generateExtractData, generateTestDataPath } from './utils';

function sleep(time: number) {
return new Promise((resolve) => {
setTimeout(() => {
resolve(0);
}, time);
});
}

test('taobao', async ({ page, ai }) => {
const playwrightPage = new PlaywrightWebPage(page);
page.setViewportSize({ width: 1280, height: 800 });

await page.goto('https://www.taobao.com/');

// for --ui
await sleep(5000);

await generateExtractData(playwrightPage, generateTestDataPath('taobao'));
});
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { generateExtractData, generateTestDataPath } from '@/debug';
import { PlaywrightWebPage } from '@/playwright';
import { PlaywrightWebPage } from '@midscene/web/playwright';
import { test } from './fixture';
import { generateExtractData, generateTestDataPath } from './utils';

function sleep(time: number) {
return new Promise((resolve) => {
Expand All @@ -10,7 +10,7 @@ function sleep(time: number) {
});
}

test('generate todo test data', async ({ page }) => {
test('todo app', async ({ page }) => {
const playwrightPage = new PlaywrightWebPage(page);
await page.goto('https://todomvc.com/examples/react/dist/');
// Add data
Expand All @@ -33,7 +33,7 @@ test('generate todo test data', async ({ page }) => {
);
});

test('generate visualstudio test data', async ({ page }) => {
test('visualstudio code', async ({ page }) => {
const playwrightPage = new PlaywrightWebPage(page);

await page.goto('https://code.visualstudio.com/');
Expand All @@ -45,7 +45,7 @@ test('generate visualstudio test data', async ({ page }) => {
);
});

test('generate githubstatus test data', async ({ page }) => {
test('github status page', async ({ page }) => {
const playwrightPage = new PlaywrightWebPage(page);

await page.setViewportSize({ width: 1920, height: 1080 });
Expand Down Expand Up @@ -95,7 +95,7 @@ test('antd widget - carousel', async ({ page }) => {
);
});

test('generate online order test data', async ({ page, ai }) => {
test('heytea online order', async ({ page, ai }) => {
const playwrightPage = new PlaywrightWebPage(page);

page.setViewportSize({ width: 400, height: 905 });
Expand All @@ -109,7 +109,7 @@ test('generate online order test data', async ({ page, ai }) => {
);
});

test('generate online order list test data (zh-cn)', async ({ page, ai }) => {
test('heytea online order list (zh-cn)', async ({ page, ai }) => {
const playwrightPage = new PlaywrightWebPage(page);

page.setViewportSize({ width: 400, height: 905 });
Expand All @@ -127,19 +127,7 @@ test('generate online order list test data (zh-cn)', async ({ page, ai }) => {
);
});

test('generate taobao test data', async ({ page, ai }) => {
const playwrightPage = new PlaywrightWebPage(page);
page.setViewportSize({ width: 1280, height: 800 });

await page.goto('https://www.taobao.com/');

// for --ui
await sleep(5000);

await generateExtractData(playwrightPage, generateTestDataPath('taobao'));
});

test('generate douyin test data', async ({ page, ai }) => {
test('douyin', async ({ page, ai }) => {
const playwrightPage = new PlaywrightWebPage(page);

page.setViewportSize({ width: 1280, height: 800 });
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import assert from 'node:assert';
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { NodeType } from '@midscene/shared/constants';

import path from 'node:path';
import type { WebPage } from '@/common/page';
import { descriptionOfTree } from '@midscene/core/tree';
import { NodeType } from '@midscene/shared/constants';
import type { ElementInfo } from '@midscene/shared/extractor';
import {
processImageElementInfo,
resizeImgBase64,
saveBase64Image,
} from '@midscene/shared/img';
import type { WebPage } from '@midscene/web';

import type { ElementInfo } from '@midscene/shared/extractor';

export async function generateExtractData(
page: WebPage,
Expand Down Expand Up @@ -109,13 +112,10 @@ export async function generateExtractData(
}

export function generateTestDataPath(testDataName: string) {
// `dist/lib/index.js` Is the default export path
const modulePath = require
.resolve('@midscene/core')
.replace('dist/lib/index.js', '');
assert(testDataName, 'testDataName is required');
const midsceneTestDataPath = path.join(
modulePath,
`tests/ai/evaluate/test-data/${testDataName}`,
__dirname,
`../page-data/${testDataName}`,
);

return midsceneTestDataPath;
Expand Down
32 changes: 32 additions & 0 deletions packages/evaluation/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"name": "evaluation",
"private": true,
"scripts": {
"update-page-data:headless": "playwright test ./data-generator/generator-headless.spec.ts",
"update-page-data:headed": "playwright test ./data-generator/generator-headed.spec.ts --headed",
"evaluate:inspect": "npx vitest --run tests/llm-inspect.test.ts",
"evaluate:inspect:update": "UPDATE_AI_DATA=true npm run evaluate:inspect",
"evaluate:assertion": "npx vitest --run tests/assertion.test.ts",
"evaluate:assertion:update": "UPDATE_AI_DATA=true npm run evaluate:assertion"
},
"dependencies": {
"@midscene/core": "workspace:*",
"@midscene/shared": "workspace:*",
"@midscene/web": "workspace:*"
},
"devDependencies": {
"dotenv": "16.4.5",
"playwright": "1.44.1",
"@playwright/test": "^1.44.1",
"typescript": "~5.0.4",
"vitest": "^1.6.0"
},
"engines": {
"node": ">=18.0.0"
},
"publishConfig": {
"access": "public",
"registry": "https://registry.npmjs.org"
},
"license": "MIT"
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"testDataPath": "test-data/online_order",
"testDataPath": "online_order",
"testCases": [
{
"prompt": "there are three tabs in the page, named 'Menu', 'Reviews', 'Merchant'",
Expand All @@ -26,4 +26,4 @@
"expected": false
}
]
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"testDataPath": "test-data/aweme-login",
"testDataPath": "aweme-login",
"testCases": [
{
"prompt": "密码登录",
Expand Down Expand Up @@ -72,4 +72,4 @@
]
}
]
}
}
58 changes: 58 additions & 0 deletions packages/evaluation/page-cases/inspect/aweme_play.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"testDataPath": "aweme-play",
"testCases": [
{
"prompt": "左下角暂停按钮",
"response": [
{
"id": "3"
}
]
},
{
"prompt": "点赞(爱心)按钮",
"response": [
{
"id": "afifi",
"indexId": 22
}
]
},
{
"prompt": "评论按钮",
"response": [
{
"id": "jldma",
"indexId": 24
}
]
},
{
"prompt": "书签收藏按钮",
"response": [
{
"id": "nmgcl",
"indexId": 26
}
]
},
{
"prompt": "分享按钮",
"response": [
{
"id": "eabap",
"indexId": 28
}
]
},
{
"prompt": "右下角区域声音按钮",
"response": [
{
"id": "djknm",
"indexId": 9
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"testDataPath": "test-data/online_order",
"testDataPath": "online_order",
"testCases": [
{
"prompt": "Top left menu bar icon",
Expand Down Expand Up @@ -62,4 +62,4 @@
]
}
]
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"testDataPath": "test-data/online_order_list",
"testDataPath": "online_order_list",
"testCases": [
{
"prompt": "'清爽不喝腻'下面第二个饮品的名称",
Expand Down Expand Up @@ -42,4 +42,4 @@
]
}
]
}
}
Loading