Skip to content

Commit cd775af

Browse files
authored
feat(web): added ai cache capability (#18)
* chore: optimize test case * chore: optimize test case * chore: optimize report path * chore: add task cache logic * refactor(report): optimize ai report * chore: save cache logic * chore: modify cache logic * chore: modify cache logic * chore: modify cache logic * chore: modify unit test case * chore: modify unit test case * chore: optimize test case * chore: add cache logic * chore: optimize cache logic * chore: optimize cache logic * chore: optimize cache logic * chore: update cache file * chore: update cache file * chore: update cache file * chore: update cache file * chore: Added cache version determination * chore: Added cache version determination * chore: add cache logic * refactor(web): use hash replace index number id * chore: add cache logic * chore: fix unit test snapshot * chore: update snapshot * chore: update snapshot * chore: update snapshot * chore: update snapshot * chore: update snapshot * chore: add cache test * chore: optimize test command * chore: optimize cache logic * chore: optimize cache logic * chore: update snapshot * chore: update cache * chore: delete unless file * chore: update test data * chore: update test data * chore: fix build command * chore: update cache logic
1 parent d2c5491 commit cd775af

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+3095
-708
lines changed

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,6 @@ blob-report/
9797
playwright/.cache/
9898

9999
# MidScene.js dump files
100-
midscene_run/
101-
midscene-report/
102100
__ai_responses__/
103101

104102

packages/midscene/src/ai-model/inspect.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export async function AiInspectElement<ElementType extends BaseElement = BaseEle
88
context: UIContext<ElementType>;
99
multi: boolean;
1010
findElementDescription: string;
11-
callAI?: typeof callToGetJSONObject;
11+
callAI?: typeof callToGetJSONObject<AIElementParseResponse>;
1212
}) {
1313
const { context, multi, findElementDescription, callAI = callToGetJSONObject } = options;
1414
const { screenshotBase64 } = context;
@@ -35,7 +35,7 @@ export async function AiInspectElement<ElementType extends BaseElement = BaseEle
3535
],
3636
},
3737
];
38-
const parseResult = await callAI<AIElementParseResponse>(msgs);
38+
const parseResult = await callAI(msgs);
3939
return {
4040
parseResult,
4141
elementById,

packages/midscene/src/ai-model/prompt/util.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,15 @@ export async function describeUserPage<ElementType extends BaseElement = BaseEle
199199
context: Omit<UIContext<ElementType>, 'describer'>,
200200
) {
201201
const { screenshotBase64 } = context;
202-
const { width, height } = await imageInfoOfBase64(screenshotBase64);
202+
let width: number;
203+
let height: number;
204+
205+
if (context.size) {
206+
({ width, height } = context.size);
207+
} else {
208+
const imgSize = await imageInfoOfBase64(screenshotBase64);
209+
({ width, height } = imgSize);
210+
}
203211

204212
const elementsInfo = context.content;
205213
const idElementMap: Record<string, ElementType> = {};

packages/midscene/src/automation/planning.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { ChatCompletionMessageParam } from 'openai/resources';
22
import { PlanningAction, PlanningAIResponse, UIContext } from '@/types';
3-
import { callToGetJSONObject as callAI } from '@/ai-model/openai';
3+
import { callToGetJSONObject } from '@/ai-model/openai';
44
import { describeUserPage } from '@/ai-model';
55

66
const characteristic =
@@ -60,7 +60,14 @@ export function systemPromptToTaskPlanning(query: string) {
6060
`;
6161
}
6262

63-
export async function plan(context: UIContext, userPrompt: string): Promise<{ plans: PlanningAction[] }> {
63+
export async function plan(
64+
userPrompt: string,
65+
opts: {
66+
context: UIContext;
67+
callAI?: typeof callToGetJSONObject<PlanningAIResponse>;
68+
},
69+
): Promise<{ plans: PlanningAction[] }> {
70+
const { callAI = callToGetJSONObject<PlanningAIResponse>, context } = opts || {};
6471
const { screenshotBase64 } = context;
6572
const { description } = await describeUserPage(context);
6673
const systemPrompt = systemPromptToTaskPlanning(userPrompt);
@@ -84,7 +91,7 @@ export async function plan(context: UIContext, userPrompt: string): Promise<{ pl
8491
},
8592
];
8693

87-
const planFromAI = await callAI<PlanningAIResponse>(msgs);
94+
const planFromAI = await callAI(msgs);
8895
if (planFromAI.error) {
8996
throw new Error(planFromAI.error);
9097
}

packages/midscene/src/insight/index.ts

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
BaseElement,
1717
DumpSubscriber,
1818
InsightExtractParam,
19+
AIElementParseResponse,
1920
} from '@/types';
2021

2122
const sortByOrder = (a: UISection, b: UISection) => {
@@ -26,8 +27,9 @@ const sortByOrder = (a: UISection, b: UISection) => {
2627
}
2728
};
2829

29-
export interface FindElementOptions {
30+
export interface LocateOpts {
3031
multi?: boolean;
32+
callAI?: typeof callAI<AIElementParseResponse>;
3133
}
3234

3335
// export type UnwrapDataShape<T> = T extends EnhancedQuery<infer DataShape> ? DataShape : {};
@@ -36,19 +38,19 @@ export type AnyValue<T> = {
3638
[K in keyof T]: unknown extends T[K] ? any : T[K];
3739
};
3840

39-
export default class Insight<ElementType extends BaseElement = BaseElement> {
40-
contextRetrieverFn: () => Promise<UIContext<ElementType>> | UIContext<ElementType>;
41+
export default class Insight<
42+
ElementType extends BaseElement = BaseElement,
43+
ContextType extends UIContext<ElementType> = UIContext<ElementType>,
44+
> {
45+
contextRetrieverFn: () => Promise<ContextType> | ContextType;
4146

4247
aiVendorFn: typeof callAI = callAI;
4348

4449
onceDumpUpdatedFn?: DumpSubscriber;
4550

4651
taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
4752

48-
constructor(
49-
context: UIContext<ElementType> | (() => Promise<UIContext<ElementType>> | UIContext<ElementType>),
50-
opt?: InsightOptions,
51-
) {
53+
constructor(context: ContextType | (() => Promise<ContextType> | ContextType), opt?: InsightOptions) {
5254
assert(context, 'context is required for Insight');
5355
if (typeof context === 'function') {
5456
this.contextRetrieverFn = context;
@@ -64,19 +66,20 @@ export default class Insight<ElementType extends BaseElement = BaseElement> {
6466
}
6567
}
6668

67-
async locate(queryPrompt: string): Promise<ElementType | null>;
69+
async locate(queryPrompt: string, opt?: { callAI: LocateOpts['callAI'] }): Promise<ElementType | null>;
6870
async locate(queryPrompt: string, opt: { multi: true }): Promise<ElementType[]>;
69-
async locate(queryPrompt: string, opt?: FindElementOptions) {
71+
async locate(queryPrompt: string, opt?: LocateOpts) {
72+
const { callAI = this.aiVendorFn, multi = false } = opt || {};
7073
assert(queryPrompt, 'query is required for located');
7174
const dumpSubscriber = this.onceDumpUpdatedFn;
7275
this.onceDumpUpdatedFn = undefined;
7376
const context = await this.contextRetrieverFn();
7477

7578
const startTime = Date.now();
7679
const { parseResult, systemPrompt, elementById } = await AiInspectElement({
77-
callAI: this.aiVendorFn,
80+
callAI,
7881
context,
79-
multi: Boolean(opt?.multi),
82+
multi: Boolean(multi),
8083
findElementDescription: queryPrompt,
8184
});
8285
// const parseResult = await this.aiVendorFn<AIElementParseResponse>(msgs);
@@ -282,4 +285,12 @@ export default class Insight<ElementType extends BaseElement = BaseElement> {
282285

283286
return mergedData;
284287
}
288+
289+
setAiVendorFn<T>(aiVendorFn: typeof callAI<T>) {
290+
const origin = this.aiVendorFn;
291+
this.aiVendorFn<T> = aiVendorFn;
292+
return () => {
293+
this.aiVendorFn = origin;
294+
};
295+
}
285296
}

packages/midscene/src/insight/utils.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ export function writeInsightDump(
5858
const length = logContent.push(dataString);
5959
logIdIndexMap[id] = length - 1;
6060
}
61-
writeDumpFile(logFileName, logFileExt, `[\n${logContent.join(',\n')}\n]`);
61+
writeDumpFile({
62+
fileName: logFileName,
63+
fileExt: logFileExt,
64+
fileContent: `[\n${logContent.join(',\n')}\n]`,
65+
});
6266

6367
return id;
6468
}

packages/midscene/src/utils.ts

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,32 @@ export const groupedActionDumpFileExt = 'web-dump.json';
4343
export function getDumpDir() {
4444
return logDir;
4545
}
46+
4647
export function setDumpDir(dir: string) {
4748
logDir = dir;
4849
}
4950

50-
export function writeDumpFile(fileName: string, fileExt: string, fileContent: string) {
51+
export function getDumpDirPath(type: 'dump' | 'cache') {
52+
return join(getDumpDir(), type);
53+
}
54+
55+
export function writeDumpFile(opts: {
56+
fileName: string;
57+
fileExt: string;
58+
fileContent: string;
59+
type?: 'dump' | 'cache';
60+
}) {
61+
const { fileName, fileExt, fileContent, type = 'dump' } = opts;
62+
const targetDir = getDumpDirPath(type);
63+
if (!existsSync(targetDir)) {
64+
mkdirSync(targetDir, { recursive: true });
65+
}
5166
// Ensure directory exists
5267
if (!logEnvReady) {
53-
assert(logDir, 'logDir should be set before writing dump file');
54-
if (!existsSync(logDir)) {
55-
mkdirSync(logDir, { recursive: true });
56-
}
68+
assert(targetDir, 'logDir should be set before writing dump file');
5769

5870
// gitIgnore in the parent directory
59-
const gitIgnorePath = join(logDir, '../.gitignore');
71+
const gitIgnorePath = join(targetDir, '../../.gitignore');
6072
let gitIgnoreContent = '';
6173
if (existsSync(gitIgnorePath)) {
6274
gitIgnoreContent = readFileSync(gitIgnorePath, 'utf-8');
@@ -67,16 +79,19 @@ export function writeDumpFile(fileName: string, fileExt: string, fileContent: st
6779
if (!gitIgnoreContent.includes(`${logDirName}/`)) {
6880
writeFileSync(
6981
gitIgnorePath,
70-
`${gitIgnoreContent}\n# MidScene.js dump files\n${logDirName}/\n`,
82+
`${gitIgnoreContent}\n# MidScene.js dump files\n${logDirName}/midscene-report\n${logDirName}/dump-logger\n`,
7183
'utf-8',
7284
);
7385
}
7486
logEnvReady = true;
7587
}
7688

77-
const filePath = join(getDumpDir(), `${fileName}.${fileExt}`);
89+
const filePath = join(targetDir, `${fileName}.${fileExt}`);
7890
writeFileSync(filePath, fileContent);
79-
copyFileSync(filePath, join(getDumpDir(), `latest.${fileExt}`));
91+
92+
if (type === 'dump') {
93+
copyFileSync(filePath, join(targetDir, `latest.${fileExt}`));
94+
}
8095

8196
return filePath;
8297
}

packages/midscene/tests/ai-model/inspector/__snapshots__/todo_inspector.test.ts.snap

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
{
33
"elements": [
44
{
5-
"id": "2",
5+
"id": "b0ca2e8c69",
66
},
77
],
88
"error": [],
@@ -11,7 +11,7 @@
1111
{
1212
"elements": [
1313
{
14-
"id": "8",
14+
"id": "b9807d7de6",
1515
},
1616
],
1717
"error": [],
@@ -20,7 +20,7 @@
2020
{
2121
"elements": [
2222
{
23-
"id": "9",
23+
"id": "c5a7702fed",
2424
},
2525
],
2626
"error": [],
@@ -29,7 +29,7 @@
2929
{
3030
"elements": [
3131
{
32-
"id": "10",
32+
"id": "c84a3afdac",
3333
},
3434
],
3535
"error": [],
@@ -38,7 +38,7 @@
3838
{
3939
"elements": [
4040
{
41-
"id": "15",
41+
"id": "defa24dedd",
4242
},
4343
],
4444
"error": [],

packages/midscene/tests/ai-model/inspector/__snapshots__/xicha_inspector.test.ts.snap

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
{
33
"elements": [
44
{
5-
"id": "1",
5+
"id": "922e98a196",
66
},
77
],
88
"error": [],
@@ -11,16 +11,16 @@
1111
{
1212
"elements": [
1313
{
14-
"id": "2",
14+
"id": "83ffa89342",
1515
},
1616
],
1717
"error": [],
18-
"prompt": "Switch language(include:中文、english text)",
18+
"prompt": "Toggle language text button(Could be:中文、english text)",
1919
},
2020
{
2121
"elements": [
2222
{
23-
"id": "4",
23+
"id": "a525985342",
2424
},
2525
],
2626
"error": [],
@@ -29,10 +29,10 @@
2929
{
3030
"elements": [
3131
{
32-
"id": "22",
32+
"id": "3fb89d359f",
3333
},
3434
{
35-
"id": "28",
35+
"id": "c4300a7c45",
3636
},
3737
],
3838
"error": [],
@@ -41,10 +41,10 @@
4141
{
4242
"elements": [
4343
{
44-
"id": "23",
44+
"id": "ae0ba24c99",
4545
},
4646
{
47-
"id": "29",
47+
"id": "a50d88f84c",
4848
},
4949
],
5050
"error": [],
@@ -53,7 +53,7 @@
5353
{
5454
"elements": [
5555
{
56-
"id": "30",
56+
"id": "df4f252aab",
5757
},
5858
],
5959
"error": [],

packages/midscene/tests/ai-model/inspector/xicha_inspector.test.ts renamed to packages/midscene/tests/ai-model/inspector/online_order_inspector.test.ts

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { test, expect } from 'vitest';
21
import path from 'path';
2+
import { test, expect } from 'vitest';
33
import { getPageTestData, repeat, runTestCases, writeFileSyncWithDir } from './util';
44
import { AiInspectElement } from '@/ai-model';
55

@@ -9,7 +9,7 @@ const testCases = [
99
multi: false,
1010
},
1111
{
12-
description: 'Switch language(include:中文、english text)',
12+
description: 'Toggle language text button(Could be:中文、english text)',
1313
multi: false,
1414
},
1515
{
@@ -31,21 +31,28 @@ const testCases = [
3131
];
3232

3333
repeat(5, (repeatIndex) => {
34-
test('xicha: inspect element', async () => {
35-
const { context } = await getPageTestData(path.join(__dirname, './test-data/xicha'));
34+
test(
35+
'xicha: inspect element',
36+
async () => {
37+
const { context } = await getPageTestData(path.join(__dirname, './test-data/online_order'));
3638

37-
const { aiResponse, filterUnStableinf } = await runTestCases(testCases, async (testCase)=>{
39+
const { aiResponse, filterUnStableinf } = await runTestCases(testCases, async (testCase) => {
3840
const { parseResult } = await AiInspectElement({
3941
context,
4042
multi: testCase.multi,
4143
findElementDescription: testCase.description,
4244
});
4345
return parseResult;
44-
});
45-
writeFileSyncWithDir(path.join(__dirname, `__ai_responses__/xicha-inspector-element-${repeatIndex}.json`), JSON.stringify(aiResponse, null, 2), { encoding: 'utf-8'});
46-
expect(filterUnStableinf).toMatchFileSnapshot('./__snapshots__/xicha_inspector.test.ts.snap');
47-
}, {
48-
timeout: 99999,
49-
});
46+
});
47+
writeFileSyncWithDir(
48+
path.join(__dirname, `__ai_responses__/online_order-inspector-element-${repeatIndex}.json`),
49+
JSON.stringify(aiResponse, null, 2),
50+
{ encoding: 'utf-8' },
51+
);
52+
expect(filterUnStableinf).toMatchFileSnapshot('./__snapshots__/online_order_inspector.test.ts.snap');
53+
},
54+
{
55+
timeout: 99999,
56+
},
57+
);
5058
});
51-

0 commit comments

Comments
 (0)