Skip to content

Commit

Permalink
chore: rename files in core module (#277)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: zhouxiao.shaw <[email protected]>
  • Loading branch information
yuyutaotao and zhoushaw authored Jan 14, 2025
1 parent 74774c6 commit 68b4004
Show file tree
Hide file tree
Showing 25 changed files with 35 additions and 635 deletions.
2 changes: 1 addition & 1 deletion apps/site/docs/zh/index.mdx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Midscene.js - AI 加持,带来愉悦的 UI 自动化体验

UI 自动化太难维护了。UI 自动化脚本里往往到处都是选择器,比如 `#ids``data-test``.selectors`。在需要重构的时候,这可能会让人感到非常头疼,尽管在这种情况下,UI 自动化应该能够发挥作用。
传统 UI 自动化太难维护了。自动化脚本里往往到处都是选择器,比如 `#ids``data-test``.selectors`。在需要重构的时候,这可能会让人感到非常头疼,尽管在这种情况下,UI 自动化应该能够发挥作用。

我们在这里推出 Midscene.js,助你重拾编码的乐趣。

Expand Down
File renamed without changes.
4 changes: 1 addition & 3 deletions packages/midscene/src/ai-model/common.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import assert from 'node:assert';
import { MIDSCENE_MODEL_TEXT_ONLY, getAIConfig } from '@/env';
import type { AIUsageInfo } from '@/types';

import type {
ChatCompletionContentPart,
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
} from 'openai/resources';
import { callToGetJSONObject, checkAIConfig } from './openai';
import { callToGetJSONObject, checkAIConfig } from './service-caller';

export type AIArgs = [
ChatCompletionSystemMessageParam,
Expand Down
9 changes: 4 additions & 5 deletions packages/midscene/src/ai-model/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export { callToGetJSONObject } from './openai';
export { systemPromptToFindElement } from './prompt/element-inspector';
export { callToGetJSONObject } from './service-caller';
export { systemPromptToLocateElement } from './prompt/llm-locator';
export { describeUserPage } from './prompt/util';

export type { ChatCompletionMessageParam } from 'openai/resources';
Expand All @@ -11,7 +11,6 @@ export {
transformElementPositionToId,
} from './inspect';

export { plan } from './automation';
export { plan } from './llm-planning';
export { callAiFn } from './common';

export { vlmPlanning } from './vlm-planning';
export { vlmPlanning } from './ui-tars-planning';
18 changes: 5 additions & 13 deletions packages/midscene/src/ai-model/inspect.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import assert from 'node:assert';
import path from 'node:path';
import type {
AIAssertionResponse,
AIElementResponse,
Expand All @@ -9,29 +8,26 @@ import type {
AIUsageInfo,
BaseElement,
ElementById,
Point,
Size,
UIContext,
} from '@/types';
import { savePositionImg } from '@midscene/shared/img';
import type {
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
} from 'openai/resources';
import { AIActionType, callAiFn } from './common';
import { call, callToGetJSONObject } from './openai';
import {
findElementPrompt,
multiDescription,
systemPromptToFindElement,
} from './prompt/element-inspector';
systemPromptToLocateElement,
} from './prompt/llm-locator';
import {
describeUserPage,
elementByPositionWithElementInfo,
extractDataPrompt,
systemPromptToAssert,
systemPromptToExtract,
} from './prompt/util';
import { callToGetJSONObject } from './service-caller';

export type AIArgs = [
ChatCompletionSystemMessageParam,
Expand Down Expand Up @@ -69,11 +65,7 @@ export async function transformElementPositionToId(
},
size,
);
// await savePositionImg({
// inputImgBase64: screenshotBase64,
// rect: absolutePosition,
// outputPath: path.join(__dirname, 'test-data', `output-${index++}.png`),
// });

const element = elementByPositionWithElementInfo(
elementsInfo,
absolutePosition,
Expand Down Expand Up @@ -182,7 +174,7 @@ export async function AiInspectElement<
targetElementDescription,
multi,
});
const systemPrompt = systemPromptToFindElement();
const systemPrompt = systemPromptToLocateElement();
const msgs: AIArgs = [
{ role: 'system', content: systemPrompt },
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
automationUserPrompt,
generateTaskBackgroundContext,
systemPromptToTaskPlanning,
} from './prompt/planning';
} from './prompt/llm-planning';
import { describeUserPage } from './prompt/util';

export async function plan(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { MATCH_BY_POSITION, getAIConfig } from '@/env';
import { PromptTemplate } from '@langchain/core/prompts';
import type { ResponseFormatJSONSchema } from 'openai/resources';
import { systemPromptToFindElementPosition } from './element-point';
import { systemPromptToLocateElementPosition } from './ui-tars-locator';

export function systemPromptToFindElement() {
export function systemPromptToLocateElement() {
if (getAIConfig(MATCH_BY_POSITION)) {
return systemPromptToFindElementPosition();
return systemPromptToLocateElementPosition();
}
return `
## Role:
Expand Down Expand Up @@ -144,12 +144,6 @@ Output Example:
`;
}

export function multiDescription(multi: boolean) {
return multi
? 'multiple elements matching the description (two or more)'
: 'The element closest to the description (only one)';
}

export const findElementSchema: ResponseFormatJSONSchema = {
type: 'json_schema',
json_schema: {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// claude 3.5 sonnet computer The ability to understand the content of the image is better, Does not provide element snapshot effect
export function systemPromptToFindElementPosition() {
export function systemPromptToLocateElementPosition() {
return `
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
Expand Down
2 changes: 1 addition & 1 deletion packages/midscene/src/ai-model/prompt/ui-tars-planning.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Action } from '../vlm-planning';
import type { Action } from '../ui-tars-planning';

export const uiTarsPlanningPrompt = `
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
Expand Down
64 changes: 4 additions & 60 deletions packages/midscene/src/ai-model/prompt/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@ const characteristic =
const contextFormatIntro = `
The user will give you a screenshot and some of the texts on it. There may be some none-English characters (like Chinese) on it, indicating it's an non-English app. If some text is shown on screenshot but not introduced by the JSON description, use the information you see on screenshot.`;

const ONE_ELEMENT_LOCATOR_PREFIX = 'LOCATE_ONE_ELEMENT';
const ELEMENTS_LOCATOR_PREFIX = 'LOCATE_ONE_OR_MORE_ELEMENTS';
const SECTION_MATCHER_FLAG = 'SECTION_MATCHER_FLAG/';

export function systemPromptToFindElement(queryPrompt: string, multi: boolean) {
export function systemPromptToLocateElement(
queryPrompt: string,
multi: boolean,
) {
assert(queryPrompt, 'queryPrompt is required');
return `
${characteristic}
Expand Down Expand Up @@ -385,58 +384,3 @@ function cropFieldInformation(
}
return elementInfosDescription;
}

/**
* elements
*/
export function retrieveElement(
prompt: string,
opt?: { multi: boolean },
): string {
if (opt?.multi) {
return `follow ${ELEMENTS_LOCATOR_PREFIX}: ${prompt}`;
}
return `follow ${ONE_ELEMENT_LOCATOR_PREFIX}: ${prompt}`;
}

export function ifElementTypeResponse(response: string): boolean {
if (typeof response !== 'string') {
return false;
}
return (
response.startsWith(ONE_ELEMENT_LOCATOR_PREFIX) ||
response.startsWith(ELEMENTS_LOCATOR_PREFIX)
);
}

export function splitElementResponse(
response: string,
): string | null | string[] {
const oneElementSplitter = `${ONE_ELEMENT_LOCATOR_PREFIX}/`;
if (response.startsWith(oneElementSplitter)) {
const id = response.slice(oneElementSplitter.length);
if (id.indexOf(',') >= 0) {
console.warn(`unexpected comma in one element response: ${id}`);
}
return id ? id : null;
}

const elementsSplitter = `${ELEMENTS_LOCATOR_PREFIX}/`;
if (response.startsWith(elementsSplitter)) {
const idsString = response.slice(elementsSplitter.length);
if (!idsString) {
return [];
}
return idsString.split(',');
}

return null;
}

/**
* sections
*/

export function retrieveSection(prompt: string): string {
return `${SECTION_MATCHER_FLAG}${prompt}`;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import {
AZURE_OPENAI_DEPLOYMENT,
AZURE_OPENAI_ENDPOINT,
AZURE_OPENAI_KEY,
MATCH_BY_POSITION,
MIDSCENE_API_TYPE,
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
MIDSCENE_AZURE_OPENAI_SCOPE,
Expand All @@ -37,8 +36,8 @@ import {
getAIConfigInJson,
} from '../../env';
import { AIActionType } from '../common';
import { findElementSchema } from '../prompt/element-inspector';
import { planSchema } from '../prompt/planning';
import { findElementSchema } from '../prompt/llm-locator';
import { planSchema } from '../prompt/llm-planning';
import { assertSchema } from '../prompt/util';

export function checkAIConfig(preferVendor?: 'openAI') {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import type { UIContext } from '@/types';
import type { PlanningAction } from '@/types';
import type { ChatCompletionMessageParam } from 'openai/resources';
import { AIActionType } from './common';
import { call, callToGetJSONObject } from './openai';
import {
getSummary,
parseActionFromVlm,
uiTarsPlanningPrompt,
} from './prompt/ui-tars-planning';
import { describeUserPage } from './prompt/util';
import { call } from './service-caller';

type ActionType = 'click' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';

Expand Down
3 changes: 0 additions & 3 deletions packages/midscene/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ export const MIDSCENE_OPENAI_SOCKS_PROXY = 'MIDSCENE_OPENAI_SOCKS_PROXY';
export const OPENAI_API_KEY = 'OPENAI_API_KEY';
export const OPENAI_BASE_URL = 'OPENAI_BASE_URL';
export const OPENAI_MAX_TOKENS = 'OPENAI_MAX_TOKENS';
export const MIDSCENE_MODEL_TEXT_ONLY = 'MIDSCENE_MODEL_TEXT_ONLY';

export const MIDSCENE_CACHE = 'MIDSCENE_CACHE';
export const MIDSCENE_USE_VLM_UI_TARS = 'MIDSCENE_USE_VLM_UI_TARS';
Expand Down Expand Up @@ -49,8 +48,6 @@ const allConfigFromEnv = () => {
process.env[MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG] || undefined,
[OPENAI_API_KEY]: process.env[OPENAI_API_KEY] || undefined,
[OPENAI_BASE_URL]: process.env[OPENAI_BASE_URL] || undefined,
[MIDSCENE_MODEL_TEXT_ONLY]:
process.env[MIDSCENE_MODEL_TEXT_ONLY] || undefined,
[OPENAI_MAX_TOKENS]: process.env[OPENAI_MAX_TOKENS] || undefined,
[OPENAI_USE_AZURE]: process.env[OPENAI_USE_AZURE] || undefined,
[MIDSCENE_CACHE]: process.env[MIDSCENE_CACHE] || undefined,
Expand Down
13 changes: 2 additions & 11 deletions packages/midscene/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
import { Executor } from './action/executor';
import { Executor } from './ai-model/action-executor';
import Insight from './insight';
import { getElement, getSection } from './query';
import { getLogDirByType, getVersion, setLogDir } from './utils';

export { plan, transformElementPositionToId } from './ai-model';

export * from './types';
export default Insight;
export {
getElement,
getSection,
Executor,
setLogDir,
getLogDirByType,
Insight,
getVersion,
};
export { Executor, setLogDir, getLogDirByType, Insight, getVersion };
30 changes: 3 additions & 27 deletions packages/midscene/src/insight/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@ import type {
PartialInsightDumpFromSDK,
UIContext,
} from '@/types';
import {
ifElementTypeResponse,
splitElementResponse,
} from '../ai-model/prompt/util';
import { idsIntoElements, shallowExpandIds, writeInsightDump } from './utils';
import { writeInsightDump } from './utils';

export interface LocateOpts {
multi?: boolean;
Expand Down Expand Up @@ -228,37 +224,17 @@ export default class Insight<
throw new Error(errorLog);
}

let mergedData = data;

// expand elements in object style data
if (data && typeof data === 'object' && !Array.isArray(data)) {
shallowExpandIds(data, ifElementTypeResponse, (id) => {
const idList = splitElementResponse(id);
if (typeof idList === 'string') {
return elementById(idList);
}
if (Array.isArray(idList)) {
return idsIntoElements(idList, elementById);
}
return idList; // i.e. null
});

mergedData = {
...data,
};
}

writeInsightDump(
{
...dumpData,
matchedSection: [],
data: mergedData,
data,
},
logId,
dumpSubscriber,
);

return mergedData;
return data;
}

async assert(assertion: string): Promise<InsightAssertionResponse> {
Expand Down
Loading

0 comments on commit 68b4004

Please sign in to comment.