Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 10 additions & 16 deletions packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';
import { getModelAdapter } from '@/ai-model/models';
import yaml from 'js-yaml';
import type { TUserPrompt } from '../ai-model/index';
import { ScreenshotItem } from '../screenshot-item';
Expand Down Expand Up @@ -132,10 +132,6 @@ const normalizeScrollType = (
return scrollType as ScrollParam['scrollType'];
};

const defaultReplanningCycleLimit = 20;
const defaultVlmUiTarsReplanningCycleLimit = 40;
const defaultAutoGlmReplanningCycleLimit = 100;

export type AiActOptions = {
cacheable?: boolean;
fileChooserAccept?: string | string[];
Expand Down Expand Up @@ -244,11 +240,8 @@ export class Agent<
return this.opts.replanningCycleLimit;
}

return isUITars(modelConfigForPlanning.modelFamily)
? defaultVlmUiTarsReplanningCycleLimit
: isAutoGLM(modelConfigForPlanning.modelFamily)
? defaultAutoGlmReplanningCycleLimit
: defaultReplanningCycleLimit;
return getModelAdapter(modelConfigForPlanning.modelFamily).planning
.defaultReplanningCycleLimit;
}

constructor(interfaceInstance: InterfaceType, opts?: AgentOpt) {
Expand Down Expand Up @@ -927,11 +920,11 @@ export class Agent<
const replanningCycleLimit = this.resolveReplanningCycleLimit(
modelConfigForPlanning,
);
// if vlm-ui-tars or auto-glm, plan cache is not used
const isVlmUiTars = isUITars(modelConfigForPlanning.modelFamily);
const isAutoGlm = isAutoGLM(modelConfigForPlanning.modelFamily);
const planCacheEnabled = getModelAdapter(
modelConfigForPlanning.modelFamily,
).planning.cacheEnabled;
const matchedCache =
isVlmUiTars || isAutoGlm || cacheable === false
!planCacheEnabled || cacheable === false
? undefined
: this.taskCache?.matchPlanCache(taskPrompt);
if (
Expand Down Expand Up @@ -1121,8 +1114,9 @@ export class Agent<
// Don't pass deepLocate to verification locate — the description was generated
// from a cropped view (deepLocate describe), but verification should use regular
// locate on the full screenshot to confirm the description works universally.
// Passing deepLocate here would trigger AiLocateSection with an element-level
// description as a section prompt, which is semantically incorrect.
// Passing deepLocate here would add another first-pass locate and search-area
// crop around an already element-level description, which is not the intent of
// verification.
verifyResult = await this.verifyLocator(
resultPrompt,
undefined,
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/agent/task-builder.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';
import type { AbstractInterface } from '@/device';
import { createLocateResultElementFromRect } from '@/locate-result-element';
import type Service from '@/service';
import { setTimingFieldOnce } from '@/task-timing';
import type {
Expand All @@ -21,7 +22,6 @@ import type {
import { ServiceError } from '@/types';
import { sleep } from '@/utils';
import type { IModelConfig } from '@midscene/shared/env';
import { generateElementByRect } from '@midscene/shared/extractor';
import { getDebug } from '@midscene/shared/logger';
import { assert } from '@midscene/shared/utils';
import type { TaskCache } from './task-cache';
Expand Down Expand Up @@ -460,7 +460,7 @@ export class TaskBuilder {
}

const elementFromXpath = rectFromXpath
? generateElementByRect(
? createLocateResultElementFromRect(
// rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;
transformLogicalRectToScreenshotRect(
rectFromXpath,
Expand Down
23 changes: 8 additions & 15 deletions packages/core/src/agent/tasks.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import {
AIResponseParseError,
ConversationHistory,
autoGLMPlanning,
plan,
uiTarsPlanning,
} from '@/ai-model';
import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';
import { AIResponseParseError, ConversationHistory } from '@/ai-model';
import { getModelAdapter } from '@/ai-model/models';
import { genericXmlPlan } from '@/ai-model/workflows/planning';
import {
type TMultimodalPrompt,
type TUserPrompt,
Expand All @@ -21,7 +16,6 @@ import type {
ExecutionTaskInsightQueryApply,
ExecutionTaskPlanningApply,
ExecutionTaskProgressOptions,
InterfaceType,
MidsceneYamlFlowItem,
PlanningAIResponse,
PlanningAction,
Expand Down Expand Up @@ -370,19 +364,18 @@ export class TaskExecutor {
);
}

const planImpl = isUITars(modelFamily)
? uiTarsPlanning
: isAutoGLM(modelFamily)
? autoGLMPlanning
: plan;
const adapter = getModelAdapter(modelFamily);
const planImpl =
adapter.planning.kind === 'custom'
? adapter.planning.planFn
: genericXmlPlan;

let planResult: Awaited<ReturnType<typeof planImpl>>;
try {
setTimingFieldOnce(timing, 'callAiStart');
planResult = await planImpl(param.userInstruction, {
context: uiContext,
actionContext: param.aiActContext,
interfaceType: this.interface.interfaceType as InterfaceType,
actionSpace,
modelConfig: modelConfigForPlanning,
conversationHistory,
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/agent/utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { TMultimodalPrompt, TUserPrompt } from '@/common';
import type { AbstractInterface } from '@/device';
import { createLocateResultElementFromRect } from '@/locate-result-element';
import { ScreenshotItem } from '@/screenshot-item';
import type {
ElementCacheFeature,
Expand All @@ -15,7 +16,6 @@ import {
MIDSCENE_REPORT_TAG_NAME,
globalConfigManager,
} from '@midscene/shared/env';
import { generateElementByRect } from '@midscene/shared/extractor';
import { imageInfoOfBase64, resizeImgBase64 } from '@midscene/shared/img';
import { getDebug } from '@midscene/shared/logger';
import { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';
Expand Down Expand Up @@ -201,7 +201,7 @@ export function matchElementFromPlan(
height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1,
};

const element = generateElementByRect(
const element = createLocateResultElementFromRect(
rect,
typeof planLocateParam.prompt === 'string'
? planLocateParam.prompt
Expand Down
10 changes: 0 additions & 10 deletions packages/core/src/ai-model/auto-glm/index.ts

This file was deleted.

23 changes: 0 additions & 23 deletions packages/core/src/ai-model/auto-glm/util.ts

This file was deleted.

18 changes: 8 additions & 10 deletions packages/core/src/ai-model/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ export {
type ConnectivityTestConfig,
type ConnectivityTestResult,
} from './connectivity';
export { systemPromptToLocateElement } from './prompt/llm-locator';
export { systemPromptToLocateElement } from './prompts/llm-locator';
export {
generatePlaywrightTest,
generatePlaywrightTestStream,
} from './prompt/playwright-generator';
} from './workflows/generation/playwright';
export {
generateYamlTest,
generateYamlTestStream,
} from './prompt/yaml-generator';
export type { YamlGenerationOptions } from './prompt/yaml-generator';
} from './workflows/generation/yaml';
export type { YamlGenerationOptions } from './workflows/generation/yaml';

export type { ChatCompletionMessageParam } from 'openai/resources/index';

Expand All @@ -28,19 +28,17 @@ export {
AiExtractElementInfo,
AiLocateSection,
AiJudgeOrderSensitive,
} from './inspect';
} from './workflows/inspect';

export { plan } from './llm-planning';
export { autoGLMPlanning } from './auto-glm/planning';
export { adaptBboxToRect } from '../common';
export { uiTarsPlanning } from './ui-tars-planning';
export { plan } from './workflows/planning/generic';
export { adaptModelLocateResultToRect } from './workflows/inspect/locate-result-rect';
export {
ConversationHistory,
type ConversationHistoryOptions,
} from './conversation-history';
export type { SubGoal, SubGoalStatus } from '@/types';

export type { AIArgs } from '../common';
export type { AIArgs } from './types';

export {
getMidsceneLocationSchema,
Expand Down
Loading
Loading