Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
48eee6b
fix(core): aiTap format declared in the documentation
e790a8 Mar 5, 2026
f0bc8ff
test(web-integration): add test for aiTap nested locate format with i…
quanru Mar 9, 2026
61d9d2f
docs(site): update YAML locate format to sibling style and deprecate …
ottomao Mar 10, 2026
863127f
chore(core): fix lint formatting in yaml player
ottomao Mar 10, 2026
78989b6
Merge branch 'web-infra-dev:main' into main
e790a8 Mar 10, 2026
6691707
Merge branch 'web-infra-dev:main' into main
e790a8 Mar 11, 2026
9a85831
fix(core): Cache flow is empty
e790a8 Mar 11, 2026
b26e8d1
Merge branch 'web-infra-dev:main' into main
e790a8 Mar 11, 2026
9ecafea
fix(core): runAdbShell not available
e790a8 Mar 11, 2026
f621d55
fix(core): fix cache empty flow logic and restore missing else branch
quanru Mar 17, 2026
54422e2
test(core): add unit tests for cache empty flow guard and player acti…
quanru Mar 17, 2026
b5f926d
fix(core): handle unusable plan cache and yaml fallback
quanru Mar 18, 2026
8090fce
test(web): add ai cache regression coverage
quanru Mar 18, 2026
5b16c3d
refactor(core): rename buildSpecialStringActionParam to buildLaunchOr…
ottomao Mar 18, 2026
57502a9
Merge branch 'web-infra-dev:main' into main
e790a8 Mar 24, 2026
62db396
feat(core): aiTap Images does not work
e790a8 Mar 24, 2026
f86cf73
Merge branch 'web-infra-dev:main' into main
e790a8 Mar 24, 2026
23f0bc2
fix(core): aiTap Images does not work
e790a8 Mar 24, 2026
a4997aa
Merge remote-tracking branch 'origin/main'
e790a8 Mar 24, 2026
2be34c2
feat(core): make the images provided by aiTap truly effective
e790a8 Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion packages/core/src/ai-model/inspect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,14 @@ export async function AiLocateElement(options: {
const targetElementDescriptionText = extraTextFromUserPrompt(
targetElementDescription,
);
const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);
const referenceImageNames =
typeof targetElementDescription !== 'string'
? targetElementDescription.images?.map((img) => img.name)
: undefined;
const userInstructionPrompt = findElementPrompt(
targetElementDescriptionText,
referenceImageNames,
);
const systemPrompt = isAutoGLM(modelFamily)
? getAutoGLMLocatePrompt(modelFamily)
: systemPromptToLocateElement(modelFamily);
Expand Down
19 changes: 17 additions & 2 deletions packages/core/src/ai-model/prompt/llm-locator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ You are an AI assistant that helps identify UI elements.
- For example: If an input field is large (both wide and tall) with a placeholder text "Please enter your comment", you should locate only the area where the placeholder text appears, not the entire input field.
- This principle applies to all text-containing elements: focus on the visible text region rather than the full element container.

## When Reference Images Are Provided:
- The FIRST image in the conversation is always the MAIN screenshot — you must return the bounding box of the element found in this MAIN screenshot.
- Any images provided after the main screenshot are reference images (visual examples of the element to find).
- When the task mentions a reference image name (e.g., "参考图片1"), it refers to the provided reference image with that name — do NOT search for that name as text on the page.
- Use the reference image as a visual template and locate the element in the MAIN screenshot that visually resembles it.
- The returned bounding box coordinates must be from the MAIN screenshot, not from the reference image.

## Output Format:
\`\`\`json
{
Expand Down Expand Up @@ -49,5 +56,13 @@ When no element is found:
`;
}

export const findElementPrompt = (targetElementDescription: string) =>
`Find: ${targetElementDescription}`;
export const findElementPrompt = (
targetElementDescription: string,
referenceImageNames?: string[],
) => {
if (referenceImageNames?.length) {
const nameList = referenceImageNames.map((n) => `'${n}'`).join(', ');
return `Find the element in the MAIN screenshot (first image) that visually matches the reference image${referenceImageNames.length > 1 ? 's' : ''} (${nameList}) provided below.\nTask hint: ${targetElementDescription}`;
}
return `Find: ${targetElementDescription}`;
};
6 changes: 2 additions & 4 deletions packages/core/src/yaml/player.ts
Original file line number Diff line number Diff line change
Expand Up @@ -481,10 +481,8 @@ export class ScriptPlayer<T extends MidsceneYamlScriptEnv> {
// User YAML: aiTap: 'search input box'
locatePrompt = aiTap;
} else if (typeof locateObj === 'object' && locateObj?.prompt) {
// buildYamlFlowFromPlans: { aiTap: '', locate: { prompt, deepLocate, cacheable } }
const { prompt: lp, ...locateOpts } = locateObj;
locatePrompt = lp;
opts = { ...locateOpts, ...tapOptions };
// buildYamlFlowFromPlans: { aiTap: { locate: { prompt, images, ... } } } (locate nested in aiTap)
locatePrompt = locateObj;
} else {
// User YAML: aiTap: { prompt: '...' } or aiTap: null + prompt: '...'
locatePrompt = aiTap?.prompt || prompt || locateObj;
Expand Down
Loading