Skip to content

Commit 9d23e6c

Browse files
ZevGitclaude
andcommitted
feat(cli): add auto-learn feature for Android app UI mapping
Add --auto-learn flag to automatically discover and map Android app UI structure (navigation tabs, interactive elements, page layouts, jump targets) using VLM-guided agents. Includes code review fixes for security (argument sanitization), error handling, type safety, and timeout protection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7986f5a commit 9d23e6c

17 files changed

Lines changed: 1943 additions & 7 deletions
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
import { describe, it, expect } from 'vitest';
6+
import { launchApp, adbClick, adbBack, adbHome, adbWake } from '../app-launcher';
7+
8+
describe('app-launcher argument sanitization', () => {
9+
describe('launchApp', () => {
10+
it('should reject deviceId with shell injection characters', () => {
11+
expect(() => launchApp('device1; rm -rf /', 'com.example.app')).toThrow();
12+
});
13+
14+
it('should reject pkg with shell injection characters', () => {
15+
expect(() => launchApp('device1', 'com.example.app; rm -rf /')).toThrow();
16+
});
17+
18+
it('should accept valid package names', () => {
19+
// This will fail at ADB level (no device), but should NOT throw sanitization error
20+
try {
21+
launchApp('emulator-5554', 'com.example.app');
22+
} catch (err: any) {
23+
// ADB error is expected, but NOT sanitization error
24+
expect(err.message).not.toContain('Invalid argument');
25+
}
26+
});
27+
28+
it('should accept valid device IDs with dots and hyphens', () => {
29+
try {
30+
launchApp('192.168.1.1:5555', 'com.app');
31+
} catch (err: any) {
32+
expect(err.message).not.toContain('Invalid argument');
33+
}
34+
});
35+
36+
it('should reject pkg with spaces', () => {
37+
expect(() => launchApp('device1', 'com.example app')).toThrow('Invalid argument');
38+
});
39+
});
40+
41+
describe('adbClick', () => {
42+
it('should reject deviceId with injection characters', () => {
43+
expect(() => adbClick('device; evil', 100, 200)).toThrow('Invalid argument');
44+
});
45+
});
46+
47+
describe('adbBack', () => {
48+
it('should reject deviceId with injection characters', () => {
49+
expect(() => adbBack('device; evil')).toThrow('Invalid argument');
50+
});
51+
});
52+
53+
describe('adbHome', () => {
54+
it('should reject deviceId with injection characters', () => {
55+
expect(() => adbHome('device; evil')).toThrow('Invalid argument');
56+
});
57+
});
58+
59+
describe('adbWake', () => {
60+
it('should reject deviceId with injection characters', () => {
61+
expect(() => adbWake('device; evil')).toThrow('Invalid argument');
62+
});
63+
});
64+
});
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
import { describe, it, expect } from 'vitest';
6+
import {
7+
matchPageSignature,
8+
getLayoutType,
9+
isSecondaryPage,
10+
} from '../page-signatures';
11+
12+
describe('matchPageSignature', () => {
13+
it('should match home page features', () => {
14+
expect(matchPageSignature('推荐卡片和banner轮播')).toBe('首页');
15+
expect(matchPageSignature('banner推荐内容')).toBe('首页');
16+
});
17+
18+
it('should match theater page features', () => {
19+
expect(matchPageSignature('左侧分类筛选列表')).toBe('剧场');
20+
expect(matchPageSignature('剧目卡片和分类筛选')).toBe('剧场');
21+
});
22+
23+
it('should match messages page features', () => {
24+
expect(matchPageSignature('消息列表聊天记录')).toBe('消息');
25+
});
26+
27+
it('should match profile page features', () => {
28+
expect(matchPageSignature('用户头像和钻石余额')).toBe('我的');
29+
expect(matchPageSignature('个人中心设置')).toBe('我的');
30+
});
31+
32+
it('should match secondary pages based on parent context', () => {
33+
expect(matchPageSignature('剧目封面播放按钮', '剧场')).toBe('剧目详情');
34+
expect(matchPageSignature('搜索框搜索结果', '首页')).toBe('搜索页');
35+
expect(matchPageSignature('播放器全屏', '剧目详情')).toBe('播放页');
36+
});
37+
38+
it('should return Unknown for unrecognized pages', () => {
39+
expect(matchPageSignature('随机未知页面')).toBe('Unknown');
40+
});
41+
42+
it('should use notFeatures to reduce false positives', () => {
43+
// "推荐" would match 首页, but "左侧筛选" is a notFeature for 首页
44+
// So this should match 剧场 instead
45+
const result = matchPageSignature('左侧筛选和剧目列表');
46+
expect(result).toBe('剧场');
47+
});
48+
});
49+
50+
describe('getLayoutType', () => {
51+
it('should return correct layout for known pages', () => {
52+
expect(getLayoutType('首页')).toBe('scrollable-feed');
53+
expect(getLayoutType('剧场')).toBe('split-view');
54+
expect(getLayoutType('消息')).toBe('list');
55+
expect(getLayoutType('我的')).toBe('form');
56+
});
57+
58+
it('should return unknown for pages without layout definition', () => {
59+
expect(getLayoutType('AI伴侣')).toBe('unknown');
60+
expect(getLayoutType('Unknown')).toBe('unknown');
61+
});
62+
});
63+
64+
describe('isSecondaryPage', () => {
65+
it('should return true for secondary pages', () => {
66+
expect(isSecondaryPage('剧目详情')).toBe(true);
67+
expect(isSecondaryPage('搜索页')).toBe(true);
68+
expect(isSecondaryPage('播放页')).toBe(true);
69+
expect(isSecondaryPage('设置页')).toBe(true);
70+
});
71+
72+
it('should return false for primary pages', () => {
73+
expect(isSecondaryPage('首页')).toBe(false);
74+
expect(isSecondaryPage('剧场')).toBe(false);
75+
expect(isSecondaryPage('AI伴侣')).toBe(false);
76+
expect(isSecondaryPage('消息')).toBe(false);
77+
expect(isSecondaryPage('我的')).toBe(false);
78+
});
79+
80+
it('should return false for unknown pages', () => {
81+
expect(isSecondaryPage('Unknown')).toBe(false);
82+
});
83+
});
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
import { describe, it, expect, vi, beforeEach } from 'vitest';
6+
import { RecordingOperator } from '../recording-operator';
7+
import type { Operator, ScreenshotOutput, ExecuteParams, ExecuteOutput } from '@ui-tars/sdk/core';
8+
9+
describe('RecordingOperator', () => {
10+
let mockBaseOp: Operator;
11+
let recordingOp: RecordingOperator;
12+
13+
const createScreenshotOutput = (base64 = 'mock-screenshot'): ScreenshotOutput => ({
14+
base64,
15+
scaleFactor: 2,
16+
});
17+
18+
const createExecuteParams = (
19+
actionType = 'click',
20+
coords = [100, 200],
21+
thought = '我要点击【按钮】',
22+
): ExecuteParams =>
23+
({
24+
parsedPrediction: {
25+
action_type: actionType,
26+
action_inputs: { start_coords: coords },
27+
thought,
28+
},
29+
}) as unknown as ExecuteParams;
30+
31+
beforeEach(() => {
32+
mockBaseOp = {
33+
screenshot: vi.fn().mockResolvedValue(createScreenshotOutput()),
34+
execute: vi.fn().mockResolvedValue({} as ExecuteOutput),
35+
} as unknown as Operator;
36+
37+
recordingOp = new RecordingOperator(mockBaseOp);
38+
});
39+
40+
describe('screenshot', () => {
41+
it('should delegate to base operator and cache last screenshot', async () => {
42+
const result = await recordingOp.screenshot();
43+
44+
expect(mockBaseOp.screenshot).toHaveBeenCalled();
45+
expect(result.base64).toBe('mock-screenshot');
46+
expect(recordingOp.getLastScreenshot()).toBe('mock-screenshot');
47+
});
48+
});
49+
50+
describe('execute', () => {
51+
it('should delegate to base operator', async () => {
52+
const params = createExecuteParams();
53+
await recordingOp.execute(params);
54+
55+
expect(mockBaseOp.execute).toHaveBeenCalledWith(params);
56+
});
57+
58+
it('should record click actions with screenshot before', async () => {
59+
await recordingOp.screenshot(); // Set last screenshot
60+
await recordingOp.execute(createExecuteParams('click', [100, 200]));
61+
62+
const actions = recordingOp.getActions();
63+
expect(actions).toHaveLength(1);
64+
expect(actions[0].type).toBe('click');
65+
expect(actions[0].inputs).toEqual({ start_coords: [100, 200] });
66+
expect(actions[0].thought).toBe('我要点击【按钮】');
67+
expect(actions[0].screenshotBefore).toBe('mock-screenshot');
68+
});
69+
70+
it('should record non-click actions', async () => {
71+
await recordingOp.execute(createExecuteParams('scroll', [200, 300], '向上滚动'));
72+
73+
const actions = recordingOp.getActions();
74+
expect(actions[0].type).toBe('scroll');
75+
});
76+
77+
it('should auto-finish after repeated clicks at same position', async () => {
78+
// Click same position 4 times
79+
for (let i = 0; i < 4; i++) {
80+
await recordingOp.execute(createExecuteParams('click', [100, 200]));
81+
}
82+
83+
const actions = recordingOp.getActions();
84+
const finishedAction = actions.find((a) => a.type === 'finished');
85+
expect(finishedAction).toBeDefined();
86+
expect(finishedAction?.thought).toBe(
87+
'Auto-finished due to repeated clicks at same position',
88+
);
89+
});
90+
91+
it('should not auto-finish for different click positions', async () => {
92+
const positions = [
93+
[100, 200],
94+
[200, 300],
95+
[300, 400],
96+
[400, 500],
97+
];
98+
for (const pos of positions) {
99+
await recordingOp.execute(createExecuteParams('click', pos));
100+
}
101+
102+
const actions = recordingOp.getActions();
103+
expect(actions.find((a) => a.type === 'finished')).toBeUndefined();
104+
});
105+
106+
it('should take after-screenshot for click actions', async () => {
107+
await recordingOp.execute(createExecuteParams('click', [100, 200]));
108+
109+
// Should have called screenshot again for after-screenshot
110+
expect(mockBaseOp.screenshot).toHaveBeenCalledTimes(1);
111+
const immediateScreenshots = recordingOp.getImmediateScreenshots();
112+
expect(immediateScreenshots).toHaveLength(1);
113+
expect(immediateScreenshots[0].actionIndex).toBe(0);
114+
});
115+
});
116+
117+
describe('getActions', () => {
118+
it('should return all recorded actions', async () => {
119+
await recordingOp.execute(createExecuteParams('click', [100, 200]));
120+
await recordingOp.execute(createExecuteParams('scroll', [200, 300]));
121+
await recordingOp.execute(createExecuteParams('type'));
122+
123+
expect(recordingOp.getActions()).toHaveLength(3);
124+
});
125+
126+
it('should return empty array initially', () => {
127+
expect(recordingOp.getActions()).toEqual([]);
128+
});
129+
});
130+
});

0 commit comments

Comments
 (0)