Skip to content

Commit

Permalink
feat(browser-event): support drag event (#321)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhoushaw authored Jan 26, 2025
1 parent 08a159d commit 839dc6c
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 2 deletions.
30 changes: 29 additions & 1 deletion packages/midscene/src/ai-model/ui-tars-planning.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@ import {
} from './prompt/ui-tars-planning';
import { call } from './service-caller';

type ActionType = 'click' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
type ActionType =
| 'click'
| 'drag'
| 'type'
| 'hotkey'
| 'finished'
| 'scroll'
| 'wait';

function capitalize(str: string) {
return str.charAt(0).toUpperCase() + str.slice(1);
Expand Down Expand Up @@ -60,6 +67,18 @@ export async function vlmPlanning(options: {
},
param: action.thought || '',
});
} else if (action.action_type === 'drag') {
const startPoint = getPoint(action.action_inputs.start_box, size);
const endPoint = getPoint(action.action_inputs.end_box, size);
transformActions.push({
type: 'Drag',
param: {
start_box: { x: startPoint[0], y: startPoint[1] },
end_box: { x: endPoint[0], y: endPoint[1] },
},
locate: null,
thought: action.thought || '',
});
} else if (action.action_type === 'type') {
transformActions.push({
type: 'Input',
Expand Down Expand Up @@ -140,6 +159,14 @@ interface ClickAction extends BaseAction {
};
}

interface DragAction extends BaseAction {
action_type: 'drag';
action_inputs: {
start_box: string; // JSON string of [x, y] coordinates
end_box: string; // JSON string of [x, y] coordinates
};
}

interface WaitAction extends BaseAction {
action_type: 'wait';
action_inputs: {
Expand Down Expand Up @@ -175,6 +202,7 @@ interface FinishedAction extends BaseAction {

export type Action =
| ClickAction
| DragAction
| TypeAction
| HotkeyAction
| ScrollAction
Expand Down
1 change: 1 addition & 0 deletions packages/midscene/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ export interface PlanningAction<ParamType = any> {
type:
| 'Locate'
| 'Tap'
| 'Drag'
| 'Hover'
| 'Input'
| 'KeyboardPress'
Expand Down
3 changes: 2 additions & 1 deletion packages/web-integration/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@
"test": "vitest --run",
"test:u": "vitest --run -u",
"test:ai": "AI_TEST_TYPE=web npm run test",
"test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect packages/web-integration/tests/ai/bridge/agent.test.ts",
"test:ai:temp": "AI_TEST_TYPE=web vitest --run tests/ai/bridge/temp.test.ts",
"test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect tests/ai/bridge/agent.test.ts",
"test:ai:cache": "MIDSCENE_CACHE=true AI_TEST_TYPE=web npm run test",
"test:ai:all": "npm run test:ai:web && npm run test:ai:native",
"test:ai:native": "MIDSCENE_CACHE=true AI_TEST_TYPE=native npm run test",
Expand Down
21 changes: 21 additions & 0 deletions packages/web-integration/src/appium/page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ export class Page implements AbstractPage {
wheel: (deltaX: number, deltaY: number) =>
this.mouseWheel(deltaX, deltaY),
move: (x: number, y: number) => this.mouseMove(x, y),
drag: (from: { x: number; y: number }, to: { x: number; y: number }) =>
this.mouseDrag(from, to),
};
}

Expand Down Expand Up @@ -249,6 +251,25 @@ export class Page implements AbstractPage {
]);
}

private async mouseDrag(
from: { x: number; y: number },
to: { x: number; y: number },
): Promise<void> {
await this.browser.performActions([
{
type: 'pointer',
id: 'mouse',
parameters: { pointerType: 'mouse' },
actions: [
{ type: 'pointerMove', duration: 0, x: from.x, y: from.y },
{ type: 'pointerDown', button: 0 },
{ type: 'pointerMove', duration: 500, x: to.x, y: to.y },
{ type: 'pointerUp', button: 0 },
],
},
]);
}

private async mouseWheel(
deltaX: number,
deltaY: number,
Expand Down
1 change: 1 addition & 0 deletions packages/web-integration/src/bridge-mode/agent-cli-side.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export const getBridgePageInCliSide = (): ChromeExtensionPageCliSide => {
click: bridgeCaller(MouseEvent.Click),
wheel: bridgeCaller(MouseEvent.Wheel),
move: bridgeCaller(MouseEvent.Move),
drag: bridgeCaller(MouseEvent.Drag),
};
return mouse;
}
Expand Down
1 change: 1 addition & 0 deletions packages/web-integration/src/bridge-mode/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export enum MouseEvent {
Click = 'mouse.click',
Wheel = 'mouse.wheel',
Move = 'mouse.move',
Drag = 'mouse.drag',
}

export enum KeyboardEvent {
Expand Down
3 changes: 3 additions & 0 deletions packages/web-integration/src/bridge-mode/page-browser-side.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ export class ChromeExtensionPageBrowserSide extends ChromeExtensionProxyPage {

if (method.startsWith(MouseEvent.PREFIX)) {
const actionName = method.split('.')[1] as keyof MouseAction;
if (actionName === 'drag') {
return this.mouse[actionName].apply(this.mouse, args as any);
}
return this.mouse[actionName].apply(this.mouse, args as any);
}

Expand Down
21 changes: 21 additions & 0 deletions packages/web-integration/src/chrome-extension/page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,27 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
y,
});
},
drag: async (
from: { x: number; y: number },
to: { x: number; y: number },
) => {
await this.mouse.move(from.x, from.y);
await this.sendCommandToDebugger('Input.dispatchMouseEvent', {
type: 'mousePressed',
x: from.x,
y: from.y,
button: 'left',
clickCount: 1,
});
await this.mouse.move(to.x, to.y);
await this.sendCommandToDebugger('Input.dispatchMouseEvent', {
type: 'mouseReleased',
x: to.x,
y: to.y,
button: 'left',
clickCount: 1,
});
},
};

keyboard = {
Expand Down
19 changes: 19 additions & 0 deletions packages/web-integration/src/common/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,25 @@ export class PageTaskExecutor {
},
};
tasks.push(taskActionTap);
} else if (plan.type === 'Drag') {
const taskActionDrag: ExecutionTaskActionApply<{
start_box: { x: number; y: number };
end_box: { x: number; y: number };
}> = {
type: 'Action',
subType: 'Drag',
param: plan.param,
thought: plan.thought,
locate: plan.locate,
executor: async (taskParam) => {
assert(
taskParam?.start_box && taskParam?.end_box,
'No start_box or end_box to drag',
);
await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
},
};
tasks.push(taskActionDrag);
} else if (plan.type === 'Hover') {
const taskActionHover: ExecutionTaskActionApply<PlanningActionParamHover> =
{
Expand Down
8 changes: 8 additions & 0 deletions packages/web-integration/src/page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ export interface MouseAction {
) => Promise<void>;
wheel: (deltaX: number, deltaY: number) => Promise<void>;
move: (x: number, y: number) => Promise<void>;
drag: (
from: { x: number; y: number },
to: { x: number; y: number },
) => Promise<void>;
}

export interface KeyboardAction {
Expand All @@ -36,6 +40,10 @@ export abstract class AbstractPage {
) => {},
wheel: async (deltaX: number, deltaY: number) => {},
move: async (x: number, y: number) => {},
drag: async (
from: { x: number; y: number },
to: { x: number; y: number },
) => {},
};
}

Expand Down
1 change: 1 addition & 0 deletions packages/web-integration/src/playground/static-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ export default class StaticPage implements AbstractPage {
click: ThrowNotImplemented.bind(null, 'mouse.click'),
wheel: ThrowNotImplemented.bind(null, 'mouse.wheel'),
move: ThrowNotImplemented.bind(null, 'mouse.move'),
drag: ThrowNotImplemented.bind(null, 'mouse.drag'),
};

keyboard = {
Expand Down
26 changes: 26 additions & 0 deletions packages/web-integration/src/puppeteer/base-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,32 @@ export class Page<
},
move: async (x: number, y: number) =>
this.underlyingPage.mouse.move(x, y),
drag: async (
from: { x: number; y: number },
to: { x: number; y: number },
) => {
if (this.pageType === 'puppeteer') {
await (this.underlyingPage as PuppeteerPage).mouse.drag(
{
x: from.x,
y: from.y,
},
{
x: to.x,
y: to.y,
},
);
} else if (this.pageType === 'playwright') {
// Playwright doesn't have a drag method, so we need to simulate it
await (this.underlyingPage as PlaywrightPage).mouse.move(
from.x,
from.y,
);
await (this.underlyingPage as PlaywrightPage).mouse.down();
await (this.underlyingPage as PlaywrightPage).mouse.move(to.x, to.y);
await (this.underlyingPage as PlaywrightPage).mouse.up();
}
},
};
}

Expand Down
19 changes: 19 additions & 0 deletions packages/web-integration/tests/ai/bridge/temp.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import {
AgentOverChromeBridge,
getBridgePageInCliSide,
} from '@/bridge-mode/agent-cli-side';
import { describe, expect, it, vi } from 'vitest';

vi.setConfig({
testTimeout: 260 * 1000,
});

describe.skipIf(!process.env.BRIDGE_MODE)('drag event', () => {
it('agent in cli side, current tab', async () => {
const agent = new AgentOverChromeBridge();
await agent.connectCurrentTab();
await agent.ai('Finish dragging the slider');

await agent.destroy();
});
});

0 comments on commit 839dc6c

Please sign in to comment.