Skip to content

Commit 312c403

Browse files
author
Shaw
committed
Merge branch 'develop' of https://github.com/elizaOS/eliza into develop
2 parents fc50c7a + 996d9d3 commit 312c403

12 files changed

Lines changed: 2165 additions & 1 deletion

File tree

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/**
2+
* Pure-function tests for the dHash + block-grid implementation.
3+
*
4+
* We synthesize tiny PNGs via `node:zlib` and the PNG chunk format so we
5+
* don't need any decoder dependency to exercise both code paths.
6+
*/
7+
8+
import { deflateSync } from "node:zlib";
9+
import { describe, expect, it } from "vitest";
10+
import {
11+
blockGrid,
12+
decodePng,
13+
diffBlocks,
14+
frameDhash,
15+
hamming,
16+
} from "../scene/dhash.js";
17+
18+
function crc32(bytes: Buffer): number {
19+
let crc = 0xffffffff >>> 0;
20+
for (let i = 0; i < bytes.length; i += 1) {
21+
crc = (crc ^ bytes[i]!) >>> 0;
22+
for (let j = 0; j < 8; j += 1) {
23+
const lsb = crc & 1;
24+
crc = (crc >>> 1) ^ (lsb ? 0xedb88320 : 0);
25+
}
26+
}
27+
return (crc ^ 0xffffffff) >>> 0;
28+
}
29+
30+
function pngChunk(type: string, data: Buffer): Buffer {
31+
const len = Buffer.alloc(4);
32+
len.writeUInt32BE(data.length);
33+
const t = Buffer.from(type, "ascii");
34+
const crc = Buffer.alloc(4);
35+
crc.writeUInt32BE(crc32(Buffer.concat([t, data])));
36+
return Buffer.concat([len, t, data, crc]);
37+
}
38+
39+
/**
40+
* Build a 16×16 8-bit RGB PNG painted in a horizontal gradient unless
41+
* `solid` is true (in which case every pixel is the same color).
42+
*/
43+
function makeTinyPng(seed = 0, solid = false): Buffer {
44+
const w = 16;
45+
const h = 16;
46+
const stride = w * 3;
47+
const rows: number[] = [];
48+
for (let y = 0; y < h; y += 1) {
49+
rows.push(0); // filter = None
50+
for (let x = 0; x < w; x += 1) {
51+
const v = solid ? (seed * 7) % 255 : ((x + seed) * 16) % 255;
52+
rows.push(v, v, v);
53+
}
54+
}
55+
const raw = Buffer.from(rows);
56+
const idat = deflateSync(raw);
57+
const ihdr = Buffer.alloc(13);
58+
ihdr.writeUInt32BE(w, 0);
59+
ihdr.writeUInt32BE(h, 4);
60+
ihdr[8] = 8; // bit depth
61+
ihdr[9] = 2; // color type RGB
62+
ihdr[10] = 0;
63+
ihdr[11] = 0;
64+
ihdr[12] = 0;
65+
const signature = Buffer.from([
66+
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
67+
]);
68+
return Buffer.concat([
69+
signature,
70+
pngChunk("IHDR", ihdr),
71+
pngChunk("IDAT", idat),
72+
pngChunk("IEND", Buffer.alloc(0)),
73+
]);
74+
}
75+
76+
describe("dhash — pure functions", () => {
77+
it("decodes a minimal RGB PNG", () => {
78+
const png = makeTinyPng();
79+
const decoded = decodePng(png);
80+
expect(decoded).not.toBeNull();
81+
expect(decoded?.width).toBe(16);
82+
expect(decoded?.height).toBe(16);
83+
expect(decoded?.rgba.length).toBe(16 * 16 * 4);
84+
});
85+
86+
it("returns null for non-PNG input", () => {
87+
expect(decodePng(Buffer.from("not a png"))).toBeNull();
88+
});
89+
90+
it("frameDhash is stable for identical frames", () => {
91+
const a = frameDhash(makeTinyPng(7));
92+
const b = frameDhash(makeTinyPng(7));
93+
expect(a).not.toBeNull();
94+
expect(b).not.toBeNull();
95+
expect(a).toBe(b);
96+
});
97+
98+
it("frameDhash differs for visually different frames", () => {
99+
const a = frameDhash(makeTinyPng(0));
100+
const b = frameDhash(makeTinyPng(50));
101+
expect(a).not.toBeNull();
102+
expect(b).not.toBeNull();
103+
expect(hamming(a!, b!)).toBeGreaterThan(0);
104+
});
105+
106+
it("hamming(x, x) == 0", () => {
107+
expect(hamming(0xdeadbeefcafe0001n, 0xdeadbeefcafe0001n)).toBe(0);
108+
});
109+
110+
it("hamming(a, b) counts changed bits", () => {
111+
expect(hamming(0n, 0xffffffffffffffffn)).toBe(64);
112+
});
113+
});
114+
115+
describe("dhash — block grid", () => {
116+
it("produces a cols*rows grid", () => {
117+
const grid = blockGrid(makeTinyPng(0), 4, 4);
118+
expect(grid).not.toBeNull();
119+
expect(grid!.cols).toBe(4);
120+
expect(grid!.rows).toBe(4);
121+
expect(grid!.hashes.length).toBe(16);
122+
});
123+
124+
it("identical frames produce identical block grids and zero dirty blocks", () => {
125+
const a = blockGrid(makeTinyPng(11), 4, 4)!;
126+
const b = blockGrid(makeTinyPng(11), 4, 4)!;
127+
const dirty = diffBlocks(a, b);
128+
expect(dirty.length).toBe(0);
129+
});
130+
131+
it("first frame (prev=null) marks every block dirty", () => {
132+
const grid = blockGrid(makeTinyPng(0), 4, 4)!;
133+
const dirty = diffBlocks(null, grid);
134+
expect(dirty.length).toBe(grid.cols * grid.rows);
135+
});
136+
137+
it("changed frames produce a non-zero dirty list", () => {
138+
const a = blockGrid(makeTinyPng(0), 4, 4)!;
139+
const b = blockGrid(makeTinyPng(120), 4, 4)!;
140+
const dirty = diffBlocks(a, b);
141+
expect(dirty.length).toBeGreaterThan(0);
142+
});
143+
144+
it("dirty-block bboxes are translated to image pixel space when dims are known", () => {
145+
const a = blockGrid(makeTinyPng(0), 4, 4)!;
146+
const b = blockGrid(makeTinyPng(120), 4, 4)!;
147+
const dirty = diffBlocks(a, b, 16, 16);
148+
for (const d of dirty) {
149+
expect(d.bbox[2]).toBeGreaterThan(0);
150+
expect(d.bbox[3]).toBeGreaterThan(0);
151+
expect(d.bbox[0]).toBeGreaterThanOrEqual(0);
152+
expect(d.bbox[1]).toBeGreaterThanOrEqual(0);
153+
expect(d.bbox[0] + d.bbox[2]).toBeLessThanOrEqual(16);
154+
expect(d.bbox[1] + d.bbox[3]).toBeLessThanOrEqual(16);
155+
}
156+
});
157+
});

plugins/plugin-computeruse/src/index.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import { promoteSubactionsToActions } from "@elizaos/core";
2727
import { useComputerAction } from "./actions/use-computer.js";
2828
import { windowAction } from "./actions/window.js";
2929
import { computerStateProvider } from "./providers/computer-state.js";
30+
import { sceneProvider } from "./providers/scene.js";
3031
import { computerUseRouteHandler } from "./routes/computer-use-compat-routes.js";
3132
import { ComputerUseService } from "./services/computer-use-service.js";
3233

@@ -80,7 +81,7 @@ export const computerUsePlugin: Plugin = {
8081
...promoteSubactionsToActions(windowAction),
8182
],
8283

83-
providers: [computerStateProvider],
84+
providers: [computerStateProvider, sceneProvider],
8485

8586
routes: computerUseRoutes,
8687

@@ -99,6 +100,53 @@ export { ComputerUseService } from "./services/computer-use-service.js";
99100

100101
// iOS computer-use surface. See `docs/IOS_CONSTRAINTS.md` for the honest scope.
101102
export * from "./mobile/index.js";
103+
104+
// WS6: scene-builder surface — consumed by WS7 (Brain) and WS10 verifiers.
105+
export {
106+
_resetDefaultSceneBuilderForTests,
107+
getDefaultSceneBuilder,
108+
SceneBuilder,
109+
type SceneBuilderDeps,
110+
type SceneUpdateEvent,
111+
} from "./scene/scene-builder.js";
112+
export type {
113+
Scene,
114+
SceneApp,
115+
SceneAppWindow,
116+
SceneAxNode,
117+
SceneFocusedWindow,
118+
SceneOcrBox,
119+
SceneVlmElement,
120+
} from "./scene/scene-types.js";
121+
export { serializeSceneForPrompt } from "./scene/serialize.js";
122+
export {
123+
type BlockGrid,
124+
blockGrid,
125+
decodePng,
126+
diffBlocks,
127+
type DirtyBlock,
128+
frameDhash,
129+
hamming,
130+
} from "./scene/dhash.js";
131+
export { enumerateApps, joinAppsAndWindows } from "./scene/apps.js";
132+
export {
133+
type AccessibilityProvider,
134+
DarwinAccessibilityProvider,
135+
LinuxAccessibilityProvider,
136+
NullAccessibilityProvider,
137+
parseHyprlandClients,
138+
parseSwayTree,
139+
resolveAccessibilityProvider,
140+
setAccessibilityProvider,
141+
WindowsAccessibilityProvider,
142+
} from "./scene/a11y-provider.js";
143+
export {
144+
listProcesses,
145+
parsePsOutput,
146+
parseWindowsProcessJson,
147+
type ProcessInfo,
148+
} from "./platform/process-list.js";
149+
export { sceneProvider } from "./providers/scene.js";
102150
export type {
103151
DesktopControlCapabilities,
104152
DesktopControlCapability,
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/**
2+
* Cross-platform process listing.
3+
*
4+
* The WS6 scene-builder joins running processes with windows to produce the
5+
* `apps[]` field of a Scene. The contract is intentionally minimal — pid,
6+
* executable/display name, and a best-effort foreground flag if cheap to
7+
* obtain. Anything richer (memory, cpu, parent pid) is out of scope here
8+
* because the scene-builder runs every active-poll frame and must stay
9+
* cheap.
10+
*
11+
* Per-OS source:
12+
* - Linux : `/proc/<pid>/comm` and `/proc/<pid>/status`. Pure FS read,
13+
* no shell out. ~5ms for 300 processes.
14+
* - macOS : `ps -axo pid=,comm=` — built-in BSD ps.
15+
* - Windows: PowerShell `Get-Process | Select Id, ProcessName`.
16+
* - Android: stub returning `[]` — the JS contract for `UsageStatsManager`
17+
* is owned by WS8's native side. We expose the function shape so
18+
* the scene-builder doesn't have to branch.
19+
*
20+
* Failure semantics:
21+
* - A single un-readable process is skipped, not propagated.
22+
* - A complete enumeration failure returns `[]` and the scene-builder logs
23+
* once per platform-mode at warn.
24+
*/
25+
26+
import { execFileSync, execSync } from "node:child_process";
27+
import { readFileSync, readdirSync } from "node:fs";
28+
import { currentPlatform } from "./helpers.js";
29+
30+
export interface ProcessInfo {
31+
pid: number;
32+
name: string;
33+
}
34+
35+
export function listProcesses(): ProcessInfo[] {
36+
const os = currentPlatform();
37+
if (os === "linux") return listLinux();
38+
if (os === "darwin") return listDarwin();
39+
if (os === "win32") return listWindows();
40+
return [];
41+
}
42+
43+
function listLinux(): ProcessInfo[] {
44+
const out: ProcessInfo[] = [];
45+
let entries: string[];
46+
try {
47+
entries = readdirSync("/proc");
48+
} catch {
49+
return out;
50+
}
51+
for (const entry of entries) {
52+
if (!/^\d+$/.test(entry)) continue;
53+
const pid = Number.parseInt(entry, 10);
54+
if (!Number.isFinite(pid)) continue;
55+
let name = "";
56+
try {
57+
// `comm` is the 15-char truncated executable basename. Good enough for
58+
// join-with-windows; a longer name (cmdline arg0 basename) is overkill.
59+
name = readFileSync(`/proc/${pid}/comm`, "utf8").trim();
60+
} catch {
61+
continue;
62+
}
63+
if (!name) continue;
64+
out.push({ pid, name });
65+
}
66+
return out;
67+
}
68+
69+
function listDarwin(): ProcessInfo[] {
70+
try {
71+
const text = execFileSync("ps", ["-axco", "pid=,comm="], {
72+
timeout: 4000,
73+
encoding: "utf8",
74+
stdio: ["ignore", "pipe", "ignore"],
75+
});
76+
return parsePsOutput(text);
77+
} catch {
78+
try {
79+
// Fallback: BSD ps without `-c` (gives full path in comm).
80+
const text = execFileSync("ps", ["-axo", "pid=,comm="], {
81+
timeout: 4000,
82+
encoding: "utf8",
83+
stdio: ["ignore", "pipe", "ignore"],
84+
});
85+
return parsePsOutput(text);
86+
} catch {
87+
return [];
88+
}
89+
}
90+
}
91+
92+
export function parsePsOutput(text: string): ProcessInfo[] {
93+
const out: ProcessInfo[] = [];
94+
for (const line of text.split(/\r?\n/)) {
95+
const trimmed = line.trim();
96+
if (!trimmed) continue;
97+
const m = trimmed.match(/^(\d+)\s+(.+)$/);
98+
if (!m) continue;
99+
const pid = Number.parseInt(m[1] ?? "0", 10);
100+
if (!Number.isFinite(pid) || pid <= 0) continue;
101+
const rawName = (m[2] ?? "").trim();
102+
if (!rawName) continue;
103+
// For `ps -axo pid=,comm=` without `-c` the comm column holds an absolute
104+
// path. We strip to the basename so the scene-builder's join key matches
105+
// the AppleScript window enumerator's `name of proc`.
106+
const name = rawName.split("/").pop() ?? rawName;
107+
out.push({ pid, name });
108+
}
109+
return out;
110+
}
111+
112+
function listWindows(): ProcessInfo[] {
113+
try {
114+
const text = execSync(
115+
"powershell -NoProfile -Command \"Get-Process | Select-Object Id,ProcessName | ConvertTo-Json -Compress\"",
116+
{ timeout: 8000, encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] },
117+
);
118+
return parseWindowsProcessJson(text);
119+
} catch {
120+
return [];
121+
}
122+
}
123+
124+
interface WinProcessRow {
125+
Id?: number;
126+
ProcessName?: string;
127+
}
128+
129+
export function parseWindowsProcessJson(text: string): ProcessInfo[] {
130+
let raw: unknown;
131+
try {
132+
raw = JSON.parse(text);
133+
} catch {
134+
return [];
135+
}
136+
const items: WinProcessRow[] = Array.isArray(raw)
137+
? (raw as WinProcessRow[])
138+
: [raw as WinProcessRow];
139+
const out: ProcessInfo[] = [];
140+
for (const row of items) {
141+
if (!row || typeof row !== "object") continue;
142+
const pid = Number(row.Id);
143+
const name = typeof row.ProcessName === "string" ? row.ProcessName : "";
144+
if (!Number.isFinite(pid) || pid <= 0 || !name) continue;
145+
out.push({ pid, name });
146+
}
147+
return out;
148+
}

0 commit comments

Comments
 (0)