Skip to content

Commit b955644

Browse files
lalaluneclaude
andcommitted
feat(bench-server): implement MESSAGE umbrella in TS fake-backend (P0-4)
Mirrors the Python _u_message handler so eliza adapter mutations land in TS bench-server state. Previously eliza scored 0.000 on mail + messages because every MESSAGE.* action no-op'd in TS, breaking the state_hash component. Per docs/audits/lifeops-2026-05-11/SYNTHESIS-IMPLEMENTATION-PLAN.md P0-4. Expected: eliza mail score 0.000 → ~0.6. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8293c92 commit b955644

2 files changed

Lines changed: 883 additions & 51 deletions

File tree

packages/app-core/src/benchmark/__tests__/lifeops-bench-handler.test.ts

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,256 @@ describe("LifeOpsFakeBackend", () => {
261261
expect(result.ok).toBe(true);
262262
expect(result.result).toMatchObject([{ id: "ev1" }]);
263263
});
264+
265+
// -------------------------------------------------------------------
266+
// MESSAGE umbrella (P0-4) — mirrors `_u_message` in the Python runner
267+
// (packages/benchmarks/lifeops-bench/eliza_lifeops_bench/runner.py).
268+
// Previously the TS bench-server no-op'd every MESSAGE.* action, so
269+
// the eliza adapter scored 0.000 on mail + messages domains because
270+
// the state_hash component never advanced.
271+
// -------------------------------------------------------------------
272+
273+
it("MESSAGE send (gmail) writes to email + email_thread stores", () => {
274+
const path = writeFixture();
275+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
276+
const before = backend.stateHash();
277+
const result = backend.applyAction("MESSAGE", {
278+
operation: "send",
279+
source: "gmail",
280+
to_emails: ["alice@example.test"],
281+
subject: "hello",
282+
body: "world",
283+
});
284+
expect(result.ok).toBe(true);
285+
const sent = result.result as { id: string; thread_id: string };
286+
expect(sent.id).toMatch(/^email_auto_[0-9a-f]{12}$/);
287+
expect(sent.thread_id).toMatch(/^thread_auto_[0-9a-f]{12}$/);
288+
const doc = backend.toDocument();
289+
expect(doc.stores.email[sent.id]).toMatchObject({
290+
folder: "sent",
291+
subject: "hello",
292+
to_emails: ["alice@example.test"],
293+
});
294+
expect(doc.stores.email_thread[sent.thread_id]).toMatchObject({
295+
message_ids: [sent.id],
296+
});
297+
expect(backend.stateHash()).not.toEqual(before);
298+
});
299+
300+
it("MESSAGE send (gmail) is deterministic — same kwargs => same id", () => {
301+
const a = LifeOpsFakeBackend.fromJsonFile(writeFixture());
302+
const b = LifeOpsFakeBackend.fromJsonFile(writeFixture());
303+
const kwargs = {
304+
operation: "send",
305+
source: "gmail",
306+
to_emails: ["alice@example.test"],
307+
subject: "hello",
308+
body: "world",
309+
};
310+
const ra = a.applyAction("MESSAGE", kwargs).result as { id: string };
311+
const rb = b.applyAction("MESSAGE", kwargs).result as { id: string };
312+
expect(ra.id).toEqual(rb.id);
313+
});
314+
315+
it("MESSAGE send (imessage contact) creates conversation + chat_message", () => {
316+
const path = writeFixture();
317+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
318+
const result = backend.applyAction("MESSAGE", {
319+
operation: "send",
320+
source: "imessage",
321+
target: "Alice",
322+
message: "hey",
323+
});
324+
expect(result.ok).toBe(true);
325+
const sent = result.result as { id: string; conversation_id: string };
326+
const doc = backend.toDocument();
327+
expect(doc.stores.chat_message[sent.id]).toMatchObject({
328+
text: "hey",
329+
channel: "imessage",
330+
conversation_id: sent.conversation_id,
331+
});
332+
expect(doc.stores.conversation[sent.conversation_id]).toMatchObject({
333+
channel: "imessage",
334+
is_group: false,
335+
title: "Alice",
336+
});
337+
});
338+
339+
it("MESSAGE send (group) requires roomId and creates a group conversation", () => {
340+
const path = writeFixture();
341+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
342+
const result = backend.applyAction("MESSAGE", {
343+
operation: "send",
344+
source: "slack",
345+
targetKind: "group",
346+
roomId: "room-42",
347+
message: "team update",
348+
});
349+
expect(result.ok).toBe(true);
350+
const sent = result.result as { id: string; conversation_id: string };
351+
expect(sent.conversation_id).toBe("room-42");
352+
const doc = backend.toDocument();
353+
expect(doc.stores.conversation["room-42"]).toMatchObject({
354+
is_group: true,
355+
channel: "slack",
356+
});
357+
});
358+
359+
it("MESSAGE manage(archive) by messageId moves email to archive", () => {
360+
const path = writeFixture();
361+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
362+
const result = backend.applyAction("MESSAGE", {
363+
operation: "manage",
364+
manageOperation: "archive",
365+
messageId: "e1",
366+
});
367+
expect(result.ok).toBe(true);
368+
expect(result.result).toMatchObject({ id: "e1", folder: "archive" });
369+
const doc = backend.toDocument();
370+
expect(doc.stores.email.e1.folder).toBe("archive");
371+
});
372+
373+
it("MESSAGE manage(archive) by threadId archives every email in thread", () => {
374+
const path = writeFixture();
375+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
376+
const result = backend.applyAction("MESSAGE", {
377+
operation: "manage",
378+
manageOperation: "archive",
379+
threadId: "t1",
380+
});
381+
expect(result.ok).toBe(true);
382+
expect(result.result).toMatchObject({
383+
thread_id: "t1",
384+
archived_ids: ["e1"],
385+
});
386+
const doc = backend.toDocument();
387+
expect(doc.stores.email.e1.folder).toBe("archive");
388+
});
389+
390+
it("MESSAGE manage(trash) flips folder to trash", () => {
391+
const path = writeFixture();
392+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
393+
const result = backend.applyAction("MESSAGE", {
394+
operation: "manage",
395+
manageOperation: "trash",
396+
messageId: "e1",
397+
});
398+
expect(result.ok).toBe(true);
399+
expect(result.result).toMatchObject({ id: "e1", folder: "trash" });
400+
});
401+
402+
it("MESSAGE manage(star) toggles is_starred and respects `starred`", () => {
403+
const path = writeFixture();
404+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
405+
const r1 = backend.applyAction("MESSAGE", {
406+
operation: "manage",
407+
manageOperation: "star",
408+
messageId: "e1",
409+
});
410+
expect(r1.result).toMatchObject({ id: "e1", is_starred: true });
411+
const r2 = backend.applyAction("MESSAGE", {
412+
operation: "manage",
413+
manageOperation: "star",
414+
messageId: "e1",
415+
starred: false,
416+
});
417+
expect(r2.result).toMatchObject({ id: "e1", is_starred: false });
418+
});
419+
420+
it("MESSAGE manage(mark_read) flips is_read", () => {
421+
const path = writeFixture();
422+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
423+
const result = backend.applyAction("MESSAGE", {
424+
operation: "manage",
425+
manageOperation: "mark_read",
426+
messageId: "e1",
427+
});
428+
expect(result.ok).toBe(true);
429+
expect(result.result).toMatchObject({ id: "e1", is_read: true });
430+
});
431+
432+
it("MESSAGE draft_reply (gmail) creates a draft on the parent thread", () => {
433+
const path = writeFixture();
434+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
435+
const result = backend.applyAction("MESSAGE", {
436+
operation: "draft_reply",
437+
source: "gmail",
438+
messageId: "e1",
439+
body: "ack",
440+
});
441+
expect(result.ok).toBe(true);
442+
const draft = result.result as {
443+
id: string;
444+
folder: string;
445+
thread_id: string;
446+
};
447+
expect(draft.folder).toBe("drafts");
448+
expect(draft.thread_id).toBe("t1");
449+
const doc = backend.toDocument();
450+
expect(doc.stores.email[draft.id]).toMatchObject({
451+
folder: "drafts",
452+
subject: "Re: report status",
453+
to_emails: ["boss@example.test"],
454+
});
455+
});
456+
457+
it("MESSAGE draft_reply on a non-gmail channel is a no-op", () => {
458+
const path = writeFixture();
459+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
460+
const before = backend.stateHash();
461+
const result = backend.applyAction("MESSAGE", {
462+
operation: "draft_reply",
463+
source: "imessage",
464+
messageId: "msg-1",
465+
});
466+
expect(result.ok).toBe(true);
467+
expect(result.result).toMatchObject({
468+
operation: "draft_reply",
469+
source: "imessage",
470+
noop: true,
471+
});
472+
expect(backend.stateHash()).toEqual(before);
473+
});
474+
475+
it("MESSAGE read ops (triage/search_inbox/list_channels/read_channel/read_with_contact) are noop", () => {
476+
const path = writeFixture();
477+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
478+
const before = backend.stateHash();
479+
for (const op of [
480+
"triage",
481+
"search_inbox",
482+
"list_channels",
483+
"read_channel",
484+
"read_with_contact",
485+
]) {
486+
const result = backend.applyAction("MESSAGE", {
487+
operation: op,
488+
source: "gmail",
489+
});
490+
expect(result.ok).toBe(true);
491+
expect(result.result).toMatchObject({
492+
operation: op,
493+
noop: true,
494+
});
495+
}
496+
expect(backend.stateHash()).toEqual(before);
497+
});
498+
499+
it("MESSAGE throws on missing operation", () => {
500+
const path = writeFixture();
501+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
502+
expect(() => backend.applyAction("MESSAGE", {})).toThrow(
503+
/requires `operation`/,
504+
);
505+
});
506+
507+
it("MESSAGE throws on unknown operation", () => {
508+
const path = writeFixture();
509+
const backend = LifeOpsFakeBackend.fromJsonFile(path);
510+
expect(() =>
511+
backend.applyAction("MESSAGE", { operation: "frobnicate" }),
512+
).toThrow(/MESSAGE\/frobnicate/);
513+
});
264514
});
265515

266516
describe("LifeOpsBenchHandler", () => {

0 commit comments

Comments
 (0)