Skip to content

Commit 4d89b51

Browse files
lalaluneclaude
andcommitted
fix(bench-server): translate CALENDAR umbrella -> lifeops.calendar.* (P0-5)
Adds umbrella-unwrap in lifeops-bench-handler.applyAction so eliza's emitted CALENDAR(subaction=create_event, ...) routes to the granular calendar handler instead of silently no-op'ing. Mirrors what the Python runner does at the _u_calendar layer. Per docs/audits/lifeops-2026-05-11/SYNTHESIS-IMPLEMENTATION-PLAN.md P0-5. Unblocks eliza state mutations on calendar write scenarios. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent dc24e51 commit 4d89b51

2 files changed

Lines changed: 231 additions & 3 deletions

File tree

packages/app-core/src/benchmark/__tests__/lifeops-bench-handler.test.ts

Lines changed: 183 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ import { tmpdir } from "node:os";
1111
import { join } from "node:path";
1212
import { Readable } from "node:stream";
1313
import { describe, expect, it } from "vitest";
14-
import { LifeOpsBenchHandler } from "../lifeops-bench-handler.js";
14+
import {
15+
LifeOpsBenchHandler,
16+
translateUmbrellaAction,
17+
} from "../lifeops-bench-handler.js";
1518
import { LifeOpsFakeBackend } from "../lifeops-fake-backend.js";
1619

1720
// --------------------------------------------------------------------------
@@ -689,3 +692,182 @@ describe("LifeOpsBenchHandler", () => {
689692
expect(parsed.tool_calls[0].error).toMatch(/unsupported/);
690693
});
691694
});
695+
696+
// --------------------------------------------------------------------------
697+
// P0-5: CALENDAR umbrella → calendar.<subaction> translation.
698+
// --------------------------------------------------------------------------
699+
700+
describe("translateUmbrellaAction (P0-5)", () => {
701+
it("maps CALENDAR(subaction=create_event) to calendar.create_event and strips subaction", () => {
702+
const translated = translateUmbrellaAction("CALENDAR", {
703+
subaction: "create_event",
704+
calendar_id: "cal_primary",
705+
title: "deep work",
706+
start: "2026-05-11T14:00:00Z",
707+
end: "2026-05-11T14:30:00Z",
708+
});
709+
expect(translated.name).toBe("calendar.create_event");
710+
expect(translated.kwargs).toEqual({
711+
calendar_id: "cal_primary",
712+
title: "deep work",
713+
start: "2026-05-11T14:00:00Z",
714+
end: "2026-05-11T14:30:00Z",
715+
});
716+
});
717+
718+
it("maps CALENDAR(subaction=delete_event) to calendar.cancel_event and strips subaction", () => {
719+
const translated = translateUmbrellaAction("CALENDAR", {
720+
subaction: "delete_event",
721+
id: "ev1",
722+
});
723+
expect(translated.name).toBe("calendar.cancel_event");
724+
expect(translated.kwargs).toEqual({ id: "ev1" });
725+
});
726+
727+
it("passes CALENDAR without subaction through unchanged", () => {
728+
const kwargs = { query: "meeting" };
729+
const translated = translateUmbrellaAction("CALENDAR", kwargs);
730+
expect(translated.name).toBe("CALENDAR");
731+
expect(translated.kwargs).toBe(kwargs);
732+
});
733+
734+
it("passes non-CALENDAR umbrellas through unchanged", () => {
735+
const kwargs = { subaction: "send", text: "hi" };
736+
const translated = translateUmbrellaAction("MESSAGE", kwargs);
737+
expect(translated.name).toBe("MESSAGE");
738+
expect(translated.kwargs).toBe(kwargs);
739+
});
740+
});
741+
742+
describe("LifeOpsBenchHandler CALENDAR umbrella unwrap (P0-5)", () => {
743+
async function runUmbrellaScenario(args: {
744+
taskId: string;
745+
toolName: string;
746+
toolArguments: Record<string, unknown>;
747+
}): Promise<{ worldHashBefore: string; worldHashAfter: string }> {
748+
const path = writeFixture();
749+
const handler = new LifeOpsBenchHandler({
750+
invokePlanner: async () => ({
751+
text: "ok",
752+
toolCalls: [
753+
{
754+
id: "c1",
755+
name: args.toolName,
756+
arguments: args.toolArguments,
757+
},
758+
],
759+
}),
760+
});
761+
762+
// reset
763+
{
764+
const req = fakeReq("POST", {
765+
task_id: args.taskId,
766+
world_snapshot_path: path,
767+
now_iso: "2026-05-10T12:00:00Z",
768+
});
769+
const res = fakeRes();
770+
await handler.tryHandle(req, res, "/api/benchmark/lifeops_bench/reset");
771+
expect(res.getStatus()).toBe(200);
772+
}
773+
774+
// pre-state
775+
const session = handler.getSession(args.taskId);
776+
if (!session) throw new Error("session missing after reset");
777+
const worldHashBefore = session.backend.stateHash();
778+
779+
// message
780+
{
781+
const req = fakeReq("POST", { task_id: args.taskId, text: "go" });
782+
const res = fakeRes();
783+
await handler.tryHandle(req, res, "/api/benchmark/lifeops_bench/message");
784+
expect(res.getStatus()).toBe(200);
785+
const parsed = JSON.parse(res.getBody());
786+
expect(parsed.tool_calls[0]).toMatchObject({
787+
name: args.toolName,
788+
ok: true,
789+
});
790+
}
791+
792+
const worldHashAfter = session.backend.stateHash();
793+
return { worldHashBefore, worldHashAfter };
794+
}
795+
796+
it("CALENDAR(subaction=create_event, …) produces the same state mutation as calendar.create_event", async () => {
797+
const kwargs = {
798+
calendar_id: "cal_primary",
799+
title: "deep work",
800+
start: "2026-05-11T14:00:00Z",
801+
end: "2026-05-11T14:30:00Z",
802+
};
803+
804+
const umbrella = await runUmbrellaScenario({
805+
taskId: "umbrella-create",
806+
toolName: "CALENDAR",
807+
toolArguments: { subaction: "create_event", ...kwargs },
808+
});
809+
const granular = await runUmbrellaScenario({
810+
taskId: "granular-create",
811+
toolName: "calendar.create_event",
812+
toolArguments: kwargs,
813+
});
814+
815+
expect(umbrella.worldHashBefore).toEqual(granular.worldHashBefore);
816+
expect(umbrella.worldHashAfter).toEqual(granular.worldHashAfter);
817+
expect(umbrella.worldHashAfter).not.toEqual(umbrella.worldHashBefore);
818+
});
819+
820+
it("CALENDAR(subaction=delete_event, …) produces the same state mutation as calendar.cancel_event", async () => {
821+
const kwargs = { id: "ev1" };
822+
823+
const umbrella = await runUmbrellaScenario({
824+
taskId: "umbrella-delete",
825+
toolName: "CALENDAR",
826+
toolArguments: { subaction: "delete_event", ...kwargs },
827+
});
828+
const granular = await runUmbrellaScenario({
829+
taskId: "granular-delete",
830+
toolName: "calendar.cancel_event",
831+
toolArguments: kwargs,
832+
});
833+
834+
expect(umbrella.worldHashBefore).toEqual(granular.worldHashBefore);
835+
expect(umbrella.worldHashAfter).toEqual(granular.worldHashAfter);
836+
expect(umbrella.worldHashAfter).not.toEqual(umbrella.worldHashBefore);
837+
});
838+
839+
it("CALENDAR without subaction does not crash and is reported as a tool_call", async () => {
840+
const path = writeFixture();
841+
const handler = new LifeOpsBenchHandler({
842+
invokePlanner: async () => ({
843+
text: "ok",
844+
toolCalls: [
845+
{
846+
id: "c1",
847+
name: "CALENDAR",
848+
arguments: { query: "meeting" },
849+
},
850+
],
851+
}),
852+
});
853+
854+
{
855+
const req = fakeReq("POST", {
856+
task_id: "umbrella-bare",
857+
world_snapshot_path: path,
858+
now_iso: "2026-05-10T12:00:00Z",
859+
});
860+
const res = fakeRes();
861+
await handler.tryHandle(req, res, "/api/benchmark/lifeops_bench/reset");
862+
expect(res.getStatus()).toBe(200);
863+
}
864+
865+
const req = fakeReq("POST", { task_id: "umbrella-bare", text: "go" });
866+
const res = fakeRes();
867+
await handler.tryHandle(req, res, "/api/benchmark/lifeops_bench/message");
868+
expect(res.getStatus()).toBe(200);
869+
const parsed = JSON.parse(res.getBody());
870+
expect(parsed.tool_calls).toHaveLength(1);
871+
expect(parsed.tool_calls[0].name).toBe("CALENDAR");
872+
});
873+
});

packages/app-core/src/benchmark/lifeops-bench-handler.ts

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,51 @@ export interface LifeOpsBenchHandlerOptions {
115115
const DEFAULT_MAX_BODY_BYTES = 16 * 1024 * 1024;
116116
const ROUTE_PREFIX = "/api/benchmark/lifeops_bench";
117117

118+
/**
119+
* Maps `CALENDAR(subaction=X)` to the granular dotted method the fake backend
120+
* exposes directly. Eliza's planner emits umbrella actions
121+
* (`CALENDAR(subaction=create_event, …)`) while
122+
* `LifeOpsFakeBackend.applyAction` keys off dotted names
123+
* (`calendar.create_event`). Without this translation the calendar umbrella
124+
* lands on the backend's umbrella router only by coincidence of name and
125+
* silently no-ops on subactions the umbrella router does not implement, which
126+
* is the root cause of the calendar state-mutation gap (synthesis plan P0-5).
127+
*
128+
* Entries here must point at routes that exist in
129+
* `LifeOpsFakeBackend.SUPPORTED_METHODS`.
130+
*/
131+
const CALENDAR_SUBACTION_TO_GRANULAR: Record<string, string> = {
132+
create_event: "calendar.create_event",
133+
update_event: "calendar.move_event",
134+
move_event: "calendar.move_event",
135+
delete_event: "calendar.cancel_event",
136+
cancel_event: "calendar.cancel_event",
137+
list_events: "calendar.list_events",
138+
search_events: "calendar.list_events",
139+
};
140+
141+
/**
142+
* Unwraps an umbrella tool call (currently CALENDAR) into the dotted granular
143+
* form the fake backend dispatches directly, stripping `subaction` from the
144+
* forwarded kwargs. When no mapping applies the original `{ name, kwargs }`
145+
* is returned unchanged so the backend's umbrella router (or its
146+
* `LifeOpsBackendUnsupportedError`) still owns the response.
147+
*/
148+
export function translateUmbrellaAction(
149+
name: string,
150+
kwargs: Record<string, unknown>,
151+
): { name: string; kwargs: Record<string, unknown> } {
152+
if (name !== "CALENDAR") return { name, kwargs };
153+
const subaction = kwargs.subaction;
154+
if (typeof subaction !== "string" || subaction.length === 0) {
155+
return { name, kwargs };
156+
}
157+
const granular = CALENDAR_SUBACTION_TO_GRANULAR[subaction];
158+
if (!granular) return { name, kwargs };
159+
const { subaction: _stripped, ...rest } = kwargs;
160+
return { name: granular, kwargs: rest };
161+
}
162+
118163
export class LifeOpsBenchHandler {
119164
private readonly sessions = new Map<string, LifeOpsBenchSession>();
120165
private readonly invokePlanner: LifeOpsPlannerInvocation;
@@ -287,10 +332,11 @@ export class LifeOpsBenchHandler {
287332
const executed: ToolCallRecord[] = [];
288333
for (const call of plannerResult.toolCalls) {
289334
const id = call.id ?? `call_${executed.length}`;
335+
const translated = translateUmbrellaAction(call.name, call.arguments);
290336
try {
291337
const result: ActionResult = session.backend.applyAction(
292-
call.name,
293-
call.arguments,
338+
translated.name,
339+
translated.kwargs,
294340
);
295341
executed.push({
296342
id,

0 commit comments

Comments
 (0)