@@ -11,7 +11,10 @@ import { tmpdir } from "node:os";
1111import { join } from "node:path" ;
1212import { Readable } from "node:stream" ;
1313import { describe , expect , it } from "vitest" ;
14- import { LifeOpsBenchHandler } from "../lifeops-bench-handler.js" ;
14+ import {
15+ LifeOpsBenchHandler ,
16+ translateUmbrellaAction ,
17+ } from "../lifeops-bench-handler.js" ;
1518import { LifeOpsFakeBackend } from "../lifeops-fake-backend.js" ;
1619
1720// --------------------------------------------------------------------------
@@ -689,3 +692,182 @@ describe("LifeOpsBenchHandler", () => {
689692 expect ( parsed . tool_calls [ 0 ] . error ) . toMatch ( / u n s u p p o r t e d / ) ;
690693 } ) ;
691694} ) ;
695+
696+ // --------------------------------------------------------------------------
697+ // P0-5: CALENDAR umbrella → calendar.<subaction> translation.
698+ // --------------------------------------------------------------------------
699+
700+ describe ( "translateUmbrellaAction (P0-5)" , ( ) => {
701+ it ( "maps CALENDAR(subaction=create_event) to calendar.create_event and strips subaction" , ( ) => {
702+ const translated = translateUmbrellaAction ( "CALENDAR" , {
703+ subaction : "create_event" ,
704+ calendar_id : "cal_primary" ,
705+ title : "deep work" ,
706+ start : "2026-05-11T14:00:00Z" ,
707+ end : "2026-05-11T14:30:00Z" ,
708+ } ) ;
709+ expect ( translated . name ) . toBe ( "calendar.create_event" ) ;
710+ expect ( translated . kwargs ) . toEqual ( {
711+ calendar_id : "cal_primary" ,
712+ title : "deep work" ,
713+ start : "2026-05-11T14:00:00Z" ,
714+ end : "2026-05-11T14:30:00Z" ,
715+ } ) ;
716+ } ) ;
717+
718+ it ( "maps CALENDAR(subaction=delete_event) to calendar.cancel_event and strips subaction" , ( ) => {
719+ const translated = translateUmbrellaAction ( "CALENDAR" , {
720+ subaction : "delete_event" ,
721+ id : "ev1" ,
722+ } ) ;
723+ expect ( translated . name ) . toBe ( "calendar.cancel_event" ) ;
724+ expect ( translated . kwargs ) . toEqual ( { id : "ev1" } ) ;
725+ } ) ;
726+
727+ it ( "passes CALENDAR without subaction through unchanged" , ( ) => {
728+ const kwargs = { query : "meeting" } ;
729+ const translated = translateUmbrellaAction ( "CALENDAR" , kwargs ) ;
730+ expect ( translated . name ) . toBe ( "CALENDAR" ) ;
731+ expect ( translated . kwargs ) . toBe ( kwargs ) ;
732+ } ) ;
733+
734+ it ( "passes non-CALENDAR umbrellas through unchanged" , ( ) => {
735+ const kwargs = { subaction : "send" , text : "hi" } ;
736+ const translated = translateUmbrellaAction ( "MESSAGE" , kwargs ) ;
737+ expect ( translated . name ) . toBe ( "MESSAGE" ) ;
738+ expect ( translated . kwargs ) . toBe ( kwargs ) ;
739+ } ) ;
740+ } ) ;
741+
742+ describe ( "LifeOpsBenchHandler CALENDAR umbrella unwrap (P0-5)" , ( ) => {
743+ async function runUmbrellaScenario ( args : {
744+ taskId : string ;
745+ toolName : string ;
746+ toolArguments : Record < string , unknown > ;
747+ } ) : Promise < { worldHashBefore : string ; worldHashAfter : string } > {
748+ const path = writeFixture ( ) ;
749+ const handler = new LifeOpsBenchHandler ( {
750+ invokePlanner : async ( ) => ( {
751+ text : "ok" ,
752+ toolCalls : [
753+ {
754+ id : "c1" ,
755+ name : args . toolName ,
756+ arguments : args . toolArguments ,
757+ } ,
758+ ] ,
759+ } ) ,
760+ } ) ;
761+
762+ // reset
763+ {
764+ const req = fakeReq ( "POST" , {
765+ task_id : args . taskId ,
766+ world_snapshot_path : path ,
767+ now_iso : "2026-05-10T12:00:00Z" ,
768+ } ) ;
769+ const res = fakeRes ( ) ;
770+ await handler . tryHandle ( req , res , "/api/benchmark/lifeops_bench/reset" ) ;
771+ expect ( res . getStatus ( ) ) . toBe ( 200 ) ;
772+ }
773+
774+ // pre-state
775+ const session = handler . getSession ( args . taskId ) ;
776+ if ( ! session ) throw new Error ( "session missing after reset" ) ;
777+ const worldHashBefore = session . backend . stateHash ( ) ;
778+
779+ // message
780+ {
781+ const req = fakeReq ( "POST" , { task_id : args . taskId , text : "go" } ) ;
782+ const res = fakeRes ( ) ;
783+ await handler . tryHandle ( req , res , "/api/benchmark/lifeops_bench/message" ) ;
784+ expect ( res . getStatus ( ) ) . toBe ( 200 ) ;
785+ const parsed = JSON . parse ( res . getBody ( ) ) ;
786+ expect ( parsed . tool_calls [ 0 ] ) . toMatchObject ( {
787+ name : args . toolName ,
788+ ok : true ,
789+ } ) ;
790+ }
791+
792+ const worldHashAfter = session . backend . stateHash ( ) ;
793+ return { worldHashBefore, worldHashAfter } ;
794+ }
795+
796+ it ( "CALENDAR(subaction=create_event, …) produces the same state mutation as calendar.create_event" , async ( ) => {
797+ const kwargs = {
798+ calendar_id : "cal_primary" ,
799+ title : "deep work" ,
800+ start : "2026-05-11T14:00:00Z" ,
801+ end : "2026-05-11T14:30:00Z" ,
802+ } ;
803+
804+ const umbrella = await runUmbrellaScenario ( {
805+ taskId : "umbrella-create" ,
806+ toolName : "CALENDAR" ,
807+ toolArguments : { subaction : "create_event" , ...kwargs } ,
808+ } ) ;
809+ const granular = await runUmbrellaScenario ( {
810+ taskId : "granular-create" ,
811+ toolName : "calendar.create_event" ,
812+ toolArguments : kwargs ,
813+ } ) ;
814+
815+ expect ( umbrella . worldHashBefore ) . toEqual ( granular . worldHashBefore ) ;
816+ expect ( umbrella . worldHashAfter ) . toEqual ( granular . worldHashAfter ) ;
817+ expect ( umbrella . worldHashAfter ) . not . toEqual ( umbrella . worldHashBefore ) ;
818+ } ) ;
819+
820+ it ( "CALENDAR(subaction=delete_event, …) produces the same state mutation as calendar.cancel_event" , async ( ) => {
821+ const kwargs = { id : "ev1" } ;
822+
823+ const umbrella = await runUmbrellaScenario ( {
824+ taskId : "umbrella-delete" ,
825+ toolName : "CALENDAR" ,
826+ toolArguments : { subaction : "delete_event" , ...kwargs } ,
827+ } ) ;
828+ const granular = await runUmbrellaScenario ( {
829+ taskId : "granular-delete" ,
830+ toolName : "calendar.cancel_event" ,
831+ toolArguments : kwargs ,
832+ } ) ;
833+
834+ expect ( umbrella . worldHashBefore ) . toEqual ( granular . worldHashBefore ) ;
835+ expect ( umbrella . worldHashAfter ) . toEqual ( granular . worldHashAfter ) ;
836+ expect ( umbrella . worldHashAfter ) . not . toEqual ( umbrella . worldHashBefore ) ;
837+ } ) ;
838+
839+ it ( "CALENDAR without subaction does not crash and is reported as a tool_call" , async ( ) => {
840+ const path = writeFixture ( ) ;
841+ const handler = new LifeOpsBenchHandler ( {
842+ invokePlanner : async ( ) => ( {
843+ text : "ok" ,
844+ toolCalls : [
845+ {
846+ id : "c1" ,
847+ name : "CALENDAR" ,
848+ arguments : { query : "meeting" } ,
849+ } ,
850+ ] ,
851+ } ) ,
852+ } ) ;
853+
854+ {
855+ const req = fakeReq ( "POST" , {
856+ task_id : "umbrella-bare" ,
857+ world_snapshot_path : path ,
858+ now_iso : "2026-05-10T12:00:00Z" ,
859+ } ) ;
860+ const res = fakeRes ( ) ;
861+ await handler . tryHandle ( req , res , "/api/benchmark/lifeops_bench/reset" ) ;
862+ expect ( res . getStatus ( ) ) . toBe ( 200 ) ;
863+ }
864+
865+ const req = fakeReq ( "POST" , { task_id : "umbrella-bare" , text : "go" } ) ;
866+ const res = fakeRes ( ) ;
867+ await handler . tryHandle ( req , res , "/api/benchmark/lifeops_bench/message" ) ;
868+ expect ( res . getStatus ( ) ) . toBe ( 200 ) ;
869+ const parsed = JSON . parse ( res . getBody ( ) ) ;
870+ expect ( parsed . tool_calls ) . toHaveLength ( 1 ) ;
871+ expect ( parsed . tool_calls [ 0 ] . name ) . toBe ( "CALENDAR" ) ;
872+ } ) ;
873+ } ) ;
0 commit comments