Skip to content

Commit ad5f5bd

Browse files
committed
updates
1 parent 70d4704 commit ad5f5bd

31 files changed

Lines changed: 169 additions & 265 deletions

docs/audits/lifeops-2026-05-11/action-collisions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schemaVersion": "lifeops-action-collisions-v1",
3-
"generatedAt": "2026-05-12T16:56:02.583Z",
3+
"generatedAt": "2026-05-13T00:18:18.683Z",
44
"threshold": 0.75,
55
"nearMissFloor": 0.5,
66
"population": 128,

docs/audits/lifeops-2026-05-11/action-collisions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Action description collisions
22

3-
Generated: 2026-05-12T16:56:02.578Z
3+
Generated: 2026-05-13T00:18:18.641Z
44
Threshold: cosine ≥ 0.75
55
Population: 128 action descriptions
66
Pairs above threshold: 0

docs/audits/lifeops-2026-05-11/prompts-manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schemaVersion": "lifeops-prompt-inventory-v1",
3-
"generatedAt": "2026-05-12T16:56:02.424Z",
3+
"generatedAt": "2026-05-13T00:18:15.387Z",
44
"counts": {
55
"total": 977,
66
"byKind": {

packages/app-core/src/benchmark/plugin.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ export function createBenchmarkPlugin(): Plugin {
630630
) => {
631631
const params = extractActionParameters(options);
632632

633-
console.log("[BENCHMARK_ACTION] params:", JSON.stringify(params));
633+
logger.debug("[BENCHMARK_ACTION] params:", JSON.stringify(params));
634634

635635
const capturedAction = recordCapturedAction(
636636
captureBenchmarkAction(params),
@@ -717,7 +717,7 @@ export function createBenchmarkPlugin(): Plugin {
717717
validate: async () => true,
718718
handler: async (_runtime, _message, _state, options) => {
719719
const params = extractActionParameters(options);
720-
console.log(`[${name}] params:`, JSON.stringify(params));
720+
logger.debug(`[${name}] params:`, JSON.stringify(params));
721721
const capturedAction = recordCapturedAction(
722722
captureLifeOpsBenchmarkToolAction(name, params),
723723
);

packages/scenario-runner/src/cerebras-judge.test.ts

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -209,16 +209,14 @@ describe("CerebrasJudge", () => {
209209
});
210210

211211
it("sets response_format when jsonObjectMode is true", async () => {
212-
const fetchSpy = vi
213-
.spyOn(globalThis, "fetch")
214-
.mockResolvedValueOnce(
215-
new Response(
216-
JSON.stringify({
217-
choices: [{ message: { content: '{"score":0.5,"reason":"x"}' } }],
218-
}),
219-
{ status: 200 },
220-
),
221-
);
212+
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
213+
new Response(
214+
JSON.stringify({
215+
choices: [{ message: { content: '{"score":0.5,"reason":"x"}' } }],
216+
}),
217+
{ status: 200 },
218+
),
219+
);
222220
const judge = new CerebrasJudge();
223221
await judge.judge("test prompt", { jsonObjectMode: true });
224222
const callArgs = fetchSpy.mock.calls[0];
@@ -231,16 +229,14 @@ describe("CerebrasJudge", () => {
231229
});
232230

233231
it("omits response_format by default", async () => {
234-
const fetchSpy = vi
235-
.spyOn(globalThis, "fetch")
236-
.mockResolvedValueOnce(
237-
new Response(
238-
JSON.stringify({
239-
choices: [{ message: { content: '{"score":0.5}' } }],
240-
}),
241-
{ status: 200 },
242-
),
243-
);
232+
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
233+
new Response(
234+
JSON.stringify({
235+
choices: [{ message: { content: '{"score":0.5}' } }],
236+
}),
237+
{ status: 200 },
238+
),
239+
);
244240
const judge = new CerebrasJudge();
245241
await judge.judge("test prompt");
246242
const initArg = fetchSpy.mock.calls[0]?.[1];
@@ -251,16 +247,14 @@ describe("CerebrasJudge", () => {
251247
});
252248

253249
it("includes systemPrompt when provided", async () => {
254-
const fetchSpy = vi
255-
.spyOn(globalThis, "fetch")
256-
.mockResolvedValueOnce(
257-
new Response(
258-
JSON.stringify({
259-
choices: [{ message: { content: '{"score":1}' } }],
260-
}),
261-
{ status: 200 },
262-
),
263-
);
250+
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
251+
new Response(
252+
JSON.stringify({
253+
choices: [{ message: { content: '{"score":1}' } }],
254+
}),
255+
{ status: 200 },
256+
),
257+
);
264258
const judge = new CerebrasJudge();
265259
await judge.judge("user prompt", { systemPrompt: "be strict" });
266260
const initArg = fetchSpy.mock.calls[0]?.[1];

packages/scenario-runner/src/cerebras-judge.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,16 @@ export function extractBalancedJsonObject(raw: string): string | null {
8686

8787
let depth = 0;
8888
let inString = false;
89-
let escape = false;
89+
let escapeNext = false;
9090

9191
for (let i = start; i < raw.length; i += 1) {
9292
const ch = raw[i];
93-
if (escape) {
94-
escape = false;
93+
if (escapeNext) {
94+
escapeNext = false;
9595
continue;
9696
}
9797
if (ch === "\\" && inString) {
98-
escape = true;
98+
escapeNext = true;
9999
continue;
100100
}
101101
if (ch === '"') {
@@ -326,7 +326,10 @@ export class CerebrasJudge {
326326
return data.choices?.[0]?.message?.content ?? "";
327327
} catch (err) {
328328
if (err instanceof CerebrasJudgeError) {
329-
if (!shouldRetryStatus(err.status ?? 0) || attempt >= this.maxRetries) {
329+
if (
330+
!shouldRetryStatus(err.status ?? 0) ||
331+
attempt >= this.maxRetries
332+
) {
330333
throw err;
331334
}
332335
lastError = err;

packages/scenario-runner/src/judge.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*/
99

1010
import type { IAgentRuntime } from "@elizaos/core";
11-
import { ModelType, logger } from "@elizaos/core";
11+
import { logger, ModelType } from "@elizaos/core";
1212
import { isCerebrasEvalEnabled } from "../../../plugins/app-lifeops/test/helpers/lifeops-eval-model.ts";
1313
import {
1414
CerebrasJudge,
@@ -41,15 +41,14 @@ export interface JudgeResult {
4141
raw?: string;
4242
}
4343

44-
function judgeResponseToResult(
45-
response: JudgeResponse,
46-
): JudgeResult | null {
44+
function judgeResponseToResult(response: JudgeResponse): JudgeResult | null {
4745
if (response.score === undefined) return null;
4846
return {
4947
score: response.score,
50-
reason: response.reason && response.reason.length > 0
51-
? response.reason
52-
: "(no reason)",
48+
reason:
49+
response.reason && response.reason.length > 0
50+
? response.reason
51+
: "(no reason)",
5352
verdict: response.verdict,
5453
raw: response.raw,
5554
};

plugins/app-lifeops/src/actions/brief.ts

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,15 @@
11
/**
22
* `BRIEF` umbrella action — Daily Operations / morning-evening-weekly synthesis.
33
*
4-
* PRD: `prd-lifeops-executive-assistant.md` §Daily Operations and Wave-2
5-
* scenario matrix `plan-lifeops-executive-assistant-scenario-matrix.md`. Today
6-
* the agent composes morning/evening/weekly briefs ad-hoc inside chat;
7-
* `BRIEF` makes it a first-class callable surface so the planner has a clean
8-
* target and the Wave-2 scenarios have a stable verb to dispatch.
9-
*
104
* Subactions:
115
* - `compose_morning` — `period: today` by default
126
* - `compose_evening` — `period: today` by default
137
* - `compose_weekly` — `period: this_week` by default
148
*
15-
* Composition rule: pull from each domain (calendar feed, inbox triage,
16-
* life-domain due items, money recurring charges) per the `include` arg, then
17-
* run a single LLM compose pass to render a narrative on top of the
18-
* structured `LifeOpsBriefing` shape. Wave-2 persists briefings; Wave-1 leaves
19-
* them in-memory.
9+
* Pulls from each domain (calendar feed, inbox triage, life-domain due items,
10+
* money recurring charges) per the `include` arg, then runs a single LLM
11+
* compose pass to render a narrative over the structured `LifeOpsBriefing`
12+
* shape. Briefings are kept in-memory.
2013
*
2114
* Owner-only — `hasLifeOpsAccess` (which delegates to `hasOwnerAccess`).
2215
*/
@@ -101,17 +94,13 @@ interface BriefActionParameters {
10194
}
10295

10396
/**
104-
* Composer hooks — overridable for tests and Wave-2 wiring. Each loader
105-
* returns the structured items the briefing renders; an unavailable source
106-
* returns an empty array (the narrative compose pass mentions missing
107-
* sources explicitly).
97+
* Composer hooks — overridable for tests. Each loader returns the structured
98+
* items the briefing renders; an unavailable source returns an empty array
99+
* (the narrative compose pass mentions missing sources explicitly).
108100
*
109-
* TODO Wave-2: replace these with real composition of CALENDAR.feed,
110-
* MESSAGE.triage, OWNER_TODOS/OWNER_REMINDERS due-today, and
111-
* OWNER_FINANCES.recurring_charges via the umbrella dispatchers rather than
112-
* direct loaders. Wave-1 leaves the seams open so the
113-
* unit tests can mock per-domain inputs without standing up the full
114-
* connector graph.
101+
* TODO: replace these with real composition of CALENDAR.feed, MESSAGE.triage,
102+
* OWNER_TODOS/OWNER_REMINDERS due-today, and OWNER_FINANCES.recurring_charges
103+
* via the umbrella dispatchers rather than direct loaders.
115104
*/
116105
export interface BriefComposers {
117106
loadCalendar: (args: {
@@ -142,7 +131,7 @@ const defaultComposers: BriefComposers = {
142131
let activeComposers: BriefComposers = defaultComposers;
143132

144133
/**
145-
* Override the briefing composers. Wave-2 will inject service-backed loaders
134+
* Override the briefing composers. Service-backed loaders can be injected
146135
* here at plugin init. Test-only callers reset between cases with
147136
* `__resetBriefComposersForTests`.
148137
*/

plugins/app-lifeops/src/actions/calendar.ts

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,18 @@
11
/**
22
* CALENDAR — umbrella action for the owner's calendar surface.
33
*
4-
* Routes to the existing handlers for live calendar reads/writes, availability checks,
5-
* meeting-preference updates, and the bulk-reschedule preview. Decomposed in
6-
* Wave 2 W2-C per `docs/audit/HARDCODING_AUDIT.md` §6 #13 / §7 and
7-
* `docs/audit/IMPLEMENTATION_PLAN.md` §5.3:
4+
* Routes to the existing handlers for live calendar reads/writes, availability
5+
* checks, meeting-preference updates, and the bulk-reschedule preview.
86
*
9-
* - `calendly_*` verbs moved out into a Calendly contribution registered
10-
* through `ConnectorRegistry` (W2-B owns the connector wrapper at
11-
* `src/lifeops/connectors/calendly.ts`). The standalone `calendlyAction`
12-
* in `./lib/calendly-handler.ts` is now a top-level Action — Calendly is a
7+
* - `calendly_*` verbs are a Calendly contribution registered through
8+
* `ConnectorRegistry`. The standalone `calendlyAction` in
9+
* `./lib/calendly-handler.ts` is a top-level Action — Calendly is a
1310
* provider, not a CALENDAR subaction.
14-
* - multi-turn scheduling negotiation is delegated through
15-
* PERSONAL_ASSISTANT action=scheduling. It is a long-running stateful
16-
* actor, not a calendar verb (§7, §8.3).
11+
* - Multi-turn scheduling negotiation is delegated through
12+
* PERSONAL_ASSISTANT action=scheduling (long-running stateful actor).
1713
*
18-
* What stays compound here is exactly the irreducible calendar-provider
19-
* surface plus `bulk_reschedule`, which `HARDCODING_AUDIT.md` §7 explicitly
20-
* keeps as a transactional preview-then-commit step.
14+
* What stays compound here is the irreducible calendar-provider surface plus
15+
* `bulk_reschedule` (a transactional preview-then-commit step).
2116
*/
2217

2318
import type {

plugins/app-lifeops/src/actions/conflict-detect.ts

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,18 @@
11
/**
22
* `CONFLICT_DETECT` umbrella action — proactive calendar conflict scanning.
33
*
4-
* PRD: `prd-lifeops-executive-assistant.md` §Calendar And Scheduling. Today
5-
* the agent only detects conflicts reactively (during create_event); the
6-
* scenario matrix calls for the agent to proactively scan today's calendar /
7-
* the week ahead, and to evaluate a proposed event against an attendee set.
8-
*
94
* Subactions:
105
* - `scan_today` — find overlaps on today's calendar
116
* - `scan_week` — find overlaps in the next seven days
127
* - `scan_event_proposal` — given a proposed start/end (and optionally
138
* attendees), find direct conflicts against the
149
* owner's calendar feed
1510
*
16-
* Behavior: reads the calendar feed via the injectable loader (Wave-2 wires
17-
* `CALENDAR.feed`) and compares event windows for overlap. Attendee freebusy
18-
* is NOT consulted unless the loader injects it — the Wave-1 scaffold falls
19-
* back to feed-vs-feed comparison when freebusy is unavailable.
11+
* Reads the calendar feed via the injectable loader and compares event windows
12+
* for overlap. Attendee freebusy is only consulted if the loader injects it.
2013
*
2114
* Owner-or-admin gating: `hasLifeOpsAccess` covers OWNER; ADMIN is also valid
22-
* for read scans of the owner's calendar in the Wave-2 dispatch surface.
15+
* for read scans.
2316
*/
2417

2518
import type {
@@ -98,11 +91,10 @@ export interface ConflictDetectResult {
9891
}
9992

10093
/**
101-
* Loader hook. Wave-2 wires this to `CALENDAR.feed`. Wave-1 returns an empty
102-
* feed by default so tests can inject scenario data.
94+
* Loader hook. Default returns an empty feed so tests can inject scenario data.
10395
*
104-
* TODO Wave-2: also expose a freebusy loader for attendee conflict checks —
105-
* for now `scan_event_proposal` only considers the owner's own feed.
96+
* TODO: expose a freebusy loader for attendee conflict checks — currently
97+
* `scan_event_proposal` only considers the owner's own feed.
10698
*/
10799
export interface ConflictDetectLoader {
108100
loadFeed: (args: {

0 commit comments

Comments
 (0)