From cfd4eaf7cb4f457e338ea35c3dcad851899e5991 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Sat, 14 Mar 2026 18:28:59 +0800 Subject: [PATCH 01/38] feat: move session memory to Redis hot tier --- .gitignore | 1 + README.md | 272 ++-- deno.json | 1 + deno.lock | 43 + mod.ts | 2 +- plans/ContextOverhaul.md | 708 +++++++++ plans/ContextOverhaulTests.md | 694 +++++++++ src/config.test.ts | 416 ++---- src/config.ts | 216 ++- src/handlers/chat.test.ts | 1304 ++++------------- src/handlers/chat.ts | 321 +--- src/handlers/compacting.test.ts | 571 +------- src/handlers/compacting.ts | 37 +- src/handlers/event.test.ts | 1937 ++++++++++--------------- src/handlers/event.ts | 342 +++-- src/handlers/messages.test.ts | 492 +++++++ src/handlers/messages.ts | 118 +- src/index.test.ts | 79 +- src/index.ts | 118 +- src/services/batch-drain.test.ts | 337 +++++ src/services/batch-drain.ts | 183 +++ src/services/client.test.ts | 121 +- src/services/client.ts | 267 +--- src/services/compaction-utils.test.ts | 2 +- src/services/event-extractor.test.ts | 157 ++ src/services/event-extractor.ts | 871 +++++++++++ src/services/graphiti-async.ts | 94 ++ src/services/graphiti-mcp.ts | 248 ++++ src/services/hot-tier-slice.test.ts | 546 +++++++ src/services/logger.test.ts | 45 + src/services/logger.ts | 53 + src/services/opencode-warning.ts | 122 ++ src/services/redis-cache.test.ts | 94 ++ src/services/redis-cache.ts | 247 ++++ src/services/redis-client.test.ts | 330 +++++ src/services/redis-client.ts | 783 ++++++++++ src/services/redis-events.ts | 410 ++++++ src/services/redis-snapshot.ts | 284 ++++ src/services/render-utils.ts | 113 ++ src/services/runtime-teardown.test.ts | 88 ++ src/services/runtime-teardown.ts | 67 + src/services/session-snapshot.test.ts | 1227 ++++++++++++---- src/session.ts | 530 ++++--- src/types/index.ts | 204 ++- 44 files changed, 10553 insertions(+), 4542 deletions(-) create mode 100644 plans/ContextOverhaul.md create mode 100644 plans/ContextOverhaulTests.md create mode 100644 src/handlers/messages.test.ts create mode 100644 src/services/batch-drain.test.ts create mode 100644 src/services/batch-drain.ts create mode 100644 src/services/event-extractor.test.ts create mode 100644 src/services/event-extractor.ts create mode 100644 src/services/graphiti-async.ts create mode 100644 src/services/graphiti-mcp.ts create mode 100644 src/services/hot-tier-slice.test.ts create mode 100644 src/services/opencode-warning.ts create mode 100644 src/services/redis-cache.test.ts create mode 100644 src/services/redis-cache.ts create mode 100644 src/services/redis-client.test.ts create mode 100644 src/services/redis-client.ts create mode 100644 src/services/redis-events.ts create mode 100644 src/services/redis-snapshot.ts create mode 100644 src/services/render-utils.ts create mode 100644 src/services/runtime-teardown.test.ts create mode 100644 src/services/runtime-teardown.ts diff --git a/.gitignore b/.gitignore index 7cfe7de..a3ee37a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .opencode/ +.swarm/ dist/ diff --git a/README.md b/README.md index e999549..c6a07bc 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,86 @@ # opencode-graphiti -OpenCode plugin that provides persistent memory via a -[Graphiti](https://github.com/getzep/graphiti) knowledge graph. +OpenCode plugin that provides persistent memory via +[FalkorDB](https://www.falkordb.com/)/Redis and asynchronous +[Graphiti](https://github.com/getzep/graphiti) knowledge-graph consolidation. ## Motivation Long-running AI coding sessions depend on persistent memory to stay on track. -Graphiti's MCP server is the intended backbone for this, but in practice it is -unreliable — connections drop, queries time out, and ingestion silently fails. -When the context window fills up and OpenCode triggers compaction, the -summarizer discards details that were never persisted. The result is **context -rot**: the agent loses track of recent decisions, re-explores solved problems, -and drifts away from the original goal. - -This plugin exists to close that gap. It captures chat histories and project -facts into Graphiti when the server is healthy, then **re-injects them at the -start of every session and before every compaction** so the agent is always -reminded of recent project context — regardless of what survived the summary. +Graphiti's MCP server is a powerful knowledge-graph backend, but synchronous +calls to it on every message add latency and introduce a single point of failure +— connections drop, queries time out, and ingestion silently fails. When the +context window fills up and OpenCode triggers compaction, the summarizer +discards details that were never persisted. The result is **context rot**: the +agent loses track of recent decisions, re-explores solved problems, and drifts +away from the original goal. + +This plugin exists to close that gap. It uses **FalkorDB/Redis as the hot-path +store** for structured session events, priority-tiered snapshots, and cached +memory — all readable in sub-millisecond time. Graphiti remains the long-term +knowledge graph but is accessed **only asynchronously**, off the critical path. +The plugin re-injects session context before every LLM call and before every +compaction so the agent is always reminded of recent project context — +regardless of what survived the summary and regardless of Graphiti availability. ## Overview -This plugin connects to a Graphiti MCP server and: +This plugin uses a two-tier architecture: -- Searches Graphiti for relevant facts and entities on each user message -- Injects memories into the last user message as a `` block via +**Hot path (FalkorDB/Redis — synchronous, sub-ms):** + +- Stores structured session events, priority-tiered snapshots, and cached + Graphiti results in Redis +- Reads cached memory on each user message and injects it into the last user + message as a `` block via `experimental.chat.messages.transform`, keeping the system prompt static for prefix caching -- Detects context drift using Jaccard similarity and re-injects when the - conversation topic shifts -- Buffers user and assistant messages, flushing them to Graphiti on idle or - before compaction -- Preserves key facts during context compaction +- Composes the same `` envelope for compaction context via + `experimental.session.compacting` +- Detects context drift using Jaccard similarity on cached fact UUIDs and + schedules an async cache refresh when the topic shifts + +**Async tier (Graphiti MCP — fire-and-forget, non-blocking):** + +- Drains buffered session events to Graphiti as episodes on idle or before + compaction +- Refreshes the Redis memory cache from Graphiti search results in the + background +- Provides cross-session recall via vector/graph search, cached in Redis for + chat-time injection - Saves compaction summaries as episodes so knowledge survives across boundaries -- Annotates stale facts and filters expired ones automatically -- Scopes memories per project (and per user) using directory-based group IDs + +No Graphiti call ever blocks a hook return. ## Prerequisites +### FalkorDB / Redis + +A running [FalkorDB](https://www.falkordb.com/) instance accessible via the +Redis protocol. The easiest way to start one: + +```bash +docker run -p 6379:6379 falkordb/falkordb:latest +``` + +### Graphiti MCP Server + A running [Graphiti MCP server](https://github.com/getzep/graphiti/tree/main/mcp_server) -accessible over HTTP. The easiest way to set one up: +accessible over HTTP: ```bash -# Clone and start with Docker Compose git clone https://github.com/getzep/graphiti.git cd graphiti/mcp_server docker compose up -d ``` -This starts the MCP server at `http://localhost:8000/mcp` with a FalkorDB -backend. +This starts the MCP server at `http://localhost:8000/mcp`. + +> **Note:** Graphiti is optional for basic operation. If Graphiti is +> unavailable, the plugin continues to function with FalkorDB/Redis-sourced +> session memory; only the `` section (long-term +> cross-session facts) will be empty until Graphiti comes online. ## Installation @@ -101,95 +132,151 @@ automatically. Supported config locations, in lookup order: -1. The provided project directory: `package.json#graphiti`, `.graphitirc`, and other standard `cosmiconfig` `graphiti` filenames -2. Standard global/home `graphiti` config locations discovered by `cosmiconfig` (for example `~/.graphitirc`) +1. The provided project directory: `package.json#graphiti`, `.graphitirc`, and + other standard `cosmiconfig` `graphiti` filenames +2. Standard global/home `graphiti` config locations discovered by `cosmiconfig` + (for example `~/.graphitirc`) 3. Legacy fallback: `~/.config/opencode/.graphitirc` -Example `.graphitirc`: +### Nested Config Shape (recommended) ```jsonc { - // Graphiti MCP server endpoint - "endpoint": "http://localhost:8000/mcp", - - // Prefix for project group IDs (e.g. "opencode-my-project") - "groupIdPrefix": "opencode", - - // Jaccard similarity threshold (0–1) below which memory is re-injected - // Lower values mean the topic must drift further before re-injection - "driftThreshold": 0.5, - - // Number of days after which facts are annotated as stale - "factStaleDays": 30 + "falkordb": { + // FalkorDB Redis URL + "redisEndpoint": "redis://localhost:6379", + // Max events per drain batch + "batchSize": 20, + // Max combined body bytes per drain batch + "batchMaxBytes": 51200, + // Session event TTL in seconds (default: 24 h) + "sessionTtlSeconds": 86400, + // Memory cache TTL in seconds (default: 10 min) + "cacheTtlSeconds": 600, + // Max drain retry attempts before dead-lettering + "drainRetryMax": 3 + }, + "graphiti": { + // Graphiti MCP server endpoint + "endpoint": "http://localhost:8000/mcp", + // Prefix for project group IDs (e.g. "opencode-my-project") + "groupIdPrefix": "opencode", + // Jaccard similarity threshold (0–1) below which cache is refreshed + "driftThreshold": 0.5, + // Number of days after which facts are annotated as stale + "factStaleDays": 30 + } } ``` All fields are optional — defaults (shown above) are used for any missing -values. +values. Nested values take precedence when both forms are supplied. + +### Legacy Top-Level Keys + +For backward compatibility, the following top-level keys are still accepted and +map to their nested equivalents: + +| Legacy key | Nested equivalent | +| ------------------- | ---------------------------- | +| `endpoint` | `graphiti.endpoint` | +| `groupIdPrefix` | `graphiti.groupIdPrefix` | +| `driftThreshold` | `graphiti.driftThreshold` | +| `factStaleDays` | `graphiti.factStaleDays` | +| `redisEndpoint` | `falkordb.redisEndpoint` | +| `batchSize` | `falkordb.batchSize` | +| `batchMaxBytes` | `falkordb.batchMaxBytes` | +| `sessionTtlSeconds` | `falkordb.sessionTtlSeconds` | +| `cacheTtlSeconds` | `falkordb.cacheTtlSeconds` | +| `drainRetryMax` | `falkordb.drainRetryMax` | ## How It Works -### Memory Search and Caching (`chat.message`) +### Injection Format + +The plugin injects a single canonical `` XML envelope into the +last user message. This envelope is assembled from Redis hot-tier state and +contains structured sections such as ``, ``, +``, ``, ``, and an optional +``. + +When cached Graphiti results are available, a nested `` +section is included with `fact_uuids` and `node_refs` attributes. On a cold +first turn or when Graphiti is unreachable, `` is simply +absent — the rest of the session memory is always available from FalkorDB/Redis. + +```xml + + Continue the current task. + Implement the new feature. + Use Redis for the hot path. + src/index.ts + No synchronous Graphiti calls. + + + + + +``` + +### Hot-Path Memory Preparation (`chat.message`) -On each user message the plugin searches Graphiti for facts and entities -relevant to the message content. Results are split into project and user scopes -(70% / 30% budget), deduplicated, filtered for validity, annotated with -staleness if older than `factStaleDays`, and formatted as Markdown. The -formatted context is cached on the session state for the messages transform hook -to pick up. +On each user message the plugin reads session state from Redis: -On the very first message of a session, the plugin also loads the most recent -session snapshot episode to prime the conversation with prior context. +- Recent structured session events (`session:{id}:events`) +- The priority-tiered snapshot (`session:{id}:snapshot`) +- The cached Graphiti memory (`memory-cache:{groupId}`) -The injection budget is calculated dynamically: 5% of the model's context limit -(resolved from the provider list) multiplied by 4 characters per token. +These are composed into a `` envelope and staged for the +transform hook. All reads are from Redis (sub-ms); no Graphiti call is made on +this path. ### User Message Injection (`experimental.chat.messages.transform`) -A separate hook reads the cached memory context and prepends it to the last user -message as a `` block. The `data-uuids` attribute lists -the fact UUIDs included in the injection, which are tracked in -`visibleFactUuids` so subsequent searches can filter out already-visible facts. -This approach keeps the system prompt static, enabling provider-side prefix -caching, and avoids influencing session titles. The cache is cleared after -injection so stale context is not re-injected on subsequent LLM calls within the -same turn. +The transform hook reads the prepared `` envelope and prepends +it to the last user message. Fact UUIDs from the `` section +are tracked in `visibleFactUuids` so subsequent cache refreshes can filter out +already-visible facts. This approach keeps the system prompt static, enabling +provider-side prefix caching, and avoids influencing session titles. The +prepared injection is cleared after use so stale context is not re-injected on +subsequent LLM calls within the same turn. -### Drift-Based Re-injection (`chat.message`) +### Drift Detection and Async Cache Refresh -After the first injection, the plugin monitors for context drift on every user -message. It searches Graphiti for the current message and compares the returned -fact UUIDs against the previously injected set using Jaccard similarity. When -similarity drops below `driftThreshold` (default 0.5), the memory cache is -refreshed with project-scoped results only (no user scope). +On each user message, the plugin compares the current query against the query +that produced the cached memory. When Jaccard similarity on cached fact UUIDs +drops below `driftThreshold` (default 0.5), an **async** cache refresh is +scheduled via Graphiti MCP. The current cached context is still injected +immediately; the refreshed cache becomes available on the next message. This +trades one message of staleness for eliminating synchronous Graphiti latency +entirely. -### Message Buffering (`event`) +### Event Extraction and Buffering (`event`) -User and assistant messages are buffered in memory as they arrive. The plugin -listens on `message.part.updated` to capture assistant text as it streams, and -on `message.updated` to finalize completed assistant replies. Buffered messages -are flushed to Graphiti as episodes: +User and assistant messages are captured as structured `SessionEvent` objects +and stored in Redis (`session:{id}:events`). The plugin listens on +`message.part.updated` to buffer assistant text as it streams, and on +`message.updated` to finalize completed assistant replies. -- **On idle** (`session.idle`): when the session becomes idle with at least 50 - bytes of buffered content. -- **Before compaction** (`session.compacted`): all buffered messages are flushed - immediately (no minimum size) so nothing is lost. +Events are also enqueued for async drain to Graphiti: -If the last buffered message is from the user (i.e. no assistant reply was -captured), the plugin fetches the latest assistant message from the session API -as a fallback before flushing. +- **On idle** (`session.idle`): buffered events are drained and the + priority-tiered snapshot is rebuilt. +- **Before compaction** (`session.compacted`): all pending events are drained + immediately so nothing is lost. -### Compaction Preservation (`session.compacted` + `experimental.session.compacting`) +### Compaction Preservation Compaction is handled entirely by OpenCode's native compaction mechanism. The plugin participates in two ways: -1. **Before compaction** (`experimental.session.compacting`): The plugin injects - known facts and entities into the compaction context using the same 70% / 30% - project/user budget split, so the summarizer preserves important knowledge. -2. **After compaction** (`session.compacted`): The compaction summary is saved - as an episode to Graphiti, ensuring knowledge survives across compaction - boundaries. +1. **Before compaction** (`experimental.session.compacting`): The plugin reads + the snapshot and cached memory from Redis and composes the same canonical + `` envelope used for chat injection, so the summarizer + preserves important knowledge. No Graphiti call is made. +2. **After compaction** (`session.compacted`): The snapshot is rebuilt from + Redis events and the compaction summary is enqueued for async drain to + Graphiti, ensuring knowledge survives across compaction boundaries. ### Project Scoping @@ -207,7 +294,12 @@ process. MIT -## Acknowledgement +## Acknowledgements + +The structured event extraction, priority-tiered snapshots, and session +continuity design in this plugin are inspired by +[context-mode](https://github.com/mksglu/context-mode) by +[Mert Köseoğlu](https://github.com/mksglu). -This project is inspired by -[opencode-openmemory](https://github.com/happycastle114/opencode-openmemory) +The original plugin concept is inspired by +[opencode-openmemory](https://github.com/happycastle114/opencode-openmemory). diff --git a/deno.json b/deno.json index 48caed1..7cbc3ea 100644 --- a/deno.json +++ b/deno.json @@ -33,6 +33,7 @@ "@opencode-ai/plugin": "npm:@opencode-ai/plugin@^1.1.53", "@opencode-ai/sdk": "npm:@opencode-ai/sdk@^1.1.53", "cosmiconfig": "npm:cosmiconfig@9.0.0", + "ioredis": "npm:ioredis@^5.7.0", "zod": "npm:zod@4.3.6" }, "exports": { diff --git a/deno.lock b/deno.lock index 3155228..4bab00e 100644 --- a/deno.lock +++ b/deno.lock @@ -17,6 +17,7 @@ "npm:@opencode-ai/plugin@^1.1.53": "1.1.53", "npm:@opencode-ai/sdk@^1.1.53": "1.1.53", "npm:cosmiconfig@9.0.0": "9.0.0", + "npm:ioredis@^5.7.0": "5.10.0", "npm:zod@4.3.6": "4.3.6" }, "jsr": { @@ -97,6 +98,9 @@ "hono" ] }, + "@ioredis/commands@1.5.1": { + "integrity": "sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw==" + }, "@modelcontextprotocol/sdk@1.25.2_zod@4.3.6_ajv@8.17.1_express@5.2.1": { "integrity": "sha512-LZFeo4F9M5qOhC/Uc1aQSrBHxMrvxett+9KLHt7OhcExtoiRN9DKgbZffMP/nxjutWDQpfMDfP3nkHI4X9ijww==", "dependencies": [ @@ -190,6 +194,9 @@ "callsites@3.1.0": { "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==" }, + "cluster-key-slot@1.1.2": { + "integrity": "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==" + }, "content-disposition@1.0.1": { "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==" }, @@ -232,6 +239,9 @@ "ms" ] }, + "denque@2.1.0": { + "integrity": "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==" + }, "depd@2.0.0": { "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==" }, @@ -413,6 +423,20 @@ "inherits@2.0.4": { "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, + "ioredis@5.10.0": { + "integrity": "sha512-HVBe9OFuqs+Z6n64q09PQvP1/R4Bm+30PAyyD4wIEqssh3v9L21QjCVk4kRLucMBcDokJTcLjsGeVRlq/nH6DA==", + "dependencies": [ + "@ioredis/commands", + "cluster-key-slot", + "debug", + "denque", + "lodash.defaults", + "lodash.isarguments", + "redis-errors", + "redis-parser", + "standard-as-callback" + ] + }, "ipaddr.js@1.9.1": { "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==" }, @@ -450,6 +474,12 @@ "lines-and-columns@1.2.4": { "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" }, + "lodash.defaults@4.2.0": { + "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==" + }, + "lodash.isarguments@3.1.0": { + "integrity": "sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==" + }, "math-intrinsics@1.1.0": { "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==" }, @@ -547,6 +577,15 @@ "unpipe" ] }, + "redis-errors@1.2.0": { + "integrity": "sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==" + }, + "redis-parser@3.0.0": { + "integrity": "sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==", + "dependencies": [ + "redis-errors" + ] + }, "require-from-string@2.0.2": { "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==" }, @@ -639,6 +678,9 @@ "side-channel-weakmap" ] }, + "standard-as-callback@2.1.0": { + "integrity": "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==" + }, "statuses@2.0.2": { "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==" }, @@ -688,6 +730,7 @@ "npm:@opencode-ai/plugin@^1.1.53", "npm:@opencode-ai/sdk@^1.1.53", "npm:cosmiconfig@9.0.0", + "npm:ioredis@^5.7.0", "npm:zod@4.3.6" ] } diff --git a/mod.ts b/mod.ts index 401c73a..9cf8633 100644 --- a/mod.ts +++ b/mod.ts @@ -1 +1 @@ -export * from "./src/index.ts"; +export { graphiti } from "./src/index.ts"; diff --git a/plans/ContextOverhaul.md b/plans/ContextOverhaul.md new file mode 100644 index 0000000..50aeafb --- /dev/null +++ b/plans/ContextOverhaul.md @@ -0,0 +1,708 @@ +# Context Overhaul — FalkorDB Hot Path + Async Graphiti Consolidation + +**Status:** Planning **Date:** 2026-03-13 (revised) + +--- + +## 1 Problem + +The current plugin routes every write and every query through Graphiti (via +MCP). Each `addEpisode` triggers LLM-backed entity extraction (~200–400 ms). +Each `searchFacts`/`searchNodes` issues an embedding + vector search on the hot +path of `chat.message`, adding 100–300 ms of synchronous latency per user +message. Compaction context augmentation also calls Graphiti synchronously with +no timeout. If Graphiti or its backing LLM is slow or down, the session degrades +silently with no local fallback. + +The current design also copies raw message strings rather than extracting +structured session events, and has no local searchable session history. + +--- + +## 2 Goals + +1. **Zero Graphiti on the hot path.** No synchronous Graphiti call may block + `chat.message`, `messages.transform`, `session.compacting`, or any + per-message event hook. All Graphiti interaction is asynchronous. +2. **Session continuity from local state.** FalkorDB/Redis owns verbatim event + history, structured snapshots, and cached memory. Compaction survives + Graphiti outages. +3. **Preserved long-term memory.** Graphiti's vector search, entity extraction, + and cross-session graph remain available — populated asynchronously and + cached in Redis for chat-time injection. +4. **Minimal async backend.** Graphiti MCP is the sole consolidation backend. It + is private infrastructure — hidden behind the async worker, never exposed to + users, and never called on the hot path. +5. **Structured event extraction.** Context-mode-style categorised events with + priority-tiered snapshot generation, not raw message copying. + +--- + +## 3 Architecture + +``` +opencode-graphiti plugin (TypeScript / Deno) + │ + ├── Hot path — ioredis → FalkorDB :6379 (Redis protocol) + │ WRITES (every event, sub-ms): + │ LPUSH session:{id}:events + │ SET session:{id}:snapshot + │ LPUSH drain:pending:{groupId} + │ READS (chat.message / compacting, sub-ms): + │ LRANGE session:{id}:events (recent session context) + │ GET session:{id}:snapshot (post-compaction restore) + │ GET memory-cache:{groupId} (cached Graphiti outputs) + │ + └── Async tier — Graphiti MCP (configured via `graphiti.endpoint`) + REQUIRED tool capabilities: + - add_memory + - search_memory_facts + - search_nodes + - get_episodes + - get_status (health check; used to verify MCP reachability) + All calls are async and never block hook returns. +``` + +### 3.1 Connectivity + +| Target | Protocol | Default Port | Connection | +| -------- | --------------- | ------------ | ----------------------------------------------------- | +| FalkorDB | Redis (ioredis) | 6379 | Direct TCP; configured via `falkordb.redisEndpoint` | +| Graphiti | MCP over HTTP | 8000 | Direct MCP client; configured via `graphiti.endpoint` | + +**Integration decision (final):** Graphiti MCP is the async consolidation +backend. Direct Graphiti HTTP is not used; all Graphiti interaction goes through +the configured MCP endpoint. + +**Deployment note:** both FalkorDB and Graphiti MCP are operator-provisioned +services. The plugin connects to whatever addresses are supplied in config. + +**Hot-path rule:** hot-path hooks never talk to MCP or Graphiti synchronously. +All MCP communication is queued, async, and hidden behind the plugin's local hot +path. + +**User-facing invariant:** MCP is private infrastructure. Users see only the +plugin's existing memory features and the new context-mode-style resumability — +no extra workflow, no manual sync, no awareness that MCP exists. + +--- + +## 4 Data Model + +### 4.1 Structured Event Schema + +Events are extracted from hooks, not copied verbatim. The taxonomy is designed +to preserve the useful parts of context-mode: active file state, task state, +decisions, blockers, environment changes, and searchable local history. + +```typescript +interface SessionEvent { + id: string; // UUID + ts: number; // epoch ms + category: EventCategory; + priority: 0 | 1 | 2 | 3 | 4; + role: "user" | "assistant" | "tool" | "system"; + summary: string; // <= 200 chars, human-readable + body?: string; // full content, truncated to 4 KB + refs?: string[]; // file paths, task IDs, session IDs, UUIDs + metadata?: Record; // tool name, exit code, cwd, env deltas +} + +type EventCategory = + | "task.create" + | "task.update" + | "task.complete" + | "decision" + | "preference" + | "rule.load" + | "file.read" + | "file.write" + | "file.edit" + | "file.search" + | "cwd.change" + | "env.change" + | "git.activity" + | "error" + | "subagent.start" + | "subagent.finish" + | "integration.call" + | "intent" + | "data.import" + | "discovery" + | "message" + | "session.meta"; +``` + +### 4.1.1 Extraction Targets + +| Context-mode benefit to preserve | SessionEvent categories | Notes | +| --------------------------------- | ----------------------------------------------------- | -------------------------------------------------------------------- | +| Active files and code touchpoints | `file.read`, `file.write`, `file.edit`, `file.search` | Track most recent touched files, not just raw tool output. | +| Task state and progress | `task.create`, `task.update`, `task.complete` | Preserve current goal, checkpoints, and completion markers. | +| Decisions and user corrections | `decision`, `preference` | Highest-priority resumability signal. | +| Rules / operating constraints | `rule.load` | Capture AGENTS/plugin rules loaded into the session. | +| Errors and unresolved blockers | `error` | Include failing command/tool, status, and whether resolved. | +| Environment / cwd state | `cwd.change`, `env.change` | Preserve working directory and setup changes. | +| Git milestones | `git.activity` | Branch, commit, merge, push, stash, rebase milestones when present. | +| Subagent orchestration | `subagent.start`, `subagent.finish` | Track dispatched work and returned outcomes. | +| Remote/tool-service usage | `integration.call` | Track Graphiti MCP calls and other remote tool/service interactions. | +| Large pasted/reference data | `data.import`, `discovery` | Store pointers/summaries instead of re-injecting full payloads. | +| Session framing | `intent`, `session.meta`, `message` | Preserve intent, compaction markers, and low-value chat residue. | + +### 4.2 Redis Key Layout + +| Key | Type | Content | TTL | +| ----------------------------- | ------ | ------------------------------------------------ | ------ | +| `session:{id}:events` | List | JSON `SessionEvent` objects | 24 h | +| `session:{id}:snapshot` | String | Priority-tiered XML snapshot (≤ 3 KB) | 48 h | +| `memory-cache:{groupId}` | String | Serialized Graphiti search results | 10 min | +| `memory-cache:{groupId}:meta` | Hash | `lastQuery`, `lastRefresh`, `factUuids` | 10 min | +| `drain:pending:{groupId}` | List | Serialized drain-batch entries awaiting Graphiti | 7 d | +| `drain:cursor:{groupId}` | String | Last successfully drained event ID | 7 d | + +### 4.3 Priority-Tiered Snapshot Format + +Generated at `session.idle` and `session.compacted` from structured Redis +events. Sections are filled in priority order; lower-priority sections are +truncated first when the snapshot budget (3 KB) is exceeded. + +| Priority | Sections | Source categories | +| -------- | ------------------------------------------------ | ------------------------------------------------ | +| P0 | `decisions`, `constraints`, `active_task` | `decision`, `preference`, `rule.load`, `task.*` | +| P1 | `active_files`, `recent_edits`, `subagents_open` | `file.*`, `subagent.start` | +| P2 | `errors`, `blockers`, `environment` | `error`, `cwd.change`, `env.change` | +| P3 | `git_state`, `subagents_done`, `open_questions` | `git.activity`, `subagent.finish`, `task.update` | +| P4 | `discoveries`, `references`, `residual_messages` | `discovery`, `data.import`, `message` | + +```xml + + + Plugin hot path must talk directly to FalkorDB; Graphiti remains async behind MCP. + + + + Graphiti stays off the hot path; Redis owns compaction survival. + + + + Redesign context pipeline around FalkorDB hot path. + Planning revised; Graphiti MCP endpoint confirmed reachable. + + + + plans/ContextOverhaul.md + + + + No open errors at snapshot time. + + + + /workspace/project + + + + Graphiti bulk ingestion is documented, but docs warn it skips edge invalidation. + + +``` + +### 4.4 Cold Tier (Graphiti — unchanged schema) + +No changes to Graphiti's internal entity/fact/node model. The plugin sends the +same semantic payloads through MCP tool calls (`add_memory`, +`search_memory_facts`, `search_nodes`, `get_episodes`). + +--- + +## 5 Hook Mapping + +### 5.1 Hot Path (synchronous, sub-ms) + +| Hook | Action | +| -------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| `event: message.part.updated` | Buffer assistant part in memory (unchanged) | +| `event: message.updated` (completed) | Extract `SessionEvent` → `LPUSH session:{id}:events` | +| `chat.message` | Extract user `SessionEvent` → `LPUSH`; read `memory-cache:{groupId}` + recent session state from Redis; prepare transform input | +| `event: session.idle` | Build priority-tiered snapshot → `SET session:{id}:snapshot`; trigger async cache refresh + drain | +| `event: session.compacted` | Build snapshot from events → `SET session:{id}:snapshot`; enqueue drain batch | +| `experimental.session.compacting` | Compose the same canonical `` envelope for compaction from Redis snapshot + cached memory | +| `experimental.chat.messages.transform` | Actual chat-time injection point: compose canonical `` with optional `` from Redis-backed state | +| `event: session.created` | `EXPIRE` reset; bootstrap best-effort async warmup / cross-session primer only; cannot inject directly | + +### 5.2 Async Tier (fire-and-forget, non-blocking) + +| Trigger | Action | +| ------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | +| `session.idle` / `session.compacted` / buffer threshold | Drain pending events through Graphiti MCP `add_memory` | +| `session.idle` / first `chat.message` | Refresh `memory-cache:{groupId}` via MCP `search_memory_facts` + `search_nodes` | +| `session.created` | Best-effort async cross-session primer via MCP `get_episodes`; prewarm reusable cache if timing allows | + +**No Graphiti call ever blocks a hook return.** + +### 5.3 Backend Rule + +| Consolidation backend | When used | Constraint | +| --------------------- | --------- | ------------------------------------------------------------------------ | +| Graphiti MCP | Always | Used only behind the async consolidation worker; never in hot-path hooks | + +--- + +## 6 Cached Memory Strategy + +### 6.1 Problem + +The current design calls `searchFacts` + `searchNodes` synchronously on every +`chat.message` (or on drift detection). This puts Graphiti + embedding latency +on the critical path. + +### 6.2 Solution: Redis-Resident Memory Cache + +``` +Session starts (`event: session.created`) + ├── [sync] Initialize empty session state; restore reusable cache keys if present + ├── [async] Fire-and-forget: best-effort warm `memory-cache:{groupId}` via MCP `get_episodes` + └── [future option, non-final] Schedule proactive `search_memory_facts`/`search_nodes` refresh for broader project scope + +First user message arrives (`chat.message`) + ├── [sync] Read memory-cache:{groupId} from Redis (sub-ms) + │ If cache hit + not stale → make cached facts/nodes available to the transform hook + │ If cache miss or stale → use last cached value (or empty) + ├── [sync] Read session:{id}:events from Redis for session context + ├── [sync] Prepare Redis-backed inputs for `experimental.chat.messages.transform` + └── [async] Fire-and-forget: refresh cache from Graphiti MCP using this prompt + search_memory_facts + search_nodes → parse results → SET memory-cache:{groupId} +``` + +### 6.3 Cache Lifecycle + +| Event | Cache Action | +| --------------------- | ----------------------------------------------------------------------------------------------- | +| Plugin startup | Restore Redis clients only; no synchronous Graphiti warmup | +| `session.created` | Best-effort async prewarm of reusable cache and cross-session primer | +| first `chat.message` | Read cache (sync); inject if available via transform; schedule prompt-specific refresh | +| later `chat.message` | Read cache (sync); schedule refresh if stale or drifted (async) | +| `session.idle` | Refresh cache (async) — incorporates recently drained facts | +| Drain completes | Refresh cache (async) — new facts now searchable | +| Cache miss / cold run | Return empty `persistent_memory`; first injection still includes Redis-sourced `session_memory` | + +### 6.4 New-Session First-Turn Behavior + +Because OpenCode does not expose `SessionStart`, the plan relies on the +combination of `event: session.created`, `chat.message`, and +`experimental.chat.messages.transform`: + +- `event: session.created` cannot inject memories directly. It only bootstraps + async warmup and restores reusable cached state. +- The first actual injection point in a brand-new session is the first + `experimental.chat.messages.transform` after the user's opening message. +- `persistent_memory` on that first reply is **best-effort**, not guaranteed. +- If `memory-cache:{groupId}` was already warm from prior work, or if the + `session.created` bootstrap finishes before the first transform runs, relevant + `persistent_memory` may appear on the first reply. +- If the cache is cold, the first reply still receives `session_memory` from + FalkorDB, while `persistent_memory` may be empty until the async MCP refresh + completes. +- In practice this means long-term memory is often cold-first-turn / warmer on a + later turn, while session continuity remains available immediately. + +### 6.5 Drift Detection (Revised) + +Drift detection currently calls `searchFacts` synchronously. Under the new +design: + +- On each `chat.message`, compare the user's message against the query that + produced the current cache. +- If the topic has drifted (Jaccard on cached fact UUIDs < threshold), schedule + an async cache refresh with the new query. The _current_ cached context is + still injected immediately; the refreshed cache is available for the next + message. +- This trades one message of staleness for eliminating synchronous Graphiti + latency entirely. + +--- + +## 7 Injection Strategy + +Injected continuity context uses one canonical `` envelope with +an optional nested `` section. The Session Guide is assembled +from Redis hot-tier state and optional Graphiti cache data. + +Historically, the plugin's Graphiti-derived memory was injected as a standalone +`...` block. This plan keeps the caller's +current naming (`session_memory` + `persistent_memory`) and treats the older +`` shape as a legacy Graphiti-only serialization detail, not +as a separate top-level layer. Its UUID metadata maps cleanly to +`` in the canonical format below. + +```xml + + Continue the current task without asking for recap. + + + Redesign plugin around FalkorDB hot path. + + + + Keep Graphiti off the hot path; use MCP only in async consolidation. + + + + plans/ContextOverhaul.md + + + + Preserve context-mode-style resumability behavior. + + + + + + + + + + +``` + +### 7.1 Session Guide Sections + +The injected sections intentionally mirror context-mode's continuity model and +should be rendered in this order: + +| Section | Source | Required | Notes | +| ------------------- | -------------------------------- | ---------- | -------------------------------------------------------------- | +| `last_request` | latest user prompt / task intent | Yes | Primary resume anchor. | +| `active_tasks` | structured task events | Yes | Checkbox/task-state style when rendered. | +| `key_decisions` | decision + preference events | Yes | Preserve user corrections and constraints. | +| `files_in_play` | recent file events | Yes | Mirrors context-mode active-files continuity. | +| `project_rules` | loaded AGENTS/rules | Yes | Must survive compaction. | +| `unresolved_errors` | open error events | If present | Show only unresolved blockers. | +| `git_state` | git activity events | If present | Include only meaningful milestones. | +| `subagent_work` | subagent events | If present | Summaries only, not raw logs. | +| `session_snapshot` | priority-tiered snapshot | If present | Compact state restore layer. | +| `persistent_memory` | Graphiti cache | Optional | Canonical successor to the legacy `` block. | + +### 7.2 Budget Allocation + +| Section group | Budget | Source | Latency | +| ------------------------------------------------------------------- | ------------------------------ | -------------------------- | ------- | +| Session Guide core (`last_request`, tasks, decisions, files, rules) | up to 1 600 chars | Redis events + snapshot | < 1 ms | +| Session snapshot detail | up to 800 chars | Redis `GET` | < 1 ms | +| Persistent memory | remainder of 5% context budget | Redis `GET memory-cache:*` | < 1 ms | + +`persistent_memory` is empty (not an error) if cache has not been warmed yet, +the session is on its first cold turn, or Graphiti is unreachable. The rest of +the Session Guide is always available because it is sourced from FalkorDB/Redis. + +### 7.3 Compatibility Note + +- **Current plan:** emit one canonical `` envelope with optional + ``. +- **Historical implementation:** Graphiti-derived memory previously appeared as + ``. +- **Migration stance:** preserve UUID/fact metadata semantics, but do not + describe or reintroduce the old shape as a separate "layer" in new plan text. + +--- + +## 8 Async Batch Drain + +### 8.1 Drain Policy + +Events are batched in a Redis list (`drain:pending:{groupId}`) and drained to +Graphiti asynchronously: + +| Parameter | Value | Rationale | +| --------------- | ------------------------------------------------------------------ | ---------------------------------------- | +| Max batch size | 20 events | Keeps Graphiti LLM call duration bounded | +| Max batch bytes | 50 KB combined body | Avoids oversized episode payloads | +| Drain triggers | `session.idle`, `session.compacted`, buffer threshold | Natural pause points | +| Retry policy | Exponential backoff, 3 attempts, then dead-letter | Bounded retry cost | +| Idempotency | Each event has a UUID; Graphiti deduplicates by episode name+group | At-least-once safe | + +**Important Graphiti constraint:** the drain path uses standard `add_memory` +sequentially per `groupId` to ensure normal entity invalidation semantics on an +active agent graph. Bulk ingestion (`add_episode_bulk`, if available) is +documented by Graphiti as skipping edge invalidation and is reserved for +bootstrap/backfill scenarios only; it is not part of the current plan. + +### 8.2 Ordering Guarantees + +- Events within a session are appended to the Redis list in order. +- Drain reads from the list head (FIFO). A cursor (`drain:cursor:{groupId}`) + tracks the last successfully drained event ID. +- If a batch partially fails, the cursor is not advanced; the entire batch is + retried. +- Cross-session ordering is best-effort (sessions drain independently). + +### 8.3 Crash Recovery + +- On plugin restart, the drain scheduler reads `drain:pending:{groupId}` and + `drain:cursor:{groupId}` from Redis. +- Events after the cursor are re-drained. Because drain is idempotent + (UUID-keyed), duplicates are harmless. +- If Redis itself is lost, pending events in memory are lost. This is acceptable + because they are session-local and Graphiti is the durable store — the lost + events simply won't be consolidated into the knowledge graph. + +### 8.4 Dead-Letter Handling + +After 3 failed drain attempts for a batch: + +- Log a warning with the batch event IDs. +- Move the batch to `drain:dead:{groupId}` (Redis list, 30-day TTL). +- Advance the cursor past the failed batch. +- A manual retry command (or scheduled job) can re-enqueue dead-letter batches. + +--- + +## 9 Compaction Flow (Revised) + +``` +session.compacting hook fires + ├── [sync] GET session:{id}:snapshot from Redis + ├── [sync] GET memory-cache:{groupId} from Redis (cached Graphiti facts) + ├── [sync] Compose the same `` envelope used for chat injection + ├── [sync] Inject into output.context + └── [done] ← no Graphiti call, no timeout risk + +session.compacted event fires (after LLM compaction completes) + ├── [sync] LRANGE session:{id}:events → rebuild snapshot → SET + ├── [async] Enqueue compaction summary to drain batch + └── [async] Refresh memory cache (new context post-compaction) +``` + +No timeout/fallback needed for compaction augmentation because all reads are +from Redis (sub-ms). The Graphiti-sourced context comes from the pre-warmed +Redis cache, not a live query. + +--- + +## 10 Session Lifecycle + +### 10.1 Session ID Semantics + +- `sessionID` from OpenCode hooks is the canonical key for all Redis state. +- Subagent sessions (with `parentID`) are ignored for memory purposes + (unchanged). +- Session state is local to the plugin process; Redis keys provide persistence + across plugin restarts within TTL windows. + +### 10.2 Startup / Bootstrap + +1. Plugin initializes `ioredis` connection to FalkorDB Redis port. +2. If Redis is unreachable: log error, disable hot tier, fall back to in-memory + event buffer (degraded but functional — same as current behavior without + Redis). Retry connection with exponential backoff. +3. Plugin initializes the Graphiti MCP client. Graphiti availability is checked + lazily on first drain attempt. +4. Async: if reusable cache context is identifiable, start best-effort warmup of + `memory-cache:{groupId}`. +5. If Graphiti is unreachable at startup: log warning, continue. Memory cache + remains empty until Graphiti comes online and a drain/refresh succeeds. + +### 10.3 Failure Modes + +| Component Down | Impact | Recovery | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | +| Redis (FalkorDB) | No session events, no snapshot, no cache. In-memory fallback for current session; no cross-restart persistence. | Auto-reconnect (ioredis built-in). State rebuilds on reconnect. | +| Graphiti | No drain, no cache refresh. Cached memory stales out (10 min TTL). Session continuity unaffected. | Drain retries on next trigger. Cache refreshes when Graphiti returns. | +| Both | Plugin operates with in-memory session buffer only. Equivalent to current plugin without Graphiti, minus cross-session memory. | Both auto-recover independently. | +| Plugin crash | In-memory state lost. Redis state survives within TTL. | On restart, read `drain:pending` + `drain:cursor` from Redis; resume drain. Session snapshot available for next session. | + +--- + +## 11 Searchable Session History + +### 11.1 Local Session Recall (reuse existing stack only) + +Do not introduce a separate SQLite store. Local session recall stays within the +existing FalkorDB/Graphiti stack: + +- **Primary local source:** Redis/FalkorDB hot-tier event log + snapshot keys. +- **Optional secondary index:** if the FalkorDB deployment includes RediSearch, + use it to index `SessionEvent.summary` and selected `body` fields. +- **Fallback:** if RediSearch is unavailable, use bounded linear scan over the + hot-tier event list for recent-session diagnostics and compaction recovery. + +### 11.2 Cross-Session Search (Graphiti) + +Cross-session search goes through Graphiti's vector/graph search, but only via +the async cache layer — never as a synchronous hot-path call. + +--- + +## 12 Tradeoffs + +| Tradeoff | Impact | Mitigation | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **One-message staleness on topic drift** | After a topic shift, the first message uses the old cached memory; the refresh arrives for the next message. | Acceptable for most conversations. Cache refresh latency is ~200 ms; user won't notice the one-turn delay. | +| **Cold-start empty persistent memory** | First reply in a new or cold session may have no Graphiti-derived `persistent_memory`. | Redis-sourced `session_memory` still provides immediate continuity. Warmup is best-effort and improves later turns when it wins the race. | +| **Redis as SPOF for hot tier** | If Redis is down, session events and snapshots are unavailable. | In-memory fallback provides degraded session continuity. ioredis auto-reconnects. | +| **Eventual consistency of knowledge graph** | Graphiti facts lag behind conversation by drain interval (seconds to minutes). | Acceptable — knowledge graph is for cross-session recall, not intra-session continuity. | +| **Lost events on plugin crash before drain** | Events buffered in-memory but not yet in Redis `drain:pending` are lost. | Use Redis `drain:pending` as the durable queue (write-ahead). Events are written to `drain:pending` at the same time as `session:{id}:events`. | +| **10-min cache TTL may serve stale facts** | Facts invalidated in Graphiti may still appear in cache for up to 10 minutes. | Current design has the same staleness issue (search results are point-in-time). Configurable TTL. | +| **No snapshot for very short sessions** | Sessions that end before `session.idle` fires produce no snapshot. | Acceptable — short sessions have minimal context to preserve. | +| **MCP tool-call abstraction** | MCP adds protocol overhead vs direct HTTP and limits control over request shaping. | Overhead is irrelevant on the async path. Direct HTTP remains a future option only if the API surface is later confirmed; it is not part of the current plan. | + +--- + +## 13 Config Changes + +`GraphitiConfig` keeps legacy top-level keys for backward compatibility, but +adds explicit nested sections for FalkorDB and Graphiti. Nested values take +precedence whenever both forms are supplied. + +```typescript +interface GraphitiConfig { + // Preferred nested config + falkordb?: { + redisEndpoint?: string; // FalkorDB Redis URL (default: "redis://localhost:6379") + batchSize?: number; // max events per drain batch (default: 20) + batchMaxBytes?: number; // max combined body bytes per batch (default: 51200) + sessionTtlSeconds?: number; // session:{id}:events TTL (default: 86400) + cacheTtlSeconds?: number; // memory-cache TTL (default: 600) + drainRetryMax?: number; // max drain retry attempts (default: 3) + }; + + graphiti?: { + endpoint?: string; // Graphiti MCP URL (e.g. "http://localhost:8000/mcp") + groupIdPrefix?: string; + driftThreshold?: number; + factStaleDays?: number; + }; + + // Legacy top-level keys still accepted during migration + endpoint?: string; + groupIdPrefix?: string; + driftThreshold?: number; + factStaleDays?: number; +} +``` + +Resolution rules for the implementation: + +1. Read FalkorDB/Redis settings from `falkordb.*` first; fall back to legacy + top-level Redis keys only when the nested value is absent. +2. Read Graphiti settings from `graphiti.*` first; fall back to legacy top-level + Graphiti keys only when the nested value is absent. +3. New docs, examples, validation, and runtime lookups should use the nested + shape as canonical; legacy top-level keys exist only for compatibility. + +--- + +## 14 File Changes + +### New Files + +``` +src/services/redis-client.ts — ioredis wrapper, connection management, fallback +src/services/redis-events.ts — SessionEvent extraction, LPUSH/LRANGE helpers +src/services/redis-snapshot.ts — priority-tiered snapshot builder +src/services/redis-cache.ts — memory-cache read/write/refresh logic +src/services/graphiti-mcp.ts — Graphiti MCP client wrapper +src/services/graphiti-async.ts — async consolidation worker backed by Graphiti MCP +src/services/batch-drain.ts — drain scheduler, cursor management, dead-letter +src/services/event-extractor.ts — structured event extraction from hook payloads +``` + +### Modified Files + +``` +src/config.ts — add canonical `falkordb`/`graphiti` sections, legacy top-level fallback, and precedence resolution +src/types/index.ts — add SessionEvent, EventCategory types +src/session.ts — SessionState gains hotTierReady; wire Redis client and async Graphiti consolidation worker; remove direct GraphitiClient dependency +src/services/connection-manager.ts — adapt existing MCP transport lifecycle for the new graphiti-mcp.ts wrapper (reconnect backoff, request queuing already implemented) +src/handlers/event.ts — hot tier writes on all event types, async drain triggers +src/handlers/chat.ts — read from Redis cache instead of sync Graphiti calls +src/handlers/compacting.ts — read snapshot + cache from Redis, no Graphiti calls +src/handlers/messages.ts — compose canonical `session_memory` envelope from Redis-sourced data +src/index.ts — wire Redis client + async Graphiti MCP worker +``` + +### Removed/Deprecated Files + +``` +src/services/client.ts — replaced by graphiti-mcp.ts +``` + +--- + +## 15 Implementation Order + +| Phase | Files | Depends On | Acceptance Criteria | +| ------------------------------------- | ----------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------- | +| 0. Normalize MCP contract | — | — | Confirm tool payload/response handling against a reachable Graphiti MCP endpoint. | +| 1. Consolidation backend | `graphiti-mcp.ts`, `graphiti-async.ts` | Phase 0 | Async worker can drain, refresh cache, and load primers through Graphiti MCP with no hot-path blocking. | +| 2. Redis primitives | `redis-client.ts`, `redis-events.ts` | — | LPUSH/LRANGE/GET/SET work against FalkorDB. Connection retry works. | +| 3. Event extractor | `event-extractor.ts`, `types/index.ts` | — | Hook payloads produce context-mode-equivalent `SessionEvent` categories. Unit tests. | +| 4. Snapshot builder | `redis-snapshot.ts` | Phase 3 | Priority-tiered XML snapshot generated from event list. Budget enforcement. Unit tests. | +| 5. Local search strategy | — | Phases 2, 4 | Redis/FalkorDB-only session recall path works; optional RediSearch path documented if available. | +| 6. Memory cache | `redis-cache.ts` | Phases 1, 2 | Async Graphiti search results written to and read from Redis. TTL expiry. Stale-read behavior. | +| 7. Batch drain | `batch-drain.ts` | Phases 1, 2, 3 | Events drain to Graphiti async with sequential ingest semantics by `groupId`. Cursor tracking. Crash recovery. | +| 8. Wire handlers | `event.ts`, `chat.ts`, `compacting.ts`, `messages.ts` | Phases 2–7 | All hooks use Redis hot path. No synchronous Graphiti calls remain. Existing test assertions hold. | +| 9. Config & bootstrap | `config.ts`, `index.ts`, `session.ts` | Phase 8 | Nested `falkordb`/`graphiti` config is validated, legacy top-level fallback works, and nested values take precedence. | +| 10. Docs alignment (future follow-up) | `README.md` | Phase 9 | README incorporates all adopted context-mode feature descriptions and credits the original author/project by name. | +| 11. Integration tests | — | All | End-to-end: message -> Redis event -> snapshot -> async drain -> Graphiti -> cache refresh -> injection. | + +--- + +## 16 Confirmed Decisions, Remaining Validation, and Future Options + +### 16.1 Confirmed decisions for this plan + +- **Hot path:** FalkorDB/Redis (configured via `falkordb.redisEndpoint`, with + legacy fallback to `redisEndpoint`) is the hot path for writes, snapshots, and + cached reads. +- **Cold/async backend:** Graphiti stays off the hot path. The consolidation + backend is Graphiti MCP (configured via `graphiti.endpoint`, with legacy + fallback to `endpoint`). +- **Hook model:** because OpenCode lacks `SessionStart`, first-turn memory must + rely on `event: session.created` bootstrap + `chat.message` + + `experimental.chat.messages.transform`. +- **Naming:** the canonical injected structure remains `session_memory` with + optional `persistent_memory`. +- **Storage scope:** do not add new independent storage such as SQLite. +- **Docs follow-up:** README alignment and attribution are future implementation + work, not already-completed state. + +### 16.2 Remaining implementation validation + +- [ ] **MCP payload/response normalization**: the endpoint is already verified + as reachable; implementation still needs to lock down exact + request/response handling for `add_memory`, `search_memory_facts`, + `search_nodes`, and `get_episodes`. +- [ ] **Graphiti bulk semantics**: official docs warn `add_episode_bulk` skips + edge invalidation. Confirm whether any bootstrap/backfill path here can + safely use bulk, or whether all non-empty-graph traffic must remain + sequential `add_memory`. +- [ ] **RediSearch in FalkorDB**: if the image includes RediSearch, decide + whether to use it for optional local session search over structured + events. +- [ ] **Cache key namespacing**: if multiple plugin instances share the same + FalkorDB, cache keys need instance-level namespacing to avoid collisions. + Current `groupId` prefix may suffice. +- [ ] **Drift detection heuristic**: the cached Jaccard approach compares fact + UUID sets rather than issuing a live search. Validate that this is good + enough in practice. +- [ ] **Connection manager reuse**: the existing + `src/services/connection-manager.ts` (from `plans/ConnectionManager.md`) + already implements MCP transport lifecycle, reconnect backoff, and request + queuing. Decide whether `graphiti-mcp.ts` wraps it as-is, adapts it, or + replaces it. +- [ ] **README scope and attribution**: the README update (Phase 10) must + enumerate every context-mode-derived feature this design adopts + (structured event extraction, priority-tiered snapshots, resumable session + state, hidden background consolidation) and credit the original + context-mode author and project by name with a link. This is a hard + requirement, not optional polish. + +### 16.3 Future options (non-final) + +- [ ] **More proactive cache prewarm**: broaden warmup beyond `get_episodes` + into project-scope `search_memory_facts`/`search_nodes` if the extra async + work is worth the cache-hit improvement. +- [ ] **Alternative Graphiti transport**: direct Graphiti HTTP could be + revisited later only if its API surface is confirmed and there is a + concrete reason to move away from MCP. It is not part of the current plan. diff --git a/plans/ContextOverhaulTests.md b/plans/ContextOverhaulTests.md new file mode 100644 index 0000000..175b936 --- /dev/null +++ b/plans/ContextOverhaulTests.md @@ -0,0 +1,694 @@ +# Context Overhaul — Test Plan + +**Status:** Complete **Date:** 2026-03-14 **Canonical design:** +[`plans/ContextOverhaul.md`](plans/ContextOverhaul.md) + +--- + +## 1 Purpose + +Verify that the Context Overhaul implementation delivers on its four core +promises: + +1. **Zero Graphiti on the hot path** — no synchronous MCP/Graphiti call blocks + any hook return. +2. **High-quality session continuity** — compact `session_memory` envelopes + restore task state, decisions, files, and rules after compaction or restart. +3. **High-quality cross-session persistent memory** — `persistent_memory` + surfaces relevant project-bound facts from the Graphiti cache without noise. +4. **Graceful degradation** — the plugin remains functional when Redis or + Graphiti is unavailable. + +Secondary goals: + +- Confirm the implementation avoids legacy verbose `` + hot-path injection. +- Confirm context payloads stay within budget and do not regress in size or + latency. +- Produce CI-friendly artifacts (timing logs, payload snapshots, pass/fail exit + codes). + +--- + +## 2 Non-Goals / Scope Boundaries + +- [ ] **Not testing Graphiti internals** — entity extraction quality, vector + search recall, or FalkorDB query plans are out of scope. +- [ ] **Not testing OpenCode core** — compaction summarizer quality, hook + dispatch ordering, or provider prefix caching are assumed correct. +- [ ] **Not testing MCP protocol compliance** — the MCP transport layer is + covered by `ConnectionManager` tests. +- [ ] **Not benchmarking LLM output quality** — we test structural properties of + injected context, not whether the LLM "understands" it. +- [ ] **Not covering UI/UX** — no visual or interactive-shell UX assertions. + +--- + +## 3 Test Environment / Dependencies + +### 3.1 Required Services + +| Service | Purpose | Test mode | +| -------- | --------------------------- | ---------------------------------------------------------- | +| FalkorDB | Redis-protocol hot tier | Real instance (Docker) or `MockRedisClient` for unit tests | +| Graphiti | Async consolidation backend | Real MCP endpoint or stub/mock for isolation tests | +| Deno | Runtime | `deno test` with `--allow-net --allow-env` | + +### 3.2 Test Tiers + +| Tier | Scope | External deps | Speed | +| ----------- | --------------------------- | ------------- | ------- | +| Unit | Pure functions, extractors | None (mocks) | < 5 s | +| Integration | Redis read/write, MCP calls | FalkorDB | < 30 s | +| System | Full hook lifecycle | Both services | < 120 s | +| Regression | Size/latency budgets | Both services | < 60 s | + +### 3.3 CI Matrix + +```yaml +# Suggested GitHub Actions matrix +strategy: + matrix: + tier: [unit, integration, system, regression] + redis: [real, mock] + graphiti: [real, stub] + exclude: + - tier: unit + redis: real + - tier: unit + graphiti: real +``` + +--- + +## 4 Required Fixtures and Seeded Memory Data + +### 4.1 Redis Fixtures + +| Fixture key | Content | Used by suites | +| ------------------------------ | ------------------------------------------------------------- | -------------------------------- | +| `session:test-1:events` | 15 `SessionEvent` objects spanning all `EventCategory` values | Continuity, compaction, snapshot | +| `session:test-1:snapshot` | Pre-built priority-tiered XML snapshot (< 3 KB) | Compaction, restart/recovery | +| `memory-cache:test-group` | Serialized Graphiti search results (3 facts, 2 nodes) | Persistent memory, drift refresh | +| `memory-cache:test-group:meta` | `lastQuery`, `lastRefresh`, `factUuids` hash | Drift detection, staleness | +| `drain:pending:test-group` | 5 serialized drain-batch entries | Drain, crash recovery | +| `drain:cursor:test-group` | Event ID of last drained event | Drain resume | + +### 4.2 Graphiti Stub Responses + +| MCP tool call | Stub response | +| --------------------- | ---------------------------------------------------------------------- | +| `search_memory_facts` | 3 facts with UUIDs, validity dates, and relevance scores | +| `search_nodes` | 2 entity nodes with summaries | +| `get_episodes` | 1 recent session snapshot episode | +| `add_memory` | Success acknowledgment (or configurable failure for degradation tests) | +| `get_status` | Health OK (or configurable timeout/error) | + +### 4.3 Legacy Fixture + +A message array containing a +`verbose block...` +part, used to verify migration/compatibility behavior. + +--- + +## 5 Observability / Instrumentation + +Tests must capture and assert on the following observable signals: + +### 5.1 Timing + +- [ ] Wall-clock time of every hook return (`chat.message`, + `messages.transform`, `session.compacting`). +- [ ] Async operation durations (drain batch, cache refresh) logged but not on + the critical path. + +### 5.2 Payload Snapshots + +- [ ] Serialized `session_memory` envelope captured as a CI artifact on every + injection. +- [ ] Snapshot XML captured on every `session.idle` and `session.compacted` + event. +- [ ] Byte size of each injected payload recorded for regression tracking. + +### 5.3 Structured Logs + +- [ ] All Redis reads/writes logged with key name and byte size. +- [ ] All async MCP calls logged with tool name, duration, and success/failure. +- [ ] Drift detection decisions logged with Jaccard score and refresh trigger. + +### 5.4 CI Artifact Collection + +``` +artifacts/ + timing-report.json # per-hook wall-clock times + payload-snapshots/ # serialized XML/envelope per test case + size-regression.csv # payload byte sizes across runs + coverage-report/ # deno test --coverage output +``` + +--- + +## 6 Test Suites + +### Suite A: Hot-Path No-Graphiti Guarantee + +**Goal:** Prove that no synchronous Graphiti/MCP call occurs during any hot-path +hook. + +**Tier:** Unit + Integration + +**Method:** Instrument the MCP client with a call counter. Assert the counter is +zero after each hot-path hook completes. + +#### Checklist + +- [ ] A-1: `chat.message` handler completes without any MCP `callTool` + invocation. +- [ ] A-2: `experimental.chat.messages.transform` completes without any MCP + `callTool` invocation. +- [ ] A-3: `experimental.session.compacting` completes without any MCP + `callTool` invocation. +- [ ] A-4: `event: message.updated` handler completes without any MCP `callTool` + invocation. +- [ ] A-5: `event: session.compacted` synchronous portion completes without any + MCP `callTool` invocation. +- [ ] A-6: `event: session.idle` synchronous portion completes without any MCP + `callTool` invocation. +- [ ] A-7: All hot-path hooks return within 5 ms when Redis is available + (wall-clock assertion). +- [ ] A-8: Async MCP calls (drain, cache refresh) are confirmed to fire _after_ + the hook returns, via event ordering in the log. + +**Automation:** Fully automatable with mock MCP client and `MockRedisClient`. + +--- + +### Suite B: Compact Memory Payloads + +**Goal:** Verify injected `session_memory` envelopes are compact, structured, +and within budget. + +**Tier:** Unit + +#### Checklist + +- [ ] B-1: `session_memory` envelope byte size is <= 2 400 chars (1 600 session + guide + 800 snapshot). +- [ ] B-2: `persistent_memory` section, when present, fits within the remainder + of the 5% context budget. +- [ ] B-3: Total injected payload (session + persistent) does not exceed 5% of a + 128k-token model context (≈ 25 600 chars). +- [ ] B-4: Snapshot XML conforms to the priority-tiered schema from + `ContextOverhaul.md` §4.3. +- [ ] B-5: Snapshot respects the 3 KB budget — lower-priority sections are + truncated first. +- [ ] B-6: Each `session_memory` contains required sections: `last_request`, + `active_tasks`, `key_decisions`, `files_in_play`, `project_rules`. +- [ ] B-7: Optional sections (`unresolved_errors`, `git_state`, `subagent_work`, + `session_snapshot`, `persistent_memory`) appear only when source data + exists. +- [ ] B-8: No raw tool output, raw transcript text, or multi-KB body content + appears in the injected envelope. + +**Automation:** Fully automatable — parse XML, measure byte sizes, assert +structure. + +--- + +### Suite C: No Raw Tool/Transcript Dumps in Hot-Tier State + +**Goal:** Confirm the implementation follows the context-mode strategy of +capturing structured events rather than raw transcripts. + +**Tier:** Unit + +#### Checklist + +- [ ] C-1: `SessionEvent.body` field is truncated to <= 4 KB per the schema. +- [ ] C-2: Events extracted from tool-result messages store a summary (≤ 200 + chars) and metadata, not the full tool output. +- [ ] C-3: `session:{id}:events` list entries do not contain raw assistant + message text longer than the `body` limit. +- [ ] C-4: The priority-tiered snapshot contains no raw tool output — only + summaries, file paths, and structured state. +- [ ] C-5: Compaction context (`session.compacting` output) contains no raw + transcript replay — only the canonical `session_memory` envelope. +- [ ] C-6: `memory-cache:{groupId}` stores parsed/structured Graphiti results, + not raw MCP response JSON. + +**Automation:** Fully automatable — inspect serialized Redis values and hook +outputs. + +--- + +### Suite D: Session Continuity Quality + +**Goal:** Verify that within a single session, the injected context accurately +reflects the conversation state. + +**Tier:** Integration + +#### Checklist + +- [ ] D-1: After 5 user/assistant exchanges, `session_memory` reflects the + current task, recent decisions, and touched files. +- [ ] D-2: After a user correction ("actually, use X instead of Y"), the next + `session_memory` includes the correction in `key_decisions`. +- [ ] D-3: After a file edit event, `files_in_play` lists the edited file. +- [ ] D-4: After an error event, `unresolved_errors` appears in the envelope. +- [ ] D-5: After the error is resolved, `unresolved_errors` is removed from + subsequent envelopes. +- [ ] D-6: `last_request` always reflects the most recent user message intent, + not a stale prior message. +- [ ] D-7: Session events are ordered chronologically in Redis (`LRANGE` returns + FIFO order). +- [ ] D-8: The `session_memory` envelope is idempotent — calling + `prepareInjection` twice with the same state produces identical output. + +**Automation:** Automatable with simulated hook sequences against +`MockRedisClient`. + +--- + +### Suite E: Compaction Continuity + +**Goal:** Verify that context survives compaction with no loss of critical +state. + +**Tier:** Integration + +#### Checklist + +- [ ] E-1: `session.compacting` hook injects a `session_memory` envelope into + `output.context`. +- [ ] E-2: The compaction-injected envelope contains the same required sections + as chat-time injection (B-6). +- [ ] E-3: After `session.compacted` fires, a new snapshot is built from + surviving events and stored in Redis. +- [ ] E-4: The post-compaction snapshot preserves P0 content (decisions, + constraints, active task) even when lower-priority sections are truncated. +- [ ] E-5: A `chat.message` arriving after compaction produces a + `session_memory` that includes the post-compaction snapshot. +- [ ] E-6: Compaction summary is enqueued to `drain:pending:{groupId}` for async + Graphiti ingestion. +- [ ] E-7: Multiple sequential compactions do not cause snapshot drift — each + rebuild uses the current event list. +- [ ] E-8: Compaction with an empty `memory-cache` (cold Graphiti) still + produces a valid `session_memory` with empty `persistent_memory`. + +**Automation:** Automatable with simulated compaction lifecycle against mocks. + +--- + +### Suite F: Cross-Session Project-Bound Persistent Memory + +**Goal:** Verify that `persistent_memory` surfaces relevant project-scoped facts +from the Graphiti cache and that cross-session recall works. + +**Tier:** Integration + System + +#### Checklist + +- [ ] F-1: On a new session with a warm `memory-cache:{groupId}`, the first + `messages.transform` includes `persistent_memory` with cached facts. +- [ ] F-2: On a new session with a cold cache, the first turn has empty + `persistent_memory`; subsequent turns include it after async warmup + completes. +- [ ] F-3: `persistent_memory` includes `fact_uuids` attribute listing the + injected fact UUIDs. +- [ ] F-4: Facts from a different `groupId` (different project) do not appear in + `persistent_memory`. +- [ ] F-5: Stale facts (older than `factStaleDays`) are annotated or filtered + per configuration. +- [ ] F-6: `persistent_memory` content is a structured summary, not raw Graphiti + JSON. +- [ ] F-7: After draining events to Graphiti and refreshing the cache, newly + created facts appear in `persistent_memory` on subsequent sessions. +- [ ] F-8: The `node_refs` attribute in `persistent_memory` lists entity node + references when present. + +**Automation:** F-1 through F-6 automatable with mocks. F-7 requires a real +Graphiti endpoint (system tier). F-8 automatable with stub responses. + +--- + +### Suite G: Memory Relevance / Anti-Noise + +**Goal:** Confirm that injected memory is relevant to the current conversation +and does not include noise. + +**Tier:** Unit + Integration + +#### Checklist + +- [ ] G-1: When the user asks about "Redis configuration", `persistent_memory` + does not include facts about unrelated topics (e.g., "CSS styling + preferences"). +- [ ] G-2: Duplicate facts (same UUID) are never injected twice in a single + envelope. +- [ ] G-3: The `visibleFactUuids` tracking prevents re-injection of + already-visible facts within the same session. +- [ ] G-4: `persistent_memory` respects the budget remainder — it does not crowd + out `session_memory` core sections. +- [ ] G-5: When Graphiti returns zero relevant results, `persistent_memory` is + omitted entirely (not rendered as an empty tag). +- [ ] G-6: The legacy `` block is never emitted by the new + implementation — only `` with optional + ``. + +**Automation:** G-1 requires semantic evaluation (semi-automated with keyword +matching on stub data). G-2 through G-6 fully automatable. + +--- + +### Suite H: Drift Refresh Behavior + +**Goal:** Verify that topic drift triggers an async cache refresh and that the +refreshed cache is used on the next turn. + +**Tier:** Integration + +#### Checklist + +- [ ] H-1: When Jaccard similarity between current and cached fact UUIDs drops + below `driftThreshold`, an async cache refresh is scheduled. +- [ ] H-2: The current (stale) cache is still injected on the drift-triggering + message (one-message staleness tradeoff). +- [ ] H-3: On the next `chat.message` after the refresh completes, the updated + cache is injected. +- [ ] H-4: When Jaccard similarity is above `driftThreshold`, no refresh is + scheduled. +- [ ] H-5: Drift detection uses the `factUuids` field from + `memory-cache:{groupId}:meta`, not a live Graphiti query. +- [ ] H-6: Rapid successive messages with different topics do not cause + thundering-herd refresh calls — only one refresh is in flight at a time. + +**Automation:** Fully automatable with mock MCP client tracking call counts and +timing. + +--- + +### Suite I: Restart / Recovery Behavior + +**Goal:** Verify that plugin restart recovers state from Redis and resumes +normal operation. + +**Tier:** Integration + +#### Checklist + +- [ ] I-1: After plugin restart, `drain:pending:{groupId}` is read and pending + events are re-drained. +- [ ] I-2: After plugin restart, `drain:cursor:{groupId}` is read and only + events after the cursor are drained. +- [ ] I-3: After plugin restart, `session:{id}:snapshot` is available for the + next session's compaction context. +- [ ] I-4: Duplicate drain (events re-sent due to cursor not advancing) is + handled idempotently by Graphiti (UUID-keyed). +- [ ] I-5: After plugin restart with Redis available but Graphiti down, the + plugin operates in degraded mode (session continuity works, drain queues + up). +- [ ] I-6: TTL expiry of session keys (24h for events, 48h for snapshots) does + not cause errors — the plugin handles missing keys gracefully. +- [ ] I-7: `memory-cache:{groupId}` TTL expiry (10 min) results in empty + `persistent_memory`, not an error. + +**Automation:** Automatable by resetting plugin state and re-initializing +against pre-seeded Redis fixtures. + +--- + +### Suite J: Redis Outage / Graphiti Outage Degradation + +**Goal:** Verify graceful degradation when one or both backends are unavailable. + +**Tier:** Integration + System + +#### Checklist + +- [ ] J-1: **Redis down at startup:** plugin logs error, falls back to in-memory + event buffer, hooks still fire. +- [ ] J-2: **Redis down at startup:** `session_memory` is still produced from + in-memory state (degraded but functional). +- [ ] J-3: **Redis down mid-session:** ioredis auto-reconnect fires; events + buffered in memory during outage. +- [ ] J-4: **Redis down mid-session:** after reconnect, state rebuilds and + subsequent hooks use Redis again. +- [ ] J-5: **Graphiti down at startup:** plugin logs warning, continues; + `persistent_memory` is empty. +- [ ] J-6: **Graphiti down mid-session:** drain retries with exponential + backoff; cache stales out after TTL. +- [ ] J-7: **Graphiti down mid-session:** `session_memory` (Redis-sourced) is + unaffected. +- [ ] J-8: **Both down:** plugin operates with in-memory buffer only; equivalent + to no-plugin-at-all baseline. +- [ ] J-9: **Graphiti returns after outage:** drain resumes; cache refreshes on + next trigger. +- [ ] J-10: **Redis returns after outage:** state rebuilds; no duplicate events + from the in-memory buffer period. +- [ ] J-11: Dead-letter batches (`drain:dead:{groupId}`) are created after 3 + failed drain attempts. +- [ ] J-12: No hook throws an unhandled exception during any outage scenario — + all failures are caught and logged. + +**Automation:** J-1 through J-8 automatable by controlling mock service +availability. J-9, J-10 require timed reconnection simulation. J-11, J-12 fully +automatable. + +--- + +### Suite K: Context-Size / Latency Regression Detection + +**Goal:** Detect regressions in injected payload size and hook latency across +commits. + +**Tier:** Regression + +#### Checklist + +- [ ] K-1: `session_memory` envelope byte size is recorded per test run and + compared against a baseline. +- [ ] K-2: A > 20% increase in envelope size from baseline fails the regression + check. +- [ ] K-3: `chat.message` hook wall-clock time is recorded and compared against + a 5 ms threshold (Redis available). +- [ ] K-4: `messages.transform` hook wall-clock time is recorded and compared + against a 3 ms threshold. +- [ ] K-5: `session.compacting` hook wall-clock time is recorded and compared + against a 5 ms threshold. +- [ ] K-6: Async drain batch duration is recorded (informational, no hard + threshold — Graphiti latency varies). +- [ ] K-7: Payload size CSV is published as a CI artifact for trend analysis. +- [ ] K-8: Latency percentiles (p50, p95, p99) are computed over 100 iterations + of each hook. + +**Automation:** Fully automatable. Requires a baseline file checked into the +repo (`tests/baselines/payload-sizes.json`). + +--- + +### Suite L: Migration / Compatibility — Legacy `data-uuids` + +**Goal:** Verify that the new implementation correctly handles legacy +`` blocks and does not emit them. + +**Tier:** Unit + +#### Checklist + +- [ ] L-1: The `messages.transform` handler extracts `fact_uuids` from legacy + `` blocks found in existing message history. +- [ ] L-2: Extracted legacy UUIDs are added to `visibleFactUuids` to prevent + re-injection. +- [ ] L-3: The new implementation never emits a `` + block — only `` with ``. +- [ ] L-4: A message array containing both legacy `` and new + `` blocks is handled without errors. +- [ ] L-5: The `fact_uuids` attribute in `` preserves the + same UUID semantics as the legacy `data-uuids` attribute. +- [ ] L-6: Legacy config keys (`endpoint`, `groupIdPrefix`, `driftThreshold`, + `factStaleDays`) at the top level are resolved correctly when nested + `graphiti.*` keys are absent. +- [ ] L-7: When both legacy top-level and nested config keys are present, nested + values take precedence. +- [ ] L-8: No verbose multi-paragraph memory block (characteristic of the legacy + Graphiti injection) appears in any hot-path output. + +**Automation:** Fully automatable — existing test in `messages.test.ts` already +covers L-1/L-2 partially. + +--- + +## 7 Metrics and Thresholds + +| Metric | Threshold | Source | Action on breach | +| -------------------------------------- | ---------------------- | ---------------------- | -------------------- | +| Hot-path hook wall-clock (p95) | < 5 ms (Redis up) | Timing instrumentation | Fail CI | +| `session_memory` envelope size | <= 2 400 chars | Payload snapshot | Fail CI | +| Total injected payload size | <= 5% of context limit | Payload snapshot | Fail CI | +| Snapshot XML size | <= 3 072 bytes (3 KB) | Redis `GET` | Fail CI | +| `SessionEvent.summary` length | <= 200 chars | Event extractor output | Fail CI | +| `SessionEvent.body` length | <= 4 096 bytes (4 KB) | Event extractor output | Fail CI | +| Async drain batch duration (p95) | < 5 000 ms | Async timing log | Warn (informational) | +| Cache refresh duration (p95) | < 2 000 ms | Async timing log | Warn (informational) | +| MCP calls during hot-path hooks | 0 | Call counter | Fail CI | +| Payload size regression (vs. baseline) | < 20% increase | Size regression CSV | Fail CI | +| Dead-letter batches per session | 0 (healthy run) | Redis key count | Warn (informational) | + +--- + +## 8 Pass / Fail Criteria + +### 8.1 Overall Pass + +All of the following must be true: + +- [ ] All Suite A checks pass (zero Graphiti on hot path). +- [ ] All Suite B checks pass (compact payloads within budget). +- [ ] All Suite C checks pass (no raw tool/transcript dumps). +- [ ] All Suite L checks pass (no legacy `data-uuids` emission). +- [ ] All Suite K thresholds are within bounds (no regressions). +- [ ] No unhandled exceptions in any degradation scenario (Suite J-12). +- [ ] Test coverage for hot-path code paths >= 90%. + +### 8.2 Conditional Pass (with known gaps) + +The following suites may have items that require manual verification or a real +interactive shell lifecycle: + +- Suite D (session continuity quality) — D-1 through D-5 require multi-turn + simulation. +- Suite F (cross-session persistent memory) — F-7 requires real Graphiti. +- Suite J (degradation) — J-9, J-10 require timed reconnection. + +These items are tracked as known gaps (see §10) and do not block CI pass if the +automatable subset passes. + +### 8.3 Fail + +Any of the following triggers a fail: + +- Any MCP call detected during a hot-path hook (Suite A). +- Injected payload exceeds budget (Suite B, K). +- Legacy `` block emitted by new code (Suite L-3). +- Unhandled exception during degradation (Suite J-12). +- Hot-path hook latency exceeds 5 ms p95 (Suite K-3 through K-5). + +--- + +## 9 CI/CD Automation Strategy + +### 9.1 Test Execution + +```bash +# Unit tests (no external deps) +deno test --allow-env --filter "suite-[a-c,g,l]" src/ + +# Integration tests (requires FalkorDB) +docker compose -f tests/docker-compose.yml up -d falkordb +deno test --allow-net --allow-env --filter "suite-[d-f,h-j]" src/ + +# Regression tests (requires both services) +docker compose -f tests/docker-compose.yml up -d +deno test --allow-net --allow-env --filter "suite-k" src/ + +# Full run +docker compose -f tests/docker-compose.yml up -d +deno test --allow-net --allow-env src/ +``` + +### 9.2 CI Artifacts to Collect + +| Artifact | Format | Purpose | +| ------------------------- | ------ | ----------------------------------------- | +| `timing-report.json` | JSON | Per-hook latency data for trend analysis | +| `payload-snapshots/*.xml` | XML | Injected envelopes for manual review | +| `size-regression.csv` | CSV | Payload sizes for cross-commit comparison | +| `coverage-report/` | HTML | Deno test coverage output | +| `test-results.json` | JSON | Structured pass/fail per checklist item | +| `dead-letter-report.json` | JSON | Dead-letter batches created during run | + +### 9.3 Suggested CI Pipeline + +```mermaid +graph LR + A[Push / PR] --> B[Unit Tests
no deps] + B --> C{Pass?} + C -->|Yes| D[Start FalkorDB
Docker] + C -->|No| X[Fail] + D --> E[Integration Tests] + E --> F{Pass?} + F -->|Yes| G[Start Graphiti
Docker] + F -->|No| X + G --> H[System + Regression] + H --> I{Pass?} + I -->|Yes| J[Collect Artifacts
Publish Report] + I -->|No| X +``` + +### 9.4 Baseline Management + +- Payload size baselines are stored in `tests/baselines/payload-sizes.json`. +- Baselines are updated manually via `deno task update-baselines` after + intentional size changes. +- CI compares current sizes against the checked-in baseline and fails on > 20% + regression. + +--- + +## 10 Remaining Gaps / Hard-to-Automate Tests + +### 10.1 Tests Requiring a True Interactive Shell Lifecycle + +The following tests cannot be fully automated within the current OpenCode plugin +test harness because they require a real OpenCode session lifecycle (hook +dispatch, compaction trigger, multi-turn LLM interaction): + +| Test ID | Description | Approximation strategy | +| ------- | ------------------------------------------- | -------------------------------------------------------------------------------- | +| D-1 | Multi-turn continuity after 5 exchanges | Simulate by calling hook handlers sequentially with synthetic payloads. | +| D-2 | User correction reflected in next injection | Simulate with synthetic `decision` event insertion. | +| E-5 | Post-compaction chat uses new snapshot | Simulate by calling compaction handler then chat handler in sequence. | +| F-2 | Cold-start first turn, warm second turn | Simulate with timed async warmup and sequential handler calls. | +| F-7 | Cross-session fact recall after drain | Requires real Graphiti; approximate with stub that returns pre-seeded facts. | +| J-9 | Graphiti recovery triggers drain resume | Simulate by toggling mock MCP availability and advancing timers. | +| J-10 | Redis recovery rebuilds state | Simulate by toggling mock Redis availability and verifying event list integrity. | + +### 10.2 Tests Requiring Real Services + +| Test ID | Description | Why | +| ------- | --------------------------------------- | -------------------------------------------------------------- | +| F-7 | End-to-end cross-session recall | Needs real Graphiti entity extraction and vector search. | +| K-6 | Async drain batch duration | Meaningful only against real Graphiti (LLM-backed extraction). | +| K-8 | Latency percentiles over 100 iterations | Meaningful only against real services under realistic load. | + +### 10.3 Tests Requiring Manual / Exploratory Verification + +| Area | What to verify | +| ----------------------------- | ---------------------------------------------------------------------------------- | +| LLM continuity quality | Does the LLM actually "feel" continuous after compaction? Requires human judgment. | +| Memory relevance (semantic) | Are the right facts surfaced for a given topic? Keyword matching approximates. | +| Multi-agent orchestration | Subagent events in a real swarm session. | +| Long-running session (> 1 hr) | TTL expiry, cache staleness, and drift behavior over extended use. | + +### 10.4 OpenCode Shell Model Limitations + +The current OpenCode plugin architecture has these constraints for test +automation: + +1. **No programmatic session creation** — tests cannot create a real OpenCode + session; they must simulate hook calls. +2. **No compaction trigger API** — compaction is triggered by OpenCode + internally; tests simulate `session.compacting` and `session.compacted` + events. +3. **No multi-session orchestration** — testing cross-session behavior requires + separate test runs or simulated session boundaries. +4. **Hook dispatch is synchronous in tests** — async fire-and-forget behavior + must be verified by awaiting explicit flush/drain calls rather than relying + on event-loop timing. + +**Mitigation:** The test harness simulates the hook lifecycle by calling handler +functions directly with synthetic inputs. This covers ~85% of the test plan. The +remaining ~15% (marked in §10.1–10.3) requires either real services, real +OpenCode sessions, or human judgment. diff --git a/src/config.test.ts b/src/config.test.ts index d277e5f..5bf2114 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -1,357 +1,121 @@ -import { - assertEquals, - assertFalse, - assertStrictEquals, -} from "jsr:@std/assert@^1.0.0"; -import { - afterEach, - beforeEach, - describe, - it, -} from "jsr:@std/testing@^1.0.0/bdd"; -import { stub } from "jsr:@std/testing@^1.0.0/mock"; +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import os from "node:os"; +import { stub } from "jsr:@std/testing@^1.0.0/mock"; import { type ConfigExplorerAdapter, loadConfig, resetConfigExplorerAdapterForTesting, setConfigExplorerAdapterForTesting, } from "./config.ts"; -import type { GraphitiConfig } from "./types/index.ts"; - -function assertConfigValues( - config: GraphitiConfig, - expected: Pick< - GraphitiConfig, - "endpoint" | "groupIdPrefix" | "driftThreshold" | "factStaleDays" - >, -) { - assertStrictEquals(config.endpoint, expected.endpoint); - assertStrictEquals(config.groupIdPrefix, expected.groupIdPrefix); - assertStrictEquals(config.driftThreshold, expected.driftThreshold); - assertStrictEquals(config.factStaleDays, expected.factStaleDays); -} function makeAdapter(options?: { - searchByDirectory?: Record; - loadResult?: Record; - searchErrorByDirectory?: Record; - loadError?: Record; - onSearch?: (from?: string) => void; - onLoad?: (filePath: string) => void; + searchResult?: unknown | null; + loadResult?: unknown | null; }): ConfigExplorerAdapter { return { - search(from) { - options?.onSearch?.(from); - - const directory = from ?? "__undefined__"; - const error = options?.searchErrorByDirectory?.[directory]; - if (error) throw error; - - const result = options?.searchByDirectory?.[directory]; - return result === undefined || result === null + search() { + return options?.searchResult == null ? null - : { config: result }; + : { config: options.searchResult }; }, - load(filePath) { - options?.onLoad?.(filePath); - - const error = options?.loadError?.[filePath]; - if (error) throw error; - - const result = options?.loadResult?.[filePath]; - return result === undefined || result === null + load() { + return options?.loadResult == null ? null - : { config: result }; + : { config: options.loadResult }; }, }; } describe("config", () => { - let originalError: typeof console.error; + afterEach(() => resetConfigExplorerAdapterForTesting()); - beforeEach(() => { - originalError = console.error; - console.error = () => {}; - }); + it("returns defaults when no config is found", () => { + setConfigExplorerAdapterForTesting(() => makeAdapter()); + const config = loadConfig(); - afterEach(() => { - console.error = originalError; - resetConfigExplorerAdapterForTesting(); + assertEquals(config.graphiti.endpoint, "http://localhost:8000/mcp"); + assertEquals(config.graphiti.groupIdPrefix, "opencode"); + assertEquals(config.graphiti.driftThreshold, 0.5); + assertEquals(config.falkordb.redisEndpoint, "redis://localhost:6379"); + assertEquals(config.falkordb.batchSize, 20); }); - describe("loadConfig", () => { - it("uses cosmiconfig global search from Deno.cwd() when no directory is provided", () => { - const fakeCwd = "/users/tester/workspace/project/subdir"; - const searchCalls: Array = []; - using _cwd = stub(Deno, "cwd", () => fakeCwd); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - searchByDirectory: { - __undefined__: { - endpoint: "http://cwd-global.local/mcp", - driftThreshold: 0.3, - factStaleDays: 14, - }, - }, - onSearch(from) { - searchCalls.push(from); + it("prefers nested graphiti and falkordb values over legacy top-level keys", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + endpoint: "http://legacy.example/mcp", + groupIdPrefix: "legacy", + redisEndpoint: "redis://legacy:6379", + graphiti: { + endpoint: "http://nested.example/mcp", + groupIdPrefix: "nested", + driftThreshold: 0.75, }, - }) - ); - - const config = loadConfig(); - - assertEquals(searchCalls, [undefined]); - assertConfigValues(config, { - endpoint: "http://cwd-global.local/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.3, - factStaleDays: 14, - }); - }); - - it("uses the explicit directory as the cosmiconfig global-search start", () => { - const explicitDir = "/users/tester/workspace/project"; - const searchCalls: Array = []; - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - searchByDirectory: { - "/users/tester/workspace/project": { - endpoint: "http://home.local/mcp", - driftThreshold: 0.7, - factStaleDays: 21, - }, + falkordb: { + redisEndpoint: "redis://nested:6379", + batchSize: 9, }, - onSearch(from) { - searchCalls.push(from); - }, - }) - ); - - const config = loadConfig(explicitDir); - - assertEquals(searchCalls, ["/users/tester/workspace/project"]); - assertConfigValues(config, { - endpoint: "http://home.local/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.7, - factStaleDays: 21, - }); - }); - - it("uses legacy fallback only after cosmiconfig search returns no config", () => { - const fakeHome = "/users/tester"; - const explicitDir = "/users/tester/workspace/project"; - const searchCalls: Array = []; - const loadCalls: string[] = []; - using _homedir = stub(os, "homedir", () => fakeHome); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - loadResult: { - "/users/tester/.config/opencode/.graphitirc": { - endpoint: "http://legacy.local/mcp", - driftThreshold: 0.8, - factStaleDays: 42, - }, - }, - onSearch(from) { - searchCalls.push(from); - }, - onLoad(filePath) { - loadCalls.push(filePath); - }, - }) - ); - - const config = loadConfig(explicitDir); - - assertEquals(searchCalls, ["/users/tester/workspace/project"]); - assertEquals(loadCalls, [ - "/users/tester/.config/opencode/.graphitirc", - ]); - assertConfigValues(config, { - endpoint: "http://legacy.local/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.8, - factStaleDays: 42, - }); - }); - - it("does not use legacy fallback when traversal already found config", () => { - const loadCalls: string[] = []; - using _cwd = stub(Deno, "cwd", () => "/users/tester/workspace/project"); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - searchByDirectory: { - __undefined__: { - endpoint: "http://discovered.local/mcp", - }, - }, - onLoad(filePath) { - loadCalls.push(filePath); - }, - }) - ); - - const config = loadConfig(); - - assertStrictEquals(loadCalls.length, 0); - assertStrictEquals(config.endpoint, "http://discovered.local/mcp"); - }); - - it("fails open when creating the explorer adapter throws", () => { - setConfigExplorerAdapterForTesting(() => { - throw new Deno.errors.PermissionDenied("Denied"); - }); - - const config = loadConfig(); - - assertConfigValues(config, { - endpoint: "http://localhost:8000/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.5, - factStaleDays: 30, - }); - }); - - it("uses legacy fallback when Deno.cwd() throws and cosmiconfig search returns no config", () => { - const fakeHome = "/users/tester"; - using _cwd = stub(Deno, "cwd", () => { - throw new Deno.errors.PermissionDenied("Denied"); - }); - using _homedir = stub(os, "homedir", () => fakeHome); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - loadResult: { - "/users/tester/.config/opencode/.graphitirc": { - endpoint: "http://legacy.local/mcp", - }, - }, - }) - ); - - const config = loadConfig(); - - assertStrictEquals(config.endpoint, "http://legacy.local/mcp"); - }); - - it("fails open when os.homedir() throws during legacy fallback", () => { - using _homedir = stub(os, "homedir", () => { - throw new Deno.errors.PermissionDenied("Denied"); - }); - setConfigExplorerAdapterForTesting(() => makeAdapter()); - - const config = loadConfig(); - - assertConfigValues(config, { - endpoint: "http://localhost:8000/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.5, - factStaleDays: 30, - }); - }); - - it("fails open when cosmiconfig search throws", () => { - const explicitDir = "/users/tester/workspace/project"; - const searchCalls: Array = []; - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - searchErrorByDirectory: { - "/users/tester/workspace/project": new Deno.errors.PermissionDenied( - "Denied", - ), - }, - onSearch(from) { - searchCalls.push(from); - }, - }) - ); - - const config = loadConfig(explicitDir); - - assertEquals(searchCalls, ["/users/tester/workspace/project"]); - assertConfigValues(config, { - endpoint: "http://localhost:8000/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.5, - factStaleDays: 30, - }); - }); - - it("fails open when the legacy fallback load throws", () => { - const fakeHome = "/users/tester"; - using _homedir = stub(os, "homedir", () => fakeHome); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - loadError: { - "/users/tester/.config/opencode/.graphitirc": new Deno.errors - .PermissionDenied("Denied"), - }, - }) - ); - - const config = loadConfig("/users/tester/workspace/project"); + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "http://nested.example/mcp"); + assertEquals(config.graphiti.groupIdPrefix, "nested"); + assertEquals(config.graphiti.driftThreshold, 0.75); + assertEquals(config.falkordb.redisEndpoint, "redis://nested:6379"); + assertEquals(config.falkordb.batchSize, 9); + assertEquals(config.endpoint, "http://nested.example/mcp"); + assertEquals(config.driftThreshold, 0.75); + assertEquals(config.redisEndpoint, "redis://nested:6379"); + }); - assertConfigValues(config, { - endpoint: "http://localhost:8000/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.5, - factStaleDays: 30, - }); - }); + it("uses legacy fallback file when discovery finds nothing", () => { + using _homedir = stub(os, "homedir", () => "/users/tester"); + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + loadResult: { + endpoint: "http://legacy.example/mcp", + redisEndpoint: "redis://legacy:6379", + }, + }) + ); + + const config = loadConfig(); + assertEquals(config.graphiti.endpoint, "http://legacy.example/mcp"); + assertEquals(config.falkordb.redisEndpoint, "redis://legacy:6379"); + }); - it("merges partial discovered config with defaults", () => { - using _cwd = stub(Deno, "cwd", () => "/users/tester/workspace/project"); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - searchByDirectory: { - __undefined__: { - endpoint: "http://partial.local/mcp", - }, + it("falls back to defaults for invalid numeric config values", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + driftThreshold: 2, + factStaleDays: 0, }, - }) - ); - - const config = loadConfig(); - - assertStrictEquals(config.endpoint, "http://partial.local/mcp"); - assertStrictEquals(config.groupIdPrefix, "opencode"); - assertStrictEquals(config.driftThreshold, 0.5); - assertStrictEquals(config.factStaleDays, 30); - }); - - it("merges partial legacy fallback config with defaults", () => { - const fakeHome = "/users/tester"; - using _homedir = stub(os, "homedir", () => fakeHome); - setConfigExplorerAdapterForTesting(() => - makeAdapter({ - loadResult: { - "/users/tester/.config/opencode/.graphitirc": { - endpoint: "http://partial-legacy.local/mcp", - }, + falkordb: { + batchSize: 0, + batchMaxBytes: -10, + sessionTtlSeconds: -1, + cacheTtlSeconds: 0, + drainRetryMax: -1, }, - }) - ); - - const config = loadConfig("/users/tester/workspace/project"); - - assertStrictEquals(config.endpoint, "http://partial-legacy.local/mcp"); - assertStrictEquals(config.groupIdPrefix, "opencode"); - assertStrictEquals(config.driftThreshold, 0.5); - assertStrictEquals(config.factStaleDays, 30); - }); - - it("returns a complete GraphitiConfig shape", () => { - using _cwd = stub(Deno, "cwd", () => "/users/tester/workspace/project"); - setConfigExplorerAdapterForTesting(() => makeAdapter()); - - const config = loadConfig(); - - assertFalse(config.endpoint === undefined); - assertFalse(config.groupIdPrefix === undefined); - assertFalse(config.driftThreshold === undefined); - assertFalse(config.factStaleDays === undefined); - }); + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.driftThreshold, 0.5); + assertEquals(config.graphiti.factStaleDays, 30); + assertEquals(config.falkordb.batchSize, 20); + assertEquals(config.falkordb.batchMaxBytes, 51_200); + assertEquals(config.falkordb.sessionTtlSeconds, 86_400); + assertEquals(config.falkordb.cacheTtlSeconds, 600); + assertEquals(config.falkordb.drainRetryMax, 3); }); }); diff --git a/src/config.ts b/src/config.ts index 2519b3b..b5f379b 100644 --- a/src/config.ts +++ b/src/config.ts @@ -4,13 +4,46 @@ import { join } from "node:path"; import type { GraphitiConfig } from "./types/index.ts"; const DEFAULT_CONFIG: GraphitiConfig = { + falkordb: { + redisEndpoint: "redis://localhost:6379", + batchSize: 20, + batchMaxBytes: 51_200, + sessionTtlSeconds: 86_400, + cacheTtlSeconds: 600, + drainRetryMax: 3, + }, + graphiti: { + endpoint: "http://localhost:8000/mcp", + groupIdPrefix: "opencode", + driftThreshold: 0.5, + factStaleDays: 30, + }, endpoint: "http://localhost:8000/mcp", groupIdPrefix: "opencode", driftThreshold: 0.5, factStaleDays: 30, + redisEndpoint: "redis://localhost:6379", + batchSize: 20, + batchMaxBytes: 51_200, + sessionTtlSeconds: 86_400, + cacheTtlSeconds: 600, + drainRetryMax: 3, }; -type PartialGraphitiConfig = Partial; +type PartialGraphitiConfig = { + falkordb?: Partial; + graphiti?: Partial; + endpoint?: string; + groupIdPrefix?: string; + driftThreshold?: number; + factStaleDays?: number; + redisEndpoint?: string; + batchSize?: number; + batchMaxBytes?: number; + sessionTtlSeconds?: number; + cacheTtlSeconds?: number; + drainRetryMax?: number; +}; type ConfigLoadResult = { config: unknown } | null; @@ -30,25 +63,163 @@ const require = createRequire(import.meta.url); const isRecord = (value: unknown): value is Record => !!value && typeof value === "object" && !Array.isArray(value); +const readString = ( + value: Record, + key: string, +): string | undefined => + typeof value[key] === "string" ? value[key] as string : undefined; + +const readNumber = ( + value: Record, + key: string, +): number | undefined => + typeof value[key] === "number" ? value[key] as number : undefined; + const normalizeConfig = (value: unknown): PartialGraphitiConfig => { if (!isRecord(value)) return {}; - const config: PartialGraphitiConfig = {}; + const compact = >(input: T): Partial => + Object.fromEntries( + Object.entries(input).filter(([_, entry]) => entry !== undefined), + ) as Partial; - if (typeof value.endpoint === "string") config.endpoint = value.endpoint; - if (typeof value.groupIdPrefix === "string") { - config.groupIdPrefix = value.groupIdPrefix; - } - if (typeof value.driftThreshold === "number") { - config.driftThreshold = value.driftThreshold; + const config: PartialGraphitiConfig = { + endpoint: readString(value, "endpoint"), + groupIdPrefix: readString(value, "groupIdPrefix"), + driftThreshold: readNumber(value, "driftThreshold"), + factStaleDays: readNumber(value, "factStaleDays"), + redisEndpoint: readString(value, "redisEndpoint"), + batchSize: readNumber(value, "batchSize"), + batchMaxBytes: readNumber(value, "batchMaxBytes"), + sessionTtlSeconds: readNumber(value, "sessionTtlSeconds"), + cacheTtlSeconds: readNumber(value, "cacheTtlSeconds"), + drainRetryMax: readNumber(value, "drainRetryMax"), + }; + + if (isRecord(value.falkordb)) { + config.falkordb = compact({ + redisEndpoint: readString(value.falkordb, "redisEndpoint"), + batchSize: readNumber(value.falkordb, "batchSize"), + batchMaxBytes: readNumber(value.falkordb, "batchMaxBytes"), + sessionTtlSeconds: readNumber(value.falkordb, "sessionTtlSeconds"), + cacheTtlSeconds: readNumber(value.falkordb, "cacheTtlSeconds"), + drainRetryMax: readNumber(value.falkordb, "drainRetryMax"), + }); } - if (typeof value.factStaleDays === "number") { - config.factStaleDays = value.factStaleDays; + + if (isRecord(value.graphiti)) { + config.graphiti = compact({ + endpoint: readString(value.graphiti, "endpoint"), + groupIdPrefix: readString(value.graphiti, "groupIdPrefix"), + driftThreshold: readNumber(value.graphiti, "driftThreshold"), + factStaleDays: readNumber(value.graphiti, "factStaleDays"), + }); } return config; }; +const isPositiveInteger = (value: number | undefined): value is number => + typeof value === "number" && Number.isInteger(value) && value > 0; + +const isPositiveNumber = (value: number | undefined): value is number => + typeof value === "number" && Number.isFinite(value) && value > 0; + +const isUnitInterval = (value: number | undefined): value is number => + typeof value === "number" && Number.isFinite(value) && value >= 0 && + value <= 1; + +const resolveNumber = ( + ...candidates: Array +): number | undefined => candidates.find((value) => value !== undefined); + +const resolveConfig = (value: PartialGraphitiConfig | null): GraphitiConfig => { + const raw = value ?? {}; + + const resolvedRedisEndpoint = raw.falkordb?.redisEndpoint ?? + raw.redisEndpoint ?? + DEFAULT_CONFIG.falkordb.redisEndpoint; + const resolvedBatchSize = resolveNumber( + raw.falkordb?.batchSize, + raw.batchSize, + ); + const resolvedBatchMaxBytes = resolveNumber( + raw.falkordb?.batchMaxBytes, + raw.batchMaxBytes, + ); + const resolvedSessionTtlSeconds = resolveNumber( + raw.falkordb?.sessionTtlSeconds, + raw.sessionTtlSeconds, + ); + const resolvedCacheTtlSeconds = resolveNumber( + raw.falkordb?.cacheTtlSeconds, + raw.cacheTtlSeconds, + ); + const resolvedDrainRetryMax = resolveNumber( + raw.falkordb?.drainRetryMax, + raw.drainRetryMax, + ); + const resolvedGraphitiEndpoint = raw.graphiti?.endpoint ?? raw.endpoint ?? + DEFAULT_CONFIG.graphiti.endpoint; + const resolvedGroupIdPrefix = raw.graphiti?.groupIdPrefix ?? + raw.groupIdPrefix ?? + DEFAULT_CONFIG.graphiti.groupIdPrefix; + const resolvedDriftThreshold = resolveNumber( + raw.graphiti?.driftThreshold, + raw.driftThreshold, + ); + const resolvedFactStaleDays = resolveNumber( + raw.graphiti?.factStaleDays, + raw.factStaleDays, + ); + + const falkordb = { + redisEndpoint: resolvedRedisEndpoint, + batchSize: isPositiveInteger(resolvedBatchSize) + ? resolvedBatchSize + : DEFAULT_CONFIG.falkordb.batchSize, + batchMaxBytes: isPositiveInteger(resolvedBatchMaxBytes) + ? resolvedBatchMaxBytes + : DEFAULT_CONFIG.falkordb.batchMaxBytes, + sessionTtlSeconds: isPositiveInteger(resolvedSessionTtlSeconds) + ? resolvedSessionTtlSeconds + : DEFAULT_CONFIG.falkordb.sessionTtlSeconds, + cacheTtlSeconds: isPositiveInteger(resolvedCacheTtlSeconds) + ? resolvedCacheTtlSeconds + : DEFAULT_CONFIG.falkordb.cacheTtlSeconds, + drainRetryMax: isPositiveInteger(resolvedDrainRetryMax) + ? resolvedDrainRetryMax + : DEFAULT_CONFIG.falkordb.drainRetryMax, + }; + + const graphiti = { + endpoint: resolvedGraphitiEndpoint, + groupIdPrefix: resolvedGroupIdPrefix, + driftThreshold: isUnitInterval(resolvedDriftThreshold) + ? resolvedDriftThreshold + : DEFAULT_CONFIG.graphiti.driftThreshold, + factStaleDays: isPositiveNumber(resolvedFactStaleDays) + ? resolvedFactStaleDays + : DEFAULT_CONFIG.graphiti.factStaleDays, + }; + + return { + ...raw, + falkordb, + graphiti, + endpoint: graphiti.endpoint, + groupIdPrefix: graphiti.groupIdPrefix, + driftThreshold: graphiti.driftThreshold, + factStaleDays: graphiti.factStaleDays, + redisEndpoint: falkordb.redisEndpoint, + batchSize: falkordb.batchSize, + batchMaxBytes: falkordb.batchMaxBytes, + sessionTtlSeconds: falkordb.sessionTtlSeconds, + cacheTtlSeconds: falkordb.cacheTtlSeconds, + drainRetryMax: falkordb.drainRetryMax, + }; +}; + const createCosmiconfigAdapter = (): ConfigExplorerAdapter => { const { cosmiconfigSync } = require("cosmiconfig") as { cosmiconfigSync: ( @@ -109,20 +280,12 @@ const getHomeDir = (): string | undefined => { } }; -const getSearchStartDir = (directory?: string): string | undefined => { - try { - return directory === undefined ? undefined : directory; - } catch { - return undefined; - } -}; - const searchConfig = ( adapter: ConfigExplorerAdapter, directory?: string, ): ConfigSearchOutcome => { try { - const loaded = adapter.search(getSearchStartDir(directory)); + const loaded = adapter.search(directory); return { ok: true, config: loaded ? normalizeConfig(loaded.config) : null, @@ -144,22 +307,13 @@ const loadLegacyConfig = ( ); }; -/** - * Load Graphiti configuration via cosmiconfig discovery, with a legacy fallback - * to `~/.config/opencode/.graphitirc` only when discovery succeeds and returns - * no result. - */ export function loadConfig(directory?: string): GraphitiConfig { const adapter = getConfigExplorerAdapter(); - if (!adapter) return { ...DEFAULT_CONFIG }; + if (!adapter) return structuredClone(DEFAULT_CONFIG); const searched = searchConfig(adapter, directory); - if (!searched.ok) return { ...DEFAULT_CONFIG }; + if (!searched.ok) return structuredClone(DEFAULT_CONFIG); const loaded = searched.config ?? loadLegacyConfig(adapter); - - return { - ...DEFAULT_CONFIG, - ...(loaded ?? {}), - }; + return resolveConfig(loaded); } diff --git a/src/handlers/chat.test.ts b/src/handlers/chat.test.ts index c5f58db..0d78578 100644 --- a/src/handlers/chat.test.ts +++ b/src/handlers/chat.test.ts @@ -1,1115 +1,307 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { setLoggerSilentOverride } from "../services/logger.ts"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import type { SessionManager } from "../session.ts"; -import type { GraphitiClient } from "../services/client.ts"; -import { normalizeEpisode } from "../services/sdk-normalize.ts"; import { createChatHandler } from "./chat.ts"; -// Mock SessionManager -class MockSessionManager implements Partial { - private sessions = new Map(); - private parentIds = new Map(); - - async resolveSessionState(sessionId: string) { - const parentId = this.parentIds.get(sessionId); - if (parentId === undefined) return { state: null, resolved: false }; - if (parentId) { - this.sessions.delete(sessionId); - return { state: null, resolved: true }; - } - - let state = this.sessions.get(sessionId); - if (!state) { - state = { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - cachedMemoryContext: undefined, - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, +class MockSessionManager { + prepareInjectionResult: + | { + envelope: string; + factUuids: string[]; + nodeRefs: string[]; + refreshDecision: { + classification: string; + shouldRefresh: boolean; + similarity: number; + threshold: number; + cachedQuery: string | null; }; - this.sessions.set(sessionId, state); } - return { state, resolved: true }; - } - - setParentId(sessionId: string, parentId: string | null) { - this.parentIds.set(sessionId, parentId); + | null + | undefined = undefined; + nextRefreshDecision: { + classification: string; + shouldRefresh: boolean; + similarity: number; + threshold: number; + cachedQuery: string | null; + } = { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + prepareInjectionCalls: Array<{ sessionId: string; lastRequest?: string }> = + []; + state = { + groupId: "group-1", + userGroupId: "user-1", + injectedMemories: false, + lastInjectionFactUuids: [], + visibleFactUuids: [], + messageCount: 0, + pendingMessages: [] as string[], + contextLimit: 200_000, + isMain: true, + hotTierReady: false, + pendingInjection: undefined as { + envelope: string; + factUuids: string[]; + nodeRefs: string[]; + refreshDecision: { + classification: string; + shouldRefresh: boolean; + similarity: number; + threshold: number; + cachedQuery: string | null; + }; + } | undefined, + pendingInjectionGeneration: 0, + latestUserRequest: undefined as string | undefined, + }; + markSessionActive(_sessionId: string): void { + // no-op for tests: activity tracking is not under test here } - setState(sessionId: string, state: any) { - this.sessions.set(sessionId, state); + resolveSessionState() { + return { state: this.state, resolved: true }; } - getState(sessionId: string) { - return this.sessions.get(sessionId); + prepareInjection(_sessionId: string, lastRequest?: string) { + this.prepareInjectionCalls.push({ + sessionId: _sessionId, + lastRequest, + }); + const prepared = this.prepareInjectionResult === undefined + ? { + envelope: + `${lastRequest}`, + factUuids: [], + nodeRefs: [], + refreshDecision: this.nextRefreshDecision, + } + : this.prepareInjectionResult; + this.state.pendingInjection = prepared ?? undefined; + this.state.hotTierReady = true; + return prepared ?? null; } } -// Mock GraphitiClient -class MockGraphitiClient implements Partial { - public searchFactsResult: GraphitiFact[] = []; - public searchNodesResult: GraphitiNode[] = []; - public episodesResult: any[] = []; - public searchFactsCalls: Array<{ - query: string; - groupIds: string[]; - maxFacts: number; - }> = []; - public searchNodesCalls: Array<{ - query: string; - groupIds: string[]; - maxNodes: number; - }> = []; - public getEpisodesCalls: Array<{ groupId: string; lastN: number }> = []; +class MockRedisEvents { + calls: Array<{ sessionId: string; groupId: string; summary: string }> = []; - async searchFacts(params: { - query: string; - groupIds?: string[]; - maxFacts?: number; - }): Promise { - this.searchFactsCalls.push({ - query: params.query, - groupIds: params.groupIds || [], - maxFacts: params.maxFacts || 10, - }); - return Promise.resolve(this.searchFactsResult); + recordEvent( + sessionId: string, + groupId: string, + event: { summary: string }, + ) { + this.calls.push({ sessionId, groupId, summary: event.summary }); + return this.calls.length; } +} - async searchNodes(params: { - query: string; - groupIds?: string[]; - maxNodes?: number; - }): Promise { - this.searchNodesCalls.push({ - query: params.query, - groupIds: params.groupIds || [], - maxNodes: params.maxNodes || 10, - }); - return Promise.resolve(this.searchNodesResult); +class MockGraphitiAsync { + refreshCalls: Array<{ groupId: string; query: string }> = []; + drainCalls: string[] = []; + + scheduleCacheRefresh(groupId: string, query: string) { + this.refreshCalls.push({ groupId, query }); } - async getEpisodes(params: { - groupId?: string; - lastN?: number; - }): Promise { - this.getEpisodesCalls.push({ - groupId: params.groupId || "", - lastN: params.lastN || 10, - }); - // Mirror the real GraphitiClient boundary: normalize casing so tests - // that supply snake_case source_description are handled correctly. - return Promise.resolve(this.episodesResult.map(normalizeEpisode)); + scheduleDrain(groupId: string) { + this.drainCalls.push(groupId); } } -describe("chat handler integration", () => { - describe("initial injection", () => { - it("should inject on first message with facts and nodes", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact 1" }, - { uuid: "f2", fact: "Test fact 2" }, - ]; - client.searchNodesResult = [ - { uuid: "n1", name: "Node 1" }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello world" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.injectedMemories, true); - assertEquals(state.cachedMemoryContext !== undefined, true); - assertEquals(state.messageCount, 1); - assertEquals(state.pendingMessages.length, 1); - assertEquals(state.pendingMessages[0], "User: Hello world"); - - // Should search project and user contexts - assertEquals(client.searchFactsCalls.length, 2); - assertEquals(client.searchNodesCalls.length, 2); - - // First call: project facts - assertEquals(client.searchFactsCalls[0].groupIds, ["test:project"]); - assertEquals(client.searchFactsCalls[0].maxFacts, 50); - - // Second call: user facts - assertEquals(client.searchFactsCalls[1].groupIds, ["test:user"]); - assertEquals(client.searchFactsCalls[1].maxFacts, 20); - }); - - it("should not inject when no facts or nodes found", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = []; - client.searchNodesResult = []; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.injectedMemories, true); - assertEquals(state.cachedMemoryContext, undefined); - }); - - it("should load and include session snapshot on first injection", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact" }, - ]; - client.episodesResult = [ - { - uuid: "e1", - name: "Snapshot", - content: "Session snapshot content with strategy and questions", - sourceDescription: "session-snapshot", - created_at: "2026-02-14T12:00:00Z", - }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals( - state.cachedMemoryContext?.includes("Session Snapshot"), - true, - ); - assertEquals( - state.cachedMemoryContext?.includes("Session snapshot content"), - true, - ); - assertEquals(client.getEpisodesCalls.length, 1); - assertEquals(client.getEpisodesCalls[0].lastN, 10); +describe("chat handler", () => { + it("records a user event, prepares session_memory, and schedules async refresh on cache miss", async () => { + const sessionManager = new MockSessionManager(); + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 2, }); - it("should prefer most recent snapshot when multiple exist", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact" }, - ]; - client.episodesResult = [ - { - uuid: "e1", - content: "Old snapshot", - sourceDescription: "session-snapshot", - created_at: "2026-02-01T12:00:00Z", - }, - { - uuid: "e2", - content: "Recent snapshot", - sourceDescription: "session-snapshot", - created_at: "2026-02-14T12:00:00Z", - }, - { - uuid: "e3", - content: "Middle snapshot", - sourceDescription: "session-snapshot", - created_at: "2026-02-10T12:00:00Z", - }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Continue the migration" }] } as never, + ); + + assertEquals(redisEvents.calls.length >= 1, true); + assertEquals(redisEvents.calls[0].sessionId, "session-1"); + assertEquals(sessionManager.state.messageCount, 1); + assertEquals(sessionManager.state.injectedMemories, true); + assertEquals(sessionManager.state.pendingMessages, [ + "User: Continue the migration", + ]); + assertStringIncludes( + sessionManager.state.pendingInjection?.envelope ?? "", + " { + const sessionManager = new MockSessionManager(); + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals( - state.cachedMemoryContext?.includes("Recent snapshot"), - true, - ); - assertEquals(state.cachedMemoryContext?.includes("Old snapshot"), false); + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, }); - it("should handle snake_case source_description field", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact" }, - ]; - client.episodesResult = [ - { - uuid: "e1", - content: "Snapshot content", - source_description: "session-snapshot", // snake_case - created_at: "2026-02-14T12:00:00Z", - }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); + await handler( + { sessionID: "session-1" }, + { + parts: [{ + type: "text", + text: "Please keep Graphiti off the hot path", + }], + } as never, + ); + + assertEquals(redisEvents.calls.length, 3); + }); - const state = sessionManager.getState("session-1"); - assertEquals( - state.cachedMemoryContext?.includes("Snapshot content"), - true, - ); - }); + it("schedules a drain when the pending queue reaches the trigger threshold", async () => { + const sessionManager = new MockSessionManager(); + const _redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); - it("should truncate snapshot to budget (1200 chars)", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact" }, - ]; - const longContent = "A".repeat(2000); - client.episodesResult = [ - { - uuid: "e1", - content: longContent, - sourceDescription: "session-snapshot", - created_at: "2026-02-14T12:00:00Z", + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: { + recordEvent() { + return 3; }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - const snapshotSection = state.cachedMemoryContext?.match( - /## Session Snapshot[\s\S]*?(?=\n\n#|$)/, - )?.[0]; - // Snapshot budget is min(characterBudget, 1200), so should be capped - // Header is ~110 chars + 1200 content = ~1310 total - assertStrictEquals( - (snapshotSection?.length || 0) <= 1320, - true, - ); + } as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 2, }); - it("should handle getEpisodes error gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact" }, - ]; - client.getEpisodes = async () => { - throw new Error("Network error"); - }; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Queue enough work" }] } as never, + ); - try { - setLoggerSilentOverride(true); - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - } finally { - setLoggerSilentOverride(false); - } - - const state = sessionManager.getState("session-1"); - // Should still inject without snapshot - assertEquals(state.injectedMemories, true); - assertEquals( - state.cachedMemoryContext?.includes("Session Snapshot"), - false, - ); - }); + assertEquals(graphitiAsync.drainCalls, ["group-1"]); }); - describe("drift detection", () => { - it("should trigger reinjection when similarity is below threshold", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - // First message - initial injection - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - { uuid: "f2", fact: "Fact 2" }, - ]; - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.lastInjectionFactUuids.length, 2); - - // Second message - different facts (low similarity) - client.searchFactsResult = [ - { uuid: "f3", fact: "Fact 3" }, - { uuid: "f4", fact: "Fact 4" }, - ]; - client.searchNodesResult = []; - - const callsBefore = client.searchFactsCalls.length; - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, - ); - - // Drift-check result is reused as project facts, so only 1 new call total. - assertEquals(client.searchFactsCalls.length, callsBefore + 1); - assertEquals(client.searchFactsCalls.at(-1)?.maxFacts, 50); - - // Should have updated cached context - const updatedState = sessionManager.getState("session-1"); - assertEquals(updatedState.cachedMemoryContext !== undefined, true); - }); - - it("should NOT reinjection when similarity is above threshold", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - // First message - initial injection - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - { uuid: "f2", fact: "Fact 2" }, - ]; - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - // Second message - same facts (high similarity) - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - { uuid: "f2", fact: "Fact 2" }, - ]; - - const callsBefore = client.searchFactsCalls.length; - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, - ); - - // Should only perform drift check (1 call), no full search - assertEquals(client.searchFactsCalls.length, callsBefore + 1); - }); - - it("should compute Jaccard similarity correctly", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.4, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - // First injection: {f1, f2, f3} - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - { uuid: "f2", fact: "Fact 2" }, - { uuid: "f3", fact: "Fact 3" }, - ]; - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - // Second message: {f2, f3, f4} - // Intersection: {f2, f3} = 2 - // Union: {f1, f2, f3, f4} = 4 - // Jaccard = 2/4 = 0.5 > 0.4 threshold - client.searchFactsResult = [ - { uuid: "f2", fact: "Fact 2" }, - { uuid: "f3", fact: "Fact 3" }, - { uuid: "f4", fact: "Fact 4" }, - ]; - - const callsBefore = client.searchFactsCalls.length; - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, - ); - - // Similarity 0.5 > 0.4, should NOT reinjection - assertEquals(client.searchFactsCalls.length, callsBefore + 1); + it("skips async refresh when cache is fresh and aligned", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.nextRefreshDecision = { + classification: "aligned", + shouldRefresh: false, + similarity: 0.5, + threshold: 0.5, + cachedQuery: "continue migration", + }; + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, }); - it("should handle empty fact sets correctly", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Continue migration" }] } as never, + ); - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - // First injection with facts - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - ]; - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - // Second message with no facts - client.searchFactsResult = []; - - const callsBefore = client.searchFactsCalls.length; - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, - ); - - // Empty current vs non-empty last = similarity 0 < threshold - // Task 8: drift-check result is reused as project facts, so only 1 new call total. - assertEquals(client.searchFactsCalls.length, callsBefore + 1); - }); - - it("should handle both empty fact sets (edge case)", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - // First injection with no facts - client.searchFactsResult = []; - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.lastInjectionFactUuids.length, 0); - - // Second message also with no facts - client.searchFactsResult = []; - - const callsBefore = client.searchFactsCalls.length; - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, - ); - - // Empty vs empty = similarity 1.0 > threshold - // Should NOT trigger reinjection - assertEquals(client.searchFactsCalls.length, callsBefore + 1); - }); + assertEquals(graphitiAsync.refreshCalls, []); }); - describe("edge cases", () => { - it("should ignore subagent sessions", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("subagent-1", "parent-session"); - - await handler( - { sessionID: "subagent-1" }, - { parts: [{ type: "text", text: "Subagent message" }] } as any, - ); - - // Should not search or inject - assertEquals(client.searchFactsCalls.length, 0); - assertEquals(sessionManager.getState("subagent-1"), undefined); - }); - - it("should ignore messages without text content", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "tool_use", name: "test" }] } as any, - ); - - // Should not search or inject - assertEquals(client.searchFactsCalls.length, 0); - }); - - it("should handle messages with multiple text parts", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Test fact" }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { - parts: [ - { type: "text", text: "First part" }, - { type: "text", text: "Second part" }, - ], - } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.pendingMessages[0], "User: First part Second part"); - }); - - it("should handle session resolution failure", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - // Don't set parent ID, so resolution fails - await handler( - { sessionID: "unknown-session" }, - { parts: [{ type: "text", text: "Message" }] } as any, - ); - - // Should not crash or search - assertEquals(client.searchFactsCalls.length, 0); - }); - - it("should handle search failures gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFacts = async () => { - throw new Error("Search failed"); - }; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - try { - setLoggerSilentOverride(true); - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - } finally { - setLoggerSilentOverride(false); - } - - const state = sessionManager.getState("session-1"); - // Should NOT mark as injected on search failure - assertEquals(state.injectedMemories, false); - assertEquals(state.cachedMemoryContext, undefined); - }); - - it("should deduplicate facts from project and user scopes", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - let callCount = 0; - client.searchFacts = async (params) => { - callCount++; - client.searchFactsCalls.push(params as any); - if (callCount === 1) { - // Project facts - return [ - { uuid: "f1", fact: "Fact 1" }, - { uuid: "f2", fact: "Fact 2" }, - ]; - } else { - // User facts - includes duplicate - return [ - { uuid: "f2", fact: "Fact 2" }, - { uuid: "f3", fact: "Fact 3" }, - ]; - } - }; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - // Should have deduplicated f2, so only {f1, f2, f3} - assertEquals(state.lastInjectionFactUuids.length, 3); - assertEquals(state.lastInjectionFactUuids.includes("f1"), true); - assertEquals(state.lastInjectionFactUuids.includes("f2"), true); - assertEquals(state.lastInjectionFactUuids.includes("f3"), true); + it("does not schedule async refresh when prepareInjection returns null during a race", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.prepareInjectionResult = null; + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, }); - it("should remove orphan nodes (nodes referenced by facts)", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { - uuid: "f1", - fact: "Fact 1", - source_node: { uuid: "n1", name: "Node 1" }, - }, - ]; - client.searchNodesResult = [ - { uuid: "n1", name: "Node 1" }, // Referenced by fact - { uuid: "n2", name: "Node 2" }, // Orphan - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - // Should only include Node 2 (orphan), Node 1 is referenced - assertEquals(state.cachedMemoryContext?.includes("Node 2"), true); - // Node 1 should not appear in nodes section (only in fact edge) - }); - - it("should filter out invalid facts (invalid_at in past)", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { - uuid: "f1", - fact: "Valid fact", - valid_at: "2026-02-01T00:00:00Z", - }, - { - uuid: "f2", - fact: "Invalid fact", - invalid_at: "2026-01-01T00:00:00Z", // Already invalid - }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.cachedMemoryContext?.includes("Valid fact"), true); - assertEquals(state.cachedMemoryContext?.includes("Invalid fact"), false); - }); - - it("should filter out future facts (valid_at in future)", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Race the refresh" }] } as never, + ); + + assertEquals(sessionManager.prepareInjectionCalls, [{ + sessionId: "session-1", + lastRequest: "Race the refresh", + }]); + assertEquals(sessionManager.state.injectedMemories, false); + assertEquals(sessionManager.state.pendingInjection, undefined); + assertEquals(graphitiAsync.refreshCalls, []); + }); - client.searchFactsResult = [ + it("refreshes stale cache, primer-only cache, and drifted cache", async () => { + for ( + const decision of [ { - uuid: "f1", - fact: "Current fact", - valid_at: "2026-02-01T00:00:00Z", + classification: "stale", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: "older query", }, { - uuid: "f2", - fact: "Future fact", - valid_at: "2026-12-01T00:00:00Z", // Future + classification: "primer-only", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: "primer", }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.cachedMemoryContext?.includes("Current fact"), true); - assertEquals(state.cachedMemoryContext?.includes("Future fact"), false); - }); - - it("should annotate stale facts", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - // Fact from 60 days ago (stale if factStaleDays=30) - const sixtyDaysAgo = new Date(); - sixtyDaysAgo.setDate(sixtyDaysAgo.getDate() - 60); - - client.searchFactsResult = [ { - uuid: "f1", - fact: "Old fact", - valid_at: sixtyDaysAgo.toISOString(), + classification: "drifted", + shouldRefresh: true, + similarity: 0.2, + threshold: 0.5, + cachedQuery: "old topic", }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.cachedMemoryContext?.includes("[stale:"), true); - assertEquals(state.cachedMemoryContext?.includes("days ago]"), true); - }); - - it("should respect character budget from context limit", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - // Create many facts that would exceed budget - client.searchFactsResult = Array.from({ length: 100 }, (_, i) => ({ - uuid: `f${i}`, - fact: `This is test fact number ${i} with some content to fill space`, - })); - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - const state = await sessionManager.resolveSessionState("session-1"); - state.state!.contextLimit = 10_000; // Small context limit - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const finalState = sessionManager.getState("session-1"); - // Budget = 10_000 * 0.05 * 4 = 2000 chars - const budget = 10_000 * 0.05 * 4; - assertStrictEquals( - (finalState.cachedMemoryContext?.length || 0) <= budget, - true, - ); - }); - }); - - describe("budget allocation", () => { - it("should allocate 70% to project and 30% to user on first injection", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - let callCount = 0; - client.searchFacts = async (params) => { - callCount++; - client.searchFactsCalls.push(params as any); - return [ - { uuid: `f${callCount}`, fact: "A".repeat(1000) }, - ]; - }; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - const state = await sessionManager.resolveSessionState("session-1"); - state.state!.contextLimit = 10_000; - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Hello" }] } as any, - ); - - const finalState = sessionManager.getState("session-1"); - // Total budget = 2000 chars - // Should be split 70/30 between project and user - assertStrictEquals( - (finalState.cachedMemoryContext?.length || 0) <= 2000, - true, - ); - }); - - it("should not search user scope on reinjection", async () => { + ] + ) { const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); + sessionManager.nextRefreshDecision = decision; + const graphitiAsync = new MockGraphitiAsync(); const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, + sessionManager: sessionManager as never, + redisEvents: new MockRedisEvents() as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, }); - sessionManager.setParentId("session-1", null); - - // First injection - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - ]; - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - const callsAfterFirst = client.searchFactsCalls.length; - - // Second message - trigger reinjection - client.searchFactsResult = [ - { uuid: "f2", fact: "Fact 2" }, - ]; await handler( { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, + { parts: [{ type: "text", text: "Need a refresh" }] } as never, ); - // Should have exactly one new project-scope facts call on reinjection. - // The drift-check result is reused as project facts, and user scope is skipped. - const newCalls = client.searchFactsCalls.length - callsAfterFirst; - assertEquals(newCalls, 1); - assertEquals(client.searchFactsCalls.at(-1)?.maxFacts, 50); - }); - }); - - describe("message counting", () => { - it("should increment message count on each message", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Message 1" }] } as any, - ); - - let state = sessionManager.getState("session-1"); - assertEquals(state.messageCount, 1); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Message 2" }] } as any, - ); - - state = sessionManager.getState("session-1"); - assertEquals(state.messageCount, 2); - }); - - it("should buffer pending messages", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - client.searchFactsResult = [ - { uuid: "f1", fact: "Fact 1" }, - ]; - - const handler = createChatHandler({ - sessionManager: sessionManager as any, - driftThreshold: 0.5, - factStaleDays: 30, - client: client as any, - }); - - sessionManager.setParentId("session-1", null); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "First message" }] } as any, - ); - - await handler( - { sessionID: "session-1" }, - { parts: [{ type: "text", text: "Second message" }] } as any, - ); - - const state = sessionManager.getState("session-1"); - assertEquals(state.pendingMessages.length, 2); - assertEquals(state.pendingMessages[0], "User: First message"); - assertEquals(state.pendingMessages[1], "User: Second message"); - }); + assertEquals(graphitiAsync.refreshCalls, [{ + groupId: "group-1", + query: "Need a refresh", + }]); + } }); }); diff --git a/src/handlers/chat.ts b/src/handlers/chat.ts index 2c41805..f680af4 100644 --- a/src/handlers/chat.ts +++ b/src/handlers/chat.ts @@ -1,306 +1,75 @@ import type { Hooks } from "@opencode-ai/plugin"; -import type { GraphitiClient } from "../services/client.ts"; -import { calculateInjectionBudget } from "../services/context-limit.ts"; -import { PROJECT_MAX_FACTS } from "../services/constants.ts"; -import { - formatMemoryContext, - resolveProjectUserContext, -} from "../services/context.ts"; +import type { GraphitiAsyncService } from "../services/graphiti-async.ts"; +import { extractStructuredEvents } from "../services/event-extractor.ts"; +import type { RedisEventsService } from "../services/redis-events.ts"; import { logger } from "../services/logger.ts"; import type { SessionManager } from "../session.ts"; -import { extractTextFromParts, truncateAtLineBoundary } from "../utils.ts"; +import { extractTextFromParts } from "../utils.ts"; type ChatMessageHook = NonNullable; type ChatMessageInput = Parameters[0]; type ChatMessageOutput = Parameters[1]; -type SearchFactsResult = Awaited>; -/** Dependencies for the chat message handler. */ export interface ChatHandlerDeps { sessionManager: SessionManager; - driftThreshold: number; - factStaleDays: number; - client: GraphitiClient; + redisEvents: RedisEventsService; + graphitiAsync: GraphitiAsyncService; + drainTriggerSize: number; } -/** Creates the `chat.message` hook handler. */ export function createChatHandler(deps: ChatHandlerDeps) { - const { sessionManager, driftThreshold, factStaleDays, client } = deps; - - /** - * Fetch project facts (and optionally user facts/nodes) then build and cache - * the formatted memory context string. - * - * Task 8: When `seedProjectFacts` is supplied (from the drift check), those - * facts are used directly for the project scope so we avoid a redundant - * second searchFacts query. - */ - const searchAndCacheMemoryContext = async ( - state: { - groupId: string; - userGroupId: string; - contextLimit: number; - lastInjectionFactUuids: string[]; - cachedMemoryContext?: string; - cachedFactUuids?: string[]; - visibleFactUuids?: string[]; - }, - messageText: string, - useUserScope: boolean, - characterBudget: number, - seedProjectFacts?: SearchFactsResult, - ) => { - const userGroupId = state.userGroupId; - - // Task 8: reuse drift-check project facts when available; only issue a new - // project searchFacts call when we don't already have them. - const projectFactsPromise: Promise = - seedProjectFacts != null - ? Promise.resolve(seedProjectFacts) - : client.searchFacts({ - query: messageText, - groupIds: [state.groupId], - maxFacts: PROJECT_MAX_FACTS, - }); - - const projectNodesPromise = client.searchNodes({ - query: messageText, - groupIds: [state.groupId], - maxNodes: 30, - }); - const userFactsPromise = useUserScope && userGroupId - ? client.searchFacts({ - query: messageText, - groupIds: [userGroupId], - maxFacts: 20, - }) - : Promise.resolve([]); - const userNodesPromise = useUserScope && userGroupId - ? client.searchNodes({ - query: messageText, - groupIds: [userGroupId], - maxNodes: 10, - }) - : Promise.resolve([]); - - const { - projectContext, - userContext, - projectFacts, - projectNodes, - userFacts, - userNodes, - } = await resolveProjectUserContext({ - projectFacts: projectFactsPromise, - projectNodes: projectNodesPromise, - userFacts: userFactsPromise, - userNodes: userNodesPromise, - }); - - const visibleSet = new Set(state.visibleFactUuids ?? []); - const beforeProjectFacts = projectContext.facts.length; - const beforeUserFacts = userContext.facts.length; - projectContext.facts = projectContext.facts.filter((fact) => - !visibleSet.has(fact.uuid) - ); - userContext.facts = userContext.facts.filter((fact) => - !visibleSet.has(fact.uuid) - ); - logger.debug("Filtered visible facts from injection", { - visibleCount: visibleSet.size, - filteredProjectFacts: beforeProjectFacts - projectContext.facts.length, - filteredUserFacts: beforeUserFacts - userContext.facts.length, - remainingProjectFacts: projectContext.facts.length, - remainingUserFacts: userContext.facts.length, - }); - - if ( - projectContext.facts.length === 0 && - userContext.facts.length === 0 && - projectContext.nodes.length === 0 && - userContext.nodes.length === 0 - ) { - logger.debug("All facts filtered; skipping context cache", { - groupId: state.groupId, - userGroupId: state.userGroupId, - }); - return; - } - const projectContextString = formatMemoryContext( - projectContext.facts, - projectContext.nodes, - { factStaleDays }, - ); - const userContextString = formatMemoryContext( - userContext.facts, - userContext.nodes, - { factStaleDays }, - ); - if (!projectContextString && !userContextString) return; - - let snapshotPrimer = ""; - if (useUserScope && characterBudget > 0) { - try { - const episodes = await client.getEpisodes({ - groupId: state.groupId, - lastN: 10, - }); - const snapshot = episodes - .filter((episode) => { - const description = episode.sourceDescription ?? ""; - return description === "session-snapshot"; - }) - .sort((a, b) => { - const aTime = a.created_at ? Date.parse(a.created_at) : 0; - const bTime = b.created_at ? Date.parse(b.created_at) : 0; - return bTime - aTime; - })[0]; - if (snapshot?.content) { - // Task 2: truncate snapshot at a line boundary. - const snapshotBudget = Math.min(characterBudget, 1200); - const snapshotBody = truncateAtLineBoundary( - snapshot.content, - snapshotBudget, - ); - snapshotPrimer = [ - "## Session Snapshot", - "> Most recent session snapshot; use to restore active strategy and open questions.", - "", - snapshotBody, - ].join("\n"); - } - } catch (err) { - logger.error("Failed to load session snapshot", { err }); - } - } - - // Task 2: truncate project/user context strings at line boundaries. - const projectBudget = useUserScope - ? Math.floor(characterBudget * 0.7) - : characterBudget; - const userBudget = characterBudget - projectBudget; - const truncatedProject = truncateAtLineBoundary( - projectContextString, - projectBudget, - ); - const truncatedUser = useUserScope - ? truncateAtLineBoundary(userContextString, userBudget) - : ""; - - // Task 2: final combined context also truncated at a line boundary. - const combined = [snapshotPrimer, truncatedProject, truncatedUser] - .filter((section) => section.trim().length > 0) - .join("\n\n"); - const memoryContext = truncateAtLineBoundary(combined, characterBudget); - if (!memoryContext) return; - - const allFactUuids = [ - ...projectContext.facts.map((fact) => fact.uuid), - ...userContext.facts.map((fact) => fact.uuid), - ]; - const factUuids = Array.from(new Set(allFactUuids)); - state.cachedMemoryContext = memoryContext; - state.cachedFactUuids = factUuids; - logger.info( - `Cached ${projectFacts.length + userFacts.length} facts and ${ - projectNodes.length + userNodes.length - } nodes for user message injection`, - ); - state.lastInjectionFactUuids = factUuids; - }; - - const computeJaccardSimilarity = ( - left: string[], - right: string[], - ): number => { - if (left.length === 0 && right.length === 0) return 1; - const leftSet = new Set(left); - const rightSet = new Set(right); - let intersection = 0; - for (const value of leftSet) { - if (rightSet.has(value)) intersection += 1; - } - const union = leftSet.size + rightSet.size - intersection; - return union === 0 ? 1 : intersection / union; - }; + const { sessionManager, redisEvents, graphitiAsync, drainTriggerSize } = deps; return async ({ sessionID }: ChatMessageInput, output: ChatMessageOutput) => { + sessionManager.markSessionActive(sessionID); const { state, resolved } = await sessionManager.resolveSessionState( sessionID, ); - if (!resolved) { - logger.debug("Unable to resolve session for message:", { sessionID }); - return; - } - - if (!state?.isMain) { - logger.debug("Ignoring subagent chat message:", sessionID); - return; - } + if (!resolved || !state?.isMain) return; - state.messageCount++; const messageText = extractTextFromParts(output.parts); if (!messageText) return; + state.messageCount += 1; + state.latestUserRequest = messageText; + state.latestRefreshQuery = messageText; state.pendingMessages.push(`User: ${messageText}`); - logger.info("Buffered user message", { - hook: "chat.message", - sessionID, - messageLength: messageText.length, - }); - - const shouldInjectOnFirst = !state.injectedMemories; - - // Task 8: driftFacts from the drift check are passed into - // searchAndCacheMemoryContext so the project searchFacts is not repeated. - let driftProjectFacts: SearchFactsResult | null = null; - - if (!shouldInjectOnFirst) { - try { - const fetched = await client.searchFacts({ - query: messageText, - groupIds: [state.groupId], - maxFacts: PROJECT_MAX_FACTS, - }); - driftProjectFacts = fetched; - const currentFactUuids = fetched.map((fact) => fact.uuid); - const similarity = computeJaccardSimilarity( - currentFactUuids, - state.lastInjectionFactUuids, - ); - const shouldReinject = similarity < driftThreshold; - if (!shouldReinject) { - logger.debug("Skipping reinjection; similarity above threshold", { - sessionID, - similarity, - }); - return; - } - } catch (err) { - logger.error("Failed to check topic drift, skipping reinjection", { - sessionID, - err, - }); - return; - } - } - try { - const useUserScope = shouldInjectOnFirst; - const characterBudget = calculateInjectionBudget(state.contextLimit); - await searchAndCacheMemoryContext( - state, + let queueLength = 0; + for ( + const event of extractStructuredEvents({ + eventType: "chat.message", + sessionId: sessionID, messageText, - useUserScope, - characterBudget, - // Task 8: on reinjection, pass the drift facts so the project query is - // not duplicated. On first injection driftProjectFacts is null, which - // triggers a full maxFacts=PROJECT_MAX_FACTS project search. - driftProjectFacts ?? undefined, + messageCount: state.messageCount, + role: "user", + }) + ) { + queueLength = await redisEvents.recordEvent( + sessionID, + state.groupId, + event, ); + } + + const prepared = await sessionManager.prepareInjection( + sessionID, + messageText, + ); + if (prepared) { state.injectedMemories = true; - } catch (err) { - logger.error("Failed to inject memories:", err); + } + logger.info("Prepared local session memory for chat transform", { + sessionID, + hotTierReady: state.hotTierReady, + refreshClassification: prepared?.refreshDecision.classification, + }); + + if (prepared && prepared.refreshDecision.shouldRefresh) { + graphitiAsync.scheduleCacheRefresh(state.groupId, messageText); + } + if (queueLength >= drainTriggerSize) { + graphitiAsync.scheduleDrain(state.groupId); } }; } diff --git a/src/handlers/compacting.test.ts b/src/handlers/compacting.test.ts index 0110660..c70f0ec 100644 --- a/src/handlers/compacting.test.ts +++ b/src/handlers/compacting.test.ts @@ -1,562 +1,29 @@ -import { assert, assertEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { setLoggerSilentOverride } from "../services/logger.ts"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import type { SessionManager, SessionState } from "../session.ts"; -import type { GraphitiClient } from "../services/client.ts"; import { createCompactingHandler } from "./compacting.ts"; -// Mock SessionManager -class MockSessionManager implements Partial { - private sessions = new Map(); - - setState(sessionId: string, state: SessionState): void { - this.sessions.set(sessionId, state); - } - - getState(sessionId: string): SessionState | undefined { - return this.sessions.get(sessionId); - } -} - -// Mock GraphitiClient -class MockGraphitiClient implements Partial { - public searchFactsCalls: Array<{ - query: string; - groupIds?: string[]; - maxFacts?: number; - }> = []; - - public searchNodesCalls: Array<{ - query: string; - groupIds?: string[]; - maxNodes?: number; - }> = []; - - private mockFacts: GraphitiFact[] = []; - private mockNodes: GraphitiNode[] = []; - - setMockFacts(facts: GraphitiFact[]): void { - this.mockFacts = facts; - } - - setMockNodes(nodes: GraphitiNode[]): void { - this.mockNodes = nodes; - } - - async searchFacts(params: { - query: string; - groupIds?: string[]; - maxFacts?: number; - }): Promise { - this.searchFactsCalls.push(params); - return this.mockFacts; - } - - async searchNodes(params: { - query: string; - groupIds?: string[]; - maxNodes?: number; - }): Promise { - this.searchNodesCalls.push(params); - return this.mockNodes; - } - - reset(): void { - this.searchFactsCalls = []; - this.searchNodesCalls = []; - this.mockFacts = []; - this.mockNodes = []; - } -} - -describe("compacting handler integration", () => { - describe("basic functionality", () => { - it("should inject compaction context for main session", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - // Set up mock facts - client.setMockFacts([ - { - uuid: "fact-1", - fact: "User decided to use TypeScript", - valid_at: new Date().toISOString(), - }, - ]); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some existing context"] }; - await handler({ sessionID: "session-1" }, output); - - // Should have added context - assert(output.context.length > 1); - // Should have called searchFacts - assert(client.searchFactsCalls.length > 0); - }); - - it("should ignore non-main sessions", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: false, // Non-main session - }); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some existing context"] }; - await handler({ sessionID: "session-1" }, output); - - // Should not have added context - assertEquals(output.context.length, 1); - assertEquals(output.context[0], "Some existing context"); - // Should not have called searchFacts - assertEquals(client.searchFactsCalls.length, 0); - }); - - it("should handle missing session state", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some existing context"] }; - await handler({ sessionID: "non-existent" }, output); - - // Should not have added context - assertEquals(output.context.length, 1); - // Should not have called searchFacts - assertEquals(client.searchFactsCalls.length, 0); - }); - - it("should handle empty context strings gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: [] }; - await handler({ sessionID: "session-1" }, output); - - // Should not crash, no context added (empty query) - assertEquals(output.context.length, 0); - }); - }); - - describe("fact classification and budgeting", () => { - it("should classify facts into decisions, active, and background", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - const now = new Date(); - const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); - const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000); - - // Set up facts with different classifications - client.setMockFacts([ - { - uuid: "decision-1", - fact: "Team decided to use Deno for this project", - valid_at: thirtyDaysAgo.toISOString(), - }, - { - uuid: "active-1", - fact: "User is working on authentication module", - valid_at: now.toISOString(), +describe("compacting handler", () => { + it("injects locally prepared session_memory without Graphiti reads", async () => { + const handler = createCompactingHandler({ + sessionManager: { + getState() { + return { isMain: true, hotTierReady: true }; }, - { - uuid: "background-1", - fact: "Project started in January", - valid_at: thirtyDaysAgo.toISOString(), + prepareInjection() { + return { + envelope: + '', + factUuids: [], + nodeRefs: [], + }; }, - ]); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some query text for searching"] }; - await handler({ sessionID: "session-1" }, output); - - // Should have added context with classification - assert(output.context.length > 1); - const injectedContext = output.context[1]; - - // Check for XML tags - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); + } as never, }); - it("should allocate budget 40/35/25 for decisions/active/background", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 10_000, // Small budget to test allocation - isMain: true, - }); - - const now = new Date(); - const facts: GraphitiFact[] = []; - - // Create many facts to test budget allocation - for (let i = 0; i < 20; i++) { - facts.push({ - uuid: `decision-${i}`, - fact: `Team decided to ${i} use pattern ${i}`, - valid_at: now.toISOString(), - }); - } - - for (let i = 0; i < 20; i++) { - facts.push({ - uuid: `active-${i}`, - fact: `User is working on feature ${i}`, - valid_at: now.toISOString(), - }); - } - - for (let i = 0; i < 20; i++) { - const oldDate = new Date( - now.getTime() - 30 * 24 * 60 * 60 * 1000, - ); - facts.push({ - uuid: `background-${i}`, - fact: `Historical context ${i}`, - valid_at: oldDate.toISOString(), - }); - } - - client.setMockFacts(facts); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); + const output = { context: ["existing"] }; + await handler({ sessionID: "session-1" }, output as never); - const output = { context: ["Some query text for searching"] }; - await handler({ sessionID: "session-1" }, output); - - // Should have added context - assert(output.context.length > 1); - const injectedContext = output.context[1]; - - // All three sections should be present - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - }); - }); - - describe("XML output format", () => { - it("should wrap output in proper XML tags", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - client.setMockFacts([ - { - uuid: "fact-1", - fact: "User decided to use TypeScript", - valid_at: new Date().toISOString(), - }, - ]); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some query text"] }; - await handler({ sessionID: "session-1" }, output); - - assert(output.context.length > 1); - const injectedContext = output.context[1]; - - // Check for XML structure - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes("")); - assert(injectedContext.includes('')); - assert(injectedContext.includes("")); - }); - - it("should include instruction for background context", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - client.setMockFacts([ - { - uuid: "fact-1", - fact: "Some background fact", - valid_at: new Date().toISOString(), - }, - ]); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some query text"] }; - await handler({ sessionID: "session-1" }, output); - - assert(output.context.length > 1); - const injectedContext = output.context[1]; - - // Check for instruction tag - assert( - injectedContext.includes( - "Background context only; do not reference in titles, summaries, or opening responses unless directly relevant.", - ), - ); - }); - }); - - describe("user and project context", () => { - it("should query both project and user groups", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - client.setMockFacts([ - { - uuid: "project-fact", - fact: "Project uses TypeScript", - valid_at: new Date().toISOString(), - }, - { - uuid: "user-fact", - fact: "User prefers tabs over spaces", - valid_at: new Date().toISOString(), - }, - ]); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some query text"] }; - await handler({ sessionID: "session-1" }, output); - - // Should have queried both project and user groups - assert(client.searchFactsCalls.length >= 2); - - const projectCalls = client.searchFactsCalls.filter((call) => - call.groupIds?.includes("test:project") - ); - const userCalls = client.searchFactsCalls.filter((call) => - call.groupIds?.includes("test:user") - ); - - assert(projectCalls.length > 0); - assert(userCalls.length > 0); - }); - - it("should handle sessions with only project groupId", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "", // Empty user group (not configured) - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - client.setMockFacts([ - { - uuid: "project-fact", - fact: "Project uses TypeScript", - valid_at: new Date().toISOString(), - }, - ]); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some query text"] }; - await handler({ sessionID: "session-1" }, output); - - // Should still work with only project group - assert(output.context.length > 1); - }); - }); - - describe("error handling", () => { - it("should handle searchFacts errors gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - - // Override searchFacts to throw error - client.searchFacts = async () => { - throw new Error("Network error"); - }; - - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createCompactingHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - factStaleDays: 30, - }); - - const output = { context: ["Some query text"] }; - - // Should not throw - try { - setLoggerSilentOverride(true); - await handler({ sessionID: "session-1" }, output); - } finally { - setLoggerSilentOverride(false); - } - - // Should not have added context (error occurred) - assertEquals(output.context.length, 1); - }); + assertEquals(output.context.length, 2); + assertStringIncludes(output.context[1], "; type CompactingInput = Parameters[0]; type CompactingOutput = Parameters[1]; -/** Dependencies for the compacting handler. */ export interface CompactingHandlerDeps { sessionManager: SessionManager; - client: GraphitiClient; - defaultGroupId: string; - factStaleDays: number; } -/** Creates the `experimental.session.compacting` hook handler. */ export function createCompactingHandler(deps: CompactingHandlerDeps) { - const { sessionManager, client, defaultGroupId, factStaleDays } = deps; + const { sessionManager } = deps; return async ( { sessionID }: CompactingInput, output: CompactingOutput, ) => { const state = sessionManager.getState(sessionID); - if (!state?.isMain) { - logger.debug("Ignoring non-main compaction context:", sessionID); - return; - } + if (!state?.isMain) return; - const groupId = state.groupId || defaultGroupId; - const characterBudget = calculateInjectionBudget(state.contextLimit); - const additionalContext = await getCompactionContext({ - client, - characterBudget, - groupIds: { - project: groupId, - user: state.userGroupId, - }, - contextStrings: output.context, - factStaleDays, + const prepared = await sessionManager.prepareInjection(sessionID); + if (!prepared?.envelope) return; + output.context.push(prepared.envelope); + logger.info("Injected local session_memory into compaction context", { + sessionID, + hotTierReady: state.hotTierReady, }); - - if (additionalContext.length > 0) { - output.context.push(...additionalContext); - logger.info("Injected persistent knowledge into compaction context"); - } }; } diff --git a/src/handlers/event.test.ts b/src/handlers/event.test.ts index 1a34216..ffbbce0 100644 --- a/src/handlers/event.test.ts +++ b/src/handlers/event.test.ts @@ -1,53 +1,66 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import { setLoggerSilentOverride } from "../services/logger.ts"; -import type { SessionManager, SessionState } from "../session.ts"; -import type { GraphitiClient } from "../services/client.ts"; -import type { OpencodeClient } from "@opencode-ai/sdk"; import { createEventHandler } from "./event.ts"; - -// Mock SessionManager -class MockSessionManager implements Partial { - private sessions = new Map(); - private parentIds = new Map(); - public flushCalls: Array<{ - sessionId: string; - sourceDescription: string; - minBytes: number; - }> = []; - - async resolveSessionState(sessionId: string) { - const parentId = this.parentIds.get(sessionId); - if (parentId === undefined) return { state: null, resolved: false }; - if (parentId) { - this.sessions.delete(sessionId); - return { state: null, resolved: true }; +import type { SessionState } from "../session.ts"; + +class FakeClock { + now = 0; + nextId = 1; + timers = new Map void }>(); + + setTimer = (callback: () => void, delayMs: number): number => { + const id = this.nextId++; + this.timers.set(id, { at: this.now + delayMs, callback }); + return id; + }; + + clearTimer = (id: number): void => { + this.timers.delete(id); + }; + + tick(delayMs: number): void { + const target = this.now + delayMs; + while (true) { + const next = [...this.timers.entries()].sort((a, b) => a[1].at - b[1].at) + .find(([, timer]) => timer.at <= target); + if (!next) break; + const [id, timer] = next; + this.timers.delete(id); + this.now = timer.at; + timer.callback(); } - - const state = this.sessions.get(sessionId); - if (!state) return { state: null, resolved: false }; - return { state, resolved: true }; - } - - setParentId(sessionId: string, parentId: string | null) { - this.parentIds.set(sessionId, parentId); - } - - setState(sessionId: string, state: SessionState) { - this.sessions.set(sessionId, state); - } - - getState(sessionId: string): SessionState | undefined { - return this.sessions.get(sessionId); + this.now = target; } +} - async flushPendingMessages( - sessionId: string, - sourceDescription: string, - minBytes: number, - ): Promise { - this.flushCalls.push({ sessionId, sourceDescription, minBytes }); +class MockSessionManager { + sessions = new Map(); + parentIds = new Map(); + buffered = new Map(); + deletedSessions: string[] = []; + activeMarks: string[] = []; + idleCleanupCalls: string[] = []; + private readonly idleRetentionMs: number; + private readonly setTimerImpl: ( + callback: () => void, + delayMs: number, + ) => number; + private readonly clearTimerImpl: (timer: number) => void; + private lifecycles = new Map< + string, + { generation: number; timerId: number | null } + >(); + + constructor( + options: { + idleRetentionMs?: number; + setTimer?: (callback: () => void, delayMs: number) => number; + clearTimer?: (timer: number) => void; + } = {}, + ) { + this.idleRetentionMs = options.idleRetentionMs ?? 0; + this.setTimerImpl = options.setTimer ?? (() => 0); + this.clearTimerImpl = options.clearTimer ?? (() => {}); } createDefaultState(groupId: string, userGroupId: string): SessionState { @@ -56,21 +69,84 @@ class MockSessionManager implements Partial { userGroupId, injectedMemories: false, lastInjectionFactUuids: [], - cachedMemoryContext: undefined, - cachedFactUuids: undefined, visibleFactUuids: [], messageCount: 0, pendingMessages: [], contextLimit: 200_000, isMain: true, + hotTierReady: false, + latestUserRequest: undefined, + latestRefreshQuery: undefined, + pendingInjection: undefined, + pendingInjectionGeneration: 0, }; } + setParentId(sessionId: string, parentId: string | null) { + this.parentIds.set(sessionId, parentId); + } + + setState(sessionId: string, state: SessionState) { + this.sessions.set(sessionId, state); + } + + markSessionActive(sessionId: string) { + this.activeMarks.push(sessionId); + const lifecycle = this.getLifecycle(sessionId); + lifecycle.generation += 1; + if (lifecycle.timerId !== null) { + this.clearTimerImpl(lifecycle.timerId); + lifecycle.timerId = null; + } + } + + captureIdleCleanupGeneration(sessionId: string) { + const state = this.sessions.get(sessionId); + if (!state?.isMain) return null; + return this.getLifecycle(sessionId).generation; + } + + scheduleIdleSessionCleanup(sessionId: string, expectedGeneration?: number) { + this.idleCleanupCalls.push(sessionId); + const state = this.sessions.get(sessionId); + if (!state?.isMain) { + this.deleteSession(sessionId); + return; + } + const lifecycle = this.getLifecycle(sessionId); + if ( + expectedGeneration !== undefined && + lifecycle.generation !== expectedGeneration + ) { + return; + } + if (this.idleRetentionMs <= 0) { + this.deleteSession(sessionId); + return; + } + if (lifecycle.timerId !== null) this.clearTimerImpl(lifecycle.timerId); + const generation = expectedGeneration ?? lifecycle.generation; + lifecycle.timerId = this.setTimerImpl(() => { + const current = this.lifecycles.get(sessionId); + if (!current) return; + if (current.generation !== generation) return; + this.deleteSession(sessionId); + }, this.idleRetentionMs); + } + + getState(sessionId: string) { + return this.sessions.get(sessionId); + } + + resolveSessionState(sessionId: string) { + return { state: this.sessions.get(sessionId) ?? null, resolved: true }; + } + bufferAssistantPart(sessionId: string, messageId: string, text: string) { - // Simple mock implementation + this.buffered.set(`${sessionId}:${messageId}`, text); } - isAssistantBuffered(sessionId: string, messageId: string): boolean { + isAssistantBuffered() { return false; } @@ -78,1240 +154,701 @@ class MockSessionManager implements Partial { state: SessionState, sessionId: string, messageId: string, - source: string, - ): void { - // Simple mock implementation + ) { + const text = this.buffered.get(`${sessionId}:${messageId}`) ?? ""; + if (!text) return null; + state.pendingMessages.push(`Assistant: ${text}`); + return text; + } + + deletePendingAssistant() {} + + deleteSession(sessionId: string) { + this.deletedSessions.push(sessionId); + const lifecycle = this.lifecycles.get(sessionId); + if (lifecycle?.timerId != null) this.clearTimerImpl(lifecycle.timerId); + this.lifecycles.delete(sessionId); + this.sessions.delete(sessionId); + this.parentIds.delete(sessionId); + for (const key of [...this.buffered.keys()]) { + if (key.startsWith(`${sessionId}:`)) this.buffered.delete(key); + } } - deletePendingAssistant(sessionId: string, messageId: string): void { - // Simple mock implementation + private getLifecycle(sessionId: string) { + let lifecycle = this.lifecycles.get(sessionId); + if (!lifecycle) { + lifecycle = { generation: 0, timerId: null }; + this.lifecycles.set(sessionId, lifecycle); + } + return lifecycle; } } -// Mock GraphitiClient -class MockGraphitiClient implements Partial { - public addEpisodeCalls: Array<{ - name: string; - episodeBody: string; - groupId?: string; - source?: "text" | "json" | "message"; - sourceDescription?: string; +class MockRedisEvents { + calls: Array<{ + sessionId: string; + groupId: string; + summary: string; + category?: string; + body?: string; + continuityText?: string; }> = []; + touchedSessionIds: string[] = []; - async addEpisode(params: { - name: string; - episodeBody: string; - groupId?: string; - source?: "text" | "json" | "message"; - sourceDescription?: string; - }): Promise { - this.addEpisodeCalls.push(params); + recordEvent( + sessionId: string, + groupId: string, + event: { summary: string; category?: string }, + ) { + this.calls.push({ + sessionId, + groupId, + summary: event.summary, + category: event.category, + body: (event as { body?: string }).body, + continuityText: (event as { continuityText?: string }).continuityText, + }); + return 1; } - async searchFacts(params: { - query: string; - groupIds?: string[]; - maxFacts?: number; - }): Promise { - return []; + async getRecentSessionEvents() { + await Promise.resolve(); + return [ + { + id: "1", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: "Finish the overhaul", + }, + ]; } - async searchNodes(params: { - query: string; - groupIds?: string[]; - maxNodes?: number; - }): Promise { - return []; + async touchSessionEvents(sessionId: string) { + await Promise.resolve(); + this.touchedSessionIds.push(sessionId); } } -// Mock OpencodeClient -class MockSdkClient implements Partial { - // Minimal mock for now -} +class DeferredRedisEvents extends MockRedisEvents { + resume!: () => void; -describe("event handler integration", () => { - describe("session.created", () => { - it("should initialize state for main session", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.created", - properties: { - info: { - id: "session-1", - parentID: null, - }, - }, - } as any, - }); - - const state = sessionManager.getState("session-1"); - assertEquals(state?.groupId, "test:project"); - // userGroupId is passed directly from defaultUserGroupId - assertEquals(state?.userGroupId, "test:user"); - assertEquals(state?.injectedMemories, false); - assertEquals(state?.lastInjectionFactUuids, []); - assertEquals(state?.messageCount, 0); - assertEquals(state?.pendingMessages, []); - assertEquals(state?.contextLimit, 200_000); - assertEquals(state?.isMain, true); + override async getRecentSessionEvents() { + await new Promise((resolve) => { + this.resume = resolve; }); + return super.getRecentSessionEvents(); + } +} - it("should not initialize state for subagent session", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.created", - properties: { - info: { - id: "session-2", - parentID: "session-1", - }, - }, - } as any, - }); +class MockRedisSnapshot { + saved: Array<{ sessionId: string; snapshot: string }> = []; + touchedSessionIds: string[] = []; - const state = sessionManager.getState("session-2"); - assertEquals(state, undefined); - }); + rebuildAndSave(sessionId: string) { + const snapshot = ``; + this.saved.push({ sessionId, snapshot }); + return snapshot; + } - it("should cache parentId correctly", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.created", - properties: { - info: { - id: "session-1", - parentID: null, - }, - }, - } as any, - }); + async touchSnapshot(sessionId: string) { + await Promise.resolve(); + this.touchedSessionIds.push(sessionId); + } +} - const { state } = await sessionManager.resolveSessionState("session-1"); - assertEquals(state?.isMain, true); - }); - }); +class MockRedisCache { + touchedGroupIds: string[] = []; + metaByGroupId = new Map< + string, + { lastQuery?: string; lastRefresh?: number; factUuids: string[] } + >(); - describe("session.idle", () => { - it("should generate and save snapshot with buildSessionSnapshot", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: true, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 5, - pendingMessages: [ - "User: What is TypeScript?", - "Assistant: TypeScript is a strongly typed programming language.", - "User: How does it work?", - "Assistant: It compiles to JavaScript and adds type checking.", - ], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "session.idle", - properties: { - sessionID: "session-1", - }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - - // Should call addEpisode with snapshot - assertEquals(client.addEpisodeCalls.length, 1); - assertEquals(client.addEpisodeCalls[0].name, "Snapshot: session-1"); - assertEquals( - client.addEpisodeCalls[0].sourceDescription, - "session-snapshot", - ); - assertEquals(client.addEpisodeCalls[0].groupId, "test:project"); - assertEquals(client.addEpisodeCalls[0].source, "text"); - - // Verify snapshot content includes recent messages - const snapshot = client.addEpisodeCalls[0].episodeBody; - assertStrictEquals(snapshot.includes("session-1"), true); - assertStrictEquals(snapshot.includes("Recent user focus:"), true); - assertStrictEquals(snapshot.includes("Recent assistant focus:"), true); - - // Should flush messages after snapshot - assertEquals(sessionManager.flushCalls.length, 1); - assertEquals(sessionManager.flushCalls[0].sessionId, "session-1"); - assertEquals( - sessionManager.flushCalls[0].sourceDescription, - "Buffered messages from OpenCode session", - ); - assertEquals(sessionManager.flushCalls[0].minBytes, 50); - }); + async touch(groupId: string) { + await Promise.resolve(); + this.touchedGroupIds.push(groupId); + } - it("should extract questions from messages", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 2, - pendingMessages: [ - "User: What is Deno?", - "Assistant: Deno is a JavaScript runtime.", - "User: How is it different from Node.js?", - ], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "session.idle", - properties: { - sessionID: "session-1", - }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - - const snapshot = client.addEpisodeCalls[0].episodeBody; - assertStrictEquals(snapshot.includes("Open questions:"), true); - assertStrictEquals(snapshot.includes("What is Deno?"), true); - assertStrictEquals( - snapshot.includes("How is it different from Node.js?"), - true, - ); - }); + async getMeta(groupId: string) { + await Promise.resolve(); + return this.metaByGroupId.get(groupId) ?? null; + } +} - it("should handle empty pending messages", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.idle", - properties: { - sessionID: "session-1", - }, - } as any, - }); +class MockGraphitiAsync { + primerCalls: string[] = []; + drainCalls: string[] = []; + refreshCalls: Array<{ groupId: string; query: string }> = []; + + schedulePrimer(groupId: string) { + this.primerCalls.push(groupId); + } - // With empty pendingMessages, snapshot is skipped - assertEquals(client.addEpisodeCalls.length, 0); + scheduleDrain(groupId: string) { + this.drainCalls.push(groupId); + } + + scheduleCacheRefresh(groupId: string, query: string) { + this.refreshCalls.push({ groupId, query }); + } +} + +const createHandler = (sessionManager: MockSessionManager) => { + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", + }); + + return { handler, redisEvents, redisCache, redisSnapshot, graphitiAsync }; +}; + +describe("event handler", () => { + it("bootstraps main sessions and schedules primer on session.created", async () => { + const sessionManager = new MockSessionManager(); + const { handler, redisEvents, redisCache, redisSnapshot, graphitiAsync } = + createHandler( + sessionManager, + ); - // Should still flush (though nothing to flush) - assertEquals(sessionManager.flushCalls.length, 1); + await handler({ + event: { + type: "session.created", + properties: { info: { id: "session-1", parentID: null } }, + } as never, }); - it("should ignore non-main sessions", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-2", "session-1"); - sessionManager.setState("session-2", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: ["User: Hello"], - contextLimit: 200_000, - isMain: false, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.idle", - properties: { - sessionID: "session-2", - }, - } as any, - }); + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.touchedSessionIds, ["session-1"]); + assertEquals(redisSnapshot.touchedSessionIds, ["session-1"]); + assertEquals(redisCache.touchedGroupIds, ["group-1"]); + assertEquals(graphitiAsync.primerCalls, ["group-1"]); + }); - // Should not save snapshot for non-main - assertEquals(client.addEpisodeCalls.length, 0); - assertEquals(sessionManager.flushCalls.length, 0); + it("preserves assistant buffering and writes the completed assistant event on message.updated", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", }); - it("should handle unresolved session gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.idle", - properties: { - sessionID: "unknown-session", + await handler({ + event: { + type: "message.part.updated", + properties: { + part: { + type: "text", + sessionID: "session-1", + messageID: "m1", + text: "Buffered answer", }, - } as any, - }); - - // Should not crash, just skip - assertEquals(client.addEpisodeCalls.length, 0); - assertEquals(sessionManager.flushCalls.length, 0); + }, + } as never, }); - it("should handle addEpisode error gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - client.addEpisode = async () => { - throw new Error("Network error"); - }; - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 1, - pendingMessages: ["User: Hello"], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.idle", - properties: { + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, }, - } as any, - }); - - // Should still flush despite error - assertEquals(sessionManager.flushCalls.length, 1); - assertEquals( - sessionManager.getState("session-1")?.lastSnapshotBody, - undefined, - ); + }, + } as never, }); - it("snapshot dedup: first snapshot is always saved", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 1, - pendingMessages: ["User: Hello there"], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - - assertEquals(client.addEpisodeCalls.length, 1); - assertEquals(client.addEpisodeCalls[0].name, "Snapshot: session-1"); + assertEquals(sessionManager.getState("session-1")?.pendingMessages, [ + "Assistant: Buffered answer", + ]); + assertEquals(redisEvents.calls.length >= 1, true); + assertStringIncludes(redisEvents.calls[0].summary, "Buffered answer"); + assertEquals(redisEvents.calls[0].body, undefined); + assertEquals(typeof redisEvents.calls[0].continuityText, "string"); + }); + + it("records the compaction summary as a structured event before rebuilding the snapshot", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", }); - it("snapshot dedup: identical subsequent snapshot is skipped", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 1, - pendingMessages: ["User: Same content"], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - // First idle — saved - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - assertEquals(client.addEpisodeCalls.length, 1); - - // Second idle with identical pendingMessages — skipped - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - assertEquals(client.addEpisodeCalls.length, 1); + await handler({ + event: { + type: "session.compacted", + properties: { sessionID: "session-1", summary: "Compaction summary" }, + } as never, }); - it("snapshot dedup: changed snapshot content is saved again", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 1, - pendingMessages: ["User: First message"], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - // First idle — saved - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - assertEquals(client.addEpisodeCalls.length, 1); - - // Change the session messages - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 2, - pendingMessages: [ - "User: First message", - "Assistant: Here is my answer.", - "User: Follow-up question", - ], - contextLimit: 200_000, - isMain: true, - }); - - // Second idle with different content — saved again - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - assertEquals(client.addEpisodeCalls.length, 2); + assertEquals( + redisEvents.calls.some((call) => + call.summary.includes("Compaction summary") + ), + true, + ); + assertEquals(redisSnapshot.saved.length, 1); + }); + + it("rebuilds the local snapshot and schedules async drain on session.idle", async () => { + const clock = new FakeClock(); + const sessionManager = new MockSessionManager({ + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + const state = sessionManager.createDefaultState("group-1", "user-1"); + state.latestUserRequest = "Refresh the cache"; + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", }); - it("snapshot dedup: failed addEpisode does not poison dedupe state", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 1, - pendingMessages: ["User: Retry me"], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - // First idle — addEpisode throws - client.addEpisode = async () => { - throw new Error("Transient failure"); - }; - - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - - // Second idle with same content — should retry (not skipped) - let savedBody = ""; - client.addEpisode = async (params) => { - savedBody = params.episodeBody; - }; - - await handler({ - event: { - type: "session.idle", - properties: { sessionID: "session-1" }, - } as any, - }); - - // The retry succeeded — body was written - assertStrictEquals(savedBody.includes("Retry me"), true); + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, }); + + assertEquals(redisSnapshot.saved.length, 1); + assertEquals(graphitiAsync.drainCalls, ["group-1"]); + assertEquals(sessionManager.idleCleanupCalls, ["session-1"]); + assertEquals(graphitiAsync.refreshCalls, [{ + groupId: "group-1", + query: "Refresh the cache", + }]); }); - describe("session.compacted", () => { - it("should flush messages before compaction", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: true, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 3, - pendingMessages: ["User: Test message"], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.compacted", - properties: { - sessionID: "session-1", - summary: "Discussion about testing", - }, - } as any, - }); - - // Should flush with compaction description and minBytes 0 - assertEquals(sessionManager.flushCalls.length, 1); - assertEquals(sessionManager.flushCalls[0].sessionId, "session-1"); - assertEquals( - sessionManager.flushCalls[0].sourceDescription, - "Buffered messages flushed before compaction", - ); - assertEquals(sessionManager.flushCalls[0].minBytes, 0); + it("uses Redis-backed refresh query fallback on session.idle after restart", async () => { + const sessionManager = new MockSessionManager({ idleRetentionMs: 100 }); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + redisCache.metaByGroupId.set("group-1", { + lastQuery: "resume refresh from redis", + factUuids: [], + }); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", }); - it("should ignore non-main sessions", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-2", "session-1"); - sessionManager.setState("session-2", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: ["User: Hello"], - contextLimit: 200_000, - isMain: false, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.compacted", - properties: { - sessionID: "session-2", - summary: "Test summary", - }, - } as any, - }); + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, + }); - // Should not flush for non-main - assertEquals(sessionManager.flushCalls.length, 0); + assertEquals(graphitiAsync.refreshCalls, [{ + groupId: "group-1", + query: "resume refresh from redis", + }]); + assertEquals( + sessionManager.getState("session-1")?.latestRefreshQuery, + "resume refresh from redis", + ); + }); + + it("cleans session state immediately on session.deleted", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler } = createHandler(sessionManager); + + await handler({ + event: { + type: "session.deleted", + properties: { sessionID: "session-1" }, + } as never, }); - it("should handle unresolved session gracefully", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.compacted", - properties: { - sessionID: "unknown-session", - summary: "Test summary", - }, - } as any, - }); + assertEquals(sessionManager.getState("session-1"), undefined); + assertEquals(sessionManager.deletedSessions, ["session-1"]); + }); - // Should not crash - assertEquals(sessionManager.flushCalls.length, 0); + it("keeps reactivated sessions from being deleted by stale idle cleanup", async () => { + const clock = new FakeClock(); + const sessionManager = new MockSessionManager({ + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler } = createHandler(sessionManager); + + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, }); - it("should skip when summary is empty", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "session.compacted", - properties: { + clock.tick(50); + + await handler({ + event: { + type: "message.part.updated", + properties: { + part: { + type: "text", sessionID: "session-1", - summary: "", + messageID: "m1", + text: "reactivated", }, - } as any, - }); + }, + } as never, + }); - // Should flush but not call handleCompaction - assertEquals(sessionManager.flushCalls.length, 1); + clock.tick(60); + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + assertEquals(sessionManager.deletedSessions, []); + + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, }); + + clock.tick(100); + assertEquals(sessionManager.getState("session-1"), undefined); + assertEquals(sessionManager.deletedSessions, ["session-1"]); }); - describe("message.updated", () => { - it("should finalize completed assistant message", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - let finalizeCalled = false; - sessionManager.finalizeAssistantMessage = ( - state, - sessionId, - messageId, - source, - ) => { - finalizeCalled = true; - assertEquals(sessionId, "session-1"); - assertEquals(messageId, "msg-1"); - assertEquals(source, "message.updated"); - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "message.updated", - properties: { - info: { - id: "msg-1", - sessionID: "session-1", - role: "assistant", - time: { created: 1000, completed: 2000 }, - tokens: { input: 10, output: 20 }, - providerID: "openai", - modelID: "gpt-4", - }, - }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - - assertEquals(finalizeCalled, true); + it("does not schedule stale idle cleanup when reactivated during async idle work", async () => { + const clock = new FakeClock(); + const sessionManager = new MockSessionManager({ + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const redisEvents = new DeferredRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", }); - it("should delete pending assistant for non-assistant messages", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - let deleteCalled = false; - sessionManager.deletePendingAssistant = (sessionId, messageId) => { - deleteCalled = true; - assertEquals(sessionId, "session-1"); - assertEquals(messageId, "msg-1"); - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.updated", - properties: { - info: { - id: "msg-1", - sessionID: "session-1", - role: "user", - time: { created: 1000, completed: 2000 }, - }, - }, - } as any, - }); - - assertEquals(deleteCalled, true); + const idleRun = handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, }); - it("should skip if message is not completed", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - let finalizeCalled = false; - sessionManager.finalizeAssistantMessage = () => { - finalizeCalled = true; - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.updated", - properties: { - info: { - id: "msg-1", - sessionID: "session-1", - role: "assistant", - time: { created: 1000 }, // No completed time - }, - }, - } as any, - }); + await Promise.resolve(); + sessionManager.markSessionActive("session-1"); + redisEvents.resume(); + await idleRun; - assertEquals(finalizeCalled, false); - }); + clock.tick(150); + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + assertEquals(sessionManager.deletedSessions, []); + assertEquals(clock.timers.size, 0); + }); - it("should skip if already buffered", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-1", null); - sessionManager.setState("session-1", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: true, - }); - - sessionManager.isAssistantBuffered = () => true; - - let finalizeCalled = false; - sessionManager.finalizeAssistantMessage = () => { - finalizeCalled = true; - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.updated", - properties: { - info: { - id: "msg-1", - sessionID: "session-1", - role: "assistant", - time: { created: 1000, completed: 2000 }, - }, - }, - } as any, - }); + it("uses Redis-backed refresh query fallback on session.compacted after restart", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + redisCache.metaByGroupId.set("group-1", { + lastQuery: "refresh after compact restart", + factUuids: [], + }); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", + }); - assertEquals(finalizeCalled, false); + await handler({ + event: { + type: "session.compacted", + properties: { sessionID: "session-1", summary: "Compacted state" }, + } as never, }); - it("should ignore non-main sessions", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - sessionManager.setParentId("session-2", "session-1"); - sessionManager.setState("session-2", { - groupId: "test:project", - userGroupId: "test:user", - injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], - messageCount: 0, - pendingMessages: [], - contextLimit: 200_000, - isMain: false, - }); - - let finalizeCalled = false; - sessionManager.finalizeAssistantMessage = () => { - finalizeCalled = true; - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.updated", - properties: { - info: { - id: "msg-1", - sessionID: "session-2", - role: "assistant", - time: { created: 1000, completed: 2000 }, - }, + assertEquals(graphitiAsync.refreshCalls, [{ + groupId: "group-1", + query: "refresh after compact restart", + }]); + assertEquals( + sessionManager.getState("session-1")?.latestRefreshQuery, + "refresh after compact restart", + ); + }); + + it("records supported non-special events into the hot-tier log for main sessions", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler, redisEvents, graphitiAsync } = createHandler( + sessionManager, + ); + + await handler({ + event: { + type: "task.updated", + properties: { + sessionID: "session-1", + task: { + id: "task-1", + path: "src/handlers/event.ts", + summary: "Implement event handler blocker fix", }, - } as any, - }); + }, + } as never, + }); - assertEquals(finalizeCalled, false); + await handler({ + event: { + type: "rules.loaded", + properties: { + sessionID: "session-1", + name: "CodingGuideline", + path: "docs/CodingGuideline.md", + }, + } as never, }); - }); - describe("message.part.updated", () => { - it("should buffer text parts for assistant messages", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - let bufferCalled = false; - sessionManager.bufferAssistantPart = (sessionId, messageId, text) => { - bufferCalled = true; - assertEquals(sessionId, "session-1"); - assertEquals(messageId, "msg-1"); - assertEquals(text, "Hello world"); - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.part.updated", - properties: { - part: { - type: "text", - text: "Hello world", - sessionID: "session-1", - messageID: "msg-1", - }, - }, - } as any, - }); + await handler({ + event: { + type: "environment.updated", + properties: { + sessionID: "session-1", + cwd: "/tmp/project", + summary: "Working directory changed to /tmp/project", + }, + } as never, + }); - assertEquals(bufferCalled, true); + await handler({ + event: { + type: "tool.called", + properties: { + sessionID: "session-1", + tool: "Read", + path: "src/handlers/event.ts", + summary: "Read file src/handlers/event.ts", + }, + } as never, }); - it("should ignore non-text parts", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - let bufferCalled = false; - sessionManager.bufferAssistantPart = () => { - bufferCalled = true; - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.part.updated", - properties: { - part: { - type: "tool_call", - sessionID: "session-1", - messageID: "msg-1", - }, - }, - } as any, - }); + await handler({ + event: { + type: "tool.completed", + properties: { + sessionID: "session-1", + tool: "git status", + summary: "Checked branch status before commit", + }, + } as never, + }); - assertEquals(bufferCalled, false); + await handler({ + event: { + type: "subagent.started", + properties: { + sessionID: "session-1", + agentId: "agent-1", + summary: "Started review subagent", + }, + } as never, }); - it("should ignore synthetic text parts", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - let bufferCalled = false; - sessionManager.bufferAssistantPart = () => { - bufferCalled = true; - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - await handler({ - event: { - type: "message.part.updated", - properties: { - part: { - type: "text", - text: "Synthetic text", - synthetic: true, - sessionID: "session-1", - messageID: "msg-1", - }, - }, - } as any, - }); + await handler({ + event: { + type: "subagent.finished", + properties: { + sessionID: "session-1", + agentId: "agent-1", + summary: "Finished review subagent", + }, + } as never, + }); + + assertEquals( + redisEvents.calls.map((call) => call.category), + [ + "task.create", + "rule.load", + "cwd.change", + "env.change", + "file.read", + "git.activity", + "subagent.start", + "subagent.finish", + ], + ); + assertEquals( + redisEvents.calls.every((call) => call.groupId === "group-1"), + true, + ); + assertEquals(graphitiAsync.primerCalls.length, 0); + assertEquals(graphitiAsync.drainCalls.length, 0); + assertEquals(graphitiAsync.refreshCalls.length, 0); + }); - assertEquals(bufferCalled, false); + it("avoids durably storing raw tool output bodies for normal tool activity", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler, redisEvents } = createHandler(sessionManager); + + await handler({ + event: { + type: "tool.completed", + properties: { + sessionID: "session-1", + tool: "Read", + path: "src/session.ts", + summary: + "Read src/session.ts and inspected continuity fields without retaining the raw output transcript", + }, + } as never, }); + + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].category, "file.read"); + assertEquals(redisEvents.calls[0].body, undefined); + assertEquals(typeof redisEvents.calls[0].continuityText, "string"); }); - describe("error handling", () => { - it("should catch and log errors without crashing", async () => { - const sessionManager = new MockSessionManager(); - const client = new MockGraphitiClient(); - const sdkClient = new MockSdkClient(); - - // Make resolveSessionState throw - sessionManager.resolveSessionState = async () => { - throw new Error("Test error"); - }; - - const handler = createEventHandler({ - sessionManager: sessionManager as any, - client: client as any, - defaultGroupId: "test:project", - defaultUserGroupId: "test:user", - sdkClient: sdkClient as any, - directory: "/test/dir", - }); - - // Should not throw - try { - setLoggerSilentOverride(true); - await handler({ - event: { - type: "session.idle", - properties: { - sessionID: "session-1", - }, - } as any, - }); - } finally { - setLoggerSilentOverride(false); - } - - // Test passed if no error thrown + it("skips the catch-all for events without a resolvable main session", async () => { + const sessionManager = new MockSessionManager(); + const childState = sessionManager.createDefaultState("group-1", "user-1"); + childState.isMain = false; + sessionManager.setState("child-session", childState); + const { handler, redisEvents } = createHandler(sessionManager); + + await handler({ + event: { + type: "tool.called", + properties: { + sessionID: "missing-session", + tool: "Read", + summary: "Read file src/handlers/event.ts", + }, + } as never, + }); + + await handler({ + event: { + type: "tool.called", + properties: { + sessionID: "child-session", + tool: "Read", + summary: "Read file src/handlers/event.ts", + }, + } as never, }); + + assertEquals(redisEvents.calls.length, 0); }); }); diff --git a/src/handlers/event.ts b/src/handlers/event.ts index e62398c..c735b6f 100644 --- a/src/handlers/event.ts +++ b/src/handlers/event.ts @@ -1,8 +1,11 @@ import type { Hooks } from "@opencode-ai/plugin"; import type { OpencodeClient } from "@opencode-ai/sdk"; -import type { GraphitiClient } from "../services/client.ts"; -import { handleCompaction } from "../services/compaction.ts"; import { resolveContextLimit } from "../services/context-limit.ts"; +import { extractStructuredEvents } from "../services/event-extractor.ts"; +import type { GraphitiAsyncService } from "../services/graphiti-async.ts"; +import type { RedisCacheService } from "../services/redis-cache.ts"; +import type { RedisEventsService } from "../services/redis-events.ts"; +import type { RedisSnapshotService } from "../services/redis-snapshot.ts"; import { logger } from "../services/logger.ts"; import type { SessionManager } from "../session.ts"; import { isTextPart } from "../utils.ts"; @@ -10,67 +13,82 @@ import { isTextPart } from "../utils.ts"; type EventHook = NonNullable; type EventInput = Parameters[0]; -/** Dependencies for the event handler. */ export interface EventHandlerDeps { sessionManager: SessionManager; - client: GraphitiClient; + redisEvents: RedisEventsService; + redisCache: RedisCacheService; + redisSnapshot: RedisSnapshotService; + graphitiAsync: GraphitiAsyncService; defaultGroupId: string; defaultUserGroupId: string; sdkClient: OpencodeClient; directory: string; } -/** Creates the `event` hook handler. */ +const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? value as Record + : undefined; + +const asString = (value: unknown): string | undefined => + typeof value === "string" && value.trim() ? value.trim() : undefined; + +const passthroughEventTypes = new Set([ + "task.updated", + "rules.loaded", + "environment.updated", + "subagent.started", + "subagent.finished", + "tool.called", + "tool.completed", +]); + +const getEventSessionId = (value: unknown, depth = 0): string | undefined => { + if (depth > 4) return undefined; + + if (Array.isArray(value)) { + for (const item of value) { + const sessionId = getEventSessionId(item, depth + 1); + if (sessionId) return sessionId; + } + return undefined; + } + + const record = asRecord(value); + if (!record) return undefined; + + const directSessionId = asString(record.sessionID) ?? + asString(record.sessionId); + if (directSessionId) return directSessionId; + + for (const nested of Object.values(record)) { + const sessionId = getEventSessionId(nested, depth + 1); + if (sessionId) return sessionId; + } + + return undefined; +}; + +const getCompactionSummary = (value: unknown): string => { + const summary = asRecord(value)?.summary; + return typeof summary === "string" ? summary : ""; +}; + export function createEventHandler(deps: EventHandlerDeps) { const { sessionManager, - client, + redisEvents, + redisCache, + redisSnapshot, + graphitiAsync, defaultGroupId, defaultUserGroupId, sdkClient, directory, } = deps; - /** Per-handler context-limit cache — no cross-instance sharing. */ const contextLimitCache = new Map(); - const buildSessionSnapshot = ( - sessionId: string, - messages: string[], - ): string => { - const recentMessages = messages.slice(-12); - const recentAssistant = recentMessages - .findLast((message) => message.startsWith("Assistant:")) - ?.replace(/^Assistant:\s*/, "") - .trim(); - const recentUser = recentMessages - .findLast((message) => message.startsWith("User:")) - ?.replace(/^User:\s*/, "") - .trim(); - const questionRegex = /[^\n\r?]{3,200}\?/g; - const questions = recentMessages - .flatMap((message) => { - const text = message.replace(/^(User|Assistant):\s*/, ""); - return text.match(questionRegex) ?? []; - }) - .map((question) => question.trim()); - - const uniqueQuestions = Array.from(new Set(questions)).slice(0, 6); - const lines: string[] = []; - lines.push(`Session ${sessionId} working snapshot`); - if (recentUser) lines.push(`Recent user focus: ${recentUser}`); - if (recentAssistant) { - lines.push(`Recent assistant focus: ${recentAssistant}`); - } - if (uniqueQuestions.length > 0) { - lines.push("Open questions:"); - for (const question of uniqueQuestions) { - lines.push(`- ${question}`); - } - } - return lines.join("\n"); - }; - return async ({ event }: EventInput) => { try { if (event.type === "session.created") { @@ -79,141 +97,130 @@ export function createEventHandler(deps: EventHandlerDeps) { const parentId = info.parentID ?? null; const isMain = !parentId; sessionManager.setParentId(sessionId, parentId); - - logger.info("Session created:", { - sessionId, - isMain, - parentID: info.parentID, - }); + sessionManager.markSessionActive(sessionId); if (isMain) { + const nextState = sessionManager.createDefaultState( + defaultGroupId, + defaultUserGroupId, + ); sessionManager.setState( sessionId, - sessionManager.createDefaultState( - defaultGroupId, - defaultUserGroupId, - ), + nextState, ); - } else { - logger.debug("Ignoring subagent session:", sessionId); + for ( + const structured of extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + role: "system", + }) + ) { + await redisEvents.recordEvent( + sessionId, + defaultGroupId, + structured, + ); + } + await Promise.all([ + redisEvents.touchSessionEvents(sessionId), + redisSnapshot.touchSnapshot(sessionId), + redisCache.touch(defaultGroupId), + ]); + graphitiAsync.schedulePrimer(defaultGroupId); } return; } - if (event.type === "session.compacted") { + if (event.type === "session.idle") { const sessionId = event.properties.sessionID; const { state, resolved } = await sessionManager.resolveSessionState( sessionId, ); - if (!resolved) { - logger.debug("Unable to resolve session compaction:", sessionId); - return; - } - if (!state?.isMain) { - logger.debug("Ignoring non-main compaction:", sessionId); - return; - } - - const summary = - ((event.properties as Record).summary as string) || - ""; - - await sessionManager.flushPendingMessages( + if (!resolved || !state?.isMain) return; + const idleGeneration = sessionManager.captureIdleCleanupGeneration( sessionId, - "Buffered messages flushed before compaction", - 0, ); + if (idleGeneration === null) return; - if (summary) { - await handleCompaction({ - client, - groupId: state.groupId, - summary, - sessionId, - }); + const events = await redisEvents.getRecentSessionEvents( + sessionId, + 40, + true, + ); + await redisSnapshot.rebuildAndSave(sessionId, events); + state.hotTierReady = true; + graphitiAsync.scheduleDrain(state.groupId); + const refreshQuery = state.latestUserRequest ?? + state.latestRefreshQuery ?? + (await redisCache.getMeta(state.groupId))?.lastQuery; + if (refreshQuery) { + state.latestRefreshQuery = refreshQuery; + graphitiAsync.scheduleCacheRefresh( + state.groupId, + refreshQuery, + ); } + sessionManager.scheduleIdleSessionCleanup(sessionId, idleGeneration); return; } - if (event.type === "session.idle") { + if (event.type === "session.deleted") { + const sessionId = (event.properties as unknown as { sessionID: string }) + .sessionID; + sessionManager.deleteSession(sessionId); + return; + } + + if (event.type === "session.compacted") { const sessionId = event.properties.sessionID; const { state, resolved } = await sessionManager.resolveSessionState( sessionId, ); - if (!resolved) { - logger.debug("Unable to resolve idle session:", sessionId); - return; - } - if (!state?.isMain) { - logger.debug("Ignoring non-main idle session:", sessionId); - return; - } + if (!resolved || !state?.isMain) return; - try { - if (state.pendingMessages.length > 0) { - const snapshotContent = buildSessionSnapshot( - sessionId, - state.pendingMessages, - ); - if (snapshotContent.trim()) { - if (state.lastSnapshotBody === snapshotContent) { - logger.debug("Skipping duplicate session snapshot", { - sessionId, - }); - } else { - await client.addEpisode({ - name: `Snapshot: ${sessionId}`, - episodeBody: snapshotContent, - groupId: state.groupId, - source: "text", - sourceDescription: "session-snapshot", - }); - state.lastSnapshotBody = snapshotContent; - logger.info("Saved session snapshot", { sessionId }); - } - } - } else { - logger.debug("Skipping idle snapshot: no pending messages", { - sessionId, - }); - } - } catch (err) { - logger.error("Failed to save session snapshot", { sessionId, err }); + const structured = extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + messageText: getCompactionSummary(event.properties), + role: "system", + }); + for (const item of structured) { + await redisEvents.recordEvent(sessionId, state.groupId, item); } - - await sessionManager.flushPendingMessages( + const events = await redisEvents.getRecentSessionEvents( sessionId, - "Buffered messages from OpenCode session", - 50, + 40, + true, ); + await redisSnapshot.rebuildAndSave( + sessionId, + events, + ); + graphitiAsync.scheduleDrain(state.groupId); + const refreshQuery = state.latestUserRequest ?? + state.latestRefreshQuery ?? + (await redisCache.getMeta(state.groupId))?.lastQuery; + if (refreshQuery) { + state.latestRefreshQuery = refreshQuery; + graphitiAsync.scheduleCacheRefresh( + state.groupId, + refreshQuery, + ); + } return; } if (event.type === "message.updated") { const info = event.properties.info; const sessionId = info.sessionID; - logger.info("Message event fired", { - hook: "message.updated", - eventType: "message.updated", - sessionId, - role: info.role, - messageID: info.id, - }); + sessionManager.markSessionActive(sessionId); const { state, resolved } = await sessionManager.resolveSessionState( sessionId, ); - if (!resolved) { - logger.debug("Unable to resolve session for message update:", { - sessionId, - messageID: info.id, - role: info.role, - }); - return; - } - if (!state?.isMain) { - logger.debug("Ignoring non-main message update:", sessionId); - return; - } + if (!resolved || !state?.isMain) return; + if (info.role !== "assistant") { sessionManager.deletePendingAssistant(sessionId, info.id); return; @@ -223,19 +230,27 @@ export function createEventHandler(deps: EventHandlerDeps) { if (!time?.completed) return; if (sessionManager.isAssistantBuffered(sessionId, info.id)) return; - sessionManager.finalizeAssistantMessage( + const assistantText = sessionManager.finalizeAssistantMessage( state, sessionId, info.id, "message.updated", ); + if (assistantText) { + for ( + const structured of extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + messageText: assistantText, + role: "assistant", + }) + ) { + await redisEvents.recordEvent(sessionId, state.groupId, structured); + } + } if (info.tokens && info.providerID && info.modelID) { - // Fire-and-forget: update contextLimit asynchronously without - // blocking event responsiveness. The state update is eventually - // consistent — a missed update only affects injection budget sizing, - // not correctness. We snapshot `state` here; if the session is - // deleted before the promise resolves the write is a harmless no-op. const capturedState = state; resolveContextLimit( info.providerID as string, @@ -255,10 +270,35 @@ export function createEventHandler(deps: EventHandlerDeps) { if (event.type === "message.part.updated") { const part = event.properties.part; if (!isTextPart(part)) return; + sessionManager.markSessionActive(part.sessionID); + sessionManager.bufferAssistantPart( + part.sessionID, + part.messageID, + part.text, + ); + return; + } - const sessionId = part.sessionID; - const messageId = part.messageID; - sessionManager.bufferAssistantPart(sessionId, messageId, part.text); + if (!passthroughEventTypes.has(event.type)) { + return; + } + + const sessionId = getEventSessionId(event.properties); + if (!sessionId) return; + + const { state, resolved } = await sessionManager.resolveSessionState( + sessionId, + ); + if (!resolved || !state?.isMain) return; + + for ( + const structured of extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + }) + ) { + await redisEvents.recordEvent(sessionId, state.groupId, structured); } } catch (err) { logger.error("Event handler error", { type: event.type, err }); diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts new file mode 100644 index 0000000..ec2c45e --- /dev/null +++ b/src/handlers/messages.test.ts @@ -0,0 +1,492 @@ +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { createMessagesHandler } from "./messages.ts"; + +describe("messages handler", () => { + it("injects pending session memory into the latest user message", async () => { + const state = { + isMain: true, + visibleFactUuids: [], + pendingInjection: { + envelope: + 'fresh', + factUuids: ["fact-1"], + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "fresh", + }, + }, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection() { + throw new Error("should not be called"); + }, + } as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "Continue work" }], + }], + }; + await handler({}, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, " { + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: undefined as unknown, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection(sessionId: string, lastRequest?: string) { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "fallback request"); + return { + envelope: + 'fallback request', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; + }, + } as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "fallback request" }], + }], + }; + await handler({ message: "fallback request" } as never, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, " { + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: undefined as unknown, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection(sessionId: string, lastRequest?: string) { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "fallback request"); + return { + envelope: '', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; + }, + } as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "fallback request" }], + }], + }; + await handler( + { message: { text: "fallback request" } } as never, + output as never, + ); + + assertStringIncludes(output.messages[0].parts[0].text, " { + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: undefined as unknown, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection(sessionId: string, lastRequest?: string) { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "message body query"); + return { + envelope: + 'message body query', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; + }, + } as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "message body query" }], + }], + }; + await handler({} as never, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, " { + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: { + envelope: '', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", + }, + }, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection() { + return state.pendingInjection; + }, + } as never, + }); + + const output = { + messages: [ + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + '', + }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "next" }], + }, + ], + }; + await handler({}, output as never); + + assertEquals(state.visibleFactUuids, ["fact-1", "fact-2"]); + }); + + it("preserves legacy Graphiti memory data-uuids extraction semantics", async () => { + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: undefined as unknown, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection() { + return { + envelope: '', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", + }, + }; + }, + } as never, + }); + + const output = { + messages: [ + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ + type: "text", + text: '', + }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "next" }], + }, + ], + }; + + await handler({}, output as never); + + assertEquals(state.visibleFactUuids, ["fact-legacy-1", "fact-legacy-2"]); + }); + + it("passes current-turn visible fact uuids into prepareInjection", async () => { + const state = { + isMain: true, + visibleFactUuids: ["stale-fact"] as string[], + pendingInjection: undefined as unknown, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection( + sessionId: string, + lastRequest?: string, + visibleFactUuids?: string[], + ) { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "next"); + assertEquals(visibleFactUuids, ["fact-1", "fact-2"]); + return { + envelope: '', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", + }, + }; + }, + } as never, + }); + + const output = { + messages: [ + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + '', + }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "next" }], + }, + ], + }; + + await handler({} as never, output as never); + + assertEquals(state.visibleFactUuids, ["fact-1", "fact-2"]); + assertStringIncludes(output.messages[1].parts[0].text, " { + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: undefined as unknown, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection( + sessionId: string, + lastRequest?: string, + visibleFactUuids?: string[], + ) { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "continue"); + assertEquals(visibleFactUuids, ["fact-1", "fact-2", "fact-3"]); + return { + envelope: + 'continue', + factUuids: [], + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + }, + } as never, + }); + + const output = { + messages: [ + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + '', + }], + }, + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ + type: "text", + text: '', + }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "continue" }], + }, + ], + }; + + await handler({}, output as never); + + assertEquals(state.visibleFactUuids, ["fact-1", "fact-2", "fact-3"]); + assertStringIncludes(output.messages[2].parts[0].text, " { + const newerPrepared = { + envelope: + 'newer', + factUuids: ["fact-2"], + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "newer", + }, + }; + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: undefined as typeof newerPrepared | undefined, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection() { + state.pendingInjection = newerPrepared; + return { + envelope: + 'older', + factUuids: ["fact-1"], + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; + }, + } as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "current request" }], + }], + }; + await handler({ message: "current request" } as never, output as never); + + assertEquals(state.pendingInjection, newerPrepared); + assertStringIncludes(output.messages[0].parts[0].text, "older"); + }); + + it("remains compatible with extended prepareInjection results", async () => { + const prepared = { + envelope: '', + factUuids: ["fact-1"], + nodeRefs: ["node-1"], + refreshDecision: { + classification: "drifted", + shouldRefresh: true, + similarity: 0.25, + threshold: 0.5, + cachedQuery: "prior topic", + }, + }; + const state = { + isMain: true, + visibleFactUuids: [] as string[], + pendingInjection: prepared, + }; + const handler = createMessagesHandler({ + sessionManager: { + getState() { + return state; + }, + prepareInjection() { + return prepared; + }, + } as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "compatibility check" }], + }], + }; + await handler({}, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, " | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? value as Record + : undefined; + +const getTransformMessage = (input: unknown): string | undefined => { + const message = asRecord(input)?.message; + return typeof message === "string" ? message : undefined; +}; + +const getLatestUserText = ( + output: MessagesTransformOutput, +): string | undefined => { + const lastUserEntry = output.messages + .findLast((message) => message.info.role === "user"); + const textPart = lastUserEntry?.parts.find(isTextPart); + return textPart?.text; +}; + +const extractVisibleUuids = (text: string): string[] => { + const uuids: string[] = []; + for ( + const regex of [ + /]*\bdata-uuids="([^"]*)"[^>]*>/g, + /]*\bfact_uuids="([^"]*)"[^>]*>/g, + ] + ) { + let match: RegExpExecArray | null; + while ((match = regex.exec(text)) !== null) { + if (match[1]) uuids.push(...match[1].split(",").filter(Boolean)); + } + } + return uuids; +}; + export function createMessagesHandler(deps: MessagesHandlerDeps) { const { sessionManager } = deps; - // deno-lint-ignore require-await return async ( - _input: MessagesTransformInput, + input: MessagesTransformInput, output: MessagesTransformOutput, ) => { const lastUserEntry = output.messages @@ -27,77 +61,43 @@ export function createMessagesHandler(deps: MessagesHandlerDeps) { const sessionID = lastUserEntry.info.sessionID; const state = sessionManager.getState(sessionID); - if (!state?.isMain) { - logger.debug("Skipping memory injection; not main session", { - sessionID, - }); - return; - } + if (!state?.isMain) return; const allVisibleUuids: string[] = []; for (const entry of output.messages) { for (const part of entry.parts) { - if (part.type === "text" && "text" in part) { - const uuids = extractVisibleUuids((part as { text: string }).text); - if (uuids.length > 0) { - logger.debug("Found block UUIDs", { - sessionID, - uuids, - messageID: entry.info.id, - }); - } - allVisibleUuids.push(...uuids); + if (isTextPart(part)) { + allVisibleUuids.push(...extractVisibleUuids(part.text)); } } } state.visibleFactUuids = [...new Set(allVisibleUuids)]; - logger.debug("Updated visibleFactUuids from message scan", { - sessionID, - visibleCount: state.visibleFactUuids.length, - }); - - if (!state.cachedMemoryContext) { - logger.debug("Skipping memory injection; no cached context", { - sessionID, - }); - return; - } - const textPart = lastUserEntry.parts.find( - (part): part is typeof part & { type: "text"; text: string } => - part.type === "text" && "text" in part, - ); - if (!textPart) { - logger.debug("Skipping memory injection; no text part", { + const recallQuery = getTransformMessage(input) ?? getLatestUserText(output); + const prepared = state.pendingInjection ?? + await sessionManager.prepareInjection( sessionID, - }); - return; - } + recallQuery, + state.visibleFactUuids, + ); + if (!prepared) return; - if (textPart.text.includes(" 0 ? ` data-uuids="${uuids.join(",")}"` : ""; - const memoryBlock = - `\n${state.cachedMemoryContext}\n`; - - textPart.text = `${memoryBlock}\n\n${textPart.text}`; - - logger.info("Injected memory context into last user message", { + textPart.text = `${prepared.envelope}\n\n${textPart.text}`; + logger.info("Injected canonical session_memory block", { sessionID, - factCount: uuids.length, - blockLength: memoryBlock.length, - preview: state.cachedMemoryContext.slice(0, 100), + factCount: prepared.factUuids.length, }); - - state.cachedMemoryContext = undefined; - state.cachedFactUuids = undefined; + if (state.pendingInjection === prepared) { + state.pendingInjection = undefined; + } }; } diff --git a/src/index.test.ts b/src/index.test.ts index 3a0a877..b6be074 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -1,8 +1,18 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; -import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { graphiti, warnOnGraphitiStartupUnavailable } from "./index.ts"; +import { + setOpenCodeClient, + setWarningTaskScheduler, +} from "./services/opencode-warning.ts"; import { makeGroupId, makeUserGroupId } from "./utils.ts"; describe("index", () => { + afterEach(() => { + setOpenCodeClient(undefined); + setWarningTaskScheduler(undefined); + }); + describe("makeGroupId", () => { it("should omit undefined prefix text when prefix is missing", () => { const groupId = makeGroupId(undefined, "/home/user/my-project"); @@ -107,6 +117,73 @@ describe("index", () => { }); }); + describe("warnOnGraphitiStartupUnavailable", () => { + it("shows a native warning toast and structured log when Graphiti is unavailable", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnGraphitiStartupUnavailable(false, "http://graphiti.test/mcp"); + + assertEquals(appLogCalls.length, 0); + assertEquals(toastCalls.length, 0); + assertEquals(scheduledTasks.length, 2); + for (const task of scheduledTasks) task(); + + assertEquals(appLogCalls.length, 1); + assertEquals(toastCalls, [{ + body: { + message: + "Graphiti MCP unavailable at http://graphiti.test/mcp; continuing without persistent memory.", + variant: "warning", + }, + }]); + }); + + it("does nothing when Graphiti is connected", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnGraphitiStartupUnavailable(true, "http://graphiti.test/mcp"); + + assertEquals(appLogCalls.length, 0); + assertEquals(toastCalls.length, 0); + }); + }); + + describe("plugin export shape", () => { + it("exports graphiti as the plugin entrypoint", () => { + assertEquals(typeof graphiti, "function"); + }); + }); + // NOTE: The main `graphiti()` plugin function requires a live Graphiti MCP // server and cannot be integration-tested here without mocking the MCP // transport layer. All testable units are covered in the files listed below: diff --git a/src/index.ts b/src/index.ts index fe33bc5..a5e85ac 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,76 +4,126 @@ import { createChatHandler } from "./handlers/chat.ts"; import { createCompactingHandler } from "./handlers/compacting.ts"; import { createEventHandler } from "./handlers/event.ts"; import { createMessagesHandler } from "./handlers/messages.ts"; -import { GraphitiClient } from "./services/client.ts"; -import { GraphitiConnectionManager } from "./services/connection-manager.ts"; -import { logger } from "./services/logger.ts"; import { SessionManager } from "./session.ts"; +import { BatchDrainService } from "./services/batch-drain.ts"; +import { GraphitiConnectionManager } from "./services/connection-manager.ts"; +import { GraphitiAsyncService } from "./services/graphiti-async.ts"; +import { GraphitiMcpClient } from "./services/graphiti-mcp.ts"; +import { + notifyGraphitiAvailabilityIssue, + setOpenCodeClient, +} from "./services/opencode-warning.ts"; +import { RedisCacheService } from "./services/redis-cache.ts"; +import { RedisClient } from "./services/redis-client.ts"; +import { RedisEventsService } from "./services/redis-events.ts"; +import { RedisSnapshotService } from "./services/redis-snapshot.ts"; +import { registerRuntimeTeardown } from "./services/runtime-teardown.ts"; import { makeGroupId, makeUserGroupId } from "./utils.ts"; -/** - * OpenCode plugin entry point for Graphiti memory integration. - */ -export const graphiti: Plugin = async (input: PluginInput) => { +export const warnOnGraphitiStartupUnavailable = ( + connected: boolean, + endpoint: string, +): void => { + if (connected) return; + notifyGraphitiAvailabilityIssue( + `Graphiti MCP unavailable at ${endpoint}; continuing without persistent memory.`, + { endpoint }, + ); +}; + +export const graphiti: Plugin = (input: PluginInput) => { const config = loadConfig(input.directory); + setOpenCodeClient(input.client); + const connectionManager = new GraphitiConnectionManager({ - endpoint: config.endpoint, + endpoint: config.graphiti.endpoint, }); connectionManager.start(); void connectionManager.ready().then((connected) => { - if (!connected) { - logger.warn( - "Could not connect to Graphiti MCP server at", - config.endpoint, - ); - logger.warn( - "Memory features will be unavailable until connection is established", - ); - } + warnOnGraphitiStartupUnavailable(connected, config.graphiti.endpoint); }); - const client = new GraphitiClient(connectionManager); - const sdkClient = input.client; + const redisClient = new RedisClient({ + endpoint: config.falkordb.redisEndpoint, + }); + void redisClient.connect(); + registerRuntimeTeardown([ + { + name: "redis", + run: () => redisClient.close(), + }, + { + name: "graphiti", + run: () => connectionManager.stop(), + }, + ]); + + const graphitiClient = new GraphitiMcpClient(connectionManager); + const redisEvents = new RedisEventsService(redisClient, { + sessionTtlSeconds: config.falkordb.sessionTtlSeconds, + }); + const redisSnapshot = new RedisSnapshotService(redisClient, { + ttlSeconds: config.falkordb.sessionTtlSeconds * 2, + }); + const redisCache = new RedisCacheService(redisClient, { + ttlSeconds: config.falkordb.cacheTtlSeconds, + driftThreshold: config.graphiti.driftThreshold, + }); + const batchDrain = new BatchDrainService(redisClient, redisEvents, { + batchSize: config.falkordb.batchSize, + batchMaxBytes: config.falkordb.batchMaxBytes, + drainRetryMax: config.falkordb.drainRetryMax, + }); + const graphitiAsync = new GraphitiAsyncService( + graphitiClient, + redisCache, + batchDrain, + ); const defaultGroupId = makeGroupId( - config.groupIdPrefix, + config.graphiti.groupIdPrefix, input.directory, ); const defaultUserGroupId = makeUserGroupId( - config.groupIdPrefix, + config.graphiti.groupIdPrefix, input.directory, ); - logger.info("Plugin initialized. Group ID:", defaultGroupId); const sessionManager = new SessionManager( defaultGroupId, defaultUserGroupId, - sdkClient, - client, + input.client, + redisEvents, + redisSnapshot, + redisCache, + { + idleRetentionMs: config.falkordb.sessionTtlSeconds * 1000, + }, ); - return { + return Promise.resolve({ event: createEventHandler({ sessionManager, - client, + redisEvents, + redisCache, + redisSnapshot, + graphitiAsync, defaultGroupId, defaultUserGroupId, - sdkClient, + sdkClient: input.client, directory: input.directory, }), "chat.message": createChatHandler({ sessionManager, - driftThreshold: config.driftThreshold, - factStaleDays: config.factStaleDays, - client, + redisEvents, + graphitiAsync, + drainTriggerSize: config.falkordb.batchSize, }), "experimental.session.compacting": createCompactingHandler({ sessionManager, - client, - defaultGroupId, - factStaleDays: config.factStaleDays, }), "experimental.chat.messages.transform": createMessagesHandler({ sessionManager, }), - }; + }); }; diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts new file mode 100644 index 0000000..6410f0f --- /dev/null +++ b/src/services/batch-drain.test.ts @@ -0,0 +1,337 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { BatchDrainService } from "./batch-drain.ts"; +import { createSessionEvent } from "./event-extractor.ts"; +import { RedisClient } from "./redis-client.ts"; +import { + drainClaimActiveKey, + drainClaimKey, + drainClaimLockKey, + drainDeadKey, + drainPendingKey, + drainRetryKey, + RedisEventsService, +} from "./redis-events.ts"; + +const createDeps = () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const events = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 1, + }); + const drain = new BatchDrainService(redis, events, { + batchSize: 2, + batchMaxBytes: 20_000, + drainRetryMax: 2, + claimHeartbeatIntervalMs: 100, + }); + return { redis, events, drain }; +}; + +describe("batch drain", () => { + it("claims oldest events, drains them FIFO, and leaves newer items pending", async () => { + const { redis, events, drain } = createDeps(); + const added: string[] = []; + const recorded = []; + for (const summary of ["first", "second", "third"]) { + const event = createSessionEvent("message", "user", { + summary, + body: summary, + }); + recorded.push(event); + await events.recordEvent( + "session-1", + "group-1", + event, + ); + } + + const result = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + added.push(input.name); + }, + } as never); + + assertEquals(result, { status: "success", drained: 2 }); + assertEquals( + added, + [ + `message:${recorded[0].id}`, + `message:${recorded[1].id}`, + ], + ); + const remaining = await redis.getListLength(drainPendingKey("group-1")); + assertEquals(remaining, 1); + + const pendingRaw = await redis.getListRange( + drainPendingKey("group-1"), + 0, + -1, + ); + assertEquals( + pendingRaw.map((item) => JSON.parse(item).event.id), + [recorded[2].id], + ); + }); + + it("keeps FIFO order across claim interleaving and does not lose newer enqueues", async () => { + const { redis, events } = createDeps(); + const first = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); + + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + const claimed = await events.getPendingBatch("group-1", 2, 20_000); + assertEquals(claimed?.entries.map((entry) => entry.event.id), [ + first.id, + second.id, + ]); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals( + await redis.getListLength(drainClaimKey("group-1", claimed!.claimToken)), + 2, + ); + + const third = createSessionEvent("message", "user", { + summary: "third", + body: "third", + }); + await events.recordEvent("session-2", "group-1", third); + + const concurrentClaim = await events.getPendingBatch("group-1", 2, 20_000); + assertEquals(concurrentClaim, null); + + await events.releaseClaim("group-1", claimed!.claimToken); + + const pendingAfterRelease = await redis.getListRange( + drainPendingKey("group-1"), + 0, + -1, + ); + assertEquals( + pendingAfterRelease.map((item) => JSON.parse(item).event.id), + [third.id, second.id, first.id], + ); + + const reclaimed = await events.getPendingBatch("group-1", 3, 20_000); + assertEquals(reclaimed?.entries.map((entry) => entry.event.id), [ + first.id, + second.id, + third.id, + ]); + }); + + it("releases claims on retry and dead-letters after max attempts", async () => { + const { redis, events, drain } = createDeps(); + const event = createSessionEvent("error", "tool", { + summary: "failing batch", + body: "failing batch", + metadata: { resolved: false }, + }); + await events.recordEvent("session-1", "group-1", event); + + const failingGraphiti = { + addMemory() { + throw new Error("boom"); + }, + }; + + const first = await drain.drainGroup("group-1", failingGraphiti as never); + assertEquals(first.status, "retry"); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + + await redis.setString( + drainRetryKey("group-1", `${event.id}:${event.id}`), + JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), + 60, + ); + + const second = await drain.drainGroup("group-1", failingGraphiti as never); + assertEquals(second.status, "dead-letter"); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals(await redis.getListLength(drainDeadKey("group-1")), 1); + }); + + it("requeues abandoned claimed batches after lock loss and drains them", async () => { + const { redis, events, drain } = createDeps(); + const first = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); + + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + const claimed = await events.getPendingBatch("group-1", 2, 20_000); + assertEquals(claimed?.entries.map((entry) => entry.event.id), [ + first.id, + second.id, + ]); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + + await redis.deleteKey(drainClaimLockKey("group-1")); + + const added: string[] = []; + const result = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + added.push(input.name); + }, + } as never); + + assertEquals(result, { status: "success", drained: 2 }); + assertEquals(added, [`message:${first.id}`, `message:${second.id}`]); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals( + await redis.getListLength(drainClaimKey("group-1", claimed!.claimToken)), + 0, + ); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + }); + + it("can recover an abandoned claim before the next drain attempt", async () => { + const { redis, events } = createDeps(); + const first = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); + + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + const claimed = await events.getPendingBatch("group-1", 2, 20_000); + await redis.deleteKey(drainClaimLockKey("group-1")); + + const recovered = await events.recoverAbandonedClaim("group-1"); + + assertEquals(recovered, true); + assertEquals( + await redis.getListLength(drainClaimKey("group-1", claimed!.claimToken)), + 0, + ); + const pendingRaw = await redis.getListRange( + drainPendingKey("group-1"), + 0, + -1, + ); + assertEquals( + pendingRaw.map((item) => JSON.parse(item).event.id), + [second.id, first.id], + ); + }); + + it("keeps an active long-running drain claim alive so recovery cannot steal it", async () => { + const { redis, events, drain } = createDeps(); + const first = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); + + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + let firstAddStarted!: () => void; + let finishFirstAdd!: () => void; + const firstAddStartedPromise = new Promise((resolve) => { + firstAddStarted = resolve; + }); + const finishFirstAddPromise = new Promise((resolve) => { + finishFirstAdd = resolve; + }); + + const added: string[] = []; + const drainPromise = drain.drainGroup("group-1", { + async addMemory(input: { name: string }) { + added.push(input.name); + if (added.length === 1) { + firstAddStarted(); + await finishFirstAddPromise; + } + }, + } as never); + + await firstAddStartedPromise; + await new Promise((resolve) => setTimeout(resolve, 1_200)); + + const recoveredWhileActive = await events.recoverAbandonedClaim("group-1"); + const concurrentClaim = await events.getPendingBatch("group-1", 2, 20_000); + + assertEquals(recoveredWhileActive, false); + assertEquals(concurrentClaim, null); + + finishFirstAdd(); + + const result = await drainPromise; + assertEquals(result, { status: "success", drained: 2 }); + assertEquals(added, [`message:${first.id}`, `message:${second.id}`]); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + }); + + it("fails and requeues when heartbeat loses ownership during a long drain", async () => { + const { redis, events, drain } = createDeps(); + const event = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + + await events.recordEvent("session-1", "group-1", event); + + let started!: () => void; + let release!: () => void; + const startedPromise = new Promise((resolve) => { + started = resolve; + }); + const releasePromise = new Promise((resolve) => { + release = resolve; + }); + + const drainPromise = drain.drainGroup("group-1", { + async addMemory() { + started(); + await releasePromise; + }, + } as never); + + await startedPromise; + const activeToken = await redis.getString(drainClaimActiveKey("group-1")); + assertEquals(typeof activeToken, "string"); + + await redis.deleteKey(drainClaimLockKey("group-1")); + await new Promise((resolve) => setTimeout(resolve, 250)); + release(); + + const result = await drainPromise; + assertEquals(result.status, "retry"); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + assertEquals( + await redis.getString( + drainRetryKey("group-1", `${event.id}:${event.id}`), + ) !== + null, + true, + ); + + const recovered = await events.recoverAbandonedClaim("group-1"); + assertEquals(recovered, false); + }); +}); diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts new file mode 100644 index 0000000..b244f37 --- /dev/null +++ b/src/services/batch-drain.ts @@ -0,0 +1,183 @@ +import { + type DrainQueueEntry, + getSessionEventRecallText, +} from "../types/index.ts"; +import type { GraphitiMcpClient } from "./graphiti-mcp.ts"; +import { drainRetryKey } from "./redis-events.ts"; +import type { RedisEventsService } from "./redis-events.ts"; +import type { RedisClient } from "./redis-client.ts"; +import { logger } from "./logger.ts"; + +export interface BatchDrainServiceOptions { + batchSize: number; + batchMaxBytes: number; + drainRetryMax: number; + claimHeartbeatIntervalMs?: number; +} + +type RetryState = { attempts: number; nextAttemptAt: number }; + +class DrainClaimLostError extends Error { + constructor() { + super("Drain claim lease lost during batch processing"); + this.name = "DrainClaimLostError"; + } +} + +const makeBatchKey = (entries: DrainQueueEntry[]): string => + `${entries[0]?.event.id ?? "empty"}:${entries.at(-1)?.event.id ?? "empty"}`; + +const buildEpisodeBody = (entry: DrainQueueEntry): string => { + const refs = entry.event.refs?.length + ? `\nRefs: ${entry.event.refs.join(", ")}` + : ""; + const keywords = entry.event.keywords?.length + ? `\nKeywords: ${entry.event.keywords.join(", ")}` + : ""; + return [ + `Category: ${entry.event.category}`, + `Role: ${entry.event.role}`, + `Summary: ${entry.event.summary}`, + entry.event.detail ? `Detail: ${entry.event.detail}` : "", + entry.event.continuityText + ? `Continuity: ${entry.event.continuityText}` + : getSessionEventRecallText(entry.event), + entry.event.body ? `Body: ${entry.event.body}` : "", + keywords, + refs, + ].filter(Boolean).join("\n"); +}; + +export class BatchDrainService { + constructor( + private readonly redis: RedisClient, + private readonly events: RedisEventsService, + private readonly options: BatchDrainServiceOptions, + ) {} + + private getClaimHeartbeatIntervalMs(lockTtlSeconds: number): number { + return this.options.claimHeartbeatIntervalMs ?? + Math.max(1_000, Math.floor((lockTtlSeconds * 1000) / 3)); + } + + private async getRetryState( + groupId: string, + batchKey: string, + ): Promise { + const raw = await this.redis.getString(drainRetryKey(groupId, batchKey)); + if (!raw) return null; + try { + return JSON.parse(raw) as RetryState; + } catch { + return null; + } + } + + private async setRetryState( + groupId: string, + batchKey: string, + state: RetryState, + ): Promise { + await this.redis.setString( + drainRetryKey(groupId, batchKey), + JSON.stringify(state), + 7 * 24 * 60 * 60, + ); + } + + async drainGroup( + groupId: string, + graphiti: GraphitiMcpClient, + ): Promise< + { + status: "empty" | "backoff" | "success" | "dead-letter" | "retry"; + drained: number; + } + > { + const claimed = await this.events.getPendingBatch( + groupId, + this.options.batchSize, + this.options.batchMaxBytes, + ); + if (!claimed || claimed.entries.length === 0) { + return { status: "empty", drained: 0 }; + } + + const batch = claimed.entries; + + const batchKey = makeBatchKey(batch); + const retryState = await this.getRetryState(groupId, batchKey); + if (retryState && retryState.nextAttemptAt > Date.now()) { + await this.events.releaseClaim(groupId, claimed.claimToken); + return { status: "backoff", drained: 0 }; + } + + let lostClaim = false; + const refreshClaimHeartbeat = async (): Promise => { + try { + const refreshed = await this.events.refreshClaimLease( + groupId, + claimed.claimToken, + claimed.lockTtlSeconds, + ); + if (!refreshed) lostClaim = true; + } catch { + lostClaim = true; + } + }; + const heartbeatInterval = setInterval(() => { + void refreshClaimHeartbeat(); + }, this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds)); + + try { + for (const entry of batch) { + await graphiti.addMemory({ + name: `${entry.event.category}:${entry.event.id}`, + episodeBody: buildEpisodeBody(entry), + groupId, + source: "text", + sourceDescription: `session-event:${entry.event.category}`, + }); + if (lostClaim) throw new DrainClaimLostError(); + } + clearInterval(heartbeatInterval); + const stillOwned = await this.events.refreshClaimLease( + groupId, + claimed.claimToken, + claimed.lockTtlSeconds, + ); + if (lostClaim || !stillOwned) throw new DrainClaimLostError(); + await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); + await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + return { status: "success", drained: batch.length }; + } catch (err) { + if (err instanceof DrainClaimLostError) { + logger.warn("Drain claim heartbeat lost ownership", { + groupId, + eventIds: batch.map((entry) => entry.event.id), + }); + } + const attempts = (retryState?.attempts ?? 0) + 1; + if (attempts >= this.options.drainRetryMax) { + logger.warn("Moving drain batch to dead-letter", { + groupId, + eventIds: batch.map((entry) => entry.event.id), + }); + await this.events.moveBatchToDeadLetter(groupId, batch); + await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); + await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + return { status: "dead-letter", drained: batch.length }; + } + + await this.events.releaseClaim(groupId, claimed.claimToken); + await this.setRetryState(groupId, batchKey, { + attempts, + nextAttemptAt: Date.now() + 1_000 * (2 ** (attempts - 1)), + }); + logger.warn("Drain batch failed; will retry later", { groupId, err }); + return { status: "retry", drained: 0 }; + } finally { + clearInterval(heartbeatInterval); + } + } +} diff --git a/src/services/client.test.ts b/src/services/client.test.ts index 38c2dc6..22b3367 100644 --- a/src/services/client.test.ts +++ b/src/services/client.test.ts @@ -3,7 +3,7 @@ import { assertRejects, assertStrictEquals, } from "jsr:@std/assert@^1.0.0"; -import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { GraphitiClient } from "./client.ts"; import { GraphitiOfflineError, @@ -11,6 +11,10 @@ import { type GraphitiToolCaller, } from "./connection-manager.ts"; import { logger } from "./logger.ts"; +import { + setOpenCodeClient, + setWarningTaskScheduler, +} from "./opencode-warning.ts"; const originalLogger = { ...logger }; logger.info = () => {}; @@ -23,6 +27,8 @@ addEventListener("unload", () => { logger.warn = originalLogger.warn; logger.error = originalLogger.error; logger.debug = originalLogger.debug; + setOpenCodeClient(undefined); + setWarningTaskScheduler(undefined); }); class FakeToolCaller implements GraphitiToolCaller { @@ -53,6 +59,11 @@ class FakeToolCaller implements GraphitiToolCaller { } describe("client", () => { + afterEach(() => { + setOpenCodeClient(undefined); + setWarningTaskScheduler(undefined); + }); + describe("parseToolResult", () => { const client = new GraphitiClient(new FakeToolCaller()); @@ -142,6 +153,56 @@ describe("client", () => { assertEquals(await client.searchNodes({ query: "test" }), []); assertEquals(await client.getEpisodes({ groupId: "g" }), []); }); + + it("emits native warning toast and structured log on fail-open reads", async () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + const tools = new FakeToolCaller(); + const err = new GraphitiOfflineError("offline"); + tools.callToolImpl = () => Promise.reject(err); + const client = new GraphitiClient(tools); + + assertEquals(await client.searchFacts({ query: "test" }), []); + assertEquals(appLogCalls.length, 0); + assertEquals(toastCalls.length, 0); + assertEquals(scheduledTasks.length, 2); + for (const task of scheduledTasks) task(); + assertEquals(appLogCalls.length, 1); + assertEquals(appLogCalls[0], { + body: { + service: "graphiti", + level: "warn", + message: "Graphiti unavailable; continuing without memory facts.", + extra: { + operation: "searchMemoryFacts", + err, + }, + }, + }); + assertEquals(toastCalls, [{ + body: { + message: "Graphiti unavailable; continuing without memory facts.", + variant: "warning", + }, + }]); + }); }); describe("write error propagation", () => { @@ -160,6 +221,64 @@ describe("client", () => { GraphitiOfflineError, ); }); + + it("emits native warning toast on write availability errors", async () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + const tools = new FakeToolCaller(); + const err = new GraphitiOfflineError("offline"); + tools.callToolImpl = () => Promise.reject(err); + const client = new GraphitiClient(tools); + + await assertRejects( + () => + client.addEpisode({ + name: "episode", + episodeBody: "body", + }), + GraphitiOfflineError, + ); + + assertEquals(appLogCalls.length, 0); + assertEquals(toastCalls.length, 0); + assertEquals(scheduledTasks.length, 2); + for (const task of scheduledTasks) task(); + assertEquals(appLogCalls.length, 1); + assertEquals(appLogCalls[0], { + body: { + service: "graphiti", + level: "warn", + message: "Graphiti unavailable; memory was not saved.", + extra: { + operation: "addMemory", + err, + }, + }, + }); + assertEquals(toastCalls, [{ + body: { + message: "Graphiti unavailable; memory was not saved.", + variant: "warning", + }, + }]); + }); }); describe("manager passthroughs", () => { diff --git a/src/services/client.ts b/src/services/client.ts index ca52757..03de5b3 100644 --- a/src/services/client.ts +++ b/src/services/client.ts @@ -1,265 +1,6 @@ -import { - GraphitiConnectionManager, - GraphitiSessionExpiredError, - type GraphitiToolCaller, - GraphitiTransportError, - isGraphitiOfflineError, - isGraphitiTimeoutError, -} from "./connection-manager.ts"; -import type { - GraphitiEpisode, - GraphitiFact, - GraphitiNode, -} from "../types/index.ts"; -import { logger } from "./logger.ts"; -import { normalizeEpisode } from "./sdk-normalize.ts"; - /** - * Graphiti domain adapter over the connection manager. + * @deprecated Compatibility re-export only. Import GraphitiMcpClient directly + * from ./graphiti-mcp.ts in new code. Kept temporarily to avoid breaking older + * imports of GraphitiClient. */ -export class GraphitiClient { - private readonly toolCaller: GraphitiToolCaller; - - constructor(endpointOrManager: string | GraphitiToolCaller) { - if (typeof endpointOrManager === "string") { - this.toolCaller = new GraphitiConnectionManager({ - endpoint: endpointOrManager, - }); - } else { - this.toolCaller = endpointOrManager; - } - } - - start(): void { - this.toolCaller.start(); - } - - async stop(): Promise { - await this.toolCaller.stop(); - } - - async connect(): Promise { - this.toolCaller.start(); - return await this.toolCaller.ready(); - } - - async ready(timeoutMs?: number): Promise { - return await this.toolCaller.ready(timeoutMs); - } - - /** - * Parse MCP tool results into JSON when possible. - * Public for testing. - */ - parseToolResult(result: unknown): unknown { - const typedResult = result as { - content?: Array<{ type?: string; text?: unknown }>; - }; - const content = typedResult.content; - if (!Array.isArray(content) || content.length === 0) return result; - - const text = content.find((item) => item?.type === "text")?.text; - if (text === undefined) return result; - - if (typeof text !== "string") { - try { - return JSON.parse(String(text)); - } catch { - return text; - } - } - - try { - return JSON.parse(text); - } catch { - return text; - } - } - - /** - * Extract an array from a tool result that may be a bare array or a - * wrapped-array response object (`{ [key]: T[] }`). - * Returns the array when found, otherwise `null`. - * Public for testing. - */ - parseWrappedArray(result: unknown, wrappedKey: string): T[] | null { - if (Array.isArray(result)) return result as T[]; - if ( - result && - typeof result === "object" && - Array.isArray((result as Record)[wrappedKey]) - ) { - return (result as Record)[wrappedKey] as T[]; - } - return null; - } - - /** - * Add an episode to Graphiti memory. - */ - async addEpisode(params: { - name: string; - episodeBody: string; - groupId?: string; - source?: "text" | "json" | "message"; - sourceDescription?: string; - }): Promise { - try { - await this.callTool("add_memory", { - name: params.name, - episode_body: params.episodeBody, - group_id: params.groupId, - source: params.source || "text", - source_description: params.sourceDescription || "", - }); - logger.debug("Added episode:", params.name); - } catch (err) { - if ( - isGraphitiOfflineError(err) || - isGraphitiTimeoutError(err) || - err instanceof GraphitiTransportError || - err instanceof GraphitiSessionExpiredError - ) { - logger.warn( - "addEpisode failed due to Graphiti availability issue", - err, - ); - } - throw err; - } - } - - /** - * Search Graphiti facts matching the provided query. - */ - async searchFacts(params: { - query: string; - groupIds?: string[]; - maxFacts?: number; - }): Promise { - try { - const result = await this.callTool("search_memory_facts", { - query: params.query, - group_ids: params.groupIds, - max_facts: params.maxFacts || 10, - }); - return this.parseWrappedArray(result, "facts") ?? []; - } catch (err) { - if (isGraphitiTimeoutError(err)) { - logger.warn("searchFacts request timed out; returning no facts"); - return []; - } - if (isGraphitiOfflineError(err)) { - logger.warn("searchFacts unavailable; returning no facts"); - return []; - } - if ( - err instanceof GraphitiTransportError || - err instanceof GraphitiSessionExpiredError - ) { - logger.warn( - "searchFacts unavailable during reconnect; returning no facts", - ); - return []; - } - logger.error("searchFacts error:", err); - return []; - } - } - - /** - * Search Graphiti nodes matching the provided query. - */ - async searchNodes(params: { - query: string; - groupIds?: string[]; - maxNodes?: number; - }): Promise { - try { - const result = await this.callTool("search_nodes", { - query: params.query, - group_ids: params.groupIds, - max_nodes: params.maxNodes || 10, - }); - return this.parseWrappedArray(result, "nodes") ?? []; - } catch (err) { - if (isGraphitiTimeoutError(err)) { - logger.warn("searchNodes request timed out; returning no nodes"); - return []; - } - if (isGraphitiOfflineError(err)) { - logger.warn("searchNodes unavailable; returning no nodes"); - return []; - } - if ( - err instanceof GraphitiTransportError || - err instanceof GraphitiSessionExpiredError - ) { - logger.warn( - "searchNodes unavailable during reconnect; returning no nodes", - ); - return []; - } - logger.error("searchNodes error:", err); - return []; - } - } - - /** - * Retrieve recent episodes for a group. - */ - async getEpisodes(params: { - groupId?: string; - lastN?: number; - }): Promise { - try { - const result = await this.callTool("get_episodes", { - group_id: params.groupId, - last_n: params.lastN, - }); - const raw = this.parseWrappedArray(result, "episodes") ?? - []; - return raw.map(normalizeEpisode); - } catch (err) { - if (isGraphitiTimeoutError(err)) { - logger.warn("getEpisodes request timed out; returning no episodes"); - return []; - } - if (isGraphitiOfflineError(err)) { - logger.warn("getEpisodes unavailable; returning no episodes"); - return []; - } - if ( - err instanceof GraphitiTransportError || - err instanceof GraphitiSessionExpiredError - ) { - logger.warn( - "getEpisodes unavailable during reconnect; returning no episodes", - ); - return []; - } - logger.error("getEpisodes error:", err); - return []; - } - } - - /** - * Check whether the Graphiti MCP server is reachable. - */ - async getStatus(): Promise { - try { - await this.callTool("get_status", {}); - return true; - } catch { - return false; - } - } - - private async callTool( - name: string, - args: Record, - ): Promise { - const result = await this.toolCaller.callTool(name, args); - return this.parseToolResult(result); - } -} +export { GraphitiMcpClient as GraphitiClient } from "./graphiti-mcp.ts"; diff --git a/src/services/compaction-utils.test.ts b/src/services/compaction-utils.test.ts index a2fc02a..6ca7163 100644 --- a/src/services/compaction-utils.test.ts +++ b/src/services/compaction-utils.test.ts @@ -1,4 +1,4 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; import { classifyFacts, takeFactsWithinBudget } from "./compaction.ts"; diff --git a/src/services/event-extractor.test.ts b/src/services/event-extractor.test.ts new file mode 100644 index 0000000..5867d6d --- /dev/null +++ b/src/services/event-extractor.test.ts @@ -0,0 +1,157 @@ +import { assert, assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { extractStructuredEvents } from "./event-extractor.ts"; + +describe("event-extractor", () => { + it("extracts intent, preference, decision, and data.import from chat input", () => { + const events = extractStructuredEvents({ + eventType: "chat.message", + sessionId: "session-1", + messageCount: 1, + role: "user", + messageText: + "Please keep Graphiti off the hot path and import this json dataset from src/data.json", + }); + + assertEquals(events.map((event) => event.category), [ + "intent", + "preference", + "decision", + "data.import", + ]); + assert(events.some((event) => event.refs?.includes("src/data.json"))); + }); + + it("extracts task lifecycle categories from task updates", () => { + const created = extractStructuredEvents({ + eventType: "task.updated", + properties: { + task: { + id: "t1", + path: "plans/ContextOverhaul.md", + summary: "Start implementing the overhaul", + }, + }, + }); + const completed = extractStructuredEvents({ + eventType: "task.updated", + properties: { + task: { id: "t1", summary: "Completed the overhaul fixes" }, + }, + }); + + assertEquals(created[0].category, "task.create"); + assertEquals(completed[0].category, "task.complete"); + assert(created[0].refs?.includes("plans/ContextOverhaul.md")); + }); + + it("extracts file, git, integration, and error activity from tool events", () => { + const fileEdit = extractStructuredEvents({ + eventType: "tool.completed", + properties: { + tool: "apply_patch", + path: "src/session.ts", + summary: "edited src/session.ts", + }, + }); + const gitActivity = extractStructuredEvents({ + eventType: "tool.completed", + properties: { + tool: "shell", + summary: "branch status and commit inspection", + }, + }); + const integration = extractStructuredEvents({ + eventType: "tool.called", + properties: { tool: "graphiti-mcp", summary: "Graphiti MCP search" }, + }); + const error = extractStructuredEvents({ + eventType: "tool.completed", + properties: { tool: "shell", summary: "command failed with error" }, + }); + + assertEquals(fileEdit[0].category, "file.edit"); + assertEquals(gitActivity[0].category, "git.activity"); + assertEquals(integration[0].category, "integration.call"); + assertEquals(error[0].category, "error"); + }); + + it("stores continuity for assistant/tool events without transcript-heavy bodies by default", () => { + const assistant = extractStructuredEvents({ + eventType: "message.updated", + role: "assistant", + messageText: + "Implemented structured continuity extraction for hot-tier snapshots and recall.", + }); + const tool = extractStructuredEvents({ + eventType: "tool.completed", + messageText: + "Read src/session.ts and extracted continuity fields from the current implementation without storing the raw output transcript.", + properties: { + tool: "Read", + path: "src/session.ts", + summary: "Read src/session.ts", + }, + }); + + assertEquals(assistant[0].category, "message"); + assertEquals(assistant[0].body, undefined); + assertEquals(typeof assistant[0].continuityText, "string"); + assertEquals(tool[0].category, "file.read"); + assertEquals(tool[0].body, undefined); + assertEquals(typeof tool[0].continuityText, "string"); + }); + + it("extracts rules, environment, subagent, discovery, and assistant error signals", () => { + const rules = extractStructuredEvents({ + eventType: "rules.loaded", + properties: { + path: "AGENTS.md", + source: "workspace", + name: "project rules", + }, + }); + const env = extractStructuredEvents({ + eventType: "environment.updated", + properties: { + cwd: "/workspace/project", + summary: + "working directory moved to /workspace/project and env updated", + }, + }); + const started = extractStructuredEvents({ + eventType: "subagent.started", + properties: { + agentId: "agent-1", + sessionId: "child-1", + summary: "Spawned subagent for tests", + }, + }); + const finished = extractStructuredEvents({ + eventType: "subagent.finished", + properties: { + agentId: "agent-1", + sessionId: "child-1", + summary: "Subagent finished tests", + }, + }); + const assistant = extractStructuredEvents({ + eventType: "message.updated", + role: "assistant", + messageText: "I discovered a blocker and cannot complete the task", + }); + + assertEquals(rules[0].category, "rule.load"); + assertEquals(env.map((event) => event.category), [ + "cwd.change", + "env.change", + ]); + assertEquals(started[0].category, "subagent.start"); + assertEquals(finished[0].category, "subagent.finish"); + assertEquals(assistant.map((event) => event.category), [ + "message", + "discovery", + "error", + ]); + }); +}); diff --git a/src/services/event-extractor.ts b/src/services/event-extractor.ts new file mode 100644 index 0000000..e7eb83f --- /dev/null +++ b/src/services/event-extractor.ts @@ -0,0 +1,871 @@ +import type { + EventCategory, + SessionEvent, + SessionEventSourceKind, +} from "../types/index.ts"; + +const MAX_SUMMARY = 200; +const MAX_BODY = 4096; + +const priorityByCategory: Record = { + decision: 0, + preference: 0, + "rule.load": 0, + "task.create": 0, + "task.update": 1, + "task.complete": 1, + "file.read": 1, + "file.write": 1, + "file.edit": 1, + "file.search": 2, + "cwd.change": 2, + "env.change": 2, + error: 2, + "git.activity": 3, + "subagent.start": 1, + "subagent.finish": 3, + "integration.call": 3, + intent: 0, + "data.import": 4, + discovery: 4, + message: 4, + "session.meta": 3, +}; + +type EventRole = SessionEvent["role"]; + +type EventContext = { + summary: string; + body?: string; + detail?: string; + continuityText?: string; + keywords?: string[]; + sourceKind?: SessionEventSourceKind; + refs?: string[]; + metadata?: Record; +}; + +type ExtractedEventInput = { + eventType: string; + properties?: Record; + sessionId?: string; + messageText?: string; + messageCount?: number; + role?: EventRole; +}; + +type NormalizedEventInput = { + eventType: string; + props: Record; + sessionId?: string; + text: string; + refs: string[]; + role: EventRole; + messageCount: number; +}; + +const textEncoder = new TextEncoder(); +const eventRoles = new Set(["user", "assistant", "tool", "system"]); + +const normalizeWhitespace = (text: string): string => + text.replace(/\s+/g, " ").trim(); + +const summarize = (text: string): string => + normalizeWhitespace(text).slice(0, MAX_SUMMARY); + +const truncateBody = (text: string): string => text.slice(0, MAX_BODY); + +const truncateDetail = (text: string): string => text.slice(0, 600); + +const truncateContinuity = (text: string): string => text.slice(0, 800); + +const makeId = (): string => + crypto.randomUUID?.() ?? + `${Date.now()}-${Math.random().toString(16).slice(2)}`; + +const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? value as Record + : undefined; + +const asString = (value: unknown): string | undefined => + typeof value === "string" && value.trim() ? value.trim() : undefined; + +const asEventRole = (value: unknown): EventRole | undefined => { + const role = asString(value); + return role && eventRoles.has(role as EventRole) + ? role as EventRole + : undefined; +}; + +const asNumber = (value: unknown): number | undefined => + typeof value === "number" && Number.isFinite(value) ? value : undefined; + +const toText = (value: unknown): string | undefined => { + if (typeof value === "string") { + const normalized = normalizeWhitespace(value); + return normalized || undefined; + } + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + if (Array.isArray(value)) { + const joined = value.map((item) => toText(item)).filter(Boolean).join(" "); + return joined || undefined; + } + const record = asRecord(value); + if (!record) return undefined; + for ( + const key of [ + "text", + "summary", + "message", + "content", + "body", + "description", + "prompt", + "query", + "title", + "name", + "value", + "reason", + "goal", + "status", + "intent", + ] + ) { + const result = toText(record[key]); + if (result) return result; + } + return undefined; +}; + +const pickStrings = ( + values: Array, + limit = 8, +): string[] => { + const seen = new Set(); + const result: string[] = []; + for (const value of values) { + const text = toText(value); + if (!text || seen.has(text)) continue; + seen.add(text); + result.push(text); + if (result.length >= limit) break; + } + return result; +}; + +const pickKeywords = ( + values: Array, + limit = 8, +): string[] => pickStrings(values, limit).map((value) => summarize(value)); + +const collectInlinePathRefs = (text: string): string[] => { + const refs = new Set(); + for ( + const match of text.matchAll( + /(?:[A-Za-z0-9._-]+\/)+[A-Za-z0-9._-]+(?:\.[A-Za-z0-9]{1,8})?/g, + ) + ) { + const value = match[0]?.trim(); + if (value) refs.add(value); + } + return [...refs]; +}; + +const collectPathRefs = ( + value: unknown, + refs = new Set(), +): string[] => { + if (!value) return [...refs]; + if (typeof value === "string") { + const trimmed = value.trim(); + if ( + trimmed && + (trimmed.includes("/") || trimmed.includes("\\") || + /\.[A-Za-z0-9]{1,8}$/.test(trimmed)) + ) { + refs.add(trimmed); + } + return [...refs]; + } + if (Array.isArray(value)) { + for (const item of value) collectPathRefs(item, refs); + return [...refs]; + } + const record = asRecord(value); + if (!record) return [...refs]; + for (const [key, item] of Object.entries(record)) { + if (/(path|paths|file|files|ref|refs|cwd|directory)/i.test(key)) { + collectPathRefs(item, refs); + } + } + return [...refs]; +}; + +const hasLowerKeyword = ( + haystack: string | undefined, + ...needles: string[] +): boolean => { + if (!haystack) return false; + return needles.some((needle) => haystack.includes(needle)); +}; + +const hasKeyword = ( + haystack: string | undefined, + ...needles: string[] +): boolean => hasLowerKeyword(haystack?.toLowerCase(), ...needles); + +const compactParts = ( + ...parts: Array +): string | undefined => { + const compact = parts + .map((part) => part ? normalizeWhitespace(part) : "") + .filter(Boolean) + .join(" — "); + return compact || undefined; +}; + +const collectMetadataKeywords = (props: Record): string[] => + pickKeywords([ + props.tool, + props.name, + props.integration, + props.status, + props.result, + props.reason, + props.cwd, + ]); + +const compactToolMetadata = ( + props: Record, + extra: Record = {}, +): Record => { + const metadata: Record = {}; + for ( + const [key, value] of Object.entries({ + tool: props.tool, + name: props.name, + integration: props.integration, + status: props.status, + result: props.result, + exitCode: props.exitCode, + cwd: props.cwd, + blocking: props.blocking, + resolved: props.resolved, + ...extra, + }) + ) { + if ( + typeof value === "string" || typeof value === "number" || + typeof value === "boolean" + ) { + metadata[key] = value; + } + } + return metadata; +}; + +const buildContinuityText = ( + summary: string, + detail?: string, + refs?: string[], + keywords?: string[], +): string | undefined => { + const continuity = [ + summary, + detail, + refs?.join(" "), + keywords?.join(" "), + ] + .map((value) => value ? normalizeWhitespace(value) : "") + .filter(Boolean) + .join(" "); + return continuity ? truncateContinuity(continuity) : undefined; +}; + +const compactMessageBody = (text: string): string | undefined => { + const normalized = normalizeWhitespace(text); + if (!normalized) return undefined; + return truncateBody(normalized.slice(0, 480)); +}; + +const buildToolActivityContext = ( + tool: string, + text: string, + refs: string[], + props: Record, + options: { + summaryPrefix?: string; + sourceKind?: SessionEventSourceKind; + preserveBody?: boolean; + extraKeywords?: string[]; + extraMetadata?: Record; + } = {}, +): EventContext => { + const normalizedText = normalizeWhitespace(text); + const refSummary = refs.slice(0, 3).join(", "); + const statusSummary = compactParts( + asString(props.status), + asString(props.result), + typeof props.exitCode === "number" ? `exit ${props.exitCode}` : undefined, + ); + const summary = compactParts( + options.summaryPrefix ?? tool, + refSummary, + statusSummary, + ) ?? `${tool} activity`; + const detail = compactParts( + summarize(normalizedText), + statusSummary, + refs.length > 0 ? `refs ${refs.slice(0, 4).join(", ")}` : undefined, + ); + const keywords = pickKeywords([ + tool, + ...refs, + ...collectMetadataKeywords(props), + ...(options.extraKeywords ?? []), + ]); + return { + summary, + body: options.preserveBody ? compactMessageBody(normalizedText) : undefined, + detail, + continuityText: buildContinuityText(summary, detail, refs, keywords), + keywords, + sourceKind: options.sourceKind ?? "tool-activity", + refs, + metadata: compactToolMetadata(props, options.extraMetadata), + }; +}; + +const normalizeInput = ( + input: ExtractedEventInput, +): NormalizedEventInput => { + const props = input.properties ?? {}; + const text = input.messageText ?? toText(props) ?? ""; + const refs = [ + ...new Set([...collectPathRefs(props), ...collectInlinePathRefs(text)]), + ]; + + return { + eventType: input.eventType, + props, + sessionId: input.sessionId, + text, + refs, + role: input.role ?? "system", + messageCount: asNumber(input.messageCount) ?? 1, + }; +}; + +const createEvent = ( + category: EventCategory, + role: EventRole, + context: EventContext, +): SessionEvent => ({ + id: makeId(), + ts: Date.now(), + category, + priority: priorityByCategory[category], + role, + summary: summarize(context.summary), + body: context.body ? truncateBody(context.body) : undefined, + detail: context.detail ? truncateDetail(context.detail) : undefined, + continuityText: context.continuityText + ? truncateContinuity(context.continuityText) + : undefined, + keywords: context.keywords?.filter(Boolean).slice(0, 8), + sourceKind: context.sourceKind, + refs: context.refs?.filter(Boolean), + metadata: context.metadata, +}); + +export const createSessionEvent = ( + category: EventCategory, + role: EventRole, + context: EventContext, +): SessionEvent => createEvent(category, role, context); + +export const extractUserMessageEvent = ( + text: string, + messageCount: number, +): SessionEvent => + createEvent(messageCount <= 1 ? "intent" : "message", "user", { + summary: text, + body: compactMessageBody(text), + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text)), + keywords: pickKeywords([text]), + sourceKind: "user-request", + }); + +export const extractAssistantMessageEvent = (text: string): SessionEvent => + createEvent("message", "assistant", { + summary: summarize(text), + detail: compactParts("Assistant response", summarize(text)), + continuityText: buildContinuityText(summarize(text), summarize(text)), + keywords: pickKeywords([text]), + sourceKind: "assistant-response", + }); + +export const extractSessionCreatedEvent = (sessionId?: string): SessionEvent => + createEvent("session.meta", "system", { + summary: `Session created${sessionId ? `: ${sessionId}` : ""}`, + detail: sessionId + ? `Session ${sessionId} initialized` + : "Session initialized", + continuityText: sessionId + ? `session created ${sessionId}` + : "session created", + keywords: pickKeywords([sessionId, "session", "created"]), + sourceKind: "system-state", + refs: sessionId ? [sessionId] : undefined, + metadata: sessionId ? { sessionId } : undefined, + }); + +export const extractCompactionEvent = (summary: string): SessionEvent => + createEvent("task.update", "system", { + summary: `Session compacted: ${summary}`, + detail: summarize(summary), + continuityText: buildContinuityText( + `Session compacted: ${summary}`, + summary, + ), + keywords: pickKeywords([summary, "compacted"]), + sourceKind: "system-state", + metadata: { compacted: true }, + }); + +const inferTaskCategory = (text: string): EventCategory => { + if ( + hasKeyword( + text, + "complete", + "completed", + "done", + "finished", + "resolved", + "fixed", + ) + ) { + return "task.complete"; + } + if ( + hasKeyword(text, "start", "create", "begin", "plan", "goal", "implement") + ) { + return "task.create"; + } + return "task.update"; +}; + +const extractFromHookPayload = ( + input: ExtractedEventInput, +): SessionEvent[] => { + const normalized = normalizeInput(input); + const { eventType, props, sessionId, text, refs, role, messageCount } = + normalized; + + if (eventType === "session.created") { + return [ + extractSessionCreatedEvent( + sessionId ?? asString(asRecord(props.info)?.id), + ), + ]; + } + + if (eventType === "session.compacted" && text) { + return [extractCompactionEvent(text)]; + } + + if (eventType === "message.updated" && role === "assistant" && text) { + return [extractAssistantMessageEvent(text)]; + } + + if (eventType === "chat.message" && text) { + return [extractUserMessageEvent(text, messageCount)]; + } + + const genericSummary = text || eventType; + return [createEvent("session.meta", role, { + summary: genericSummary, + detail: summarize(text), + continuityText: buildContinuityText(genericSummary, summarize(text), refs), + keywords: pickKeywords([eventType, text, ...refs]), + sourceKind: role === "tool" + ? "tool-activity" + : role === "assistant" + ? "assistant-response" + : role === "user" + ? "user-request" + : "system-state", + refs, + metadata: { eventType }, + })]; +}; + +export const extractStructuredEvents = ( + input: ExtractedEventInput, +): SessionEvent[] => { + const normalized = normalizeInput(input); + const { eventType, props, text, refs, role, messageCount } = normalized; + + if (eventType === "chat.message") { + const events = [extractUserMessageEvent(text, messageCount)]; + const lower = text.toLowerCase(); + if (hasLowerKeyword(lower, "prefer", "please", "always", "never")) { + events.push( + createEvent("preference", "user", { + summary: text, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text)), + keywords: pickKeywords([text, "preference"]), + sourceKind: "user-request", + }), + ); + } + if ( + hasLowerKeyword(lower, "decide", "decision", "must", "should", "keep ") + ) { + events.push( + createEvent("decision", "user", { + summary: text, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text)), + keywords: pickKeywords([text, "decision"]), + sourceKind: "user-request", + }), + ); + } + if ( + hasLowerKeyword( + lower, + "import", + "paste", + "uploaded", + "dataset", + "csv", + "json", + ) + ) { + events.push( + createEvent("data.import", "user", { + summary: text, + detail: compactParts("Imported or referenced data", summarize(text)), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs, "data"]), + sourceKind: "user-request", + refs, + }), + ); + } + return events; + } + + if (eventType === "message.updated") { + const resolvedRole = input.role ?? asEventRole(asRecord(props.info)?.role); + if (resolvedRole === "assistant" && text) { + const events = [extractAssistantMessageEvent(text)]; + if (hasKeyword(text, "discovered", "found", "identified", "confirmed")) { + events.push( + createEvent("discovery", "assistant", { + summary: text, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs, "discovery"]), + sourceKind: "assistant-response", + refs, + }), + ); + } + if (hasKeyword(text, "error", "failed", "blocker", "cannot", "unable")) { + events.push(createEvent("error", "assistant", { + summary: text, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs, "error", "blocker"]), + sourceKind: "assistant-response", + refs, + metadata: { resolved: false, eventType }, + })); + } + return events; + } + } + + if (eventType === "task.updated") { + const task = asRecord(props.task) ?? props; + const summary = toText(task) ?? "Task updated"; + return [createEvent(inferTaskCategory(summary), "system", { + summary, + detail: compactParts("Task update", summarize(summary)), + continuityText: buildContinuityText(summary, summarize(summary), refs), + keywords: pickKeywords([summary, task.id, task.path, ...refs]), + sourceKind: "system-state", + refs: pickStrings([task.id, task.path, ...refs]), + metadata: compactToolMetadata(task), + })]; + } + + if (eventType === "rules.loaded") { + const summary = + pickStrings([props.name, props.path, props.source, text]).join(" — ") || + "Rules loaded"; + return [createEvent("rule.load", "system", { + summary, + detail: compactParts("Rules loaded", text || summary), + continuityText: buildContinuityText(summary, text || summary, refs), + keywords: pickKeywords([summary, ...refs, "rules"]), + sourceKind: "system-state", + refs, + metadata: compactToolMetadata(props), + })]; + } + + if (eventType === "tool.called" || eventType === "tool.completed") { + const tool = asString(props.tool) ?? asString(props.name) ?? + toText(asRecord(props.call)?.tool) ?? "tool"; + const summaryText = text || `${tool} activity`; + const lowerTool = tool.toLowerCase(); + const lowerText = summaryText.toLowerCase(); + + if ( + hasLowerKeyword(lowerTool, "read", "open") || + hasLowerKeyword(lowerText, "read file", "opened") + ) { + return [ + createEvent( + "file.read", + "tool", + buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Read", + extraKeywords: ["file", "read"], + }), + ), + ]; + } + if ( + hasLowerKeyword(lowerTool, "write", "create") || + hasLowerKeyword(lowerText, "wrote", "created file") + ) { + return [ + createEvent( + "file.write", + "tool", + buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Wrote", + extraKeywords: ["file", "write"], + }), + ), + ]; + } + if ( + hasLowerKeyword(lowerTool, "edit", "patch", "replace") || + hasLowerKeyword(lowerText, "updated file", "edited") + ) { + return [ + createEvent( + "file.edit", + "tool", + buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Edited", + extraKeywords: ["file", "edit"], + }), + ), + ]; + } + if ( + hasLowerKeyword(lowerTool, "grep", "search", "glob") || + hasLowerKeyword(lowerText, "searched", "query") + ) { + return [ + createEvent( + "file.search", + "tool", + buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Searched", + extraKeywords: ["search"], + }), + ), + ]; + } + if ( + hasLowerKeyword(lowerTool, "git") || + hasLowerKeyword( + lowerText, + "branch", + "commit", + "merge", + "rebase", + "push", + "stash", + ) + ) { + return [ + createEvent( + "git.activity", + "tool", + buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Git", + extraKeywords: ["git"], + preserveBody: true, + }), + ), + ]; + } + if ( + hasLowerKeyword(lowerTool, "graphiti", "mcp", "redis", "http") || + asString(props.integration) + ) { + return [ + createEvent( + "integration.call", + "tool", + buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Integration", + extraKeywords: ["integration"], + }), + ), + ]; + } + if (hasLowerKeyword(lowerText, "error", "failed", "exception", "unable")) { + return [createEvent("error", "tool", { + ...buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Tool error", + preserveBody: true, + extraKeywords: ["error", "failed"], + extraMetadata: { resolved: false }, + }), + })]; + } + } + + if (eventType === "environment.updated") { + const summary = text || "Environment updated"; + const entries: SessionEvent[] = []; + if (hasKeyword(summary, "cwd", "directory", "working directory")) { + entries.push( + createEvent("cwd.change", "system", { + summary, + detail: compactParts("Working directory updated", text), + continuityText: buildContinuityText(summary, text, refs), + keywords: pickKeywords([summary, ...refs, "cwd"]), + sourceKind: "system-state", + refs, + metadata: compactToolMetadata(props), + }), + ); + } + entries.push( + createEvent("env.change", "system", { + summary, + detail: compactParts("Environment updated", text), + continuityText: buildContinuityText(summary, text, refs), + keywords: pickKeywords([summary, ...refs, "environment"]), + sourceKind: "system-state", + refs, + metadata: compactToolMetadata(props), + }), + ); + return entries; + } + + if (eventType === "subagent.started" || eventType === "subagent.finished") { + return [ + createEvent( + eventType === "subagent.started" ? "subagent.start" : "subagent.finish", + "system", + { + summary: text || eventType, + detail: compactParts( + eventType === "subagent.started" + ? "Subagent started" + : "Subagent finished", + text, + ), + continuityText: buildContinuityText(text || eventType, text, refs), + keywords: pickKeywords([ + text, + props.agentId, + props.sessionId, + ...refs, + ]), + sourceKind: "system-state", + refs: pickStrings([props.agentId, props.sessionId, ...refs]), + metadata: compactToolMetadata(props), + }, + ), + ]; + } + + if (eventType === "session.idle") { + return [createEvent("session.meta", "system", { + summary: text || "Session idle", + detail: compactParts("Session idle", text), + continuityText: buildContinuityText(text || "Session idle", text, refs), + keywords: pickKeywords([text, eventType, ...refs]), + sourceKind: "system-state", + refs, + metadata: { ...props, eventType }, + })]; + } + + if (text) { + const lower = text.toLowerCase(); + if (hasLowerKeyword(lower, "error", "failed", "exception", "blocker")) { + return [createEvent("error", role, { + summary: text, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs, "error"]), + sourceKind: role === "assistant" + ? "assistant-response" + : role === "user" + ? "user-request" + : role === "tool" + ? "tool-activity" + : "system-state", + refs, + metadata: { ...props, resolved: false, eventType }, + })]; + } + if (hasLowerKeyword(lower, "discover", "found", "inspect", "observed")) { + return [createEvent("discovery", role, { + summary: text, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs, "discovery"]), + sourceKind: role === "assistant" + ? "assistant-response" + : role === "user" + ? "user-request" + : role === "tool" + ? "tool-activity" + : "system-state", + refs, + metadata: { ...props, eventType }, + })]; + } + return [createEvent("message", role, { + summary: text, + body: role === "user" ? compactMessageBody(text) : undefined, + detail: summarize(text), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs]), + sourceKind: role === "assistant" + ? "assistant-response" + : role === "user" + ? "user-request" + : role === "tool" + ? "tool-activity" + : "system-state", + refs, + metadata: { ...props, eventType }, + })]; + } + + return extractFromHookPayload(input); +}; + +export const estimateEventSize = (event: SessionEvent): number => + textEncoder.encode(JSON.stringify(event)).length; diff --git a/src/services/graphiti-async.ts b/src/services/graphiti-async.ts new file mode 100644 index 0000000..06e3c6f --- /dev/null +++ b/src/services/graphiti-async.ts @@ -0,0 +1,94 @@ +import type { PersistentMemoryCacheEntry } from "../types/index.ts"; +import type { BatchDrainService } from "./batch-drain.ts"; +import type { GraphitiMcpClient } from "./graphiti-mcp.ts"; +import type { RedisCacheService } from "./redis-cache.ts"; +import { logger } from "./logger.ts"; + +export class GraphitiAsyncService { + private readonly drainInFlight = new Map>(); + private readonly refreshInFlight = new Map>(); + private readonly primerInFlight = new Map>(); + + constructor( + private readonly graphiti: GraphitiMcpClient, + private readonly cache: RedisCacheService, + private readonly drain: BatchDrainService, + ) {} + + schedulePrimer(groupId: string): void { + if (this.primerInFlight.has(groupId)) return; + const run = (async () => { + const existing = await this.cache.get(groupId); + if (existing) return; + const episodes = await this.graphiti.getEpisodes({ groupId, lastN: 5 }); + if (episodes.length === 0) return; + const entry: PersistentMemoryCacheEntry = { + query: "primer", + refreshedAt: Date.now(), + facts: [], + nodes: [], + factUuids: [], + nodeRefs: [], + episodeSummaries: episodes.map((episode) => + `${episode.name}: ${episode.content}`.slice(0, 240) + ), + }; + await this.cache.set(groupId, entry); + })().catch((err) => logger.debug("Graphiti primer failed", err)).finally( + () => this.primerInFlight.delete(groupId), + ); + this.primerInFlight.set(groupId, run); + } + + scheduleCacheRefresh(groupId: string, query: string): void { + const normalized = query.trim(); + if (!normalized) return; + const key = `${groupId}:${normalized.toLowerCase()}`; + if (this.refreshInFlight.has(key)) return; + + const run = (async () => { + await this.cache.rememberRefreshQuery(groupId, normalized); + const [facts, nodes] = await Promise.all([ + this.graphiti.searchMemoryFacts({ + query: normalized, + groupIds: [groupId], + maxFacts: 20, + }), + this.graphiti.searchNodes({ + query: normalized, + groupIds: [groupId], + maxNodes: 12, + }), + ]); + await this.cache.set(groupId, { + query: normalized, + refreshedAt: Date.now(), + facts, + nodes, + factUuids: facts.map((fact) => fact.uuid), + nodeRefs: nodes.map((node) => node.uuid), + }); + })().catch((err) => logger.debug("Graphiti cache refresh failed", err)) + .finally(() => this.refreshInFlight.delete(key)); + + this.refreshInFlight.set(key, run); + } + + scheduleDrain(groupId: string): void { + if (this.drainInFlight.has(groupId)) return; + const run = (async () => { + const result = await this.drain.drainGroup(groupId, this.graphiti); + if (result.status === "success" || result.status === "dead-letter") { + const [current, meta] = await Promise.all([ + this.cache.get(groupId), + this.cache.getMeta(groupId), + ]); + const refreshQuery = current?.query || meta?.lastQuery; + if (refreshQuery) this.scheduleCacheRefresh(groupId, refreshQuery); + } + })().catch((err) => logger.debug("Graphiti drain failed", err)).finally( + () => this.drainInFlight.delete(groupId), + ); + this.drainInFlight.set(groupId, run); + } +} diff --git a/src/services/graphiti-mcp.ts b/src/services/graphiti-mcp.ts new file mode 100644 index 0000000..917512c --- /dev/null +++ b/src/services/graphiti-mcp.ts @@ -0,0 +1,248 @@ +import { + GraphitiConnectionManager, + GraphitiSessionExpiredError, + type GraphitiToolCaller, + GraphitiTransportError, + isGraphitiOfflineError, + isGraphitiTimeoutError, +} from "./connection-manager.ts"; +import type { + GraphitiEpisode, + GraphitiFact, + GraphitiNode, +} from "../types/index.ts"; +import { logger } from "./logger.ts"; +import { notifyGraphitiAvailabilityIssue } from "./opencode-warning.ts"; +import { normalizeEpisode } from "./sdk-normalize.ts"; + +export class GraphitiMcpClient { + private readonly toolCaller: GraphitiToolCaller; + + constructor(endpointOrManager: string | GraphitiToolCaller) { + this.toolCaller = typeof endpointOrManager === "string" + ? new GraphitiConnectionManager({ endpoint: endpointOrManager }) + : endpointOrManager; + } + + start(): void { + this.toolCaller.start(); + } + + async stop(): Promise { + await this.toolCaller.stop(); + } + + async connect(): Promise { + this.toolCaller.start(); + return await this.toolCaller.ready(); + } + + async ready(timeoutMs?: number): Promise { + return await this.toolCaller.ready(timeoutMs); + } + + parseToolResult(result: unknown): unknown { + const typedResult = result as { + content?: Array<{ type?: string; text?: unknown }>; + }; + const content = typedResult.content; + if (!Array.isArray(content) || content.length === 0) return result; + + const text = content.find((item) => item?.type === "text")?.text; + if (text === undefined) return result; + + if (typeof text !== "string") { + try { + return JSON.parse(String(text)); + } catch { + return text; + } + } + + try { + return JSON.parse(text); + } catch { + return text; + } + } + + parseWrappedArray(result: unknown, wrappedKey: string): T[] | null { + if (Array.isArray(result)) return result as T[]; + if ( + result && + typeof result === "object" && + Array.isArray((result as Record)[wrappedKey]) + ) { + return (result as Record)[wrappedKey] as T[]; + } + return null; + } + + async addMemory(params: { + name: string; + episodeBody: string; + groupId?: string; + source?: "text" | "json" | "message"; + sourceDescription?: string; + }): Promise { + try { + await this.callTool("add_memory", { + name: params.name, + episode_body: params.episodeBody, + group_id: params.groupId, + source: params.source ?? "text", + source_description: params.sourceDescription ?? "", + }); + } catch (err) { + if ( + isGraphitiOfflineError(err) || + isGraphitiTimeoutError(err) || + err instanceof GraphitiTransportError || + err instanceof GraphitiSessionExpiredError + ) { + notifyGraphitiAvailabilityIssue( + "Graphiti unavailable; memory was not saved.", + { + operation: "addMemory", + err, + }, + ); + } + throw err; + } + } + + async addEpisode(params: { + name: string; + episodeBody: string; + groupId?: string; + source?: "text" | "json" | "message"; + sourceDescription?: string; + }): Promise { + await this.addMemory(params); + } + + async searchMemoryFacts(params: { + query: string; + groupIds?: string[]; + maxFacts?: number; + }): Promise { + try { + const result = await this.callTool("search_memory_facts", { + query: params.query, + group_ids: params.groupIds, + max_facts: params.maxFacts ?? 10, + }); + return this.parseWrappedArray(result, "facts") ?? []; + } catch (err) { + if ( + isGraphitiTimeoutError(err) || + isGraphitiOfflineError(err) || + err instanceof GraphitiTransportError || + err instanceof GraphitiSessionExpiredError + ) { + notifyGraphitiAvailabilityIssue( + "Graphiti unavailable; continuing without memory facts.", + { + operation: "searchMemoryFacts", + err, + }, + ); + return []; + } + logger.error("searchMemoryFacts error", err); + return []; + } + } + + async searchFacts(params: { + query: string; + groupIds?: string[]; + maxFacts?: number; + }): Promise { + return await this.searchMemoryFacts(params); + } + + async searchNodes(params: { + query: string; + groupIds?: string[]; + maxNodes?: number; + }): Promise { + try { + const result = await this.callTool("search_nodes", { + query: params.query, + group_ids: params.groupIds, + max_nodes: params.maxNodes ?? 10, + }); + return this.parseWrappedArray(result, "nodes") ?? []; + } catch (err) { + if ( + isGraphitiTimeoutError(err) || + isGraphitiOfflineError(err) || + err instanceof GraphitiTransportError || + err instanceof GraphitiSessionExpiredError + ) { + notifyGraphitiAvailabilityIssue( + "Graphiti unavailable; continuing without memory nodes.", + { + operation: "searchNodes", + err, + }, + ); + return []; + } + logger.error("searchNodes error", err); + return []; + } + } + + async getEpisodes(params: { + groupId?: string; + lastN?: number; + }): Promise { + try { + const result = await this.callTool("get_episodes", { + group_id: params.groupId, + last_n: params.lastN, + }); + const raw = this.parseWrappedArray(result, "episodes") ?? + []; + return raw.map(normalizeEpisode); + } catch (err) { + if ( + isGraphitiTimeoutError(err) || + isGraphitiOfflineError(err) || + err instanceof GraphitiTransportError || + err instanceof GraphitiSessionExpiredError + ) { + notifyGraphitiAvailabilityIssue( + "Graphiti unavailable; continuing without episode history.", + { + operation: "getEpisodes", + err, + }, + ); + return []; + } + logger.error("getEpisodes error", err); + return []; + } + } + + async getStatus(): Promise { + try { + await this.callTool("get_status", {}); + return true; + } catch { + return false; + } + } + + private async callTool( + name: string, + args: Record, + ): Promise { + const result = await this.toolCaller.callTool(name, args); + return this.parseToolResult(result); + } +} diff --git a/src/services/hot-tier-slice.test.ts b/src/services/hot-tier-slice.test.ts new file mode 100644 index 0000000..dc2972e --- /dev/null +++ b/src/services/hot-tier-slice.test.ts @@ -0,0 +1,546 @@ +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { createChatHandler } from "../handlers/chat.ts"; +import { createCompactingHandler } from "../handlers/compacting.ts"; +import { createMessagesHandler } from "../handlers/messages.ts"; +import { SessionManager } from "../session.ts"; +import { BatchDrainService } from "./batch-drain.ts"; +import { RedisCacheService } from "./redis-cache.ts"; +import { RedisClient } from "./redis-client.ts"; +import { RedisEventsService } from "./redis-events.ts"; +import { RedisSnapshotService } from "./redis-snapshot.ts"; + +describe("hot-tier vertical slice", () => { + it("records local state, prepares injection, transforms messages, and serves compaction context without live MCP", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + await redisCache.set("group-1", { + query: "Continue the overhaul", + refreshedAt: Date.now(), + facts: [{ uuid: "fact-1", fact: "Graphiti remains async" }], + nodes: [{ uuid: "node-1", name: "ContextOverhaul" }], + factUuids: ["fact-1"], + nodeRefs: ["node-1"], + }); + + const manager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + ); + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + const graphitiAsync = { + scheduleCacheRefresh() {}, + scheduleDrain() {}, + }; + + const chat = createChatHandler({ + sessionManager: manager, + redisEvents, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + const transform = createMessagesHandler({ sessionManager: manager }); + const compacting = createCompactingHandler({ sessionManager: manager }); + + await chat( + { sessionID: "session-1" } as never, + { + parts: [{ + type: "text", + text: "Please keep Graphiti off the hot path", + }], + } as never, + ); + + const transformOutput = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: "Please keep Graphiti off the hot path", + }], + }], + }; + await transform( + { message: "Please keep Graphiti off the hot path" } as never, + transformOutput as never, + ); + + assertStringIncludes( + transformOutput.messages[0].parts[0].text, + ""); + + const compactOutput = { context: [] as string[] }; + await compacting( + { sessionID: "session-1" } as never, + compactOutput as never, + ); + assertEquals(compactOutput.context.length, 1); + assertStringIncludes(compactOutput.context[0], " { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const manager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + ); + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + await redisEvents.recordEvent("session-1", "group-1", { + id: "older-decision", + ts: Date.now() - 60_000, + category: "decision", + priority: 0, + role: "user", + summary: "Use deterministic merge behavior for recall", + continuityText: + "Use deterministic merge behavior for recall when injecting session memory", + }); + + for (let index = 0; index < 25; index += 1) { + await redisEvents.recordEvent("session-1", "group-1", { + id: `recent-${index}`, + ts: Date.now() - 1_000 + index, + category: "message", + priority: 4, + role: "assistant", + summary: `Recent unrelated event ${index}`, + continuityText: `Recent unrelated event ${index}`, + }); + } + + const refreshCalls: Array<{ groupId: string; query: string }> = []; + const graphitiAsync = { + scheduleCacheRefresh(groupId: string, query: string) { + refreshCalls.push({ groupId, query }); + }, + scheduleDrain() {}, + }; + + const chat = createChatHandler({ + sessionManager: manager, + redisEvents, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 999, + }); + const transform = createMessagesHandler({ sessionManager: manager }); + + await chat( + { sessionID: "session-1" } as never, + { + parts: [{ + type: "text", + text: "Can you revisit deterministic merge behavior for recall?", + }], + } as never, + ); + + const transformOutput = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: "Can you revisit deterministic merge behavior for recall?", + }], + }], + }; + await transform( + { + message: "Can you revisit deterministic merge behavior for recall?", + } as never, + transformOutput as never, + ); + + assertStringIncludes( + transformOutput.messages[0].parts[0].text, + "Use deterministic merge behavior for recall", + ); + assertEquals(refreshCalls, [{ + groupId: "group-1", + query: "Can you revisit deterministic merge behavior for recall?", + }]); + }); + + it("recalls continuity-rich events without relying on transcript bodies", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + + await redisEvents.recordEvent("session-1", "group-1", { + id: "decision-1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "user", + summary: "Use continuity-first injection", + continuityText: + "Use continuity-first injection for hot-tier recall and session memory selection", + }); + + const recalled = await redisEvents.recallSessionEvents( + "session-1", + "continuity-first injection", + ); + + assertEquals(recalled.length, 1); + assertEquals(recalled[0].id, "decision-1"); + assertEquals(recalled[0].body, undefined); + }); + + it("drains structured semantic payloads to Graphiti asynchronously", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const drain = new BatchDrainService(redis, redisEvents, { + batchSize: 8, + batchMaxBytes: 8_192, + drainRetryMax: 2, + }); + + await redisEvents.recordEvent("session-1", "group-1", { + id: "event-1", + ts: Date.now(), + category: "file.edit", + priority: 1, + role: "tool", + summary: "Edited src/session.ts", + detail: "Updated session injection selection", + continuityText: + "Edited src/session.ts to prefer continuity fields during session-memory injection", + refs: ["src/session.ts"], + keywords: ["session", "continuity", "injection"], + }); + + const calls: Array<{ name: string; episodeBody: string }> = []; + const result = await drain.drainGroup("group-1", { + addMemory(input: { name: string; episodeBody: string }) { + calls.push(input); + return Promise.resolve(); + }, + } as never); + + assertEquals(result.status, "success"); + assertEquals(calls.length, 1); + assertStringIncludes(calls[0].name, "file.edit:event-1"); + assertStringIncludes( + calls[0].episodeBody, + "Summary: Edited src/session.ts", + ); + assertStringIncludes( + calls[0].episodeBody, + "Continuity: Edited src/session.ts to prefer continuity fields during session-memory injection", + ); + assertEquals(calls[0].episodeBody.includes("Body:"), false); + }); + + it("updates only the refresh query field without clobbering cache metadata", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redis.setHashFields("memory-cache:group-1:meta", { + lastQuery: "previous query", + lastRefresh: 123, + factUuids: "fact-1,fact-2", + }, 300); + + await redisCache.rememberRefreshQuery("group-1", "next query"); + + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + lastQuery: "next query", + lastRefresh: "123", + factUuids: "fact-1,fact-2", + }); + }); + + it("classifies drift deterministically at the configured threshold boundary", () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const aligned = redisCache.classifyRefresh({ + query: "alpha beta", + refreshedAt: Date.now(), + facts: [], + nodes: [], + factUuids: [], + nodeRefs: [], + }, "alpha beta gamma delta"); + const drifted = redisCache.classifyRefresh({ + query: "alpha beta", + refreshedAt: Date.now(), + facts: [], + nodes: [], + factUuids: [], + nodeRefs: [], + }, "alpha delta epsilon"); + + assertEquals(aligned.classification, "aligned"); + assertEquals(aligned.shouldRefresh, false); + assertEquals(aligned.similarity, 0.5); + assertEquals(drifted.classification, "drifted"); + assertEquals(drifted.shouldRefresh, true); + }); + + it("detects primer-only and stale cache states while preserving injection", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + const manager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + ); + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + await redisCache.set("group-1", { + query: "primer", + refreshedAt: Date.now(), + facts: [], + nodes: [], + factUuids: [], + nodeRefs: [], + episodeSummaries: ["Primer episode"], + }); + const primerPrepared = await manager.prepareInjection( + "session-1", + "real query", + ); + assertEquals(primerPrepared?.refreshDecision.classification, "primer-only"); + assertStringIncludes(primerPrepared?.envelope ?? "", " { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "architecture token", + refreshedAt: Date.now(), + facts: [{ + uuid: "fact-1", + fact: + "Exact token ALPHA-RECALL-42 identifies the architecture decision", + }], + nodes: [], + factUuids: ["fact-1"], + nodeRefs: [], + }); + + const sameGroupManager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + ); + sameGroupManager.setParentId("session-b", null); + sameGroupManager.setState( + "session-b", + sameGroupManager.createDefaultState("group-1", "user-1"), + ); + + const otherGroupManager = new SessionManager( + "group-2", + "user-2", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + ); + otherGroupManager.setParentId("session-c", null); + otherGroupManager.setState( + "session-c", + otherGroupManager.createDefaultState("group-2", "user-2"), + ); + + const sameGroupPrepared = await sameGroupManager.prepareInjection( + "session-b", + "architecture token", + ); + const otherGroupPrepared = await otherGroupManager.prepareInjection( + "session-c", + "architecture token", + ); + + assertStringIncludes(sameGroupPrepared?.envelope ?? "", "ALPHA-RECALL-42"); + assertEquals( + (otherGroupPrepared?.envelope ?? "").includes("ALPHA-RECALL-42"), + false, + ); + assertEquals(otherGroupPrepared?.factUuids ?? [], []); + }); + + it("injects stale cached memory immediately while scheduling async refresh", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "old recall topic", + refreshedAt: Date.now() - 301_000, + facts: [{ uuid: "fact-1", fact: "Stale but still useful recall fact" }], + nodes: [], + factUuids: ["fact-1"], + nodeRefs: [], + }); + + const manager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + ); + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + const refreshCalls: Array<{ groupId: string; query: string }> = []; + const graphitiAsync = { + scheduleCacheRefresh(groupId: string, query: string) { + refreshCalls.push({ groupId, query }); + }, + scheduleDrain() {}, + }; + + const chat = createChatHandler({ + sessionManager: manager, + redisEvents, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 999, + }); + const transform = createMessagesHandler({ sessionManager: manager }); + + await chat( + { sessionID: "session-1" } as never, + { + parts: [{ + type: "text", + text: "new recall topic", + }], + } as never, + ); + + const transformOutput = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: "new recall topic", + }], + }], + }; + await transform( + { message: "new recall topic" } as never, + transformOutput as never, + ); + + assertEquals(refreshCalls, [{ + groupId: "group-1", + query: "new recall topic", + }]); + assertStringIncludes( + transformOutput.messages[0].parts[0].text, + "Stale but still useful recall fact", + ); + }); +}); diff --git a/src/services/logger.test.ts b/src/services/logger.test.ts index f54f0ee..01c66b0 100644 --- a/src/services/logger.test.ts +++ b/src/services/logger.test.ts @@ -6,6 +6,10 @@ import { it, } from "jsr:@std/testing@^1.0.0/bdd"; import { spy } from "jsr:@std/testing@^1.0.0/mock"; +import { + setOpenCodeClient, + setWarningTaskScheduler, +} from "./opencode-warning.ts"; import { setLoggerDebugOverride, setLoggerSilentOverride } from "./logger.ts"; describe("logger", () => { @@ -32,6 +36,8 @@ describe("logger", () => { consoleDebugSpy.restore(); setLoggerDebugOverride(undefined); setLoggerSilentOverride(false); + setOpenCodeClient(undefined); + setWarningTaskScheduler(undefined); }); describe("when GRAPHITI_DEBUG is set", () => { @@ -100,6 +106,39 @@ describe("logger", () => { ]); }); + it("should use structured app logging for warn when client is available", async () => { + const appLogCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + }); + + const { logger } = await import("./logger.ts"); + logger.warn("warning", { code: 42 }, ["array"]); + + assertEquals(appLogCalls.length, 0); + assertEquals(consoleWarnSpy.calls.length, 0); + assertEquals(scheduledTasks.length, 1); + for (const task of scheduledTasks) task(); + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: "warning", + extra: { + data: [{ code: 42 }, ["array"]], + }, + }, + }]); + }); + it("should forward multiple arguments to error", async () => { const { logger } = await import("./logger.ts"); const error = new Error("test"); @@ -147,6 +186,12 @@ describe("logger", () => { ]); }); + it("warn falls back to console when no client is available", async () => { + const { logger } = await import("./logger.ts"); + logger.warn("warning message"); + assertEquals(consoleWarnSpy.calls.length, 1); + }); + it("error always emits regardless of GRAPHITI_DEBUG", async () => { const { logger } = await import("./logger.ts"); logger.error("error message"); diff --git a/src/services/logger.ts b/src/services/logger.ts index e261757..d1f0417 100644 --- a/src/services/logger.ts +++ b/src/services/logger.ts @@ -1,3 +1,5 @@ +import { logStructuredWarning } from "./opencode-warning.ts"; + const console = globalThis.console as { log: (...args: unknown[]) => void; warn: (...args: unknown[]) => void; @@ -9,6 +11,55 @@ const PREFIX = "[graphiti]"; let debugOverride: boolean | undefined; let silentOverride = false; +const serializeLogArg = (value: unknown): unknown => { + if (value instanceof Error) { + return { + name: value.name, + message: value.message, + stack: value.stack, + }; + } + return value; +}; + +const stringifyLogArg = (value: unknown): string => { + if (typeof value === "string") return value; + if (value instanceof Error) return value.message; + if ( + typeof value === "number" || typeof value === "boolean" || + typeof value === "bigint" + ) { + return String(value); + } + if (value === null) return "null"; + if (value === undefined) return "undefined"; + try { + return JSON.stringify(value); + } catch { + return String(value); + } +}; + +const toWarningPayload = ( + args: unknown[], +): { message: string; extra?: unknown } => { + if (args.length === 0) return { message: "Graphiti warning" }; + const [first, ...rest] = args; + if (typeof first === "string") { + return rest.length === 0 + ? { message: first } + : { message: first, extra: { data: rest.map(serializeLogArg) } }; + } + return { + message: stringifyLogArg(first), + ...(rest.length === 0 ? {} : { + extra: { + data: [serializeLogArg(first), ...rest.map(serializeLogArg)], + }, + }), + }; +}; + const isDebugEnabled = (): boolean => { if (debugOverride !== undefined) return debugOverride; try { @@ -33,6 +84,8 @@ export const logger = { }, warn: (...args: unknown[]) => { if (silentOverride) return; + const payload = toWarningPayload(args); + if (logStructuredWarning(payload.message, payload.extra)) return; console.warn(PREFIX, ...args); }, error: (...args: unknown[]) => { diff --git a/src/services/opencode-warning.ts b/src/services/opencode-warning.ts new file mode 100644 index 0000000..be4c30e --- /dev/null +++ b/src/services/opencode-warning.ts @@ -0,0 +1,122 @@ +const console = globalThis.console as { + warn: (...args: unknown[]) => void; +}; + +type OpenCodeLogLevel = "debug" | "info" | "warn" | "error"; +type OpenCodeToastVariant = "info" | "success" | "warning" | "error"; + +type OpenCodeClientLike = { + app?: { + log: (input: { + body: { + service: string; + level: OpenCodeLogLevel; + message: string; + extra?: Record; + }; + }) => Promise | unknown; + }; + tui?: { + showToast: (input: { + body: { + message: string; + variant: OpenCodeToastVariant; + }; + }) => Promise | unknown; + }; +}; + +const PREFIX = "[graphiti]"; + +let openCodeClient: unknown; +let scheduleTask: (callback: () => void) => void = (callback) => { + setTimeout(callback, 0); +}; + +const asRecord = (value: unknown): Record => { + if (value && typeof value === "object" && !Array.isArray(value)) { + return value as Record; + } + return { data: value }; +}; + +const getClient = (): OpenCodeClientLike | undefined => { + return openCodeClient as OpenCodeClientLike | undefined; +}; + +const runSafely = ( + task: () => Promise | unknown, + onError?: (err: unknown) => void, +): void => { + scheduleTask(() => { + try { + void Promise.resolve(task()).catch((err) => onError?.(err)); + } catch (err) { + onError?.(err); + } + }); +}; + +export const setOpenCodeClient = ( + client: unknown, +): void => { + openCodeClient = client; +}; + +export const setWarningTaskScheduler = ( + scheduler: ((callback: () => void) => void) | undefined, +): void => { + scheduleTask = scheduler ?? ((callback) => { + setTimeout(callback, 0); + }); +}; + +export const logStructuredWarning = ( + message: string, + extra?: unknown, +): boolean => { + const client = getClient(); + if (!client?.app?.log) return false; + + runSafely(() => + client.app!.log({ + body: { + service: "graphiti", + level: "warn", + message, + ...(extra === undefined ? {} : { extra: asRecord(extra) }), + }, + }) + ); + return true; +}; + +export const showWarningToast = (message: string): boolean => { + const client = getClient(); + if (!client?.tui?.showToast) return false; + + runSafely(() => + client.tui!.showToast({ + body: { + message, + variant: "warning", + }, + }) + ); + return true; +}; + +export const notifyGraphitiAvailabilityIssue = ( + message: string, + extra?: unknown, +): void => { + const logged = logStructuredWarning(message, extra); + const toasted = showWarningToast(message); + if (!logged && !toasted) { + if (extra === undefined) { + console.warn(PREFIX, message); + return; + } + console.warn(PREFIX, message, extra); + } +}; diff --git a/src/services/redis-cache.test.ts b/src/services/redis-cache.test.ts new file mode 100644 index 0000000..53488f1 --- /dev/null +++ b/src/services/redis-cache.test.ts @@ -0,0 +1,94 @@ +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { RedisCacheService } from "./redis-cache.ts"; +import { RedisClient } from "./redis-client.ts"; + +describe("redis cache", () => { + it("stores cache entries per group without leaking across groups", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await cache.set("group-1", { + query: "project alpha policy", + refreshedAt: Date.now(), + facts: [{ uuid: "fact-1", fact: "Alpha uses kebab-case config names" }], + nodes: [], + factUuids: ["fact-1"], + nodeRefs: [], + }); + await cache.set("group-2", { + query: "project beta policy", + refreshedAt: Date.now(), + facts: [{ uuid: "fact-2", fact: "Beta uses snake_case env names" }], + nodes: [], + factUuids: ["fact-2"], + nodeRefs: [], + }); + + assertEquals((await cache.get("group-1"))?.factUuids, ["fact-1"]); + assertEquals((await cache.get("group-2"))?.factUuids, ["fact-2"]); + assertEquals(await cache.get("group-3"), null); + }); + + it("filters already visible facts and returns little or no persistent memory for noise-only remainder", () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const rendered = cache.renderPersistentMemory({ + query: "naming policy", + refreshedAt: Date.now(), + facts: [{ uuid: "fact-1", fact: "Use kebab-case route names" }], + nodes: [], + factUuids: ["fact-1"], + nodeRefs: [], + }, ["fact-1"]); + + assertEquals(rendered.body, ""); + assertEquals(rendered.factUuids, []); + assertEquals(rendered.nodeRefs, []); + }); + + it("renders bounded persistent memory with deduped visible facts and truncated long content", () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const huge = "RAW-TRANSCRIPT-CHUNK ".repeat(200); + const rendered = cache.renderPersistentMemory({ + query: "context overhaul policy", + refreshedAt: Date.now(), + facts: Array.from({ length: 10 }, (_, index) => ({ + uuid: `fact-${index + 1}`, + fact: `Fact ${index + 1} ${huge}`, + })), + nodes: Array.from({ length: 8 }, (_, index) => ({ + uuid: `node-${index + 1}`, + name: `Node ${index + 1}`, + summary: huge, + })), + episodeSummaries: Array.from( + { length: 6 }, + (_, index) => `Episode ${index + 1} ${huge}`, + ), + factUuids: Array.from({ length: 10 }, (_, index) => `fact-${index + 1}`), + nodeRefs: Array.from({ length: 8 }, (_, index) => `node-${index + 1}`), + }, ["fact-1", "fact-2", "fact-3"]); + + assertEquals(rendered.factUuids.includes("fact-1"), false); + assertEquals(rendered.factUuids.includes("fact-2"), false); + assertEquals(rendered.factUuids.includes("fact-3"), false); + assertEquals(rendered.factUuids.length <= 7, true); + assertEquals(rendered.nodeRefs.length <= 6, true); + assertEquals(rendered.body.length <= 1800, true); + assertStringIncludes(rendered.body, "Fact 4"); + assertEquals(rendered.body.includes(huge), false); + }); +}); diff --git a/src/services/redis-cache.ts b/src/services/redis-cache.ts new file mode 100644 index 0000000..feb52db --- /dev/null +++ b/src/services/redis-cache.ts @@ -0,0 +1,247 @@ +import type { + CacheRefreshDecision, + GraphitiFact, + GraphitiNode, + PersistentMemoryCacheEntry, + PersistentMemoryCacheMeta, +} from "../types/index.ts"; +import { escapeXml } from "./render-utils.ts"; +import type { RedisClient } from "./redis-client.ts"; +import { memoryCacheKey, memoryCacheMetaKey } from "./redis-events.ts"; + +const formatFact = (fact: GraphitiFact): string => { + const refs = [fact.source_node?.name, fact.target_node?.name] + .filter(Boolean) + .join(" → "); + return refs ? `${fact.fact} (${refs})` : fact.fact; +}; + +const formatNode = (node: GraphitiNode): string => + node.summary ? `${node.name}: ${node.summary}` : node.name; + +export interface RedisCacheServiceOptions { + ttlSeconds: number; + driftThreshold: number; +} + +const TOKEN_PATTERN = /[a-z0-9._/-]{2,}/g; +const FACT_RENDER_LIMIT = 220; +const NODE_RENDER_LIMIT = 180; +const EPISODE_RENDER_LIMIT = 180; +const PERSISTENT_MEMORY_BODY_BUDGET = 1_800; + +const normalizeQuery = (query: string): string => query.trim().toLowerCase(); + +const tokenizeQuery = (query: string): Set => { + const normalized = normalizeQuery(query); + return new Set(normalized.match(TOKEN_PATTERN) ?? []); +}; + +const jaccardSimilarity = (left: string, right: string): number => { + const leftTokens = tokenizeQuery(left); + const rightTokens = tokenizeQuery(right); + + if (leftTokens.size === 0 && rightTokens.size === 0) return 1; + if (leftTokens.size === 0 || rightTokens.size === 0) return 0; + + let intersection = 0; + for (const token of leftTokens) { + if (rightTokens.has(token)) intersection += 1; + } + const union = new Set([...leftTokens, ...rightTokens]).size; + return union === 0 ? 0 : intersection / union; +}; + +export class RedisCacheService { + constructor( + private readonly redis: RedisClient, + private readonly options: RedisCacheServiceOptions, + ) {} + + async get(groupId: string): Promise { + const raw = await this.redis.getString(memoryCacheKey(groupId)); + if (!raw) return null; + try { + return JSON.parse(raw) as PersistentMemoryCacheEntry; + } catch { + return null; + } + } + + async getMeta(groupId: string): Promise { + const raw = await this.redis.getHashAll(memoryCacheMetaKey(groupId)); + if (Object.keys(raw).length === 0) return null; + + return { + lastQuery: raw.lastQuery?.trim() || undefined, + lastRefresh: raw.lastRefresh && Number.isFinite(Number(raw.lastRefresh)) + ? Number(raw.lastRefresh) + : undefined, + factUuids: raw.factUuids + ? raw.factUuids.split(",").map((value) => value.trim()).filter(Boolean) + : [], + }; + } + + async rememberRefreshQuery(groupId: string, query: string): Promise { + const normalized = query.trim(); + if (!normalized) return; + + await this.redis.setHashFields( + memoryCacheMetaKey(groupId), + { lastQuery: normalized }, + this.options.ttlSeconds, + ); + } + + async touchEntry(groupId: string): Promise { + await this.redis.touch(memoryCacheKey(groupId), this.options.ttlSeconds); + } + + async touchMeta(groupId: string): Promise { + await this.redis.touch( + memoryCacheMetaKey(groupId), + this.options.ttlSeconds, + ); + } + + async touch(groupId: string): Promise { + await Promise.all([ + this.touchEntry(groupId), + this.touchMeta(groupId), + ]); + } + + async set( + groupId: string, + entry: PersistentMemoryCacheEntry, + ): Promise { + await this.redis.setString( + memoryCacheKey(groupId), + JSON.stringify(entry), + this.options.ttlSeconds, + ); + await this.redis.setHashFields( + memoryCacheMetaKey(groupId), + { + lastQuery: entry.query, + lastRefresh: entry.refreshedAt, + factUuids: entry.factUuids.join(","), + }, + this.options.ttlSeconds, + ); + } + + isStale(entry: PersistentMemoryCacheEntry): boolean { + return Date.now() - entry.refreshedAt > this.options.ttlSeconds * 1000; + } + + classifyRefresh( + entry: PersistentMemoryCacheEntry | null, + query: string, + ): CacheRefreshDecision { + if (!entry) { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: this.options.driftThreshold, + cachedQuery: null, + }; + } + + if (this.isStale(entry)) { + return { + classification: "stale", + shouldRefresh: true, + similarity: 0, + threshold: this.options.driftThreshold, + cachedQuery: entry.query, + }; + } + + const normalizedQuery = normalizeQuery(query); + const normalizedCachedQuery = normalizeQuery(entry.query); + const hasPrimerEpisodes = (entry.episodeSummaries?.length ?? 0) > 0; + const hasFactsOrNodes = entry.facts.length > 0 || entry.nodes.length > 0; + if ( + normalizedCachedQuery === "primer" && + normalizedQuery && + hasPrimerEpisodes && + !hasFactsOrNodes + ) { + return { + classification: "primer-only", + shouldRefresh: true, + similarity: 0, + threshold: this.options.driftThreshold, + cachedQuery: entry.query, + }; + } + + const similarity = jaccardSimilarity(entry.query, query); + const aligned = similarity >= this.options.driftThreshold; + return { + classification: aligned ? "aligned" : "drifted", + shouldRefresh: !aligned, + similarity, + threshold: this.options.driftThreshold, + cachedQuery: entry.query, + }; + } + + shouldRefresh( + entry: PersistentMemoryCacheEntry | null, + query: string, + ): boolean { + return this.classifyRefresh(entry, query).shouldRefresh; + } + + renderPersistentMemory( + entry: PersistentMemoryCacheEntry | null, + visibleFactUuids: string[] = [], + ): { body: string; factUuids: string[]; nodeRefs: string[] } { + if (!entry) return { body: "", factUuids: [], nodeRefs: [] }; + const visible = new Set(visibleFactUuids); + const facts = entry.facts.filter((fact) => !visible.has(fact.uuid)); + + const sections: string[] = []; + const factUuids: string[] = []; + const nodeRefs: string[] = []; + let remaining = PERSISTENT_MEMORY_BODY_BUDGET; + for (const fact of facts.slice(0, 8)) { + const section = `${ + escapeXml( + formatFact(fact).slice(0, FACT_RENDER_LIMIT), + ) + }`; + if (section.length > remaining) break; + sections.push(section); + factUuids.push(fact.uuid); + remaining -= section.length; + } + for (const node of entry.nodes.slice(0, 6)) { + const section = `${ + escapeXml( + formatNode(node).slice(0, NODE_RENDER_LIMIT), + ) + }`; + if (section.length > remaining) break; + sections.push(section); + nodeRefs.push(node.uuid); + remaining -= section.length; + } + for (const episode of entry.episodeSummaries?.slice(0, 4) ?? []) { + const section = `${ + escapeXml( + episode.slice(0, EPISODE_RENDER_LIMIT), + ) + }`; + if (section.length > remaining) break; + sections.push(section); + remaining -= section.length; + } + + return { body: sections.join(""), factUuids, nodeRefs }; + } +} diff --git a/src/services/redis-client.test.ts b/src/services/redis-client.test.ts new file mode 100644 index 0000000..0c80b4b --- /dev/null +++ b/src/services/redis-client.test.ts @@ -0,0 +1,330 @@ +import { assert, assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { RedisClient } from "./redis-client.ts"; + +type RedisEvent = "close" | "end" | "error" | "ready"; + +class FakeRedisRuntime { + private readonly values = new Map(); + private readonly listeners = new Map< + RedisEvent, + Set<(...args: unknown[]) => void> + >(); + + constructor(private readonly state: { available: boolean }) {} + + connect(): Promise { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + this.emit("ready"); + return Promise.resolve(); + } + + ping(): Promise<"PONG"> { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + lpush(_key: string, _value: string): Promise { + throw new Error("not implemented"); + } + + rpush(_key: string, _value: string): Promise { + throw new Error("not implemented"); + } + + lmove( + _source: string, + _destination: string, + _sourceSide: "LEFT" | "RIGHT", + _destinationSide: "LEFT" | "RIGHT", + ): Promise { + throw new Error("not implemented"); + } + + lrange(_key: string, _start: number, _stop: number): Promise { + throw new Error("not implemented"); + } + + llen(_key: string): Promise { + throw new Error("not implemented"); + } + + ltrim(_key: string, _start: number, _stop: number): Promise { + throw new Error("not implemented"); + } + + lindex(_key: string, _index: number): Promise { + throw new Error("not implemented"); + } + + lset(_key: string, _index: number, _value: string): Promise { + throw new Error("not implemented"); + } + + get(key: string): Promise { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + return Promise.resolve(this.values.get(key) ?? null); + } + + set( + key: string, + value: string, + ..._args: Array + ): Promise<"OK"> { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + this.values.set(key, value); + return Promise.resolve("OK"); + } + + expire(_key: string, _ttlSeconds: number): Promise { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + return Promise.resolve(1); + } + + del(key: string): Promise { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + return Promise.resolve(this.values.delete(key) ? 1 : 0); + } + + eval(_script: string, _numKeys: number, ..._args: string[]): Promise { + throw new Error("not implemented"); + } + + on(event: RedisEvent, listener: (...args: unknown[]) => void): void { + const set = this.listeners.get(event) ?? new Set(); + set.add(listener); + this.listeners.set(event, set); + } + + off(event: RedisEvent, listener: (...args: unknown[]) => void): void { + this.listeners.get(event)?.delete(listener); + } + + emit(event: RedisEvent, ...args: unknown[]): void { + for (const listener of this.listeners.get(event) ?? []) { + listener(...args); + } + } +} + +class DeferredConnectRedisRuntime extends FakeRedisRuntime { + private resolveConnect?: () => void; + private readonly connectGate = new Promise((resolve) => { + this.resolveConnect = resolve; + }); + + override async connect(): Promise { + await this.connectGate; + await super.connect(); + } + + resumeConnect(): void { + this.resolveConnect?.(); + } +} + +class ObservableDeferredConnectRedisRuntime + extends DeferredConnectRedisRuntime { + quitCalls = 0; + + override quit(): Promise<"OK"> { + this.quitCalls += 1; + return super.quit(); + } +} + +async function waitFor( + condition: () => boolean, + timeoutMs = 200, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (condition()) return; + await new Promise((resolve) => setTimeout(resolve, 5)); + } + assert(condition(), "condition not met before timeout"); +} + +describe("redis client", () => { + it("honors NX semantics in the in-memory fallback store", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const memory = (redis as unknown as { + memory: { + set( + key: string, + value: string, + ...args: Array + ): Promise<"OK" | null>; + }; + }).memory; + + assertEquals(await memory.set("lock", "first", "NX", "EX", 30), "OK"); + assertEquals(await memory.set("lock", "second", "NX", "EX", 30), null); + assertEquals(await redis.getString("lock"), "first"); + }); + + it("touches only when the stored token matches", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + + await redis.setString("lock", "first", 1); + + assertEquals(await redis.compareAndTouch("lock", "second", 60), false); + assertEquals(await redis.getString("lock"), "first"); + + assertEquals(await redis.compareAndTouch("lock", "first", 60), true); + assertEquals(await redis.getString("lock"), "first"); + }); + + it("matches Redis WRONGTYPE behavior in the in-memory fallback list helpers", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + + await redis.setString("queue", "not-a-list"); + + await assertRejects( + () => redis.appendToList("queue", "value"), + Error, + "WRONGTYPE", + ); + }); + + it("stores cache metadata in hashes and reads it back", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + + await redis.setHashFields("memory-cache:group-1:meta", { + lastQuery: "Continue overhaul", + lastRefresh: 123, + factUuids: "fact-1,fact-2", + }, 60); + + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + lastQuery: "Continue overhaul", + lastRefresh: "123", + factUuids: "fact-1,fact-2", + }); + }); + + it("reconnects after startup failure and swaps back to live redis", async () => { + const state = { available: false }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), false); + + await redis.setString("key", "memory-value"); + assertEquals(await redis.getString("key"), "memory-value"); + + state.available = true; + await waitFor(() => redis.isConnected()); + + await redis.setString("key", "redis-value"); + assertEquals(await redis.getString("key"), "redis-value"); + + await redis.close(); + }); + + it("reconnects after a transient disconnect and resumes live redis reads", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), true); + + await redis.setString("key", "before-disconnect"); + assertEquals(await redis.getString("key"), "before-disconnect"); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + await redis.setString("key", "memory-during-outage"); + assertEquals(await redis.getString("key"), "memory-during-outage"); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals(await redis.getString("key"), "before-disconnect"); + await redis.setString("key", "after-reconnect"); + assertEquals(await redis.getString("key"), "after-reconnect"); + + await redis.close(); + }); + + it("ignores stale runtime ready events during reconnect", async () => { + const firstState = { available: true }; + const secondState = { available: true }; + const firstRuntime = new FakeRedisRuntime(firstState); + const secondRuntime = new DeferredConnectRedisRuntime(secondState); + let factoryCalls = 0; + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => { + factoryCalls += 1; + return factoryCalls === 1 ? firstRuntime : secondRuntime; + }, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), true); + + firstState.available = false; + firstRuntime.emit("close"); + assertEquals(redis.isConnected(), false); + + await waitFor(() => factoryCalls === 2); + firstRuntime.emit("ready"); + assertEquals(redis.isConnected(), false); + + secondRuntime.resumeConnect(); + await waitFor(() => redis.isConnected()); + assertEquals(redis.isConnected(), true); + + await redis.close(); + }); + + it("does not reinstall a runtime after close during an in-flight connect", async () => { + const state = { available: true }; + const runtime = new ObservableDeferredConnectRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => runtime, + }); + + const connectPromise = redis.connect(); + await redis.close(); + runtime.resumeConnect(); + await connectPromise; + + assertEquals(redis.isConnected(), false); + assertEquals(runtime.quitCalls, 1); + }); +}); diff --git a/src/services/redis-client.ts b/src/services/redis-client.ts new file mode 100644 index 0000000..8bbae16 --- /dev/null +++ b/src/services/redis-client.ts @@ -0,0 +1,783 @@ +import { logger } from "./logger.ts"; + +type TimerValue = string | string[] | Map; + +type TimerHandle = ReturnType; + +type RedisEvent = "close" | "end" | "error" | "ready"; + +type RedisRuntime = { + ping(): Promise; + quit(): Promise; + lpush(key: string, value: string): Promise; + rpush(key: string, value: string): Promise; + lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise; + lrange(key: string, start: number, stop: number): Promise; + llen(key: string): Promise; + ltrim(key: string, start: number, stop: number): Promise; + lindex(key: string, index: number): Promise; + lset(key: string, index: number, value: string): Promise; + get(key: string): Promise; + hset?(key: string, values: Record): Promise; + hgetall?(key: string): Promise>; + set( + key: string, + value: string, + ...args: Array + ): Promise; + expire(key: string, ttlSeconds: number): Promise; + del(key: string): Promise; + eval?(script: string, numKeys: number, ...args: string[]): Promise; + connect?(): Promise; + on?(event: RedisEvent, listener: (...args: unknown[]) => void): unknown; + off?(event: RedisEvent, listener: (...args: unknown[]) => void): unknown; +}; + +type RedisRuntimeFactory = ( + endpoint: string, +) => Promise | RedisRuntime; + +type RuntimeListeners = { + close: () => void; + end: () => void; + error: (error: unknown) => void; + ready: () => void; +}; + +type StoredValue = { + value: TimerValue; + expiresAt?: number; +}; + +class InMemoryRedisStore implements RedisRuntime { + private readonly values = new Map(); + + ping(): Promise<"PONG"> { + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + private cleanup(key: string): void { + const value = this.values.get(key); + if (!value?.expiresAt) return; + if (value.expiresAt <= Date.now()) this.values.delete(key); + } + + private ensureList(key: string): string[] { + this.cleanup(key); + const existing = this.values.get(key); + if (existing) { + if (!Array.isArray(existing.value)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + return existing.value; + } + const list: string[] = []; + this.values.set(key, { value: list }); + return list; + } + + private ensureHash(key: string): Map { + this.cleanup(key); + const existing = this.values.get(key); + if (existing) { + if (!(existing.value instanceof Map)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + return existing.value; + } + const hash = new Map(); + this.values.set(key, { value: hash }); + return hash; + } + + private parseSetArgs(args: Array): { + onlyIfAbsent: boolean; + ttlSeconds?: number; + } { + let onlyIfAbsent = false; + let ttlSeconds: number | undefined; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + if (arg === "NX") { + onlyIfAbsent = true; + continue; + } + if (arg === "EX") { + const next = args[index + 1]; + if (typeof next !== "number") { + throw new Error("ERR unsupported in-memory Redis SET arguments"); + } + ttlSeconds = next; + index += 1; + continue; + } + throw new Error("ERR unsupported in-memory Redis SET arguments"); + } + + return { onlyIfAbsent, ttlSeconds }; + } + + lpush(key: string, value: string): Promise { + const list = this.ensureList(key); + list.unshift(value); + return Promise.resolve(list.length); + } + + rpush(key: string, value: string): Promise { + const list = this.ensureList(key); + list.push(value); + return Promise.resolve(list.length); + } + + lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.cleanup(source); + this.cleanup(destination); + const existing = this.values.get(source); + if ( + !existing || !Array.isArray(existing.value) || existing.value.length === 0 + ) { + return Promise.resolve(null); + } + + const sourceList = existing.value; + const value = sourceSide === "LEFT" ? sourceList.shift() : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") { + destinationList.unshift(value); + } else { + destinationList.push(value); + } + + return Promise.resolve(value); + } + + lrange(key: string, start: number, stop: number): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing || !Array.isArray(existing.value)) return Promise.resolve([]); + const list = existing.value; + const normalizeIndex = (index: number): number => + index < 0 ? Math.max(list.length + index, 0) : index; + const from = normalizeIndex(start); + const to = stop < 0 ? list.length + stop : stop; + return Promise.resolve(list.slice(from, to + 1)); + } + + llen(key: string): Promise { + this.cleanup(key); + const existing = this.values.get(key); + return Promise.resolve( + existing && Array.isArray(existing.value) ? existing.value.length : 0, + ); + } + + lindex(key: string, index: number): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing || !Array.isArray(existing.value)) { + return Promise.resolve(null); + } + const list = existing.value; + const normalized = index < 0 ? list.length + index : index; + return Promise.resolve(list[normalized] ?? null); + } + + lset(key: string, index: number, value: string): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing || !Array.isArray(existing.value)) { + return Promise.reject(new Error("ERR no such key")); + } + const list = existing.value; + const normalized = index < 0 ? list.length + index : index; + if (normalized < 0 || normalized >= list.length) { + return Promise.reject(new Error("ERR index out of range")); + } + list[normalized] = value; + return Promise.resolve(); + } + + ltrim(key: string, start: number, stop: number): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing || !Array.isArray(existing.value)) return Promise.resolve(); + const list = existing.value; + const normalizeIndex = (index: number): number => + index < 0 ? Math.max(list.length + index, 0) : index; + const trimmed = list.slice( + normalizeIndex(start), + stop < 0 ? list.length + stop + 1 : stop + 1, + ); + existing.value = trimmed; + return Promise.resolve(); + } + + get(key: string): Promise { + this.cleanup(key); + const existing = this.values.get(key); + return Promise.resolve( + existing && typeof existing.value === "string" ? existing.value : null, + ); + } + + hset(key: string, values: Record): Promise { + const hash = this.ensureHash(key); + let added = 0; + for (const [field, value] of Object.entries(values)) { + if (!hash.has(field)) added += 1; + hash.set(field, value); + } + return Promise.resolve(added); + } + + hgetall(key: string): Promise> { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing) return Promise.resolve({}); + if (!(existing.value instanceof Map)) { + return Promise.reject( + new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ), + ); + } + return Promise.resolve(Object.fromEntries(existing.value.entries())); + } + + set( + key: string, + value: string, + ...args: Array + ): Promise<"OK" | null> { + this.cleanup(key); + const { onlyIfAbsent, ttlSeconds } = this.parseSetArgs(args); + if (onlyIfAbsent && this.values.has(key)) return Promise.resolve(null); + this.values.set(key, { + value, + expiresAt: ttlSeconds ? Date.now() + ttlSeconds * 1000 : undefined, + }); + return Promise.resolve("OK"); + } + + expire(key: string, ttlSeconds: number): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing) return Promise.resolve(0); + existing.expiresAt = Date.now() + ttlSeconds * 1000; + return Promise.resolve(1); + } + + del(key: string): Promise { + return Promise.resolve(this.values.delete(key) ? 1 : 0); + } + + setIfAbsent( + key: string, + value: string, + ttlSeconds?: number, + ): Promise { + this.cleanup(key); + if (this.values.has(key)) return Promise.resolve(false); + this.values.set(key, { + value, + expiresAt: ttlSeconds ? Date.now() + ttlSeconds * 1000 : undefined, + }); + return Promise.resolve(true); + } + + deleteIfValue(key: string, expectedValue: string): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing || typeof existing.value !== "string") { + return Promise.resolve(false); + } + if (existing.value !== expectedValue) return Promise.resolve(false); + this.values.delete(key); + return Promise.resolve(true); + } + + compareAndExpire( + key: string, + expectedValue: string, + ttlSeconds: number, + ): Promise { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing || typeof existing.value !== "string") { + return Promise.resolve(false); + } + if (existing.value !== expectedValue) return Promise.resolve(false); + existing.expiresAt = Date.now() + ttlSeconds * 1000; + return Promise.resolve(true); + } +} + +export interface RedisClientOptions { + endpoint: string; + reconnectBaseDelayMs?: number; + reconnectMaxDelayMs?: number; + runtimeFactory?: RedisRuntimeFactory; +} + +export class RedisClient { + private readonly memory = new InMemoryRedisStore(); + private readonly runtimeListeners = new WeakMap< + RedisRuntime, + RuntimeListeners + >(); + private redis: RedisRuntime | null = null; + private connected = false; + private closed = false; + private reconnectTimer: TimerHandle | null = null; + private reconnectAttempts = 0; + private connectAttempt: Promise | null = null; + + constructor(private readonly options: RedisClientOptions) {} + + async connect(): Promise { + this.closed = false; + await this.tryConnectOnce(); + } + + isConnected(): boolean { + return this.connected; + } + + async close(): Promise { + this.closed = true; + this.clearReconnectTimer(); + const runtime = this.redis; + if (!runtime) return; + + this.detachRuntimeListeners(runtime); + try { + await runtime.quit(); + } finally { + this.redis = null; + this.connected = false; + } + } + + private getReconnectDelayMs(): number { + const baseDelay = this.options.reconnectBaseDelayMs ?? 1_000; + const maxDelay = this.options.reconnectMaxDelayMs ?? 30_000; + return Math.min( + baseDelay * (2 ** Math.max(this.reconnectAttempts - 1, 0)), + maxDelay, + ); + } + + private clearReconnectTimer(): void { + if (this.reconnectTimer === null) return; + clearTimeout(this.reconnectTimer); + this.reconnectTimer = null; + } + + private scheduleReconnect(): void { + if (this.closed || this.connected || this.reconnectTimer !== null) return; + const delayMs = this.getReconnectDelayMs(); + this.reconnectTimer = setTimeout(() => { + this.reconnectTimer = null; + void this.tryConnectOnce(); + }, delayMs); + } + + private async createRuntime(): Promise { + if (this.options.runtimeFactory) { + return await this.options.runtimeFactory(this.options.endpoint); + } + + const module = await import("npm:ioredis@^5.7.0"); + const RedisCtor = (module as unknown as { + default: new ( + endpoint: string, + options: { + lazyConnect: boolean; + maxRetriesPerRequest: number; + retryStrategy: () => null; + }, + ) => RedisRuntime & { connect(): Promise }; + }).default; + + return new RedisCtor(this.options.endpoint, { + lazyConnect: true, + maxRetriesPerRequest: 1, + retryStrategy: () => null, + }); + } + + private attachRuntimeListeners(runtime: RedisRuntime): void { + const listeners: RuntimeListeners = { + close: () => { + this.handleDisconnect(runtime); + }, + end: () => { + this.handleDisconnect(runtime); + }, + error: (error: unknown) => { + this.handleDisconnect(runtime, error); + }, + ready: () => { + if (runtime !== this.redis) return; + this.connected = true; + this.reconnectAttempts = 0; + this.clearReconnectTimer(); + }, + }; + + this.runtimeListeners.set(runtime, listeners); + + runtime.on?.("close", listeners.close); + runtime.on?.("end", listeners.end); + runtime.on?.("error", listeners.error); + runtime.on?.("ready", listeners.ready); + } + + private detachRuntimeListeners(runtime: RedisRuntime): void { + const listeners = this.runtimeListeners.get(runtime); + if (!listeners) return; + + runtime.off?.("close", listeners.close); + runtime.off?.("end", listeners.end); + runtime.off?.("error", listeners.error); + runtime.off?.("ready", listeners.ready); + this.runtimeListeners.delete(runtime); + } + + private async replaceRuntime(runtime: RedisRuntime): Promise { + if (this.closed) { + await this.disposeFailedRuntime(runtime); + this.connected = false; + return; + } + + const previous = this.redis; + if (previous === runtime) return; + + this.redis = runtime; + this.connected = true; + this.reconnectAttempts = 0; + this.clearReconnectTimer(); + + if (!previous) return; + + this.detachRuntimeListeners(previous); + try { + await previous.quit(); + } catch { + // Ignore teardown errors for stale runtimes. + } + } + + private async disposeFailedRuntime(runtime: RedisRuntime): Promise { + this.detachRuntimeListeners(runtime); + try { + await runtime.quit(); + } catch { + // Best-effort cleanup only. + } + } + + private handleDisconnect( + runtime: RedisRuntime | null, + error?: unknown, + ): void { + if (this.closed) return; + if (runtime && runtime !== this.redis) return; + if (error) { + logger.warn( + "Redis hot tier unavailable; using in-memory fallback", + error, + ); + } + + if (runtime && this.redis === runtime) { + this.redis = null; + this.detachRuntimeListeners(runtime); + void runtime.quit().catch(() => { + // Ignore teardown errors for disconnected runtimes. + }); + } + + this.connected = false; + this.scheduleReconnect(); + } + + private async tryConnectOnce(): Promise { + if (this.closed) return false; + if (this.connected && this.redis) return true; + if (this.connectAttempt) return await this.connectAttempt; + + this.connectAttempt = (async () => { + let runtime: RedisRuntime | null = null; + try { + runtime = await this.createRuntime(); + this.attachRuntimeListeners(runtime); + await runtime.connect?.(); + await runtime.ping(); + await this.replaceRuntime(runtime); + return true; + } catch (error) { + if (runtime) { + await this.disposeFailedRuntime(runtime); + } + + this.redis = null; + this.connected = false; + this.reconnectAttempts += 1; + logger.warn( + "Redis hot tier unavailable; using in-memory fallback", + error, + ); + this.scheduleReconnect(); + return false; + } finally { + this.connectAttempt = null; + } + })(); + + return await this.connectAttempt; + } + + private async useRuntime( + operation: (runtime: RedisRuntime) => Promise, + ): Promise { + const runtime = this.redis; + if (this.connected && runtime) { + try { + return await operation(runtime); + } catch (error) { + this.handleDisconnect(runtime, error); + } + } + + return await operation(this.memory); + } + + async prependToList( + key: string, + value: string, + ttlSeconds?: number, + ): Promise { + return await this.useRuntime(async (runtime) => { + const length = await runtime.lpush(key, value); + if (ttlSeconds) await runtime.expire(key, ttlSeconds); + return length; + }); + } + + async appendToList( + key: string, + value: string, + ttlSeconds?: number, + ): Promise { + return await this.useRuntime(async (runtime) => { + const length = await runtime.rpush(key, value); + if (ttlSeconds) await runtime.expire(key, ttlSeconds); + return length; + }); + } + + async getRecentList(key: string, limit: number): Promise { + return await this.useRuntime((runtime) => + runtime.lrange(key, 0, Math.max(limit - 1, 0)) + ); + } + + async getOldestList(key: string, limit: number): Promise { + return await this.useRuntime(async (runtime) => { + const length = await runtime.llen(key); + if (length === 0) return []; + const start = Math.max(length - limit, 0); + return await runtime.lrange(key, start, length - 1); + }); + } + + async getListRange( + key: string, + start: number, + stop: number, + ): Promise { + return await this.useRuntime((runtime) => runtime.lrange(key, start, stop)); + } + + async getListItem(key: string, index: number): Promise { + return await this.useRuntime((runtime) => runtime.lindex(key, index)); + } + + async setListItem(key: string, index: number, value: string): Promise { + await this.useRuntime((runtime) => runtime.lset(key, index, value)); + } + + async getListLength(key: string): Promise { + return await this.useRuntime((runtime) => runtime.llen(key)); + } + + async moveListItem( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + return await this.useRuntime((runtime) => + runtime.lmove(source, destination, sourceSide, destinationSide) + ); + } + + async trimOldest(key: string, count: number): Promise { + if (count <= 0) return; + await this.useRuntime(async (runtime) => { + const length = await runtime.llen(key); + if (length <= count) { + await runtime.del(key); + return; + } + await runtime.ltrim(key, 0, length - count - 1); + }); + } + + async getString(key: string): Promise { + return await this.useRuntime((runtime) => runtime.get(key)); + } + + async setString( + key: string, + value: string, + ttlSeconds?: number, + ): Promise { + await this.useRuntime(async (runtime) => { + if (ttlSeconds) { + await runtime.set(key, value, "EX", ttlSeconds); + return; + } + await runtime.set(key, value); + }); + } + + async setStringIfAbsent( + key: string, + value: string, + ttlSeconds?: number, + ): Promise { + return await this.useRuntime(async (runtime) => { + if (runtime === this.memory) { + return await this.memory.setIfAbsent(key, value, ttlSeconds); + } + + const result = ttlSeconds + ? await runtime.set(key, value, "NX", "EX", ttlSeconds) + : await runtime.set(key, value, "NX"); + return result === "OK"; + }); + } + + async touch(key: string, ttlSeconds: number): Promise { + await this.useRuntime((runtime) => runtime.expire(key, ttlSeconds)); + } + + async getHashAll(key: string): Promise> { + return await this.useRuntime(async (runtime) => { + if (runtime === this.memory) { + return await this.memory.hgetall(key); + } + return await runtime.hgetall?.(key) ?? {}; + }); + } + + async setHashFields( + key: string, + values: Record, + ttlSeconds?: number, + ): Promise { + const serialized = Object.fromEntries( + Object.entries(values) + .filter(([, value]) => value !== undefined) + .map(([field, value]) => [field, String(value)]), + ); + if (Object.keys(serialized).length === 0) return; + + await this.useRuntime(async (runtime) => { + if (runtime === this.memory) { + await this.memory.hset(key, serialized); + } else if (runtime.hset) { + await runtime.hset(key, serialized); + } else { + const existing = await runtime.get(key); + if (existing !== null) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + await this.memory.hset(key, serialized); + } + + if (ttlSeconds) await runtime.expire(key, ttlSeconds); + }); + } + + async compareAndTouch( + key: string, + expectedValue: string, + ttlSeconds: number, + ): Promise { + return await this.useRuntime(async (runtime) => { + if (runtime === this.memory) { + return await this.memory.compareAndExpire( + key, + expectedValue, + ttlSeconds, + ); + } + + const extended = await runtime.eval?.( + "if redis.call('GET', KEYS[1]) == ARGV[1] then return redis.call('EXPIRE', KEYS[1], ARGV[2]) else return 0 end", + 1, + key, + expectedValue, + String(ttlSeconds), + ) ?? 0; + return extended === 1; + }); + } + + async deleteKey(key: string): Promise { + await this.useRuntime((runtime) => runtime.del(key)); + } + + async deleteKeyIfValue(key: string, expectedValue: string): Promise { + return await this.useRuntime(async (runtime) => { + if (runtime === this.memory) { + return await this.memory.deleteIfValue(key, expectedValue); + } + + const deleted = await runtime.eval?.( + "if redis.call('GET', KEYS[1]) == ARGV[1] then return redis.call('DEL', KEYS[1]) else return 0 end", + 1, + key, + expectedValue, + ) ?? 0; + return deleted === 1; + }); + } +} diff --git a/src/services/redis-events.ts b/src/services/redis-events.ts new file mode 100644 index 0000000..abc597e --- /dev/null +++ b/src/services/redis-events.ts @@ -0,0 +1,410 @@ +import type { + ClaimedDrainBatch, + DrainQueueEntry, + SessionEvent, +} from "../types/index.ts"; +import { getSessionEventRecallText } from "../types/index.ts"; +import type { RedisClient } from "./redis-client.ts"; + +const SESSION_EVENT_LIMIT = 40; +const SESSION_RECALL_SCAN_LIMIT = 120; +const SESSION_RECALL_RESULT_LIMIT = 12; +const DRAIN_TTL_SECONDS = 7 * 24 * 60 * 60; +const DEAD_LETTER_TTL_SECONDS = 30 * 24 * 60 * 60; +const CLAIM_LOCK_TTL_SECONDS = 60; + +const RECALL_ELIGIBLE_CATEGORIES = new Set([ + "task.create", + "task.update", + "task.complete", + "decision", + "preference", + "rule.load", + "file.read", + "file.write", + "file.edit", + "file.search", + "error", + "git.activity", + "subagent.start", + "subagent.finish", + "intent", +]); + +const RECALL_STOP_WORDS = new Set([ + "about", + "after", + "again", + "always", + "been", + "before", + "between", + "could", + "from", + "have", + "into", + "keep", + "more", + "please", + "should", + "that", + "the", + "their", + "them", + "there", + "these", + "this", + "those", + "with", + "without", +]); + +export const sessionEventsKey = (sessionId: string): string => + `session:${sessionId}:events`; +export const sessionSnapshotKey = (sessionId: string): string => + `session:${sessionId}:snapshot`; +export const memoryCacheKey = (groupId: string): string => + `memory-cache:${groupId}`; +export const memoryCacheMetaKey = (groupId: string): string => + `memory-cache:${groupId}:meta`; +export const drainPendingKey = (groupId: string): string => + `drain:pending:${groupId}`; +export const drainCursorKey = (groupId: string): string => + `drain:cursor:${groupId}`; +export const drainDeadKey = (groupId: string): string => + `drain:dead:${groupId}`; +export const drainRetryKey = (groupId: string, batchKey: string): string => + `drain:retry:${groupId}:${batchKey}`; +export const drainClaimKey = (groupId: string, claimToken: string): string => + `drain:claim:${groupId}:${claimToken}`; +export const drainClaimActiveKey = (groupId: string): string => + `drain:claim-active:${groupId}`; +export const drainClaimLockKey = (groupId: string): string => + `drain:claim-lock:${groupId}`; + +const makeClaimToken = (): string => crypto.randomUUID(); + +const parseEntry = (raw: string): DrainQueueEntry | null => { + try { + return JSON.parse(raw) as DrainQueueEntry; + } catch { + return null; + } +}; + +const parseSessionEvent = (raw: string): SessionEvent | null => { + try { + return JSON.parse(raw) as SessionEvent; + } catch { + return null; + } +}; + +const tokenizeRecallQuery = (query: string): string[] => { + const matches = query.toLowerCase().match(/[a-z0-9._/-]{3,}/g) ?? []; + return [...new Set(matches.filter((token) => !RECALL_STOP_WORDS.has(token)))]; +}; + +const scoreSessionEventRecall = ( + event: SessionEvent, + query: string, + tokens: string[], +): number => { + if (!RECALL_ELIGIBLE_CATEGORIES.has(event.category)) return 0; + + const summary = event.summary.toLowerCase(); + const continuity = (event.continuityText ?? "").toLowerCase(); + const detail = (event.detail ?? "").toLowerCase(); + const refs = (event.refs ?? []).join(" ").toLowerCase(); + const keywords = (event.keywords ?? []).join(" ").toLowerCase(); + const recallText = getSessionEventRecallText(event).toLowerCase(); + let score = 0; + + if (summary.includes(query)) score += 8; + else if (continuity.includes(query)) score += 7; + else if (detail.includes(query)) score += 5; + else if (recallText.includes(query)) score += 4; + + for (const token of tokens) { + if (summary.includes(token)) score += 4; + if (continuity.includes(token)) score += 4; + if (detail.includes(token)) score += 3; + if (refs.includes(token)) score += 3; + if (keywords.includes(token)) score += 3; + if (recallText.includes(token)) score += 1; + } + + return score; +}; + +export interface RedisEventsServiceOptions { + sessionTtlSeconds: number; + claimLockTtlSeconds?: number; +} + +export class RedisEventsService { + constructor( + private readonly redis: RedisClient, + private readonly options: RedisEventsServiceOptions, + ) {} + + getClaimLockTtlSeconds(): number { + return this.options.claimLockTtlSeconds ?? CLAIM_LOCK_TTL_SECONDS; + } + + async recordEvent( + sessionId: string, + groupId: string, + event: SessionEvent, + ): Promise { + const queueEntry: DrainQueueEntry = { sessionId, groupId, event }; + await this.redis.prependToList( + sessionEventsKey(sessionId), + JSON.stringify(event), + this.options.sessionTtlSeconds, + ); + return await this.redis.prependToList( + drainPendingKey(groupId), + JSON.stringify(queueEntry), + DRAIN_TTL_SECONDS, + ); + } + + async getRecentSessionEvents( + sessionId: string, + limit = SESSION_EVENT_LIMIT, + chronological = true, + ): Promise { + const raw = await this.redis.getRecentList( + sessionEventsKey(sessionId), + limit, + ); + const events = raw.flatMap((item) => { + try { + return [JSON.parse(item) as SessionEvent]; + } catch { + return []; + } + }); + return chronological ? [...events].reverse() : events; + } + + async touchSessionEvents(sessionId: string): Promise { + await this.redis.touch( + sessionEventsKey(sessionId), + this.options.sessionTtlSeconds, + ); + } + + async recallSessionEvents( + sessionId: string, + query: string, + options: { + scanLimit?: number; + resultLimit?: number; + } = {}, + ): Promise { + const normalizedQuery = query.trim().toLowerCase(); + if (!normalizedQuery) return []; + + const tokens = tokenizeRecallQuery(normalizedQuery); + if (tokens.length === 0 && normalizedQuery.length < 3) return []; + + const raw = await this.redis.getListRange( + sessionEventsKey(sessionId), + 0, + Math.max((options.scanLimit ?? SESSION_RECALL_SCAN_LIMIT) - 1, 0), + ); + + return raw + .flatMap((item) => { + const event = parseSessionEvent(item); + if (!event) return []; + const score = scoreSessionEventRecall(event, normalizedQuery, tokens); + return score > 0 ? [{ event, score }] : []; + }) + .sort((left, right) => { + if (right.score !== left.score) return right.score - left.score; + if (right.event.ts !== left.event.ts) { + return right.event.ts - left.event.ts; + } + return left.event.id.localeCompare(right.event.id); + }) + .slice(0, options.resultLimit ?? SESSION_RECALL_RESULT_LIMIT) + .map(({ event }) => event); + } + + async getPendingCount(groupId: string): Promise { + return await this.redis.getListLength(drainPendingKey(groupId)); + } + + async getPendingBatch( + groupId: string, + maxItems: number, + maxBytes: number, + ): Promise { + if (maxItems <= 0) return null; + + await this.recoverAbandonedClaim(groupId); + + const pendingKey = drainPendingKey(groupId); + if (await this.redis.getListLength(pendingKey) === 0) return null; + + const claimToken = makeClaimToken(); + const claimKey = drainClaimKey(groupId, claimToken); + const lockAcquired = await this.redis.setStringIfAbsent( + drainClaimLockKey(groupId), + claimToken, + this.getClaimLockTtlSeconds(), + ); + if (!lockAcquired) return null; + + await this.redis.setString( + drainClaimActiveKey(groupId), + claimToken, + DRAIN_TTL_SECONDS, + ); + + const selected: DrainQueueEntry[] = []; + let totalBytes = 0; + + try { + while (selected.length < maxItems) { + const raw = await this.redis.moveListItem( + pendingKey, + claimKey, + "RIGHT", + "RIGHT", + ); + if (!raw) break; + + await this.redis.touch(claimKey, DRAIN_TTL_SECONDS); + const entry = parseEntry(raw); + if (!entry) continue; + + const bytes = new TextEncoder().encode( + getSessionEventRecallText(entry.event), + ).length; + if (selected.length > 0 && totalBytes + bytes > maxBytes) { + await this.redis.moveListItem(claimKey, pendingKey, "RIGHT", "RIGHT"); + break; + } + + selected.push(entry); + totalBytes += bytes; + } + + if (selected.length === 0) { + await this.redis.deleteKey(claimKey); + await this.redis.deleteKeyIfValue( + drainClaimActiveKey(groupId), + claimToken, + ); + await this.redis.deleteKeyIfValue( + drainClaimLockKey(groupId), + claimToken, + ); + return null; + } + + return { + claimToken, + claimKey, + lockTtlSeconds: this.getClaimLockTtlSeconds(), + entries: selected, + }; + } catch (err) { + await this.releaseClaim(groupId, claimToken); + throw err; + } + } + + async refreshClaimLease( + groupId: string, + claimToken: string, + ttlSeconds = this.getClaimLockTtlSeconds(), + ): Promise { + const lockRefreshed = await this.redis.compareAndTouch( + drainClaimLockKey(groupId), + claimToken, + ttlSeconds, + ); + if (!lockRefreshed) return false; + + const activeRefreshed = await this.redis.compareAndTouch( + drainClaimActiveKey(groupId), + claimToken, + DRAIN_TTL_SECONDS, + ); + if (!activeRefreshed) return false; + + await this.redis.touch( + drainClaimKey(groupId, claimToken), + DRAIN_TTL_SECONDS, + ); + return true; + } + + async markBatchSuccess( + groupId: string, + claimToken: string, + entries: DrainQueueEntry[], + ): Promise { + if (entries.length === 0) return; + + await this.redis.deleteKey(drainClaimKey(groupId, claimToken)); + await this.redis.deleteKeyIfValue(drainClaimActiveKey(groupId), claimToken); + await this.redis.deleteKeyIfValue(drainClaimLockKey(groupId), claimToken); + await this.redis.setString( + drainCursorKey(groupId), + entries.at(-1)?.event.id ?? "", + DRAIN_TTL_SECONDS, + ); + } + + async moveBatchToDeadLetter( + groupId: string, + entries: DrainQueueEntry[], + ): Promise { + for (const entry of entries) { + await this.redis.appendToList( + drainDeadKey(groupId), + JSON.stringify(entry), + DEAD_LETTER_TTL_SECONDS, + ); + } + } + + async releaseClaim( + groupId: string, + claimToken: string, + ): Promise { + const pendingKey = drainPendingKey(groupId); + const claimKey = drainClaimKey(groupId, claimToken); + + while (true) { + const raw = await this.redis.moveListItem( + claimKey, + pendingKey, + "RIGHT", + "RIGHT", + ); + if (!raw) break; + } + + await this.redis.deleteKey(claimKey); + await this.redis.deleteKeyIfValue(drainClaimActiveKey(groupId), claimToken); + await this.redis.deleteKeyIfValue(drainClaimLockKey(groupId), claimToken); + } + + async recoverAbandonedClaim(groupId: string): Promise { + const claimToken = await this.redis.getString(drainClaimActiveKey(groupId)); + if (!claimToken) return false; + + const lockToken = await this.redis.getString(drainClaimLockKey(groupId)); + if (lockToken) return false; + + await this.releaseClaim(groupId, claimToken); + return true; + } +} diff --git a/src/services/redis-snapshot.ts b/src/services/redis-snapshot.ts new file mode 100644 index 0000000..775cd5d --- /dev/null +++ b/src/services/redis-snapshot.ts @@ -0,0 +1,284 @@ +import { + getSessionEventPrimaryText, + type SessionEvent, +} from "../types/index.ts"; +import { + escapeXml, + renderXmlListSection, + renderXmlSingleSection, + uniqueValues, +} from "./render-utils.ts"; +import type { RedisClient } from "./redis-client.ts"; +import { sessionSnapshotKey } from "./redis-events.ts"; + +const SNAPSHOT_BUDGET = 3_000; +const BLOCKER_PATTERN = /\b(blocker|blocked|blocking)\b/i; + +const selectRecent = ( + events: SessionEvent[], + predicate: (event: SessionEvent) => boolean, + map: (event: SessionEvent) => string | string[] | undefined, + limit: number, +): string[] => + uniqueValues( + events.flatMap((event) => { + if (!predicate(event)) return []; + const value = map(event); + if (!value) return []; + return Array.isArray(value) ? value : [value]; + }).reverse(), + limit, + ); + +export const buildSessionSnapshotXml = ( + sessionId: string, + events: SessionEvent[], +): string => { + const decisions = selectRecent( + events, + (event) => ["decision", "preference"].includes(event.category), + (event) => getSessionEventPrimaryText(event), + 5, + ); + const constraints = selectRecent( + events, + (event) => event.category === "rule.load", + (event) => getSessionEventPrimaryText(event), + 5, + ); + const latestUserRequest = getSessionEventPrimaryText( + events.findLast((event) => event.role === "user") ?? { + id: "", + ts: 0, + category: "message", + priority: 4, + role: "user", + summary: "", + }, + ) || undefined; + const activeTask = + events.findLast((event) => + ["task.create", "task.update", "intent"].includes(event.category) + )?.summary ?? latestUserRequest; + const activeFiles = selectRecent( + events, + (event) => event.category.startsWith("file."), + (event) => event.refs ?? [], + 6, + ); + const recentEdits = selectRecent( + events, + (event) => + event.category === "file.write" || event.category === "file.edit", + (event) => getSessionEventPrimaryText(event), + 5, + ); + const subagentsOpen = selectRecent( + events, + (event) => event.category === "subagent.start", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const unresolvedErrors = events.filter((event) => + event.category === "error" && event.metadata?.resolved !== true + ); + const errors = uniqueValues( + unresolvedErrors.map((event) => getSessionEventPrimaryText(event)) + .reverse(), + 4, + ); + const blockers = uniqueValues( + unresolvedErrors.flatMap((event) => { + const blockerText = event.detail?.trim() || + event.continuityText?.trim() || + event.body?.trim(); + if (!blockerText || blockerText === event.summary) return []; + if ( + event.metadata?.blocking === true || + BLOCKER_PATTERN.test(blockerText) || + BLOCKER_PATTERN.test(event.summary) + ) { + return [blockerText]; + } + return []; + }).reverse(), + 3, + ); + const environment = selectRecent( + events, + (event) => + event.category === "cwd.change" || event.category === "env.change", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const gitState = selectRecent( + events, + (event) => event.category === "git.activity", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const subagentsDone = selectRecent( + events, + (event) => event.category === "subagent.finish", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const openQuestions = selectRecent( + events, + (event) => event.category === "task.update", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const discoveries = selectRecent( + events, + (event) => event.category === "discovery", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const references = selectRecent( + events, + (event) => event.category === "data.import", + (event) => getSessionEventPrimaryText(event), + 4, + ); + const residualMessages = selectRecent( + events, + (event) => event.category === "message", + (event) => getSessionEventPrimaryText(event), + 3, + ); + + const open = ``; + const close = ``; + let xml = open; + let remaining = SNAPSHOT_BUDGET - open.length - close.length; + + const sectionBuilders = [ + () => + renderXmlListSection("decisions", "d", decisions, { + itemCharLimit: 240, + remaining, + }), + () => + renderXmlListSection("constraints", "c", constraints, { + itemCharLimit: 240, + remaining, + }), + () => + renderXmlSingleSection("active_task", "goal", activeTask, { + valueCharLimit: 320, + remaining, + }), + () => + renderXmlListSection("active_files", "f", activeFiles, { + itemCharLimit: 240, + remaining, + }), + () => + renderXmlListSection("recent_edits", "e", recentEdits, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("subagents_open", "s", subagentsOpen, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("errors", "e", errors, { + itemCharLimit: 240, + remaining, + }), + () => + renderXmlListSection("blockers", "b", blockers, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("environment", "e", environment, { + itemCharLimit: 240, + remaining, + }), + () => + renderXmlListSection("git_state", "g", gitState, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("subagents_done", "s", subagentsDone, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("open_questions", "q", openQuestions, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("discoveries", "d", discoveries, { + itemCharLimit: 240, + remaining, + }), + () => + renderXmlListSection("references", "r", references, { + itemCharLimit: 220, + remaining, + }), + () => + renderXmlListSection("residual_messages", "m", residualMessages, { + itemCharLimit: 180, + remaining, + }), + ]; + + for (const buildSection of sectionBuilders) { + const section = buildSection(); + if (!section) continue; + if (section.length > remaining) break; + xml += section; + remaining -= section.length; + } + + return `${xml}${close}`; +}; + +export interface RedisSnapshotServiceOptions { + ttlSeconds: number; +} + +export class RedisSnapshotService { + constructor( + private readonly redis: RedisClient, + private readonly options: RedisSnapshotServiceOptions, + ) {} + + async getSnapshot(sessionId: string): Promise { + return await this.redis.getString(sessionSnapshotKey(sessionId)); + } + + async saveSnapshot(sessionId: string, snapshot: string): Promise { + await this.redis.setString( + sessionSnapshotKey(sessionId), + snapshot, + this.options.ttlSeconds, + ); + } + + async touchSnapshot(sessionId: string): Promise { + await this.redis.touch( + sessionSnapshotKey(sessionId), + this.options.ttlSeconds, + ); + } + + async rebuildAndSave( + sessionId: string, + events: SessionEvent[], + ): Promise { + const snapshot = buildSessionSnapshotXml(sessionId, events); + await this.saveSnapshot(sessionId, snapshot); + return snapshot; + } +} diff --git a/src/services/render-utils.ts b/src/services/render-utils.ts new file mode 100644 index 0000000..2925e71 --- /dev/null +++ b/src/services/render-utils.ts @@ -0,0 +1,113 @@ +export const escapeXml = (value: string): string => + value.replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); + +export const uniqueValues = (values: string[], limit: number): string[] => { + const seen = new Set(); + const result: string[] = []; + for (const value of values) { + const cleaned = value.trim(); + if (!cleaned || seen.has(cleaned)) continue; + seen.add(cleaned); + result.push(cleaned); + if (result.length >= limit) break; + } + return result; +}; + +const fitEscapedText = (value: string, maxEscapedLength: number): string => { + const source = value.trim(); + if (!source || maxEscapedLength <= 0) return ""; + if (escapeXml(source).length <= maxEscapedLength) return source; + + let low = 0; + let high = source.length; + while (low < high) { + const mid = Math.ceil((low + high) / 2); + if (escapeXml(source.slice(0, mid)).length <= maxEscapedLength) { + low = mid; + } else { + high = mid - 1; + } + } + + return source.slice(0, low).trimEnd(); +}; + +export interface RenderXmlListSectionOptions { + itemCharLimit?: number; + remaining?: number; + includeEmpty?: boolean; +} + +export const renderXmlListSection = ( + tag: string, + itemTag: string, + values: string[], + options: RenderXmlListSectionOptions = {}, +): string => { + const { itemCharLimit, remaining, includeEmpty = false } = options; + const open = `<${tag}>`; + const close = ``; + + if (remaining !== undefined && open.length + close.length > remaining) { + return ""; + } + + let body = ""; + for (const value of values) { + const normalized = value.trim(); + if (!normalized) continue; + + const limited = itemCharLimit + ? normalized.slice(0, itemCharLimit) + : normalized; + const itemOpen = `<${itemTag}>`; + const itemClose = ``; + const content = remaining === undefined ? limited : fitEscapedText( + limited, + remaining - open.length - close.length - body.length - itemOpen.length - + itemClose.length, + ); + if (!content) break; + + body += `${itemOpen}${escapeXml(content)}`; + } + + if (!body) return includeEmpty ? `${open}${close}` : ""; + return `${open}${body}${close}`; +}; + +export interface RenderXmlSingleSectionOptions { + valueCharLimit?: number; + remaining?: number; +} + +export const renderXmlSingleSection = ( + tag: string, + itemTag: string, + value: string | undefined, + options: RenderXmlSingleSectionOptions = {}, +): string => { + if (!value) return ""; + + const { valueCharLimit, remaining } = options; + const normalized = value.trim(); + if (!normalized) return ""; + + const limited = valueCharLimit + ? normalized.slice(0, valueCharLimit) + : normalized; + const open = `<${tag}><${itemTag}>`; + const close = ``; + const content = remaining === undefined + ? limited + : fitEscapedText(limited, remaining - open.length - close.length); + if (!content) return ""; + + const section = `${open}${escapeXml(content)}${close}`; + return remaining === undefined || section.length <= remaining ? section : ""; +}; diff --git a/src/services/runtime-teardown.test.ts b/src/services/runtime-teardown.test.ts new file mode 100644 index 0000000..88dbaf6 --- /dev/null +++ b/src/services/runtime-teardown.test.ts @@ -0,0 +1,88 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { logger } from "./logger.ts"; +import { registerRuntimeTeardown } from "./runtime-teardown.ts"; + +describe("runtime teardown", () => { + it("runs teardown tasks only once even when invoked repeatedly", async () => { + const calls: string[] = []; + const registration = registerRuntimeTeardown([ + { name: "redis", run: () => void calls.push("redis") }, + { name: "graphiti", run: () => void calls.push("graphiti") }, + ], {}); + + await Promise.all([ + registration.run(), + registration.run(), + registration.run(), + ]); + + assertEquals(calls, ["redis", "graphiti"]); + }); + + it("registers best-effort unload and signal handlers that share the same idempotent path", async () => { + const eventHandlers = new Map void>(); + const signalHandlers = new Map<"SIGINT" | "SIGTERM", () => void>(); + const calls: string[] = []; + const registration = registerRuntimeTeardown([ + { + name: "runtime", + run: () => { + calls.push("runtime"); + }, + }, + ], { + addEventListener(type, listener) { + eventHandlers.set(type, listener as () => void); + }, + Deno: { + addSignalListener(signal, handler) { + signalHandlers.set(signal, handler); + }, + }, + }); + + assertEquals([...eventHandlers.keys()].sort(), ["beforeunload", "unload"]); + assertEquals([...signalHandlers.keys()].sort(), ["SIGINT", "SIGTERM"]); + + eventHandlers.get("unload")?.(); + signalHandlers.get("SIGINT")?.(); + await registration.run(); + + assertEquals(calls, ["runtime"]); + }); + + it("continues teardown after a task failure", async () => { + const warnings: unknown[] = []; + const originalWarn = logger.warn; + logger.warn = (...args: unknown[]) => { + warnings.push(args); + }; + + try { + const calls: string[] = []; + const registration = registerRuntimeTeardown([ + { + name: "redis", + run: () => { + calls.push("redis"); + throw new Error("boom"); + }, + }, + { + name: "graphiti", + run: () => { + calls.push("graphiti"); + }, + }, + ], {}); + + await registration.run(); + + assertEquals(calls, ["redis", "graphiti"]); + assertEquals(warnings.length, 1); + } finally { + logger.warn = originalWarn; + } + }); +}); diff --git a/src/services/runtime-teardown.ts b/src/services/runtime-teardown.ts new file mode 100644 index 0000000..97779c5 --- /dev/null +++ b/src/services/runtime-teardown.ts @@ -0,0 +1,67 @@ +import { logger } from "./logger.ts"; + +export type RuntimeTeardownTask = { + name: string; + run: () => void | Promise; +}; + +export interface RuntimeTeardownRegistration { + run(): Promise; +} + +type ShutdownRegistrationAdapter = { + addEventListener?: ( + type: string, + listener: (event?: Event) => void, + options?: boolean | { once?: boolean; capture?: boolean }, + ) => void; + Deno?: { + addSignalListener?: ( + signal: "SIGINT" | "SIGTERM", + handler: () => void, + ) => void; + }; +}; + +const SHUTDOWN_EVENTS = ["unload", "beforeunload"] as const; +const SHUTDOWN_SIGNALS = ["SIGINT", "SIGTERM"] as const; + +export function registerRuntimeTeardown( + tasks: RuntimeTeardownTask[], + runtime: ShutdownRegistrationAdapter = globalThis, +): RuntimeTeardownRegistration { + let teardownPromise: Promise | null = null; + + const run = (): Promise => { + if (teardownPromise) return teardownPromise; + + teardownPromise = (async () => { + for (const task of tasks) { + try { + await task.run(); + } catch (err) { + logger.warn("Runtime teardown failed", { + resource: task.name, + err, + }); + } + } + })(); + + return teardownPromise; + }; + + for (const eventType of SHUTDOWN_EVENTS) { + runtime.addEventListener?.(eventType, () => { + void run(); + }, { once: true }); + } + + for (const signal of SHUTDOWN_SIGNALS) { + runtime.Deno?.addSignalListener?.(signal, () => { + void run(); + }); + } + + return { run }; +} diff --git a/src/services/session-snapshot.test.ts b/src/services/session-snapshot.test.ts index 0aa18cb..4b9f194 100644 --- a/src/services/session-snapshot.test.ts +++ b/src/services/session-snapshot.test.ts @@ -1,282 +1,1021 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import type { OpencodeClient } from "@opencode-ai/sdk"; -import type { GraphitiClient } from "./client.ts"; import { SessionManager } from "../session.ts"; +import type { PersistentMemoryCacheEntry } from "../types/index.ts"; +import { buildSessionSnapshotXml } from "./redis-snapshot.ts"; -// --------------------------------------------------------------------------- -// Minimal stub clients – only the methods exercised by the tests are needed. -// --------------------------------------------------------------------------- - -function makeStubSdk( - parentID?: string | null, -): Pick { - return { - session: { - get: async () => ({ data: { parentID: parentID ?? null } }), - messages: async () => ({ data: [] }), - } as unknown as OpencodeClient["session"], +class FakeClock { + now = 0; + nextId = 1; + timers = new Map void }>(); + + setTimer = (callback: () => void, delayMs: number): number => { + const id = this.nextId++; + this.timers.set(id, { at: this.now + delayMs, callback }); + return id; }; -} -function makeStubGraphiti( - addEpisodeSpy?: (p: unknown) => Promise, -): Pick { - return { - addEpisode: addEpisodeSpy ?? (async () => {}), - } as unknown as Pick; + clearTimer = (id: number): void => { + this.timers.delete(id); + }; + + tick(delayMs: number): void { + const target = this.now + delayMs; + while (true) { + const next = [...this.timers.entries()].sort((a, b) => a[1].at - b[1].at) + .find(([, timer]) => timer.at <= target); + if (!next) break; + const [id, timer] = next; + this.timers.delete(id); + this.now = timer.at; + timer.callback(); + } + this.now = target; + } } -// --------------------------------------------------------------------------- -// createDefaultState -// --------------------------------------------------------------------------- -describe("SessionManager.createDefaultState", () => { - it("returns correct default state shape", () => { - const sm = new SessionManager( - "proj-group", - "user-group", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("proj-group", "user-group"); - assertEquals(state.groupId, "proj-group"); - assertEquals(state.userGroupId, "user-group"); - assertEquals(state.injectedMemories, false); - assertEquals(state.lastInjectionFactUuids, []); - assertEquals(state.pendingMessages, []); - assertEquals(state.messageCount, 0); - assertEquals(state.contextLimit, 200_000); - assertEquals(state.isMain, true); - }); -}); +describe("SessionManager", () => { + it("createDefaultState includes the new hot-tier fields", () => { + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + {} as never, + {} as never, + {} as never, + ); -// --------------------------------------------------------------------------- -// getState / setState -// --------------------------------------------------------------------------- -describe("SessionManager.getState / setState", () => { - it("returns undefined for unknown session", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - assertEquals(sm.getState("missing"), undefined); + const state = manager.createDefaultState("group-1", "user-1"); + assertEquals(state.hotTierReady, false); + assertEquals(state.pendingInjection, undefined); + assertEquals(state.latestUserRequest, undefined); + assertEquals(state.latestRefreshQuery, undefined); + assertEquals(state.pendingInjectionGeneration, 0); }); - it("round-trips state through setState / getState", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, + it("prepareInjection builds canonical session_memory with optional persistent_memory", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [ + { + id: "1", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: "Continue the overhaul", + continuityText: + "Continue the overhaul using structured continuity fields", + }, + { + id: "2", + ts: Date.now(), + category: "decision", + priority: 0, + role: "user", + summary: "Keep Graphiti off the hot path", + continuityText: + "Keep Graphiti off the hot path and rely on structured continuity in session memory", + }, + ]; + }, + } as never, + { + getSnapshot() { + return ''; + }, + } as never, + { + get() { + return { + query: "Continue the overhaul", + refreshedAt: Date.now(), + facts: [{ + uuid: "fact-1", + fact: "The user prefers local injection", + }], + nodes: [{ uuid: "node-1", name: "Context Overhaul" }], + factUuids: ["fact-1"], + nodeRefs: ["node-1"], + }; + }, + renderPersistentMemory() { + return { + body: "The user prefers local injection", + factUuids: ["fact-1"], + nodeRefs: ["node-1"], + }; + }, + getMeta() { + return null; + }, + classifyRefresh() { + return { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "Continue the overhaul", + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection( + "session-1", + "Continue the overhaul", + ); + + assertStringIncludes(prepared?.envelope ?? "", " { - it("returns resolved=false when parentId lookup fails", async () => { - const sdk = { - session: { - get: async () => { - throw new Error("network"); - }, - messages: async () => ({ data: [] }), + it("snapshot and injection preserve continuity from structured fields without body text", async () => { + const decisionText = + "Keep structured continuity summaries in session memory instead of transcript bodies"; + const snapshot = buildSessionSnapshotXml("session-1", [ + { + id: "1", + ts: Date.now() - 1, + category: "decision", + priority: 0, + role: "user", + summary: "Keep structured continuity summaries", + continuityText: decisionText, + }, + { + id: "2", + ts: Date.now(), + category: "message", + priority: 4, + role: "user", + summary: "continue", + continuityText: + "continue with continuity-first session memory injection semantics", }, - } as unknown as OpencodeClient; - const sm = new SessionManager( - "g", - "u", - sdk, - makeStubGraphiti() as unknown as GraphitiClient, - ); - const result = await sm.resolveSessionState("unknown"); - assertEquals(result.resolved, false); - assertEquals(result.state, null); + ]); + + assertStringIncludes(snapshot, decisionText); + + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "user", + summary: "Keep structured continuity summaries", + continuityText: decisionText, + }, { + id: "2", + ts: Date.now() + 1, + category: "intent", + priority: 0, + role: "user", + summary: "continue", + continuityText: + "continue with continuity-first session memory injection semantics", + }]; + }, + } as never, + { + getSnapshot() { + return snapshot; + }, + } as never, + { + get() { + return null; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { body: "", factUuids: [], nodeRefs: [] }; + }, + classifyRefresh() { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection("session-1", "continue"); + + assertStringIncludes(prepared?.envelope ?? "", decisionText); + assertEquals(prepared?.envelope.includes(""), true); + }); + + it("prepareInjection prefers the freshest user event over stale fallback", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "message", + priority: 4, + role: "user", + summary: "fresh request", + body: "fresh request", + }]; + }, + } as never, + { + getSnapshot() { + return null; + }, + } as never, + { + get() { + return null; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { body: "", factUuids: [], nodeRefs: [] }; + }, + classifyRefresh() { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection( + "session-1", + "stale fallback", + ); + + assertStringIncludes( + prepared?.envelope ?? "", + "fresh request", + ); + assertEquals(prepared?.refreshDecision.classification, "miss"); }); - it("returns resolved=true, state=null for subagent (has parentId)", async () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - sm.setParentId("sub1", "parent-session"); - const result = await sm.resolveSessionState("sub1"); - assertEquals(result.resolved, true); - assertEquals(result.state, null); + it("prepareInjection recalls older relevant events and merges them deterministically", async () => { + const olderDecisionTs = Date.now() - 10_000; + const recentIntentTs = Date.now(); + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + getRecentSessionEvents() { + return [{ + id: "recent-intent", + ts: recentIntentTs, + category: "intent", + priority: 0, + role: "user", + summary: "Investigate recall behavior", + body: "Investigate recall behavior", + }]; + }, + recallSessionEvents() { + return [{ + id: "older-decision", + ts: olderDecisionTs, + category: "decision", + priority: 0, + role: "user", + summary: "Prefer recalled decisions for injection", + }, { + id: "recent-intent", + ts: recentIntentTs, + category: "intent", + priority: 0, + role: "user", + summary: "Investigate recall behavior", + body: "Investigate recall behavior", + }]; + }, + } as never, + { + getSnapshot() { + return null; + }, + } as never, + { + get() { + return null; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { body: "", factUuids: [], nodeRefs: [] }; + }, + classifyRefresh() { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection( + "session-1", + "Investigate recall behavior", + ); + + assertStringIncludes( + prepared?.envelope ?? "", + "Prefer recalled decisions for injection", + ); + assertEquals( + prepared?.envelope.match(/Investigate recall behavior/g)?.length, + 2, + ); }); - it("creates and returns default state for main session", async () => { - const sm = new SessionManager( - "proj", - "user", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - sm.setParentId("main1", null); - const result = await sm.resolveSessionState("main1"); - assertEquals(result.resolved, true); - assertEquals(result.state?.groupId, "proj"); - assertEquals(result.state?.isMain, true); + it("prepareInjection drops stale late completions after a newer prepare wins", async () => { + let resolveFirst!: () => void; + let resolveSecond!: () => void; + let recentCallCount = 0; + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + async getRecentSessionEvents() { + recentCallCount += 1; + if (recentCallCount === 1) { + await new Promise((resolve) => { + resolveFirst = resolve; + }); + return [{ + id: "1", + ts: Date.now(), + category: "message", + priority: 4, + role: "user", + summary: "stale request", + body: "stale request", + }]; + } + await new Promise((resolve) => { + resolveSecond = resolve; + }); + return [{ + id: "2", + ts: Date.now(), + category: "message", + priority: 4, + role: "user", + summary: "fresh request", + body: "fresh request", + }]; + }, + } as never, + { + getSnapshot() { + return null; + }, + } as never, + { + get() { + return null; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { body: "", factUuids: [], nodeRefs: [] }; + }, + classifyRefresh() { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + const firstPrepare = manager.prepareInjection("session-1", "stale request"); + const secondPrepare = manager.prepareInjection( + "session-1", + "fresh request", + ); + + resolveSecond(); + const freshPrepared = await secondPrepare; + resolveFirst(); + const stalePrepared = await firstPrepare; + + const state = manager.getState("session-1"); + assertStringIncludes( + freshPrepared?.envelope ?? "", + "fresh request", + ); + assertEquals(stalePrepared, null); + assertEquals(state?.pendingInjection, freshPrepared); + assertEquals(freshPrepared?.refreshDecision.classification, "miss"); }); - it("returns existing state on second call", async () => { - const sm = new SessionManager( - "proj", - "user", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - sm.setParentId("main1", null); - const first = await sm.resolveSessionState("main1"); - first.state!.messageCount = 5; - const second = await sm.resolveSessionState("main1"); - assertEquals(second.state?.messageCount, 5); + it("prepareInjection preserves required continuity sections after restore", async () => { + const snapshot = buildSessionSnapshotXml("session-1", [{ + id: "snap-1", + ts: Date.now() - 10, + category: "decision", + priority: 0, + role: "user", + summary: "Keep Graphiti off the hot path", + }]); + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: "Continue the context overhaul", + }, { + id: "2", + ts: Date.now() + 1, + category: "task.update", + priority: 0, + role: "user", + summary: "Implement deterministic CI-safe tests", + }, { + id: "3", + ts: Date.now() + 2, + category: "decision", + priority: 0, + role: "user", + summary: "Keep Graphiti off the hot path", + }, { + id: "4", + ts: Date.now() + 3, + category: "file.edit", + priority: 1, + role: "tool", + summary: "Edited src/session.ts", + refs: ["src/session.ts"], + }, { + id: "5", + ts: Date.now() + 4, + category: "rule.load", + priority: 0, + role: "system", + summary: "Stay within scoped tests only", + }, { + id: "6", + ts: Date.now() + 5, + category: "error", + priority: 2, + role: "tool", + summary: "Redis refresh blocked", + continuityText: "Redis refresh blocked until reconnect succeeds", + metadata: { resolved: false, blocking: true }, + }, { + id: "7", + ts: Date.now() + 6, + category: "git.activity", + priority: 3, + role: "tool", + summary: "Working tree has local changes", + }, { + id: "8", + ts: Date.now() + 7, + category: "subagent.finish", + priority: 1, + role: "system", + summary: "Reviewer subagent finished", + }]; + }, + } as never, + { + getSnapshot() { + return snapshot; + }, + } as never, + { + get() { + return null; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { body: "", factUuids: [], nodeRefs: [] }; + }, + classifyRefresh() { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection( + "session-1", + "Continue the context overhaul", + ); + + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); + assertStringIncludes(prepared?.envelope ?? "", ""); }); -}); -// --------------------------------------------------------------------------- -// bufferAssistantPart / isAssistantBuffered / finalizeAssistantMessage -// --------------------------------------------------------------------------- -describe("SessionManager assistant message buffering", () => { - it("bufferAssistantPart stores text; isAssistantBuffered returns false before finalize", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - sm.bufferAssistantPart("s1", "m1", "Hello"); - assertEquals(sm.isAssistantBuffered("s1", "m1"), false); + it("prepareInjection stays compact and avoids raw transcript dumps under large tool output", async () => { + const hugeTranscript = "TOOL-OUTPUT ".repeat(1200); + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: "Continue compact session memory work", + }, { + id: "2", + ts: Date.now() + 1, + category: "file.edit", + priority: 1, + role: "tool", + summary: "Edited src/services/redis-cache.ts", + continuityText: + "Edited src/services/redis-cache.ts to keep persistent memory compact", + body: hugeTranscript, + refs: ["src/services/redis-cache.ts"], + }]; + }, + } as never, + { + getSnapshot() { + return buildSessionSnapshotXml("session-1", [{ + id: "snap-1", + ts: Date.now(), + category: "file.edit", + priority: 1, + role: "tool", + summary: "Edited src/services/redis-cache.ts", + continuityText: + "Edited src/services/redis-cache.ts to keep persistent memory compact", + body: hugeTranscript, + refs: ["src/services/redis-cache.ts"], + }]); + }, + } as never, + { + get() { + return { + query: "compact session memory", + refreshedAt: Date.now(), + facts: [{ uuid: "fact-1", fact: hugeTranscript }], + nodes: [{ + uuid: "node-1", + name: "Context Overhaul", + summary: hugeTranscript, + }], + factUuids: ["fact-1"], + nodeRefs: ["node-1"], + }; + }, + getMeta() { + return null; + }, + renderPersistentMemory(cache: PersistentMemoryCacheEntry | null) { + return { + body: cache + ? `${cache.facts[0].fact.slice(0, 220)}` + : "", + factUuids: cache ? ["fact-1"] : [], + nodeRefs: cache ? ["node-1"] : [], + }; + }, + classifyRefresh() { + return { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "compact session memory", + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection( + "session-1", + "Continue compact session memory work", + ); + + assertEquals((prepared?.envelope.length ?? 0) < 5000, true); + assertStringIncludes( + prepared?.envelope ?? "", + "Edited src/services/redis-cache.ts to keep persistent memory compact", + ); + assertEquals((prepared?.envelope ?? "").includes(hugeTranscript), false); }); - it("finalizeAssistantMessage appends to pendingMessages", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - sm.bufferAssistantPart("s1", "m1", "World"); - sm.finalizeAssistantMessage(state, "s1", "m1", "test"); - assertEquals(state.pendingMessages[0], "Assistant: World"); - assertEquals(sm.isAssistantBuffered("s1", "m1"), true); + it("deletes idle sessions after retention when still inactive", () => { + const clock = new FakeClock(); + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + {} as never, + {} as never, + {} as never, + { + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + manager.scheduleIdleSessionCleanup("session-1"); + clock.tick(99); + assertEquals(manager.getState("session-1")?.groupId, "group-1"); + + clock.tick(1); + assertEquals(manager.getState("session-1"), undefined); }); - it("finalizeAssistantMessage is idempotent", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - sm.bufferAssistantPart("s1", "m1", "Hi"); - sm.finalizeAssistantMessage(state, "s1", "m1", "test"); - sm.finalizeAssistantMessage(state, "s1", "m1", "test"); - assertEquals(state.pendingMessages.length, 1); + it("cancels stale idle cleanup when the session is reactivated", () => { + const clock = new FakeClock(); + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + {} as never, + {} as never, + {} as never, + { + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + manager.scheduleIdleSessionCleanup("session-1"); + clock.tick(50); + manager.markSessionActive("session-1"); + + clock.tick(60); + assertEquals(manager.getState("session-1")?.groupId, "group-1"); + + manager.scheduleIdleSessionCleanup("session-1"); + clock.tick(100); + assertEquals(manager.getState("session-1"), undefined); }); - it("does not append empty buffered text", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - sm.bufferAssistantPart("s1", "m1", " "); // whitespace only - sm.finalizeAssistantMessage(state, "s1", "m1", "test"); - assertEquals(state.pendingMessages.length, 0); + it("rejects stale idle scheduling when the captured generation is outdated", () => { + const clock = new FakeClock(); + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + {} as never, + {} as never, + {} as never, + { + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + const idleGeneration = manager.captureIdleCleanupGeneration("session-1"); + manager.markSessionActive("session-1"); + manager.scheduleIdleSessionCleanup( + "session-1", + idleGeneration ?? undefined, + ); + + clock.tick(150); + assertEquals(manager.getState("session-1")?.groupId, "group-1"); }); -}); -// --------------------------------------------------------------------------- -// flushPendingMessages -// --------------------------------------------------------------------------- -describe("SessionManager.flushPendingMessages", () => { - it("does nothing when pendingMessages is empty", async () => { - const calls: unknown[] = []; - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti(async (p) => { - calls.push(p); - }) as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - await sm.flushPendingMessages("s1", "test", 0); - assertEquals(calls.length, 0); + it("snapshot builder admits sections against the current remaining budget", () => { + const long = "x".repeat(500); + const snapshot = buildSessionSnapshotXml("session-1", [ + { + id: "1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "user", + summary: long, + }, + { + id: "2", + ts: Date.now(), + category: "rule.load", + priority: 0, + role: "system", + summary: long, + }, + { + id: "3", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: long, + }, + { + id: "4", + ts: Date.now(), + category: "file.edit", + priority: 1, + role: "tool", + summary: "edited", + refs: ["src/session.ts"], + }, + { + id: "5", + ts: Date.now(), + category: "error", + priority: 2, + role: "tool", + summary: "error", + metadata: { resolved: false }, + }, + ]); + + assertEquals(snapshot.length <= 3000, true); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); }); - it("does not flush when combined text is below minBytes", async () => { - const calls: unknown[] = []; - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti(async (p) => { - calls.push(p); - }) as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - state.pendingMessages = ["Assistant: Hi"]; - await sm.flushPendingMessages("s1", "test", 10_000); - assertEquals(calls.length, 0); - // Message was preserved (not consumed) - assertEquals(state.pendingMessages.length, 1); + it("snapshot keeps an active_task section by falling back to the latest user request", () => { + const long = "plan ".repeat(120); + const snapshot = buildSessionSnapshotXml("session-1", [ + ...Array.from({ length: 5 }, (_, index) => ({ + id: `d-${index}`, + ts: Date.now(), + category: "decision" as const, + priority: 0 as const, + role: "user" as const, + summary: `${index} ${long}`, + })), + ...Array.from({ length: 5 }, (_, index) => ({ + id: `r-${index}`, + ts: Date.now(), + category: "rule.load" as const, + priority: 0 as const, + role: "system" as const, + summary: `${index} ${long}`, + })), + { + id: "m-1", + ts: Date.now(), + category: "message", + priority: 4, + role: "user", + summary: long, + body: long, + }, + ]); + + assertStringIncludes(snapshot, ""); + assertEquals(snapshot.length <= 3000, true); }); - it("flushes messages above minBytes threshold", async () => { - const calls: Array<{ episodeBody: string }> = []; - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti(async (p) => { - calls.push(p as { episodeBody: string }); - }) as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - state.pendingMessages = ["User: Hello", "Assistant: World"]; - await sm.flushPendingMessages("s1", "my-source", 0); - assertEquals(calls.length, 1); - assertEquals(calls[0].episodeBody.includes("User: Hello"), true); - assertEquals(state.pendingMessages.length, 0); + it("snapshot keeps blockers distinct from summary-only errors", () => { + const snapshot = buildSessionSnapshotXml("session-1", [ + { + id: "1", + ts: Date.now() - 2, + category: "error", + priority: 2, + role: "tool", + summary: "Command failed", + continuityText: "Command failed", + metadata: { resolved: false }, + }, + { + id: "2", + ts: Date.now() - 1, + category: "error", + priority: 2, + role: "tool", + summary: "Refresh blocked", + continuityText: "Refresh blocked while waiting on Redis lock", + metadata: { resolved: false }, + }, + ]); + + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, "Command failed"); + assertStringIncludes( + snapshot, + "Refresh blocked while waiting on Redis lock", + ); + assertStringIncludes( + snapshot, + "Refresh blocked while waiting on Redis lock", + ); + assertEquals(snapshot.includes("Command failed"), false); }); -}); -// --------------------------------------------------------------------------- -// deleteSession -// --------------------------------------------------------------------------- -describe("SessionManager.deleteSession", () => { - it("removes session state and parentId cache", () => { - const sm = new SessionManager( - "g", - "u", - makeStubSdk() as unknown as OpencodeClient, - makeStubGraphiti() as unknown as GraphitiClient, - ); - const state = sm.createDefaultState("g", "u"); - sm.setState("s1", state); - sm.setParentId("s1", null); - sm.deleteSession("s1"); - assertEquals(sm.getState("s1"), undefined); + it("snapshot renders the expanded context sections when those events exist", () => { + const snapshot = buildSessionSnapshotXml("session-1", [ + { + id: "1", + ts: Date.now() - 8, + category: "env.change", + priority: 0, + role: "system", + summary: "Environment switched to staging", + }, + { + id: "2", + ts: Date.now() - 7, + category: "git.activity", + priority: 0, + role: "tool", + summary: "Working tree has local changes", + }, + { + id: "3", + ts: Date.now() - 6, + category: "subagent.start", + priority: 1, + role: "system", + summary: "Started reviewer subagent", + }, + { + id: "4", + ts: Date.now() - 5, + category: "subagent.finish", + priority: 1, + role: "system", + summary: "Reviewer subagent finished cleanly", + }, + { + id: "5", + ts: Date.now() - 4, + category: "task.update", + priority: 0, + role: "user", + summary: "Need confirmation on restart-safe refresh scheduling", + }, + { + id: "6", + ts: Date.now() - 3, + category: "discovery", + priority: 0, + role: "assistant", + summary: "Redis metadata already stores the last refresh query", + }, + { + id: "7", + ts: Date.now() - 2, + category: "data.import", + priority: 0, + role: "system", + summary: "Imported prior refresh hints", + }, + { + id: "8", + ts: Date.now() - 1, + category: "message", + priority: 4, + role: "assistant", + summary: "Residual assistant summary", + }, + ]); + + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); + assertStringIncludes(snapshot, ""); }); }); diff --git a/src/session.ts b/src/session.ts index e3f69f7..dd18f98 100644 --- a/src/session.ts +++ b/src/session.ts @@ -1,44 +1,91 @@ import type { OpencodeClient } from "@opencode-ai/sdk"; -import type { GraphitiClient } from "./services/client.ts"; -import { extractSdkMessages } from "./services/sdk-normalize.ts"; import { DEFAULT_CONTEXT_LIMIT } from "./services/constants.ts"; import { logger } from "./services/logger.ts"; -import { extractTextFromParts } from "./utils.ts"; +import type { RedisCacheService } from "./services/redis-cache.ts"; +import type { RedisEventsService } from "./services/redis-events.ts"; +import { + escapeXml, + renderXmlListSection, + uniqueValues, +} from "./services/render-utils.ts"; +import type { RedisSnapshotService } from "./services/redis-snapshot.ts"; +import { + getSessionEventPrimaryText, + type PreparedSessionMemory, + type SessionEvent, +} from "./types/index.ts"; + +const findLatestUserRequest = ( + events: SessionEvent[], + fallback?: string, +): string => { + const lastUser = events.findLast((event) => event.role === "user"); + return lastUser + ? getSessionEventPrimaryText(lastUser, fallback) + : fallback ?? ""; +}; + +const RECENT_BASELINE_LIMIT = 20; +const RECALL_RESULT_LIMIT = 12; + +const mergeSessionEvents = ( + recentEvents: SessionEvent[], + recalledEvents: SessionEvent[], +): SessionEvent[] => { + const merged = new Map(); + for (const event of [...recentEvents, ...recalledEvents]) { + if (!merged.has(event.id)) merged.set(event.id, event); + } + return [...merged.values()].sort((left, right) => { + if (left.ts !== right.ts) return left.ts - right.ts; + return left.id.localeCompare(right.id); + }); +}; + +const collectRecentUniqueValues = ( + events: SessionEvent[], + collect: (event: SessionEvent) => string | string[] | null | undefined, + limit: number, +): string[] => + uniqueValues( + events.flatMap((event) => { + const value = collect(event); + if (value === null || value === undefined) return []; + return Array.isArray(value) ? value : [value]; + }).reverse(), + limit, + ); -/** - * Per-session state tracked by the plugin. - */ export type SessionState = { - /** Graphiti group ID for this session. */ groupId: string; - /** Graphiti group ID for user-scoped memories. */ userGroupId: string; - /** Whether memories have been injected into this session yet. */ injectedMemories: boolean; - /** Fact UUIDs included in the last memory injection. */ lastInjectionFactUuids: string[]; - /** Cached formatted memory context for user message injection. */ - cachedMemoryContext?: string; - /** Fact UUIDs from cached context, for embedding in tag. */ - cachedFactUuids?: string[]; - /** Fact UUIDs currently visible in blocks across all messages. */ visibleFactUuids: string[]; - /** Count of messages observed in this session. */ messageCount: number; - /** Buffered message strings awaiting flush. */ pendingMessages: string[]; - /** Last successfully saved idle-session snapshot body. */ - lastSnapshotBody?: string; - /** Context window limit in tokens. */ contextLimit: number; - /** True when this session is the primary (non-subagent) session. */ isMain: boolean; + hotTierReady: boolean; + latestUserRequest?: string; + latestRefreshQuery?: string; + pendingInjection?: PreparedSessionMemory; + pendingInjectionGeneration: number; +}; + +type TimerHandle = ReturnType | number; + +export interface SessionManagerOptions { + idleRetentionMs?: number; + setTimer?: (callback: () => void, delayMs: number) => TimerHandle; + clearTimer?: (timer: TimerHandle) => void; +} + +type SessionLifecycle = { + activityGeneration: number; + idleCleanupTimer: TimerHandle | null; }; -/** - * Tracks per-session state, parent resolution, message buffering, - * and flushing pending messages to Graphiti. - */ export class SessionManager { private sessions = new Map(); private parentIdCache = new Map(); @@ -47,48 +94,117 @@ export class SessionManager { { sessionId: string; text: string } >(); private bufferedAssistantMessageIds = new Set(); + private sessionLifecycles = new Map(); + private readonly idleRetentionMs: number; + private readonly setTimerImpl: ( + callback: () => void, + delayMs: number, + ) => TimerHandle; + private readonly clearTimerImpl: (timer: TimerHandle) => void; constructor( private readonly defaultGroupId: string, private readonly defaultUserGroupId: string, private readonly sdkClient: OpencodeClient, - private readonly graphitiClient: GraphitiClient, - ) {} + private readonly redisEvents: RedisEventsService, + private readonly redisSnapshot: RedisSnapshotService, + private readonly redisCache: RedisCacheService, + options: SessionManagerOptions = {}, + ) { + this.idleRetentionMs = Math.max(0, options.idleRetentionMs ?? 0); + this.setTimerImpl = options.setTimer ?? + ((callback, delayMs) => setTimeout(callback, delayMs)); + this.clearTimerImpl = options.clearTimer ?? + ((timer) => clearTimeout(timer)); + } - /** Create a default main-session state for the given group IDs. */ createDefaultState(groupId: string, userGroupId: string): SessionState { return { groupId, userGroupId, injectedMemories: false, lastInjectionFactUuids: [], - cachedMemoryContext: undefined, - cachedFactUuids: undefined, visibleFactUuids: [], messageCount: 0, pendingMessages: [], - lastSnapshotBody: undefined, contextLimit: DEFAULT_CONTEXT_LIMIT, isMain: true, + hotTierReady: false, + latestUserRequest: undefined, + latestRefreshQuery: undefined, + pendingInjection: undefined, + pendingInjectionGeneration: 0, }; } - /** Get the current session state, if present. */ getState(sessionId: string): SessionState | undefined { return this.sessions.get(sessionId); } - /** Persist session state for the given session ID. */ setState(sessionId: string, state: SessionState): void { this.sessions.set(sessionId, state); } - /** Cache a resolved parent ID for a session. */ + markSessionActive(sessionId: string): void { + const lifecycle = this.getLifecycle(sessionId); + lifecycle.activityGeneration += 1; + if (lifecycle.idleCleanupTimer !== null) { + this.clearTimerImpl(lifecycle.idleCleanupTimer); + lifecycle.idleCleanupTimer = null; + } + } + + captureIdleCleanupGeneration(sessionId: string): number | null { + const state = this.sessions.get(sessionId); + if (!state?.isMain) return null; + return this.getLifecycle(sessionId).activityGeneration; + } + + scheduleIdleSessionCleanup( + sessionId: string, + expectedActivityGeneration?: number, + ): void { + const state = this.sessions.get(sessionId); + if (!state?.isMain) { + this.deleteSession(sessionId); + return; + } + + const lifecycle = this.getLifecycle(sessionId); + if ( + expectedActivityGeneration !== undefined && + lifecycle.activityGeneration !== expectedActivityGeneration + ) { + return; + } + + if (this.idleRetentionMs <= 0) { + this.deleteSession(sessionId); + return; + } + + if (lifecycle.idleCleanupTimer !== null) { + this.clearTimerImpl(lifecycle.idleCleanupTimer); + lifecycle.idleCleanupTimer = null; + } + + const activityGeneration = expectedActivityGeneration ?? + lifecycle.activityGeneration; + const timerHandle = this.setTimerImpl(() => { + const currentLifecycle = this.sessionLifecycles.get(sessionId); + if (!currentLifecycle) return; + if (currentLifecycle.idleCleanupTimer !== timerHandle) return; + if (currentLifecycle.activityGeneration !== activityGeneration) return; + this.deleteSession(sessionId); + }, this.idleRetentionMs); + + lifecycle.idleCleanupTimer = timerHandle; + } + setParentId(sessionId: string, parentId: string | null): void { this.parentIdCache.set(sessionId, parentId); } - /** Resolve and cache the parent ID for a session. */ async resolveParentId( sessionId: string, ): Promise { @@ -113,14 +229,13 @@ export class SessionManager { } } - /** Resolve the session state, initializing if needed. */ async resolveSessionState( sessionId: string, ): Promise<{ state: SessionState | null; resolved: boolean }> { const parentId = await this.resolveParentId(sessionId); if (parentId === undefined) return { state: null, resolved: false }; if (parentId) { - this.sessions.delete(sessionId); + this.deleteSession(sessionId); return { state: null, resolved: true }; } @@ -135,7 +250,6 @@ export class SessionManager { return { state, resolved: true }; } - /** Buffer partial assistant text for a streaming message. */ bufferAssistantPart( sessionId: string, messageId: string, @@ -145,199 +259,231 @@ export class SessionManager { this.pendingAssistantMessages.set(key, { sessionId, text }); } - /** Check if an assistant message has already been finalized. */ isAssistantBuffered(sessionId: string, messageId: string): boolean { - const key = `${sessionId}:${messageId}`; - return this.bufferedAssistantMessageIds.has(key); + return this.bufferedAssistantMessageIds.has(`${sessionId}:${messageId}`); } - /** - * Finalize a buffered assistant message and append it to pending messages. - */ finalizeAssistantMessage( state: SessionState, sessionId: string, messageId: string, source: string, - ): void { + ): string | null { const key = `${sessionId}:${messageId}`; - if (this.bufferedAssistantMessageIds.has(key)) return; + if (this.bufferedAssistantMessageIds.has(key)) return null; const buffered = this.pendingAssistantMessages.get(key); this.pendingAssistantMessages.delete(key); this.bufferedAssistantMessageIds.add(key); const messageText = buffered?.text?.trim() ?? ""; - const messagePreview = messageText.slice(0, 120); + if (!messageText) return null; + state.pendingMessages.push(`Assistant: ${messageText}`); logger.info("Assistant message completed", { hook: source, sessionId, messageID: messageId, - source, messageLength: messageText.length, - preview: messagePreview, }); + return messageText; + } - if (!messageText) { - logger.debug("Assistant message completed without buffered text", { - hook: source, - sessionId, - messageID: messageId, - source, - }); - return; - } - - state.pendingMessages.push(`Assistant: ${messageText}`); - logger.info("Buffered assistant reply", { - hook: source, - sessionId, - messageID: messageId, - source, - messageLength: messageText.length, - preview: messagePreview, - }); + deletePendingAssistant(sessionId: string, messageId: string): void { + this.pendingAssistantMessages.delete(`${sessionId}:${messageId}`); } - /** Flush pending buffered messages to Graphiti when size thresholds permit. */ - async flushPendingMessages( + async prepareInjection( sessionId: string, - sourceDescription: string, - minBytes: number, - ): Promise { + lastRequest?: string, + visibleFactUuids?: string[], + ): Promise { const state = this.sessions.get(sessionId); - if (!state || state.pendingMessages.length === 0) return; - - const lastMessage = state.pendingMessages.at(-1); - if (lastMessage) { - const separatorIndex = lastMessage.indexOf(":"); - const role = separatorIndex === -1 - ? lastMessage.trim().toLowerCase() - : lastMessage.slice(0, separatorIndex).trim().toLowerCase(); - if (role === "user") { - const fallback = await this.fetchLatestAssistantMessage(sessionId); - if (fallback?.text) { - const fallbackKey = fallback.id - ? `${sessionId}:${fallback.id}` - : undefined; - const alreadyBuffered = fallbackKey - ? this.bufferedAssistantMessageIds.has(fallbackKey) - : state.pendingMessages.some((message) => - message.startsWith("Assistant:") && - message.includes(fallback.text) - ); - if (!alreadyBuffered) { - state.pendingMessages.push(`Assistant: ${fallback.text}`); - if (fallbackKey) { - this.bufferedAssistantMessageIds.add(fallbackKey); - } - logger.info("Fallback assistant fetch used", { - sessionId, - messageID: fallback.id, - messageLength: fallback.text.length, - }); - } - } - } - } + if (!state?.isMain) return null; + const generation = state.pendingInjectionGeneration + 1; + state.pendingInjectionGeneration = generation; - const combined = state.pendingMessages.join("\n\n"); - if (combined.length < minBytes) return; - - const messagesToFlush = [...state.pendingMessages]; - state.pendingMessages = []; - const messageLines = messagesToFlush.map((message) => { - const separatorIndex = message.indexOf(":"); - const role = separatorIndex === -1 - ? "Unknown" - : message.slice(0, separatorIndex).trim(); - const text = separatorIndex === -1 - ? message - : message.slice(separatorIndex + 1).trim(); - return `${role}: ${text}`; - }); + const [recentEvents, snapshot, cache, cacheMeta] = await Promise.all([ + this.redisEvents.getRecentSessionEvents( + sessionId, + RECENT_BASELINE_LIMIT, + true, + ), + this.redisSnapshot.getSnapshot(sessionId), + this.redisCache.get(state.groupId), + this.redisCache.getMeta(state.groupId), + ]); - try { - const name = combined.slice(0, 80).replace(/\n/g, " "); - logger.info(`Flushing ${messagesToFlush.length} buffered message(s).`); - logger.info( - `Buffered message contents:\n${messageLines.join("\n")}`, - { sessionId }, - ); - await this.graphitiClient.addEpisode({ - name: `Buffered messages: ${name}`, - episodeBody: combined, - groupId: state.groupId, - source: "text", - sourceDescription, - }); - logger.info("Flushed buffered messages to Graphiti"); - } catch (err) { - logger.error(`Failed to flush messages for ${sessionId}:`, err); - const currentState = this.sessions.get(sessionId); - if (currentState) { - currentState.pendingMessages = [ - ...messagesToFlush, - ...currentState.pendingMessages, - ]; - } - } - } + const latestRequest = findLatestUserRequest( + recentEvents, + lastRequest ?? state.latestUserRequest ?? state.latestRefreshQuery ?? + cacheMeta?.lastQuery, + ); + const recalledEvents = latestRequest + ? await this.redisEvents.recallSessionEvents(sessionId, latestRequest, { + resultLimit: RECALL_RESULT_LIMIT, + }) + : []; + const events = mergeSessionEvents(recentEvents, recalledEvents); + const activeTasks = collectRecentUniqueValues( + events, + (event) => + ["task.create", "task.update", "intent"].includes(event.category) + ? getSessionEventPrimaryText(event) + : null, + 4, + ); + const decisions = collectRecentUniqueValues( + events, + (event) => + ["decision", "preference"].includes(event.category) + ? getSessionEventPrimaryText(event) + : null, + 5, + ); + const files = collectRecentUniqueValues( + events, + (event) => event.category.startsWith("file.") ? event.refs ?? [] : [], + 6, + ); + const rules = collectRecentUniqueValues( + events, + (event) => + event.category === "rule.load" + ? getSessionEventPrimaryText(event) + : null, + 6, + ); + const unresolvedErrors = collectRecentUniqueValues( + events, + (event) => + event.category === "error" && event.metadata?.resolved !== true + ? getSessionEventPrimaryText(event) + : null, + 4, + ); + const gitState = collectRecentUniqueValues( + events, + (event) => + event.category === "git.activity" + ? getSessionEventPrimaryText(event) + : null, + 4, + ); + const subagentWork = collectRecentUniqueValues( + events, + (event) => + event.category === "subagent.start" || + event.category === "subagent.finish" + ? getSessionEventPrimaryText(event) + : null, + 4, + ); + const persistent = this.redisCache.renderPersistentMemory( + cache, + visibleFactUuids ?? state.visibleFactUuids, + ); + const refreshDecision = this.redisCache.classifyRefresh( + cache, + latestRequest, + ); - /** Remove a pending assistant message by key. */ - deletePendingAssistant(sessionId: string, messageId: string): void { - const key = `${sessionId}:${messageId}`; - this.pendingAssistantMessages.delete(key); + const sections = [ + `${escapeXml(latestRequest)}`, + renderXmlListSection( + "active_tasks", + "task", + activeTasks.length > 0 + ? activeTasks + : latestRequest + ? [latestRequest] + : [], + { itemCharLimit: 280, includeEmpty: true }, + ), + renderXmlListSection("key_decisions", "decision", decisions, { + itemCharLimit: 280, + includeEmpty: true, + }), + renderXmlListSection("files_in_play", "file", files, { + itemCharLimit: 280, + includeEmpty: true, + }), + renderXmlListSection("project_rules", "rule", rules, { + itemCharLimit: 280, + includeEmpty: true, + }), + unresolvedErrors.length > 0 + ? renderXmlListSection("unresolved_errors", "error", unresolvedErrors, { + itemCharLimit: 280, + }) + : "", + gitState.length > 0 + ? renderXmlListSection("git_state", "item", gitState, { + itemCharLimit: 280, + }) + : "", + subagentWork.length > 0 + ? renderXmlListSection("subagent_work", "item", subagentWork, { + itemCharLimit: 280, + }) + : "", + snapshot ? `${snapshot}` : "", + persistent.body + ? `${persistent.body}` + : "", + ].filter(Boolean); + + const envelope = + `${ + sections.join("") + }`; + const prepared = { + envelope, + factUuids: persistent.factUuids, + nodeRefs: persistent.nodeRefs, + refreshDecision, + }; + + const currentState = this.sessions.get(sessionId); + if (currentState !== state || !currentState.isMain) return null; + if (state.pendingInjectionGeneration !== generation) return null; + + state.pendingInjection = prepared; + state.lastInjectionFactUuids = persistent.factUuids; + state.hotTierReady = true; + state.latestRefreshQuery = latestRequest || cacheMeta?.lastQuery; + return prepared; } - /** Clear cached data for a session. */ deleteSession(sessionId: string): void { + const lifecycle = this.sessionLifecycles.get(sessionId); + if (lifecycle?.idleCleanupTimer != null) { + this.clearTimerImpl(lifecycle.idleCleanupTimer); + } + this.sessionLifecycles.delete(sessionId); this.sessions.delete(sessionId); this.parentIdCache.delete(sessionId); - - // Collect matching keys first, then delete in a second pass to avoid - // mutating a Map/Set while iterating over its live iterator. const prefix = `${sessionId}:`; - - const pendingToDelete: string[] = []; - for (const key of this.pendingAssistantMessages.keys()) { - if (key.startsWith(prefix)) pendingToDelete.push(key); + for (const key of [...this.pendingAssistantMessages.keys()]) { + if (key.startsWith(prefix)) this.pendingAssistantMessages.delete(key); } - for (const key of pendingToDelete) { - this.pendingAssistantMessages.delete(key); - } - - const bufferedToDelete: string[] = []; - for (const key of this.bufferedAssistantMessageIds) { - if (key.startsWith(prefix)) bufferedToDelete.push(key); - } - for (const key of bufferedToDelete) { - this.bufferedAssistantMessageIds.delete(key); + for (const key of [...this.bufferedAssistantMessageIds]) { + if (key.startsWith(prefix)) this.bufferedAssistantMessageIds.delete(key); } } - private async fetchLatestAssistantMessage( - sessionId: string, - ): Promise<{ id?: string; text: string } | null> { - try { - const response = await this.sdkClient.session.messages({ - path: { id: sessionId }, - query: { limit: 20 }, - }); - const messages = extractSdkMessages(response); - if (messages.length === 0) return null; - const lastAssistant = messages - .findLast((message) => message.info?.role === "assistant"); - if (!lastAssistant) return null; - const text = extractTextFromParts(lastAssistant.parts); - if (!text) return null; - return { id: lastAssistant.info?.id, text }; - } catch (err) { - logger.debug("Failed to list session messages for fallback", { - sessionId, - err, - }); - return null; + private getLifecycle(sessionId: string): SessionLifecycle { + let lifecycle = this.sessionLifecycles.get(sessionId); + if (!lifecycle) { + lifecycle = { + activityGeneration: 0, + idleCleanupTimer: null, + }; + this.sessionLifecycles.set(sessionId, lifecycle); } + return lifecycle; } } diff --git a/src/types/index.ts b/src/types/index.ts index 21d2d45..bb5a8e9 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -1,68 +1,202 @@ -/** Plugin configuration for Graphiti memory integration. */ -export interface GraphitiConfig { - /** URL of the Graphiti MCP server endpoint. */ +/** FalkorDB/Redis hot-tier configuration. */ +export interface FalkorDbConfig { + redisEndpoint: string; + batchSize: number; + batchMaxBytes: number; + sessionTtlSeconds: number; + cacheTtlSeconds: number; + drainRetryMax: number; +} + +/** Graphiti async-tier configuration. */ +export interface GraphitiServiceConfig { endpoint: string; - /** Prefix for group IDs to namespace project memories. */ groupIdPrefix: string; - /** Jaccard similarity threshold below which reinjection occurs. */ driftThreshold: number; - /** Number of days after which facts are considered stale. */ factStaleDays: number; } +/** Plugin configuration for hot-tier + Graphiti async integration. */ +export interface GraphitiConfig { + falkordb: FalkorDbConfig; + graphiti: GraphitiServiceConfig; + + // Legacy top-level keys retained for compatibility. + endpoint?: string; + groupIdPrefix?: string; + driftThreshold?: number; + factStaleDays?: number; + redisEndpoint?: string; + batchSize?: number; + batchMaxBytes?: number; + sessionTtlSeconds?: number; + cacheTtlSeconds?: number; + drainRetryMax?: number; +} + /** A fact retrieved from the Graphiti knowledge graph. */ export interface GraphitiFact { - /** Unique identifier for the fact. */ uuid: string; - /** Human-readable fact content. */ fact: string; - /** Timestamp when the fact becomes valid. */ valid_at?: string; - /** Timestamp when the fact becomes invalid. */ invalid_at?: string; - /** Source entity for the fact edge. */ source_node?: { name: string; uuid: string }; - /** Target entity for the fact edge. */ target_node?: { name: string; uuid: string }; } /** A node retrieved from the Graphiti knowledge graph. */ export interface GraphitiNode { - /** Unique identifier for the node. */ uuid: string; - /** Display name of the node. */ name: string; - /** Optional summary describing the node. */ summary?: string; - /** Optional labels associated with the node. */ labels?: string[]; } -/** - * An episode retrieved from Graphiti memory. - * - * `sourceDescription` is the canonical field. Raw payloads may carry either - * `sourceDescription` (camelCase) or `source_description` (snake_case); the - * boundary helper `normalizeEpisode()` in `src/services/sdk-normalize.ts` - * collapses both into `sourceDescription` so downstream consumers only need to - * check one field. - */ +/** A recent episode retrieved from Graphiti memory. */ export interface GraphitiEpisode { - /** Unique identifier for the episode. */ uuid: string; - /** Episode title or name. */ name: string; - /** Episode content body. */ content: string; - /** Optional episode source type. */ source?: string; - /** - * Canonical source description (normalized from either camelCase or - * snake_case payload). Always populated by `normalizeEpisode()`. - */ sourceDescription?: string; - /** Optional episode creation timestamp. */ created_at?: string; - /** Optional labels associated with the episode. */ labels?: string[]; } + +export type EventCategory = + | "task.create" + | "task.update" + | "task.complete" + | "decision" + | "preference" + | "rule.load" + | "file.read" + | "file.write" + | "file.edit" + | "file.search" + | "cwd.change" + | "env.change" + | "git.activity" + | "error" + | "subagent.start" + | "subagent.finish" + | "integration.call" + | "intent" + | "data.import" + | "discovery" + | "message" + | "session.meta"; + +export type SessionEventSourceKind = + | "user-request" + | "assistant-response" + | "tool-activity" + | "system-state"; + +export interface SessionEvent { + id: string; + ts: number; + category: EventCategory; + priority: 0 | 1 | 2 | 3 | 4; + role: "user" | "assistant" | "tool" | "system"; + summary: string; + body?: string; + detail?: string; + continuityText?: string; + keywords?: string[]; + sourceKind?: SessionEventSourceKind; + refs?: string[]; + metadata?: Record; +} + +const compactEventText = (values: Array): string => + [...new Set(values.map((value) => value?.trim()).filter(Boolean) as string[])] + .join(" ") + .trim(); + +const metadataRecallText = (metadata?: Record): string => { + if (!metadata) return ""; + const values: string[] = []; + for ( + const [key, value] of Object.entries(metadata).filter(([, value]) => + typeof value === "string" || typeof value === "number" || + typeof value === "boolean" + ) + ) { + if (/^(eventType|tool|integration|cwd|status|result|reason)$/i.test(key)) { + values.push(String(value)); + } + } + return values.join(" "); +}; + +export const getSessionEventPrimaryText = ( + event: SessionEvent, + fallback?: string, +): string => + event.continuityText?.trim() || event.detail?.trim() || + event.summary.trim() || + event.body?.trim() || fallback || ""; + +export const getSessionEventRecallText = (event: SessionEvent): string => + compactEventText([ + event.summary, + event.continuityText, + event.detail, + event.refs?.join(" "), + event.keywords?.join(" "), + metadataRecallText(event.metadata), + event.category, + event.sourceKind, + ]); + +export interface PersistentMemoryCacheEntry { + query: string; + refreshedAt: number; + facts: GraphitiFact[]; + nodes: GraphitiNode[]; + episodeSummaries?: string[]; + factUuids: string[]; + nodeRefs: string[]; +} + +export interface PersistentMemoryCacheMeta { + lastQuery?: string; + lastRefresh?: number; + factUuids: string[]; +} + +export type CacheRefreshClassification = + | "miss" + | "stale" + | "primer-only" + | "aligned" + | "drifted"; + +export interface CacheRefreshDecision { + classification: CacheRefreshClassification; + shouldRefresh: boolean; + similarity: number; + threshold: number; + cachedQuery: string | null; +} + +export interface DrainQueueEntry { + sessionId: string; + groupId: string; + event: SessionEvent; +} + +export interface ClaimedDrainBatch { + claimToken: string; + claimKey: string; + lockTtlSeconds: number; + entries: DrainQueueEntry[]; +} + +export interface PreparedSessionMemory { + envelope: string; + factUuids: string[]; + nodeRefs: string[]; + refreshDecision: CacheRefreshDecision; +} From b9ca275dc99a947c0d07243210b5679a453c174f Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Sat, 14 Mar 2026 18:30:29 +0800 Subject: [PATCH 02/38] feat!: mark context overhaul as 0.2.0 release Release-As: 0.2.0 --- .github/scripts/version.test.ts | 317 +++++ .github/scripts/version.ts | 217 ++- .github/workflows/publish.yml | 35 +- AGENTS.md | 176 +++ CONTRIBUTING.md | 19 +- README.md | 308 +++-- deno.json | 11 +- deno.lock | 16 +- dnt.ts | 6 +- docs/ReviewProtocol.md | 127 ++ mod.test.ts | 6 + plans/ConnectionManager.md | 27 +- plans/ContextOverhaul.md | 562 ++++++-- plans/ContextOverhaulTests.md | 137 +- scripts/bench-falkordb.ts | 131 ++ src/config.test.ts | 249 +++- src/config.ts | 280 ++-- src/handlers/chat.test.ts | 144 +- src/handlers/chat.ts | 104 +- src/handlers/compacting.test.ts | 124 +- src/handlers/compacting.ts | 39 +- src/handlers/event.test.ts | 1428 ++++++++++++++++++-- src/handlers/event.ts | 484 ++++--- src/handlers/messages.test.ts | 999 ++++++++++---- src/handlers/messages.ts | 132 +- src/index.test.ts | 790 ++++++++++- src/index.ts | 297 +++-- src/services/batch-drain.test.ts | 794 ++++++++++- src/services/batch-drain.ts | 302 ++++- src/services/client.test.ts | 305 ----- src/services/client.ts | 6 - src/services/compaction-utils.test.ts | 227 ---- src/services/compaction.test.ts | 443 ------- src/services/compaction.ts | 320 ----- src/services/connection-manager.test.ts | 517 +++++++- src/services/connection-manager.ts | 278 +++- src/services/context-limit.test.ts | 197 +++ src/services/context-limit.ts | 64 +- src/services/context-utils.test.ts | 418 ------ src/services/context.test.ts | 372 ------ src/services/context.ts | 243 ---- src/services/event-extractor.test.ts | 132 +- src/services/event-extractor.ts | 240 +++- src/services/graphiti-async.test.ts | 433 ++++++ src/services/graphiti-async.ts | 237 +++- src/services/graphiti-mcp.test.ts | 119 ++ src/services/graphiti-mcp.ts | 39 +- src/services/hot-tier-slice.test.ts | 1603 +++++++++++++++++++++-- src/services/logger.test.ts | 80 +- src/services/logger.ts | 12 +- src/services/opencode-warning.test.ts | 217 +++ src/services/opencode-warning.ts | 138 +- src/services/redis-cache.test.ts | 352 ++++- src/services/redis-cache.ts | 154 ++- src/services/redis-client.test.ts | 650 ++++++++- src/services/redis-client.ts | 392 +++++- src/services/redis-events.test.ts | 1020 ++++++++++++++ src/services/redis-events.ts | 405 +++++- src/services/redis-snapshot.ts | 154 ++- src/services/render-utils.test.ts | 41 + src/services/render-utils.ts | 115 +- src/services/runtime-teardown.test.ts | 355 ++++- src/services/runtime-teardown.ts | 166 ++- src/services/session-snapshot.test.ts | 998 +++++++++++--- src/session.ts | 1049 +++++++++++---- src/types/index.ts | 37 +- src/utils.test.ts | 41 +- src/utils.ts | 51 +- 68 files changed, 16169 insertions(+), 4712 deletions(-) create mode 100644 AGENTS.md create mode 100644 docs/ReviewProtocol.md create mode 100644 mod.test.ts create mode 100644 scripts/bench-falkordb.ts delete mode 100644 src/services/client.test.ts delete mode 100644 src/services/client.ts delete mode 100644 src/services/compaction-utils.test.ts delete mode 100644 src/services/compaction.test.ts delete mode 100644 src/services/compaction.ts create mode 100644 src/services/context-limit.test.ts delete mode 100644 src/services/context-utils.test.ts delete mode 100644 src/services/context.test.ts delete mode 100644 src/services/context.ts create mode 100644 src/services/graphiti-async.test.ts create mode 100644 src/services/graphiti-mcp.test.ts create mode 100644 src/services/opencode-warning.test.ts create mode 100644 src/services/redis-events.test.ts create mode 100644 src/services/render-utils.test.ts diff --git a/.github/scripts/version.test.ts b/.github/scripts/version.test.ts index 2b54426..dca3423 100644 --- a/.github/scripts/version.test.ts +++ b/.github/scripts/version.test.ts @@ -11,10 +11,54 @@ import { applyBump, calculateVersion, findReleaseAs, + hasBreakingChangeBody, hasNonTestChanges, + parseChangedFiles, parseSemver, + run, } from "./version.ts"; +const makeCliDeps = (options: { + env?: Record; + files?: Record; + commands?: Record; + now?: Date; +}) => { + const outputs: string[] = []; + const logs: string[] = []; + const calls: string[] = []; + + return { + deps: { + cmd: (...command: string[]) => { + const key = command.join(" "); + calls.push(key); + const result = options.commands?.[key]; + if (result instanceof Error) return Promise.reject(result); + return Promise.resolve(result ?? ""); + }, + readTextFile: (filePath: string) => { + const result = options.files?.[filePath]; + if (result === undefined) { + return Promise.reject(new Error(`ENOENT: ${filePath}`)); + } + return Promise.resolve(result); + }, + envGet: (name: string) => options.env?.[name], + appendFile: (_filePath: string, text: string) => { + outputs.push(text); + }, + log: (message: string) => { + logs.push(message); + }, + now: () => options.now ?? new Date("2026-02-12T09:14:29Z"), + }, + outputs, + logs, + calls, + }; +}; + describe("analyzeCommits", () => { it("returns 'none' for empty array", () => { assertEquals(analyzeCommits([]), "none"); @@ -45,6 +89,7 @@ describe("analyzeCommits", () => { it("returns 'major' for breaking change with ! suffix", () => { assertEquals(analyzeCommits(["feat!: breaking change"]), "major"); + assertEquals(analyzeCommits(["fix!: breaking fix"]), "major"); }); it("returns 'major' for breaking change with BREAKING CHANGE in subject", () => { @@ -61,6 +106,15 @@ describe("analyzeCommits", () => { ); }); + it("returns 'major' when a commit body contains BREAKING CHANGE", () => { + assertEquals( + analyzeCommits(["feat: keep subject normal"], [ + "BREAKING CHANGE: api changed", + ]), + "major", + ); + }); + it("returns highest bump when mixed commits (feat + fix → minor)", () => { const subjects = [ "fix: bug fix", @@ -116,6 +170,24 @@ describe("analyzeCommits", () => { }); }); +describe("hasBreakingChangeBody", () => { + it("returns true for semantic-release style breaking change bodies", () => { + assertEquals( + hasBreakingChangeBody([ + "Some text\n\nBREAKING CHANGE: changed output format", + ]), + true, + ); + }); + + it("returns false when commit bodies do not include the breaking footer", () => { + assertEquals( + hasBreakingChangeBody(["Regular body", "Another body"]), + false, + ); + }); +}); + describe("findReleaseAs", () => { it("returns undefined for empty array", () => { assertEquals(findReleaseAs([]), undefined); @@ -310,6 +382,15 @@ describe("hasNonTestChanges", () => { }); }); +describe("parseChangedFiles", () => { + it("returns unique trimmed changed paths", () => { + assertEquals( + parseChangedFiles("src/mod.ts\n\nsrc/mod.ts\n src/util.ts \n"), + ["src/mod.ts", "src/util.ts"], + ); + }); +}); + describe("calculateVersion", () => { const baseOpts = { currentVersion: "1.0.0", @@ -349,6 +430,16 @@ describe("calculateVersion", () => { assertEquals(result, { skip: false, version: "2.0.0", tag: "latest" }); }); + it("creates release version for BREAKING CHANGE in commit body", () => { + const result = calculateVersion({ + ...baseOpts, + subjects: ["feat: keep subject stable"], + bodies: ["BREAKING CHANGE: api changed"], + eventName: "push", + }); + assertEquals(result, { skip: false, version: "2.0.0", tag: "latest" }); + }); + it("skips when no triggering commits", () => { const result = calculateVersion({ ...baseOpts, @@ -440,6 +531,21 @@ describe("calculateVersion", () => { }); }); + it("creates a 0.x canary with an exact Release-As override", () => { + const result = calculateVersion({ + ...baseOpts, + currentVersion: "0.1.12", + subjects: ["feat!: context overhaul"], + bodies: ["Release-As: 0.2.0"], + eventName: "pull_request", + }); + assertEquals(result, { + skip: false, + version: "0.2.0-canary.abc123d.20260212091429", + tag: "canary", + }); + }); + it("creates canary for breaking change", () => { const result = calculateVersion({ ...baseOpts, @@ -453,6 +559,20 @@ describe("calculateVersion", () => { }); }); + it("creates canary for BREAKING CHANGE in commit body", () => { + const result = calculateVersion({ + ...baseOpts, + subjects: ["fix: preserve subject format"], + bodies: ["BREAKING CHANGE: cache schema changed"], + eventName: "pull_request", + }); + assertEquals(result, { + skip: false, + version: "2.0.0-canary.abc123d.20260212091429", + tag: "canary", + }); + }); + it("shortens commit SHA to 7 characters", () => { const result = calculateVersion({ ...baseOpts, @@ -514,6 +634,30 @@ describe("calculateVersion", () => { tag: "canary", }); }); + + it("skips push release when no git tags and only test files changed", () => { + const result = calculateVersion({ + ...baseOpts, + currentVersion: "0.0.0", + subjects: ["chore: initial"], + changedFiles: ["src/foo.test.ts", ".github/scripts/version.test.ts"], + noGitTags: true, + eventName: "push", + }); + assertEquals(result, { skip: true }); + }); + + it("skips canary publish when no git tags and only test files changed", () => { + const result = calculateVersion({ + ...baseOpts, + currentVersion: "0.0.0", + subjects: ["docs: update"], + changedFiles: ["src/foo.test.ts"], + noGitTags: true, + eventName: "pull_request", + }); + assertEquals(result, { skip: true }); + }); }); describe("edge cases", () => { @@ -603,5 +747,178 @@ describe("calculateVersion", () => { tag: "canary", }); }); + + it("creates a 0.x canary minor bump from BREAKING CHANGE in body", () => { + const result = calculateVersion({ + ...baseOpts, + currentVersion: "0.1.12", + subjects: ["feat: keep subject stable"], + bodies: ["BREAKING CHANGE: overhaul session-memory semantics"], + eventName: "pull_request", + }); + assertEquals(result, { + skip: false, + version: "0.2.0-canary.abc123d.20260212091429", + tag: "canary", + }); + }); + }); +}); + +describe("run", () => { + it("writes release outputs for the git-tag CLI path used by GitHub Actions", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_EVENT_NAME: "push", + GITHUB_OUTPUT: "/tmp/github-output", + COMMIT_SHA: "override-sha-1234567", + }, + files: { + "deno.json": JSON.stringify({ name: "opencode-graphiti" }), + }, + commands: { + "git describe --tags --abbrev=0 --match v*": "v1.2.3", + "git log v1.2.3..HEAD --format=%s": "feat: ship cli coverage", + "git log v1.2.3..HEAD --format=%b": "", + "git diff --name-only v1.2.3..HEAD": ".github/scripts/version.ts\n", + }, + }); + + await run([], cli.deps); + + assertEquals(cli.outputs, ["version=1.3.0\n", "tag=latest\n"]); + assertEquals(cli.logs, [ + "version=1.3.0", + "tag=latest", + "Release version: 1.3.0", + ]); + assertEquals( + cli.calls.includes("git describe --tags --abbrev=0 --match v*"), + true, + ); + }); + + it("covers the no-tag fallback path, package discovery, args fallback, and canary output", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_OUTPUT: "/tmp/github-output", + }, + files: { + "package.json": JSON.stringify({ name: "fallback-package" }), + }, + commands: { + "git describe --tags --abbrev=0 --match v*": new Error("no tags"), + "npm view fallback-package version": "0.1.0", + "git log --format=%s": "docs: note fallback behavior", + "git log --format=%b": "", + "git show --format= --name-only HEAD": "src/mod.ts\n", + }, + now: new Date("2026-02-12T09:14:29Z"), + }); + + await run(["pull_request", "abcdef1234567890"], cli.deps); + + assertEquals(cli.outputs, [ + "version=0.1.1-canary.abcdef1.20260212091429\n", + "tag=canary\n", + ]); + assertEquals( + cli.logs.at(-1), + "Canary version: 0.1.1-canary.abcdef1.20260212091429", + ); + assertEquals(cli.calls.includes("npm view fallback-package version"), true); + }); + + it("reads the package name from commented deno.jsonc content", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_OUTPUT: "/tmp/github-output", + }, + files: { + "deno.jsonc": `{ + // Package metadata for release automation. + "name": "commented-package", + /* Keep the rest of the manifest commented-friendly. */ + "version": "0.0.0-development" +}`, + }, + commands: { + "git describe --tags --abbrev=0 --match v*": new Error("no tags"), + "npm view commented-package version": "0.2.0", + "git log --format=%s": "docs: note jsonc support", + "git log --format=%b": "", + "git show --format= --name-only HEAD": ".github/scripts/version.ts\n", + }, + now: new Date("2026-02-12T09:14:29Z"), + }); + + await run(["pull_request", "abcdef1234567890"], cli.deps); + + assertEquals(cli.outputs, [ + "version=0.2.1-canary.abcdef1.20260212091429\n", + "tag=canary\n", + ]); + assertEquals( + cli.calls.includes("npm view commented-package version"), + true, + ); + }); + + it("emits skip=true when only test files changed", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_EVENT_NAME: "push", + GITHUB_OUTPUT: "/tmp/github-output", + }, + files: { + "deno.json": JSON.stringify({ name: "opencode-graphiti" }), + }, + commands: { + "git rev-parse HEAD": "abc123def4567890", + "git describe --tags --abbrev=0 --match v*": "v1.2.3", + "git log v1.2.3..HEAD --format=%s": "test: add cli coverage", + "git log v1.2.3..HEAD --format=%b": "", + "git diff --name-only v1.2.3..HEAD": + ".github/scripts/version.test.ts\n", + }, + }); + + await run([], cli.deps); + + assertEquals(cli.outputs, ["skip=true\n"]); + assertEquals(cli.logs, [ + "skip=true", + "No release-triggering commits since v1.2.3, skipping", + ]); + }); + + it("emits skip=true in the no-tag fallback when only the current commit changes test files", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_EVENT_NAME: "pull_request", + GITHUB_OUTPUT: "/tmp/github-output", + }, + files: { + "package.json": JSON.stringify({ name: "fallback-package" }), + }, + commands: { + "git rev-parse HEAD": "abcdef1234567890", + "git describe --tags --abbrev=0 --match v*": new Error("no tags"), + "npm view fallback-package version": "0.1.0", + "git log --format=%s": "docs: note fallback behavior", + "git log --format=%b": "", + "git show --format= --name-only HEAD": + ".github/scripts/version.test.ts\n", + }, + now: new Date("2026-02-12T09:14:29Z"), + }); + + await run([], cli.deps); + + assertEquals(cli.outputs, ["skip=true\n"]); + assertEquals(cli.logs, [ + "skip=true", + "No release-triggering commits since initial, skipping", + ]); }); }); diff --git a/.github/scripts/version.ts b/.github/scripts/version.ts index 7352835..4a25011 100644 --- a/.github/scripts/version.ts +++ b/.github/scripts/version.ts @@ -18,16 +18,140 @@ export type VersionResult = | { skip: true } | { skip: false; version: string; tag: "latest" | "canary" }; +export interface VersionCliDeps { + cmd: (...command: string[]) => Promise; + readTextFile: (filePath: string) => Promise; + envGet: (name: string) => string | undefined; + appendFile: (filePath: string, text: string) => void; + log: (message: string) => void; + now: () => Date; +} + +function stripJsonComments(text: string): string { + let result = ""; + let inString = false; + let escaped = false; + + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const nextChar = text[index + 1]; + + if (inString) { + result += char; + if (escaped) { + escaped = false; + } else if (char === "\\") { + escaped = true; + } else if (char === '"') { + inString = false; + } + continue; + } + + if (char === '"') { + inString = true; + result += char; + continue; + } + + if (char === "/" && nextChar === "/") { + index += 2; + while (index < text.length && text[index] !== "\n") { + index += 1; + } + if (index < text.length) { + result += text[index]; + } + continue; + } + + if (char === "/" && nextChar === "*") { + index += 2; + while ( + index < text.length - 1 && + !(text[index] === "*" && text[index + 1] === "/") + ) { + if (text[index] === "\n") { + result += "\n"; + } + index += 1; + } + index += 1; + continue; + } + + result += char; + } + + return result; +} + +function parsePackageManifest(text: string, filePath: string): unknown { + if (filePath.endsWith(".jsonc")) { + return JSON.parse(stripJsonComments(text)); + } + + return JSON.parse(text); +} + +function getPackageNameFromManifest(manifest: unknown): string | undefined { + if ( + manifest && + typeof manifest === "object" && + "name" in manifest && + typeof manifest.name === "string" + ) { + return manifest.name; + } + + return undefined; +} + +const defaultVersionCliDeps: VersionCliDeps = { + cmd: async (...command: string[]): Promise => { + const proc = new Deno.Command(command[0], { + args: command.slice(1), + stdout: "piped", + stderr: "piped", + }); + const { stdout } = await proc.output(); + return new TextDecoder().decode(stdout).trim(); + }, + readTextFile: (filePath) => Deno.readTextFile(filePath), + envGet: (name) => Deno.env.get(name), + appendFile: (filePath, text) => { + Deno.writeTextFileSync(filePath, text, { append: true }); + }, + log: (message) => console.log(message), + now: () => new Date(), +}; + +/** + * Returns true when any commit body contains a semantic-release style breaking + * change footer/header such as `BREAKING CHANGE: details`. + */ +export function hasBreakingChangeBody(bodies: string[]): boolean { + return bodies.some((body) => /^BREAKING CHANGE:/im.test(body)); +} + /** - * Analyze conventional commit subjects and return the highest bump type. + * Analyze conventional commits and return the highest bump type. + * + * Supported formats: + * - `feat: add feature` -> minor + * - `fix: resolve bug` / `perf: speed up path` -> patch + * - `feat!: breaking api change` / `fix!: breaking bugfix` -> major + * - `BREAKING CHANGE: explanation` in a commit body -> major + * - `Release-As: x.y.z` is handled separately as an exact override * - * Rules: - * - `BREAKING CHANGE` in body or `type!:` → major - * - `feat:` → minor - * - `fix:` / `perf:` → patch - * - Anything else → none + * In `0.x`, a major bump resolves to the next minor version. */ -export function analyzeCommits(subjects: string[]): Bump { +export function analyzeCommits( + subjects: string[], + bodies: string[] = [], +): Bump { + if (hasBreakingChangeBody(bodies)) return "major"; + let bump: Bump = "none"; for (const msg of subjects) { @@ -111,6 +235,17 @@ export function hasNonTestChanges(changedFiles: string[]): boolean { return changedFiles.some((file) => file && !file.endsWith(".test.ts")); } +/** Parse newline-separated changed-file output into a stable unique list. */ +export function parseChangedFiles(output: string): string[] { + return [ + ...new Set( + output.split("\n").map((line) => line.trim()).filter( + Boolean, + ), + ), + ]; +} + /** * Calculate the next version given all inputs. * @@ -121,7 +256,7 @@ export function calculateVersion(opts: { currentVersion: string; /** Conventional commit subjects since last release. */ subjects: string[]; - /** Commit bodies (for Release-As detection). */ + /** Commit bodies (for Release-As and BREAKING CHANGE detection). */ bodies: string[]; /** Whether this is a "push" (release) or "pull_request" (canary). */ eventName: "push" | "pull_request"; @@ -151,8 +286,8 @@ export function calculateVersion(opts: { return { skip: false, version, tag } as const; } - // Analyze commits - let bump = analyzeCommits(opts.subjects); + // Analyze commits using subjects plus semantic-release style body footers. + let bump = analyzeCommits(opts.subjects, opts.bodies); // When no git tags, default to patch bump from npm baseline if (opts.noGitTags && bump === "none") { @@ -185,33 +320,28 @@ export function calculateVersion(opts: { // CLI entry point // --------------------------------------------------------------------------- -async function run(args: string[]): Promise { - const cmd = async (...command: string[]): Promise => { - const proc = new Deno.Command(command[0], { - args: command.slice(1), - stdout: "piped", - stderr: "piped", - }); - const { stdout } = await proc.output(); - return new TextDecoder().decode(stdout).trim(); - }; - +export async function run( + args: string[], + deps: VersionCliDeps = defaultVersionCliDeps, +): Promise { + const { cmd, readTextFile, envGet, appendFile, log, now } = deps; const output = (key: string, value: string): void => { - const ghOutput = Deno.env.get("GITHUB_OUTPUT"); + const ghOutput = envGet("GITHUB_OUTPUT"); if (ghOutput) { - Deno.writeTextFileSync(ghOutput, `${key}=${value}\n`, { append: true }); + appendFile(ghOutput, `${key}=${value}\n`); } - console.log(`${key}=${value}`); + log(`${key}=${value}`); }; // Read package name from deno.json or package.json let packageName = "unknown"; for (const file of ["deno.json", "deno.jsonc", "package.json"]) { try { - const text = await Deno.readTextFile(file); - const json = JSON.parse(text); - if (json.name) { - packageName = json.name; + const text = await readTextFile(file); + const manifest = parsePackageManifest(text, file); + const manifestPackageName = getPackageNameFromManifest(manifest); + if (manifestPackageName) { + packageName = manifestPackageName; break; } } catch { @@ -219,11 +349,11 @@ async function run(args: string[]): Promise { } } - const eventName = (Deno.env.get("GITHUB_EVENT_NAME") ?? args[0] ?? "push") as + const eventName = (envGet("GITHUB_EVENT_NAME") ?? args[0] ?? "push") as | "push" | "pull_request"; - const commitSha = Deno.env.get("COMMIT_SHA") ?? - Deno.env.get("GITHUB_SHA") ?? + const commitSha = envGet("COMMIT_SHA") ?? + envGet("GITHUB_SHA") ?? args[1] ?? await cmd("git", "rev-parse", "HEAD"); @@ -250,8 +380,9 @@ async function run(args: string[]): Promise { currentVersion = npmVersion || "0.0.0"; subjects = (await cmd("git", "log", "--format=%s")).split("\n"); bodies = (await cmd("git", "log", "--format=%b")).split("\n"); - changedFiles = (await cmd("git", "ls-tree", "-r", "--name-only", "HEAD")) - .split("\n"); + changedFiles = parseChangedFiles( + await cmd("git", "show", "--format=", "--name-only", "HEAD"), + ); noGitTags = true; } else { currentVersion = latestTag.replace(/^v/, ""); @@ -267,16 +398,18 @@ async function run(args: string[]): Promise { `${latestTag}..HEAD`, "--format=%b", )).split("\n"); - changedFiles = (await cmd( - "git", - "diff", - "--name-only", - `${latestTag}..HEAD`, - )).split("\n"); + changedFiles = parseChangedFiles( + await cmd( + "git", + "diff", + "--name-only", + `${latestTag}..HEAD`, + ), + ); noGitTags = false; } - const timestamp = new Date().toISOString().replace(/[-:T]/g, "").slice(0, 14); + const timestamp = now().toISOString().replace(/[-:T]/g, "").slice(0, 14); const result = calculateVersion({ currentVersion, @@ -291,13 +424,13 @@ async function run(args: string[]): Promise { if (result.skip) { output("skip", "true"); - console.log( + log( `No release-triggering commits since ${latestTag || "initial"}, skipping`, ); } else { output("version", result.version); output("tag", result.tag); - console.log( + log( `${ result.tag === "canary" ? "Canary" : "Release" } version: ${result.version}`, diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c13dd82..6201ece 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -42,27 +42,42 @@ jobs: with: node-version: 24 - - name: Check if version exists + - name: Check if version exists on npm if: steps.version.outputs.skip != 'true' - id: check + id: npm run: | if npm view "opencode-graphiti@${{ steps.version.outputs.version }}" version 2>/dev/null; then - echo "skip=true" >> "$GITHUB_OUTPUT" - echo "Version ${{ steps.version.outputs.version }} already exists, skipping" + echo "publish=false" >> "$GITHUB_OUTPUT" + echo "Version ${{ steps.version.outputs.version }} already exists on npm, skipping publish" else - echo "skip=false" >> "$GITHUB_OUTPUT" + echo "publish=true" >> "$GITHUB_OUTPUT" fi - name: Publish - if: steps.version.outputs.skip != 'true' && steps.check.outputs.skip != 'true' + if: steps.version.outputs.skip != 'true' && steps.npm.outputs.publish == 'true' working-directory: dist run: npm publish --provenance --access public --tag ${{ steps.version.outputs.tag }} - name: Tag and Release - if: github.event_name == 'push' && steps.version.outputs.skip != 'true' && steps.check.outputs.skip != 'true' + if: github.event_name == 'push' && steps.version.outputs.skip != 'true' run: | - git tag "v${{ steps.version.outputs.version }}" - git push origin "v${{ steps.version.outputs.version }}" - gh release create "v${{ steps.version.outputs.version }}" --generate-notes + set -euo pipefail + tag="v${{ steps.version.outputs.version }}" + + if git show-ref --verify --quiet "refs/tags/$tag"; then + echo "Tag $tag already exists locally" + elif git ls-remote --exit-code --tags origin "refs/tags/$tag" >/dev/null 2>&1; then + git fetch --tags origin + echo "Tag $tag already exists on origin" + else + git tag "$tag" + git push origin "$tag" + fi + + if gh release view "$tag" >/dev/null 2>&1; then + echo "Release $tag already exists" + else + gh release create "$tag" --generate-notes + fi env: GH_TOKEN: ${{ github.token }} diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..0b5c531 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,176 @@ +# opencode-graphiti: Repository Anti-Drift Guide + +## What This Repository Does + +**opencode-graphiti** is an OpenCode plugin that provides persistent memory for +AI agent sessions. It is a two-layer architecture: + +- **Short-term memory**: Continuously extracts structured session events + (decisions, tasks, file edits, errors) and rebuilds a priority-tiered snapshot + stored in Redis/FalkorDB. This snapshot survives compaction and is re-injected + before every LLM call. +- **Long-term memory**: Asynchronously sends buffered events to Graphiti (a + knowledge graph MCP server) in the background. Graphiti results are cached + locally in Redis and injected alongside the short-term snapshot for + cross-session recall. + +**Key invariant**: Graphiti is never on the hot path. All writes and queries for +chat/compaction hooks use only Redis/FalkorDB; Graphiti updates happen +asynchronously on idle or after compaction. + +## Critical Architecture Boundaries + +### Hot Path + +- **Redis/FalkorDB** only. ioredis TCP client at `localhost:6379` + (configurable). +- Stores: session events, snapshots, memory cache, pending drain batches. +- Used by: `chat.message`, `messages.transform`, `session.compacting`, event + handlers. + +### Async Tier (Background) + +- **Graphiti MCP** HTTP endpoint (default `localhost:8000/mcp`, configurable). +- Async drain service: batches buffered events, retries on failure, flushes on + idle or post-compaction. +- Background cache refresh: searches Graphiti when topic drift is detected, + updates Redis cache. +- Never blocks hook return time. + +### Session Continuity Across Delegation + +- Child/subagent sessions resolve to root sessionID via `parentID` chain. +- All child events are recorded in the root session's event log. +- Snapshots and `` injection reflect combined parent + child + activity. +- Deleting a child session preserves root state and events. + +## Workflows + +### Session Memory Injection + +1. **chat.message**: Session events + snapshot loaded from Redis, cached + Graphiti facts retrieved, composed into `` XML, staged for + transform hook. +2. **messages.transform**: `` prepended to last user message + (right before LLM call). +3. **Drift detection**: Current query vs. cached query; if Jaccard similarity < + `driftThreshold` (default 0.5), schedule Graphiti cache refresh for next + turn. +4. **session.compacting**: Same `` envelope injected into + compaction summary (no fresh Graphiti call). + +### Event Extraction and Buffering + +- User/assistant messages captured as `SessionEvent` objects, stored in Redis as + `session:{id}:events`. +- Events queued for async drain to Graphiti. +- **On idle** (`session.idle`): drain pending events to Graphiti, rebuild + snapshot. +- **Post-compaction** (`session.compacted`): schedule async drain and snapshot + rebuild. + +### GitHub PR Review Handling + +- **See `docs/ReviewProtocol.md`** for the complete workflow. +- Detect active PR → fetch unresolved review comments → spawn concurrent swarm + sessions per item → verify claims → apply narrow fixes → resolve threads → + push → request fresh review. + +## Validation Expectations + +- **Config loading**: Supports `cosmiconfig` discovery + nested `redis.*` and + `graphiti.*` keys. See `src/config.ts`. +- **Redis connectivity**: When available, Redis/FalkorDB stores events, + snapshots, and cache. If Redis is unavailable, the plugin degrades to + in-memory fallback. Graphiti is optional; plugin continues with local-only + mode if unavailable. +- **Compaction survival**: Snapshots and events must persist across compaction + cycles. Test via `plans/ContextOverhaulTests.md`. +- **Concurrency**: Multiple child sessions should not corrupt root snapshot. + Serialize child event writes to avoid race conditions. + +## Risky Areas + +1. **Session root resolution**: Parent ID chain walk must not infinite-loop; + validate chain structure to avoid cycles. +2. **Event ordering**: Redis LPUSH/LRANGE preserve order, but concurrent writes + risk out-of-order injection if not serialized. +3. **Snapshot budget**: Priority-tiered snapshot has hard limits + (`SNAPSHOT_BODY_BUDGET`, `PERSISTENT_MEMORY_BODY_BUDGET`). Oversized events + may be truncated; monitor via test suite. +4. **Drain batch retry logic**: Failed Graphiti writes retry up to + `drainRetryMax` times (default 3). Dead-lettered entries are retained in + Redis dead-letter storage but not automatically recovered. +5. **Cache stale reads**: On Graphiti unavailability, cached facts may be stale; + no explicit cache invalidation exists. + +## Resume-Reading Order + +When starting work, read in this order: + +1. **This file** (AGENTS.md) — overview and boundaries. +2. **README.md** (§1–4) — detailed motivation, architecture, injection format, + workflows. +3. **docs/ReviewProtocol.md** — if handling PR reviews. +4. **src/index.ts** — plugin entry point; see which services are instantiated + and how. +5. **src/session.ts** — session ID resolution, memory composition, root-finding + logic. +6. **src/handlers/** — event capture, chat injection, compaction, message + transform. +7. **src/services/** — Redis clients, batch drain, Graphiti async worker, cache + management. +8. **plans/ContextOverhaul.md** — full design rationale (especially for async + decisions and event taxonomy). +9. **plans/ContextOverhaulTests.md** — test expectations and validation + checklist. +10. **deno.json** — dependencies and build tasks. + +## Configuration + +Default config file locations (cosmiconfig order): + +- Project: `package.json#graphiti`, `.graphitirc`, `graphiti.config.*` +- Home: `~/.graphitirc`, `~/.config/graphiti/*` +- Legacy: `~/.config/opencode/.graphitirc` + +Canonical shape (nested): + +```jsonc +{ + "redis": { + "endpoint": "redis://localhost:6379", + "batchSize": 20, + "sessionTtlSeconds": 86400, + "cacheTtlSeconds": 600 + }, + "graphiti": { + "endpoint": "http://localhost:8000/mcp", + "groupIdPrefix": "opencode", + "driftThreshold": 0.5 + } +} +``` + +## Key Files & Their Scope + +| File | Purpose | +| ------------------------------------ | ----------------------------------------------------------- | +| `src/index.ts` | Plugin factory; wires all services. | +| `src/session.ts` | Session root resolution, memory composition, XML rendering. | +| `src/handlers/chat.ts` | `chat.message` hook; prepares ``. | +| `src/handlers/messages.ts` | `messages.transform` hook; injects into LLM message. | +| `src/handlers/compacting.ts` | `session.compacting` hook; injects for summarization. | +| `src/handlers/event.ts` | Event capture from all message hooks. | +| `src/services/redis-cache.ts` | Graphiti cache, drift detection, TTL. | +| `src/services/redis-events.ts` | Event list storage, cleanup. | +| `src/services/graphiti-async.ts` | Async drain worker, Graphiti interaction. | +| `src/services/connection-manager.ts` | Graphiti MCP health checks. | +| `src/services/batch-drain.ts` | Event batching, retry logic. | +| `plans/ContextOverhaul.md` | Full design document. | +| `docs/ReviewProtocol.md` | PR review handling workflow. | + +--- + +**Last Updated:** 2026-03-19 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fe37785..4592ab3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,6 +19,13 @@ deno test -A deno task build ``` +## Benchmarking + +The Redis benchmark helper lives at `scripts/bench-falkordb.ts`. It targets a +Redis/FalkorDB endpoint, defaults to `redis://localhost:6379` when no argument +is provided, and is intended for ad hoc local measurement rather than routine +CI/development. + ## Releasing Releases are fully automated via CI. The version in `deno.json` stays at @@ -37,13 +44,17 @@ tag to determine the next semver version: | `BREAKING CHANGE` or `type!:` | minor | major | CI creates a git tag (`v*`), publishes to npm under the `latest` dist-tag, and -creates a GitHub Release — all automatically. npm Trusted Publishers (OIDC) is -used, so no `NPM_TOKEN` secret is needed. +creates a GitHub Release — all automatically. If a rerun finds that the npm +version already exists, it skips only the publish step and still backfills any +missing tag or GitHub Release metadata for that version. npm Trusted Publishers +(OIDC) is used, so no `NPM_TOKEN` secret is needed. ### Canary releases -Opening a PR against `main` publishes a canary version under the `canary` npm -dist-tag (e.g. `1.2.3-canary.abc1234.20260212091429`). +Opening a PR against `main` runs the same publish workflow but publishes only a +canary version under the `canary` npm dist-tag (e.g. +`1.2.3-canary.abc1234.20260212091429`). PR runs do not create git tags or GitHub +Releases. ### Force a specific version diff --git a/README.md b/README.md index c6a07bc..28a8c75 100644 --- a/README.md +++ b/README.md @@ -1,73 +1,87 @@ # opencode-graphiti -OpenCode plugin that provides persistent memory via -[FalkorDB](https://www.falkordb.com/)/Redis and asynchronous -[Graphiti](https://github.com/getzep/graphiti) knowledge-graph consolidation. +OpenCode plugin that gives your AI agent **short-term memory** and **long-term +memory**. + +**Short-term memory** continuously summarizes and compacts every meaningful +session event — decisions, active tasks, file edits, errors, and more — into a +priority-tiered snapshot that is re-injected before every LLM call and every +compaction. The result is a rolling window of session continuity that +effectively extends the usable context far beyond the model's native limit: the +agent always knows what it was doing, even after the conversation is compacted. + +**Long-term memory** persists knowledge across sessions via a +[Graphiti](https://github.com/getzep/graphiti) knowledge graph, so the agent can +recall project facts, past decisions, and learned preferences from earlier work +— not just the current session. ## Motivation Long-running AI coding sessions depend on persistent memory to stay on track. -Graphiti's MCP server is a powerful knowledge-graph backend, but synchronous -calls to it on every message add latency and introduce a single point of failure -— connections drop, queries time out, and ingestion silently fails. When the -context window fills up and OpenCode triggers compaction, the summarizer -discards details that were never persisted. The result is **context rot**: the -agent loses track of recent decisions, re-explores solved problems, and drifts -away from the original goal. - -This plugin exists to close that gap. It uses **FalkorDB/Redis as the hot-path -store** for structured session events, priority-tiered snapshots, and cached -memory — all readable in sub-millisecond time. Graphiti remains the long-term -knowledge graph but is accessed **only asynchronously**, off the critical path. -The plugin re-injects session context before every LLM call and before every -compaction so the agent is always reminded of recent project context — -regardless of what survived the summary and regardless of Graphiti availability. +When the context window fills up and OpenCode triggers compaction, the +summarizer discards details that were never captured outside the conversation. +The result is **context rot**: the agent loses track of recent decisions, +re-explores solved problems, and drifts away from the original goal. + +Graphiti's MCP server is a powerful knowledge-graph backend, but calling it on +every message adds latency and introduces a single point of failure — +connections drop, queries time out, and ingestion silently fails. + +This plugin exists to close both gaps. + +**Short-term memory** captures every meaningful event during the session — +decisions, task progress, file edits, errors, environment changes — and +continuously summarizes them into a compact, priority-tiered snapshot. That +snapshot is re-injected before every LLM call and before every compaction, so +the agent always retains a coherent picture of the active workstream. Because +the snapshot is continuously rebuilt from structured events rather than raw +conversation text, it survives compaction intact: the model picks up exactly +where it left off, no matter how many times the conversation has been +summarized. In practice, this creates a rolling session memory that extends the +effective context window well beyond the model's native limit. + +**Long-term memory** lives in Graphiti's knowledge graph, which is updated in +the background so it never slows down your conversation. It provides +cross-session recall — project facts, past decisions, and learned preferences +from earlier sessions — cached locally for instant injection alongside the +short-term snapshot. ## Overview -This plugin uses a two-tier architecture: +This plugin uses a two-layer memory architecture: -**Hot path (FalkorDB/Redis — synchronous, sub-ms):** +**Short-term memory — continuously summarized session continuity:** -- Stores structured session events, priority-tiered snapshots, and cached - Graphiti results in Redis -- Reads cached memory on each user message and injects it into the last user - message as a `` block via - `experimental.chat.messages.transform`, keeping the system prompt static for - prefix caching -- Composes the same `` envelope for compaction context via - `experimental.session.compacting` -- Detects context drift using Jaccard similarity on cached fact UUIDs and - schedules an async cache refresh when the topic shifts +- Captures every meaningful event (decisions, tasks, file edits, errors, + environment changes) as structured session events +- Continuously rebuilds a priority-tiered snapshot from those events, keeping + the most important context within a tight budget +- Re-injects the snapshot before every LLM call and every compaction as a + `` block, so the agent never loses track of the active + workstream — even after repeated compactions +- Detects topic drift and schedules a background refresh of cached long-term + facts when the conversation shifts -**Async tier (Graphiti MCP — fire-and-forget, non-blocking):** +**Long-term memory — persistent cross-session recall via Graphiti:** -- Drains buffered session events to Graphiti as episodes on idle or before +- Sends buffered session events to Graphiti as episodes on idle or before compaction -- Refreshes the Redis memory cache from Graphiti search results in the +- Refreshes the local memory cache from Graphiti search results in the background -- Provides cross-session recall via vector/graph search, cached in Redis for - chat-time injection -- Saves compaction summaries as episodes so knowledge survives across boundaries +- Provides cross-session recall via vector/graph search, cached locally for + instant injection alongside the short-term snapshot +- Saves compaction summaries as episodes so knowledge survives across session + boundaries -No Graphiti call ever blocks a hook return. +Graphiti stays off the steady-state hook path entirely: hook-time injection uses +only Redis/local cached recall, while fresh Graphiti data arrives through the +existing background refresh path on later turns. ## Prerequisites -### FalkorDB / Redis - -A running [FalkorDB](https://www.falkordb.com/) instance accessible via the -Redis protocol. The easiest way to start one: - -```bash -docker run -p 6379:6379 falkordb/falkordb:latest -``` - -### Graphiti MCP Server - -A running +Start the [Graphiti MCP server](https://github.com/getzep/graphiti/tree/main/mcp_server) -accessible over HTTP: +with its default [FalkorDB](https://www.falkordb.com/) backend: ```bash git clone https://github.com/getzep/graphiti.git @@ -75,12 +89,17 @@ cd graphiti/mcp_server docker compose up -d ``` -This starts the MCP server at `http://localhost:8000/mcp`. +This starts Graphiti at `http://localhost:8000/mcp` and FalkorDB/Redis on +`localhost:6379`. + +This plugin reuses that same FalkorDB/Redis storage layer alongside Graphiti: it +keeps short-term memory locally for every turn, while Graphiti builds the +long-term knowledge graph on top of the same backend. > **Note:** Graphiti is optional for basic operation. If Graphiti is > unavailable, the plugin continues to function with FalkorDB/Redis-sourced > session memory; only the `` section (long-term -> cross-session facts) will be empty until Graphiti comes online. +> cross-session facts) will be absent until Graphiti comes online. ## Installation @@ -96,19 +115,14 @@ Add the plugin to your `opencode.json` (or `opencode.jsonc`): ### Option B: Local build -Clone and build, then reference the built file: - -```bash -git clone https://github.com/vicary/opencode-graphiti.git -cd opencode-graphiti -deno task build -``` - -Then add to your `opencode.json`: +Local distributable builds are not a routine local setup step: `deno task +build` +requires an explicit `VERSION` via `dnt.ts`. If you already have a built +artifact, add it to your `opencode.json`: ```jsonc { - "plugin": ["file:///absolute/path/to/opencode-graphiti/dist/index.js"] + "plugin": ["file:///absolute/path/to/opencode-graphiti/dist/esm/mod.js"] } ``` @@ -118,11 +132,11 @@ Copy the built plugin into OpenCode's auto-loaded plugin directory: ```bash # Global (all projects) -cp dist/index.js ~/.config/opencode/plugins/opencode-graphiti.js +cp dist/esm/mod.js ~/.config/opencode/plugins/opencode-graphiti.js # Or project-level mkdir -p .opencode/plugins -cp dist/index.js .opencode/plugins/opencode-graphiti.js +cp dist/esm/mod.js .opencode/plugins/opencode-graphiti.js ``` No config entry needed — OpenCode loads plugins from these directories @@ -142,9 +156,9 @@ Supported config locations, in lookup order: ```jsonc { - "falkordb": { - // FalkorDB Redis URL - "redisEndpoint": "redis://localhost:6379", + "redis": { + // Redis endpoint used for the plugin hot tier + "endpoint": "redis://localhost:6379", // Max events per drain batch "batchSize": 20, // Max combined body bytes per drain batch @@ -162,94 +176,99 @@ Supported config locations, in lookup order: // Prefix for project group IDs (e.g. "opencode-my-project") "groupIdPrefix": "opencode", // Jaccard similarity threshold (0–1) below which cache is refreshed - "driftThreshold": 0.5, - // Number of days after which facts are annotated as stale - "factStaleDays": 30 + "driftThreshold": 0.5 } } ``` All fields are optional — defaults (shown above) are used for any missing -values. Nested values take precedence when both forms are supplied. +values. Canonical nested values take precedence when both forms are supplied. + +### Retained Compatibility + +The canonical hot-tier config shape is `redis.*`. Only the original Graphiti +top-level aliases remain supported for backward compatibility. Precedence is: + +1. `redis.*` (canonical) +2. top-level Graphiti aliases such as `endpoint` and `groupIdPrefix` ### Legacy Top-Level Keys -For backward compatibility, the following top-level keys are still accepted and -map to their nested equivalents: - -| Legacy key | Nested equivalent | -| ------------------- | ---------------------------- | -| `endpoint` | `graphiti.endpoint` | -| `groupIdPrefix` | `graphiti.groupIdPrefix` | -| `driftThreshold` | `graphiti.driftThreshold` | -| `factStaleDays` | `graphiti.factStaleDays` | -| `redisEndpoint` | `falkordb.redisEndpoint` | -| `batchSize` | `falkordb.batchSize` | -| `batchMaxBytes` | `falkordb.batchMaxBytes` | -| `sessionTtlSeconds` | `falkordb.sessionTtlSeconds` | -| `cacheTtlSeconds` | `falkordb.cacheTtlSeconds` | -| `drainRetryMax` | `falkordb.drainRetryMax` | +For backward compatibility, the following original Graphiti top-level keys are +still accepted and map to their nested equivalents: + +| Legacy key | Nested equivalent | +| ---------------- | ------------------------- | +| `endpoint` | `graphiti.endpoint` | +| `groupIdPrefix` | `graphiti.groupIdPrefix` | +| `driftThreshold` | `graphiti.driftThreshold` | + +Removed top-level Redis aliases are no longer supported. ## How It Works ### Injection Format -The plugin injects a single canonical `` XML envelope into the -last user message. This envelope is assembled from Redis hot-tier state and -contains structured sections such as ``, ``, +The plugin currently injects a `` XML envelope into the last +user message. This envelope is assembled from short-term memory in Redis and can +contain structured sections such as ``, ``, ``, ``, ``, and an optional ``. -When cached Graphiti results are available, a nested `` -section is included with `fact_uuids` and `node_refs` attributes. On a cold -first turn or when Graphiti is unreachable, `` is simply -absent — the rest of the session memory is always available from FalkorDB/Redis. +When long-term memory is available, a nested `` section is +included with a `node_refs` attribute naming the emitted cached entities. On a +cold first turn or when Graphiti is unreachable, `` is simply +absent — the rest of the session memory is always available from short-term +storage in FalkorDB/Redis. ```xml - + Continue the current task. Implement the new feature. - Use Redis for the hot path. + Use Redis for short-term memory. src/index.ts - No synchronous Graphiti calls. + Graphiti runs in the background only. - - + + ``` -### Hot-Path Memory Preparation (`chat.message`) +### Session Memory Preparation (`chat.message`) -On each user message the plugin reads session state from Redis: +On each user message the plugin assembles the current session memory from three +sources: -- Recent structured session events (`session:{id}:events`) -- The priority-tiered snapshot (`session:{id}:snapshot`) -- The cached Graphiti memory (`memory-cache:{groupId}`) +- Recent structured session events +- The continuously rebuilt priority-tiered snapshot +- Cached long-term facts from Graphiti These are composed into a `` envelope and staged for the -transform hook. All reads are from Redis (sub-ms); no Graphiti call is made on -this path. +transform hook. The hook-time reads are local/cache-backed only; any fresh +Graphiti lookup remains on the existing background refresh path and benefits the +next turn instead of blocking the current one. ### User Message Injection (`experimental.chat.messages.transform`) The transform hook reads the prepared `` envelope and prepends -it to the last user message. Fact UUIDs from the `` section -are tracked in `visibleFactUuids` so subsequent cache refreshes can filter out -already-visible facts. This approach keeps the system prompt static, enabling -provider-side prefix caching, and avoids influencing session titles. The -prepared injection is cleared after use so stale context is not re-injected on -subsequent LLM calls within the same turn. +it to the last user message. Legacy `` and older +`` blocks are still scrubbed and parsed for +compatibility, while current `` output uses `node_refs`. This +approach keeps the system prompt static, enabling provider-side prefix caching, +and avoids influencing session titles. The prepared injection is cleared after +use so stale context is not re-injected on subsequent LLM calls within the same +turn. -### Drift Detection and Async Cache Refresh +### Drift Detection and Background Cache Refresh On each user message, the plugin compares the current query against the query -that produced the cached memory. When Jaccard similarity on cached fact UUIDs -drops below `driftThreshold` (default 0.5), an **async** cache refresh is -scheduled via Graphiti MCP. The current cached context is still injected -immediately; the refreshed cache becomes available on the next message. This -trades one message of staleness for eliminating synchronous Graphiti latency -entirely. +that produced the cached memory. When Jaccard similarity between the current +query text and cached query text drops below `driftThreshold` (default 0.5), a +background cache refresh is scheduled via Graphiti. The current cached context +is still injected immediately; the refreshed cache becomes available on the next +message. This trades one message of staleness for keeping most long-term memory +refresh work off the response-time path. ### Event Extraction and Buffering (`event`) @@ -258,25 +277,54 @@ and stored in Redis (`session:{id}:events`). The plugin listens on `message.part.updated` to buffer assistant text as it streams, and on `message.updated` to finalize completed assistant replies. -Events are also enqueued for async drain to Graphiti: +Events are also queued for background ingestion into long-term memory: -- **On idle** (`session.idle`): buffered events are drained and the +- **On idle** (`session.idle`): buffered events are sent to Graphiti and the priority-tiered snapshot is rebuilt. -- **Before compaction** (`session.compacted`): all pending events are drained +- **Before compaction** (`session.compacted`): all pending events are sent immediately so nothing is lost. ### Compaction Preservation Compaction is handled entirely by OpenCode's native compaction mechanism. The -plugin participates in two ways: +plugin ensures session continuity survives each compaction cycle: -1. **Before compaction** (`experimental.session.compacting`): The plugin reads - the snapshot and cached memory from Redis and composes the same canonical - `` envelope used for chat injection, so the summarizer +1. **Before compaction** (`experimental.session.compacting`): The plugin injects + the same `` envelope used for chat — including the + priority-tiered snapshot and cached long-term facts — so the summarizer preserves important knowledge. No Graphiti call is made. 2. **After compaction** (`session.compacted`): The snapshot is rebuilt from - Redis events and the compaction summary is enqueued for async drain to - Graphiti, ensuring knowledge survives across compaction boundaries. + structured events and the compaction summary is sent to Graphiti in the + background, ensuring knowledge survives across compaction boundaries. + +Because the snapshot is rebuilt from structured events rather than raw +conversation text, the agent retains a coherent picture of the workstream +regardless of how aggressively the conversation was summarized. + +### Child / Subagent Session Handling + +> **Note:** This behavior intentionally diverges from +> [context-mode](https://github.com/mksglu/context-mode), which records subagent +> work as summarized tool events. This plugin promotes child sessions to +> first-class participants in the root session's state so that decisions, file +> edits, and errors from delegated work are fully visible to the parent session. +> See `plans/ContextOverhaul.md` §10.1 for the design rationale. + +When OpenCode spawns a child session (e.g. a subagent or delegated task), the +plugin resolves the child's `sessionID` to the root/parent session by walking +the `parentID` chain. All event storage, snapshot builds, and `` +injection then operate on the canonical root session, so child activity is +treated identically to parent activity: + +- Child prompts and responses are recorded in the same event log as the parent. +- The priority-tiered snapshot includes child-derived events when it is rebuilt. +- Future `` injections — for both parent and child turns — + reflect the combined activity of the entire session lineage. +- Deleting a child session removes only that child's local bookkeeping; the root + session's state, events, and snapshot are preserved. + +This means the agent retains full continuity across delegation boundaries +without any special configuration. ### Project Scoping @@ -288,7 +336,8 @@ projects stay isolated. ## Contributing See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and release -process. +process. In CI, pushes to `main` publish `latest` releases, while pull requests +targeting `main` publish canary builds under the `canary` dist-tag. ## License @@ -298,8 +347,7 @@ MIT The structured event extraction, priority-tiered snapshots, and session continuity design in this plugin are inspired by -[context-mode](https://github.com/mksglu/context-mode) by -[Mert Köseoğlu](https://github.com/mksglu). +[context-mode](https://github.com/mksglu/context-mode). The original plugin concept is inspired by [opencode-openmemory](https://github.com/happycastle114/opencode-openmemory). diff --git a/deno.json b/deno.json index 7cbc3ea..4142565 100644 --- a/deno.json +++ b/deno.json @@ -5,12 +5,9 @@ "license": "MIT", "tasks": { "build": "deno run -A dnt.ts", - "deploy": { - "command": "cd dist/ && npm publish", - "dependencies": ["build"] - }, - "dev": "deno run --allow-all src/index.ts", "check": "deno check src/index.ts", + "deploy": "deno eval \"throw new Error('Manual publish is disabled. Use the GitHub Actions publish workflow for releases.')\"", + "dev": "deno run --allow-all src/index.ts", "lint": "deno lint", "fmt": "deno fmt" }, @@ -30,8 +27,8 @@ "nodeModulesDir": "auto", "imports": { "@modelcontextprotocol/sdk": "npm:@modelcontextprotocol/sdk@^1.25.2", - "@opencode-ai/plugin": "npm:@opencode-ai/plugin@^1.1.53", - "@opencode-ai/sdk": "npm:@opencode-ai/sdk@^1.1.53", + "@opencode-ai/plugin": "npm:@opencode-ai/plugin@1.2.26", + "@opencode-ai/sdk": "npm:@opencode-ai/sdk@1.2.26", "cosmiconfig": "npm:cosmiconfig@9.0.0", "ioredis": "npm:ioredis@^5.7.0", "zod": "npm:zod@4.3.6" diff --git a/deno.lock b/deno.lock index 4bab00e..0ede3b8 100644 --- a/deno.lock +++ b/deno.lock @@ -14,8 +14,8 @@ "jsr:@ts-morph/bootstrap@0.27": "0.27.0", "jsr:@ts-morph/common@0.27": "0.27.0", "npm:@modelcontextprotocol/sdk@^1.25.2": "1.25.2_zod@4.3.6_ajv@8.17.1_express@5.2.1", - "npm:@opencode-ai/plugin@^1.1.53": "1.1.53", - "npm:@opencode-ai/sdk@^1.1.53": "1.1.53", + "npm:@opencode-ai/plugin@1.2.26": "1.2.26", + "npm:@opencode-ai/sdk@1.2.26": "1.2.26", "npm:cosmiconfig@9.0.0": "9.0.0", "npm:ioredis@^5.7.0": "5.10.0", "npm:zod@4.3.6": "4.3.6" @@ -122,15 +122,15 @@ "zod-to-json-schema" ] }, - "@opencode-ai/plugin@1.1.53": { - "integrity": "sha512-9ye7Wz2kESgt02AUDaMea4hXxj6XhWwKAG8NwFhrw09Ux54bGaMJFt1eIS8QQGIMaD+Lp11X4QdyEg96etEBJw==", + "@opencode-ai/plugin@1.2.26": { + "integrity": "sha512-pC71KGAI9T0+S84KpbEq9THp5pT7KOq+GmfdXkvQ7KSH5zi+iASWRhqorir73sKmEj2MQfpbe1BxdcU5qbeOwA==", "dependencies": [ "@opencode-ai/sdk", "zod@4.1.8" ] }, - "@opencode-ai/sdk@1.1.53": { - "integrity": "sha512-RUIVnPOP1CyyU32FrOOYuE7Ge51lOBuhaFp2NSX98ncApT7ffoNetmwzqrhOiJQgZB1KrbCHLYOCK6AZfacxag==" + "@opencode-ai/sdk@1.2.26": { + "integrity": "sha512-HPB+0pfvTMPj2KEjNLF3oqgldKW8koTJ7ssqXwzndazqxS+gUynzvdIKIQP4+QIInNcc5nJMG9JtfLcePGgTLQ==" }, "accepts@2.0.0": { "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", @@ -727,8 +727,8 @@ "workspace": { "dependencies": [ "npm:@modelcontextprotocol/sdk@^1.25.2", - "npm:@opencode-ai/plugin@^1.1.53", - "npm:@opencode-ai/sdk@^1.1.53", + "npm:@opencode-ai/plugin@1.2.26", + "npm:@opencode-ai/sdk@1.2.26", "npm:cosmiconfig@9.0.0", "npm:ioredis@^5.7.0", "npm:zod@4.3.6" diff --git a/dnt.ts b/dnt.ts index 2f15c0f..502a1f9 100644 --- a/dnt.ts +++ b/dnt.ts @@ -1,7 +1,11 @@ import { build } from "jsr:@deno/dnt@^0.42.3"; import manifest from "./deno.json" with { type: "json" }; -const version = Deno.env.get("VERSION") || manifest.version; +const version = Deno.env.get("VERSION")?.trim() || manifest.version?.trim(); +if (!version) { + throw new Error('Specify $VERSION or set "version" in deno.json.'); +} + const outDir = "dist/"; await Deno.remove(outDir, { recursive: true }).catch(() => undefined); diff --git a/docs/ReviewProtocol.md b/docs/ReviewProtocol.md new file mode 100644 index 0000000..366ccf2 --- /dev/null +++ b/docs/ReviewProtocol.md @@ -0,0 +1,127 @@ +# GitHub PR Review Protocol + +Use this protocol when the current branch is linked to an open GitHub pull +request and review feedback needs to be handled systematically. + +## Purpose + +- use live GitHub review state as the source of truth +- verify each review claim before changing code +- keep fixes narrow and scoped to the verified issue +- resolve handled review threads and request a fresh review cycle + +## Required Unresolved-Batch Query + +Use this command exactly as written for metadata-first traversal across +review-thread pages until it collects the first 10 unresolved threads. The 10 +unresolved items may be sparse, non-contiguous, and spread across multiple +pages. After that metadata pass, fetch narrow details only for that unresolved +batch. Do not rewrite, broaden, or replace it with an equivalent query. + +If this command fails for any reason, stop and report the failure explicitly +before taking any further review-handling action. + +```bash +deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o:String!,$r:String!,$n:Int!,$a:String){repository(owner:$o,name:$r){pullRequest(number:$n){reviewThreads(first:20,after:$a){pageInfo{hasNextPage endCursor}nodes{id isResolved isOutdated path}}}}}",dq="query($ids:[ID!]!){nodes(ids:$ids){... on PullRequestReviewThread{id path isResolved isOutdated comments(first:10){nodes{author{login}body url createdAt}}}}}";let a=null,t={pageInfo:{hasNextPage:false,endCursor:null},nodes:[]},u=[];for(;;){const c=new Deno.Command("gh",{args:["api","graphql","-f",`query=${mq}`,"-F",`o=${o}`,"-F",`r=${r}`,"-F",`n=${n}`,...(a?["-F",`a=${a}`]:[])]});const x=await c.output();if(!x.success){console.error(new TextDecoder().decode(x.stderr));Deno.exit(x.code)}t=JSON.parse(new TextDecoder().decode(x.stdout)).data.repository.pullRequest.reviewThreads;for(const node of t.nodes){if(!node.isResolved)u.push(node);if(u.length===maxUnresolved)break}if(u.length===maxUnresolved||!t.pageInfo.hasNextPage)break;a=t.pageInfo.endCursor}const ids=u.slice(0,maxUnresolved).map(x=>x.id);let d=[];if(ids.length){const c=new Deno.Command("gh",{args:["api","graphql","-f",`query=${dq}`,...ids.flatMap(id=>["-F",`ids[]=${id}`])]});const x=await c.output();if(!x.success){console.error(new TextDecoder().decode(x.stderr));Deno.exit(x.code)}d=JSON.parse(new TextDecoder().decode(x.stdout)).data.nodes.filter(Boolean)}console.log(JSON.stringify({pageInfo:t.pageInfo,batchSize:ids.length,exhausted:!t.pageInfo.hasNextPage&&ids.length { + assertEquals(Object.keys(pluginModule).sort(), ["graphiti"]); +}); diff --git a/plans/ConnectionManager.md b/plans/ConnectionManager.md index 74b8107..d8a3ff7 100644 --- a/plans/ConnectionManager.md +++ b/plans/ConnectionManager.md @@ -38,8 +38,9 @@ Responsibilities: - Reject requests that arrive while state is `offline` with a typed error, allowing higher-level APIs to degrade gracefully instead of stalling. - Expose a readiness signal (`ready(): Promise`) that resolves when the - first connection succeeds or a caller-supplied timeout elapses, so - first-message hooks can bound their wait. + first connection succeeds or a caller-supplied timeout elapses, for + diagnostics and background coordination only — never to gate hot-path memory + injection. - Expose a single request API for tool execution so `GraphitiClient` becomes a thin domain adapter. @@ -76,8 +77,9 @@ Create `src/services/connection-manager.ts` with: the MCP client, cancel any pending reconnect timer, then become inert. After `stop()` all subsequent `callTool` calls reject immediately. - `ready(timeoutMs?)` — returns a promise that resolves `true` when the - manager reaches `connected`, or `false` if the timeout elapses first. - Callers such as first-message hooks can use this to bound their wait. + manager reaches `connected`, or `false` if the timeout elapses first. This + is an observability/background coordination helper, not a hot-path gating + primitive. - `callTool(name, args, deadlineMs?)` — route requests according to current state; accepts an optional per-request deadline. - `reconnect()` — rebuild client and transport after disconnect/session loss. @@ -179,14 +181,14 @@ re-queue path still triggers. should continue to catch and log failures; no behavioral change beyond receiving typed errors instead of raw transport errors. -**`src/handlers/chat.ts`** — calls `searchFacts`, `searchNodes` during memory -injection. These are read operations that already return empty on failure. -Optionally, the chat handler can call `connectionManager.ready(timeoutMs)` -before the first memory injection to avoid injecting empty context when the -connection is still warming up. +**`src/handlers/chat.ts`** — hot-path memory injection must remain Redis/cache +only. The chat handler should not call `searchFacts`, `searchNodes`, or +`connectionManager.ready(timeoutMs)` before injection; Graphiti warmup and +refresh remain background-only. -**`src/handlers/compacting.ts`** — calls `searchFacts` and `getEpisodes` via -`getCompactionContext`. Read-path only; same fail-open behavior as today. +**`src/handlers/compacting.ts`** — compaction injection should use the same +Redis snapshot + cached-memory inputs as chat-time injection. It must not make +synchronous Graphiti reads on the hot path. **`src/services/client.ts`** — refactored as described in section 2. @@ -240,8 +242,7 @@ rejecting requests when the manager is offline. the new typed offline error correctly (re-queue path). 5. Verify `src/handlers/event.ts`, `src/handlers/chat.ts`, and `src/handlers/compacting.ts` — confirm read-path fail-open behavior is - unchanged. Optionally add `ready()` call in `chat.ts` before first memory - injection. + unchanged, and do not add a pre-injection `ready()` call in `chat.ts`. 6. Update tests in `src/services/client.test.ts` and add focused tests for the connection manager (see [Testing Plan](#testing-plan)). 7. Run `deno test`, `deno check src/index.ts`, and any relevant linting. diff --git a/plans/ContextOverhaul.md b/plans/ContextOverhaul.md index 50aeafb..16f9ddb 100644 --- a/plans/ContextOverhaul.md +++ b/plans/ContextOverhaul.md @@ -1,6 +1,7 @@ # Context Overhaul — FalkorDB Hot Path + Async Graphiti Consolidation -**Status:** Planning **Date:** 2026-03-13 (revised) +**Status:** In Implementation **Date:** 2026-03-13 (revised) | README overhaul +completed 2026-03-15 | Child-session routing documented 2026-03-15 --- @@ -67,7 +68,7 @@ opencode-graphiti plugin (TypeScript / Deno) | Target | Protocol | Default Port | Connection | | -------- | --------------- | ------------ | ----------------------------------------------------- | -| FalkorDB | Redis (ioredis) | 6379 | Direct TCP; configured via `falkordb.redisEndpoint` | +| Redis | Redis (ioredis) | 6379 | Direct TCP; configured via `redis.endpoint` | | Graphiti | MCP over HTTP | 8000 | Direct MCP client; configured via `graphiti.endpoint` | **Integration decision (final):** Graphiti MCP is the async consolidation @@ -151,14 +152,14 @@ type EventCategory = ### 4.2 Redis Key Layout -| Key | Type | Content | TTL | -| ----------------------------- | ------ | ------------------------------------------------ | ------ | -| `session:{id}:events` | List | JSON `SessionEvent` objects | 24 h | -| `session:{id}:snapshot` | String | Priority-tiered XML snapshot (≤ 3 KB) | 48 h | -| `memory-cache:{groupId}` | String | Serialized Graphiti search results | 10 min | -| `memory-cache:{groupId}:meta` | Hash | `lastQuery`, `lastRefresh`, `factUuids` | 10 min | -| `drain:pending:{groupId}` | List | Serialized drain-batch entries awaiting Graphiti | 7 d | -| `drain:cursor:{groupId}` | String | Last successfully drained event ID | 7 d | +| Key | Type | Content | TTL | +| ----------------------------- | ------ | ------------------------------------------------------ | ------ | +| `session:{id}:events` | List | JSON `SessionEvent` objects | 24 h | +| `session:{id}:snapshot` | String | Priority-tiered XML snapshot (≤ 3 KB) | 48 h | +| `memory-cache:{groupId}` | String | Serialized Graphiti search results | 10 min | +| `memory-cache:{groupId}:meta` | Hash | `lastQuery`, `lastRefresh` (+ optional extra metadata) | 10 min | +| `drain:pending:{groupId}` | List | Serialized drain-batch entries awaiting Graphiti | 7 d | +| `drain:cursor:{groupId}` | String | Last successfully drained event ID | 7 d | ### 4.3 Priority-Tiered Snapshot Format @@ -219,16 +220,21 @@ same semantic payloads through MCP tool calls (`add_memory`, ### 5.1 Hot Path (synchronous, sub-ms) +All hooks resolve the incoming `sessionID` to the canonical (root) session ID +before accessing state, events, or snapshots. Child/subagent sessions are routed +to the parent session's state transparently (see §10.1). + | Hook | Action | | -------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `event: message.part.updated` | Buffer assistant part in memory (unchanged) | -| `event: message.updated` (completed) | Extract `SessionEvent` → `LPUSH session:{id}:events` | +| `event: session.created` | Cache parent/child linkage; resolve canonical ID; `EXPIRE` reset; bootstrap best-effort async warmup / cross-session primer | +| `event: message.part.updated` | Buffer assistant part under canonical session ID | +| `event: message.updated` (completed) | Extract `SessionEvent` → `LPUSH session:{canonicalId}:events` | | `chat.message` | Extract user `SessionEvent` → `LPUSH`; read `memory-cache:{groupId}` + recent session state from Redis; prepare transform input | -| `event: session.idle` | Build priority-tiered snapshot → `SET session:{id}:snapshot`; trigger async cache refresh + drain | -| `event: session.compacted` | Build snapshot from events → `SET session:{id}:snapshot`; enqueue drain batch | +| `event: session.idle` | Build priority-tiered snapshot → `SET session:{canonicalId}:snapshot`; trigger async cache refresh + drain | +| `event: session.compacted` | Build snapshot from events → `SET session:{canonicalId}:snapshot`; enqueue drain batch | +| `event: session.deleted` | Delete only the reported session's local bookkeeping; canonical/root session state is preserved (see §10.1) | | `experimental.session.compacting` | Compose the same canonical `` envelope for compaction from Redis snapshot + cached memory | | `experimental.chat.messages.transform` | Actual chat-time injection point: compose canonical `` with optional `` from Redis-backed state | -| `event: session.created` | `EXPIRE` reset; bootstrap best-effort async warmup / cross-session primer only; cannot inject directly | ### 5.2 Async Tier (fire-and-forget, non-blocking) @@ -276,15 +282,15 @@ First user message arrives (`chat.message`) ### 6.3 Cache Lifecycle -| Event | Cache Action | -| --------------------- | ----------------------------------------------------------------------------------------------- | -| Plugin startup | Restore Redis clients only; no synchronous Graphiti warmup | -| `session.created` | Best-effort async prewarm of reusable cache and cross-session primer | -| first `chat.message` | Read cache (sync); inject if available via transform; schedule prompt-specific refresh | -| later `chat.message` | Read cache (sync); schedule refresh if stale or drifted (async) | -| `session.idle` | Refresh cache (async) — incorporates recently drained facts | -| Drain completes | Refresh cache (async) — new facts now searchable | -| Cache miss / cold run | Return empty `persistent_memory`; first injection still includes Redis-sourced `session_memory` | +| Event | Cache Action | +| --------------------- | --------------------------------------------------------------------------------------- | +| Plugin startup | Restore Redis clients only; no synchronous Graphiti warmup | +| `session.created` | Best-effort async prewarm of reusable cache and cross-session primer | +| first `chat.message` | Read cache (sync); inject if available via transform; schedule prompt-specific refresh | +| later `chat.message` | Read cache (sync); schedule refresh if stale or drifted (async) | +| `session.idle` | Refresh cache (async) — incorporates recently drained facts | +| Drain completes | Refresh cache (async) — new facts now searchable | +| Cache miss / cold run | Omit `persistent_memory`; first injection still includes Redis-sourced `session_memory` | ### 6.4 New-Session First-Turn Behavior @@ -301,7 +307,7 @@ combination of `event: session.created`, `chat.message`, and `session.created` bootstrap finishes before the first transform runs, relevant `persistent_memory` may appear on the first reply. - If the cache is cold, the first reply still receives `session_memory` from - FalkorDB, while `persistent_memory` may be empty until the async MCP refresh + FalkorDB, while `persistent_memory` may be absent until the async MCP refresh completes. - In practice this means long-term memory is often cold-first-turn / warmer on a later turn, while session continuity remains available immediately. @@ -313,10 +319,10 @@ design: - On each `chat.message`, compare the user's message against the query that produced the current cache. -- If the topic has drifted (Jaccard on cached fact UUIDs < threshold), schedule - an async cache refresh with the new query. The _current_ cached context is - still injected immediately; the refreshed cache is available for the next - message. +- If the topic has drifted (Jaccard on current query text vs cached query text < + threshold), schedule an async cache refresh with the new query. The _current_ + cached context is still injected immediately; the refreshed cache is available + for the next message. - This trades one message of staleness for eliminating synchronous Graphiti latency entirely. @@ -331,9 +337,8 @@ from Redis hot-tier state and optional Graphiti cache data. Historically, the plugin's Graphiti-derived memory was injected as a standalone `...` block. This plan keeps the caller's current naming (`session_memory` + `persistent_memory`) and treats the older -`` shape as a legacy Graphiti-only serialization detail, not -as a separate top-level layer. Its UUID metadata maps cleanly to -`` in the canonical format below. +UUID-bearing shapes as legacy compatibility details, not as a separate top-level +layer. ```xml @@ -359,8 +364,8 @@ as a separate top-level layer. Its UUID metadata maps cleanly to - - + + ``` @@ -370,18 +375,18 @@ as a separate top-level layer. Its UUID metadata maps cleanly to The injected sections intentionally mirror context-mode's continuity model and should be rendered in this order: -| Section | Source | Required | Notes | -| ------------------- | -------------------------------- | ---------- | -------------------------------------------------------------- | -| `last_request` | latest user prompt / task intent | Yes | Primary resume anchor. | -| `active_tasks` | structured task events | Yes | Checkbox/task-state style when rendered. | -| `key_decisions` | decision + preference events | Yes | Preserve user corrections and constraints. | -| `files_in_play` | recent file events | Yes | Mirrors context-mode active-files continuity. | -| `project_rules` | loaded AGENTS/rules | Yes | Must survive compaction. | -| `unresolved_errors` | open error events | If present | Show only unresolved blockers. | -| `git_state` | git activity events | If present | Include only meaningful milestones. | -| `subagent_work` | subagent events | If present | Summaries only, not raw logs. | -| `session_snapshot` | priority-tiered snapshot | If present | Compact state restore layer. | -| `persistent_memory` | Graphiti cache | Optional | Canonical successor to the legacy `` block. | +| Section | Source | Required | Notes | +| ------------------- | -------------------------------- | ---------- | ------------------------------------------------------------------------------------------------------ | +| `last_request` | latest user prompt / task intent | Yes | Primary resume anchor. | +| `active_tasks` | structured task events | If present | Omitted when empty. Checkbox/task-state style when rendered. | +| `key_decisions` | decision + preference events | If present | Omitted when empty. Preserve user corrections and constraints. | +| `files_in_play` | recent file events | If present | Omitted when empty. Mirrors context-mode active-files continuity. | +| `project_rules` | loaded AGENTS/rules | If present | Omitted when empty. Must survive compaction. | +| `unresolved_errors` | open error events | If present | Show only unresolved blockers. | +| `git_state` | git activity events | If present | Include only meaningful milestones. | +| `subagent_work` | subagent events | If present | Summaries only, not raw logs. | +| `session_snapshot` | priority-tiered snapshot | If present | Compact state restore layer. | +| `persistent_memory` | Graphiti cache | Optional | Current emitted shape carries `node_refs`; legacy UUID-bearing blocks remain parse-only compatibility. | ### 7.2 Budget Allocation @@ -391,7 +396,7 @@ should be rendered in this order: | Session snapshot detail | up to 800 chars | Redis `GET` | < 1 ms | | Persistent memory | remainder of 5% context budget | Redis `GET memory-cache:*` | < 1 ms | -`persistent_memory` is empty (not an error) if cache has not been warmed yet, +`persistent_memory` is omitted (not an error) if cache has not been warmed yet, the session is on its first cold turn, or Graphiti is unreachable. The rest of the Session Guide is always available because it is sourced from FalkorDB/Redis. @@ -481,11 +486,62 @@ Redis cache, not a live query. ## 10 Session Lifecycle -### 10.1 Session ID Semantics +### 10.1 Session ID Semantics and Child-Session Routing + +> **Intentional divergence from `mksglu/context-mode`.** Official context-mode +> treats subagent activity as summarized `Agent` tool events within the parent +> session's continuity log — the child session is not a first-class participant +> in session state; only its summary is recorded. +> +> This plugin intentionally promotes child sessions to first-class participants +> in the root session's state. The rationale is that OpenCode's multi-agent +> orchestration (swarm, delegated tasks) produces work that is semantically part +> of the parent session — decisions made, files edited, and errors encountered +> by a child agent are as important to session continuity as parent-originated +> events. Treating them as opaque summaries would lose actionable detail that +> the parent (or a sibling child) needs on the next turn. +> +> **Do not "fix" this by reverting to summarized-only subagent tracking unless +> the design decision is explicitly revisited.** Future alignment sessions with +> upstream context-mode should treat this section as the canonical record of the +> divergence. + +Every hook-provided `sessionID` is resolved to a **canonical (root) session ID** +before any state access. When a session has a `parentID`, the plugin walks the +parent chain to find the root session and uses that root ID for all Redis keys, +event storage, snapshot builds, `` injection, and compaction +context. This means child/subagent sessions are first-class participants in the +parent session's memory: + +- **Event log**: child prompts, responses, tool calls, and structured events are + recorded under the root session's `session:{canonicalId}:events` key. +- **Snapshot**: child activity is included when the priority-tiered snapshot is + rebuilt at `session.idle` or `session.compacted`. +- **`` injection**: the same prepared envelope is used + regardless of whether the triggering hook fires from a parent or child + session. `chat.message`, `experimental.chat.messages.transform`, and + `experimental.session.compacting` all resolve to the canonical session before + reading or writing state. +- **Compaction**: child-derived events survive compaction because they live in + the same event list and snapshot as the parent. +- **Future `` injections**: because child events are stored + alongside parent events, they are included in later snapshot rebuilds and + appear in subsequent `` injections for any session in the same + lineage. + +Parent/child linkage is established at `session.created` time via +`setParentId()` and cached for the process lifetime. The canonical ID is +resolved lazily (with an SDK lookup fallback) and cached once resolved. Cycle +detection prevents infinite loops in malformed parent chains. + +#### Child-Session Deletion Semantics + +When a `session.deleted` event fires for a child session, **only that child's +local bookkeeping is removed** (parent-ID cache entry, canonical-ID cache entry, +buffered assistant messages scoped to the child). The canonical/root session's +state, event log, snapshot, and lifecycle are **not** deleted. This prevents a +child session teardown from accidentally wiping the parent's accumulated memory. -- `sessionID` from OpenCode hooks is the canonical key for all Redis state. -- Subagent sessions (with `parentID`) are ignored for memory purposes - (unchanged). - Session state is local to the plugin process; Redis keys provide persistence across plugin restarts within TTL windows. @@ -550,15 +606,15 @@ the async cache layer — never as a synchronous hot-path call. ## 13 Config Changes -`GraphitiConfig` keeps legacy top-level keys for backward compatibility, but -adds explicit nested sections for FalkorDB and Graphiti. Nested values take -precedence whenever both forms are supplied. +`GraphitiConfig` keeps only the original top-level Graphiti keys for backward +compatibility, while using explicit nested sections for Redis and Graphiti. +Canonical nested values take precedence whenever both forms are supplied. ```typescript interface GraphitiConfig { // Preferred nested config - falkordb?: { - redisEndpoint?: string; // FalkorDB Redis URL (default: "redis://localhost:6379") + redis?: { + endpoint?: string; // Redis URL for the plugin hot tier (default: "redis://localhost:6379") batchSize?: number; // max events per drain batch (default: 20) batchMaxBytes?: number; // max combined body bytes per batch (default: 51200) sessionTtlSeconds?: number; // session:{id}:events TTL (default: 86400) @@ -570,25 +626,33 @@ interface GraphitiConfig { endpoint?: string; // Graphiti MCP URL (e.g. "http://localhost:8000/mcp") groupIdPrefix?: string; driftThreshold?: number; - factStaleDays?: number; }; - // Legacy top-level keys still accepted during migration + // Legacy top-level keys still accepted during migration (Graphiti settings) endpoint?: string; groupIdPrefix?: string; driftThreshold?: number; - factStaleDays?: number; + + // Legacy nested compatibility during migration + falkordb?: { + redisEndpoint?: string; + batchSize?: number; + batchMaxBytes?: number; + sessionTtlSeconds?: number; + cacheTtlSeconds?: number; + drainRetryMax?: number; + }; } ``` Resolution rules for the implementation: -1. Read FalkorDB/Redis settings from `falkordb.*` first; fall back to legacy - top-level Redis keys only when the nested value is absent. +1. Read Redis settings from `redis.*` first; fall back to legacy nested + `falkordb.*` only when the higher-precedence value is absent. 2. Read Graphiti settings from `graphiti.*` first; fall back to legacy top-level Graphiti keys only when the nested value is absent. 3. New docs, examples, validation, and runtime lookups should use the nested - shape as canonical; legacy top-level keys exist only for compatibility. + shape as canonical; only Graphiti top-level keys remain for compatibility. --- @@ -610,14 +674,14 @@ src/services/event-extractor.ts — structured event extraction from hook paylo ### Modified Files ``` -src/config.ts — add canonical `falkordb`/`graphiti` sections, legacy top-level fallback, and precedence resolution +src/config.ts — add canonical `redis`/`graphiti` sections, retain nested `falkordb` compatibility and top-level Graphiti compatibility, and resolve precedence src/types/index.ts — add SessionEvent, EventCategory types -src/session.ts — SessionState gains hotTierReady; wire Redis client and async Graphiti consolidation worker; remove direct GraphitiClient dependency +src/session.ts — SessionState gains hotTierReady; wire Redis client and async Graphiti consolidation worker; remove direct GraphitiClient dependency; add canonical session ID resolution, parent/child linkage cache, and child-safe deletion src/services/connection-manager.ts — adapt existing MCP transport lifecycle for the new graphiti-mcp.ts wrapper (reconnect backoff, request queuing already implemented) -src/handlers/event.ts — hot tier writes on all event types, async drain triggers -src/handlers/chat.ts — read from Redis cache instead of sync Graphiti calls -src/handlers/compacting.ts — read snapshot + cache from Redis, no Graphiti calls -src/handlers/messages.ts — compose canonical `session_memory` envelope from Redis-sourced data +src/handlers/event.ts — hot tier writes on all event types, async drain triggers; all hooks resolve to canonical session ID; child deletion preserves parent state +src/handlers/chat.ts — read from Redis cache instead of sync Graphiti calls; resolves to canonical session ID for child sessions +src/handlers/compacting.ts — read snapshot + cache from Redis, no Graphiti calls; resolves to canonical session ID for child sessions +src/handlers/messages.ts — compose canonical `session_memory` envelope from Redis-sourced data; resolves to canonical session ID for child sessions src/index.ts — wire Redis client + async Graphiti MCP worker ``` @@ -631,20 +695,20 @@ src/services/client.ts — replaced by graphiti-mcp.ts ## 15 Implementation Order -| Phase | Files | Depends On | Acceptance Criteria | -| ------------------------------------- | ----------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------- | -| 0. Normalize MCP contract | — | — | Confirm tool payload/response handling against a reachable Graphiti MCP endpoint. | -| 1. Consolidation backend | `graphiti-mcp.ts`, `graphiti-async.ts` | Phase 0 | Async worker can drain, refresh cache, and load primers through Graphiti MCP with no hot-path blocking. | -| 2. Redis primitives | `redis-client.ts`, `redis-events.ts` | — | LPUSH/LRANGE/GET/SET work against FalkorDB. Connection retry works. | -| 3. Event extractor | `event-extractor.ts`, `types/index.ts` | — | Hook payloads produce context-mode-equivalent `SessionEvent` categories. Unit tests. | -| 4. Snapshot builder | `redis-snapshot.ts` | Phase 3 | Priority-tiered XML snapshot generated from event list. Budget enforcement. Unit tests. | -| 5. Local search strategy | — | Phases 2, 4 | Redis/FalkorDB-only session recall path works; optional RediSearch path documented if available. | -| 6. Memory cache | `redis-cache.ts` | Phases 1, 2 | Async Graphiti search results written to and read from Redis. TTL expiry. Stale-read behavior. | -| 7. Batch drain | `batch-drain.ts` | Phases 1, 2, 3 | Events drain to Graphiti async with sequential ingest semantics by `groupId`. Cursor tracking. Crash recovery. | -| 8. Wire handlers | `event.ts`, `chat.ts`, `compacting.ts`, `messages.ts` | Phases 2–7 | All hooks use Redis hot path. No synchronous Graphiti calls remain. Existing test assertions hold. | -| 9. Config & bootstrap | `config.ts`, `index.ts`, `session.ts` | Phase 8 | Nested `falkordb`/`graphiti` config is validated, legacy top-level fallback works, and nested values take precedence. | -| 10. Docs alignment (future follow-up) | `README.md` | Phase 9 | README incorporates all adopted context-mode feature descriptions and credits the original author/project by name. | -| 11. Integration tests | — | All | End-to-end: message -> Redis event -> snapshot -> async drain -> Graphiti -> cache refresh -> injection. | +| Phase | Files | Depends On | Acceptance Criteria | +| -------------------------------- | ----------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| 0. Normalize MCP contract | — | — | Confirm tool payload/response handling against a reachable Graphiti MCP endpoint. | +| 1. Consolidation backend | `graphiti-mcp.ts`, `graphiti-async.ts` | Phase 0 | Async worker can drain, refresh cache, and load primers through Graphiti MCP with no hot-path blocking. | +| 2. Redis primitives | `redis-client.ts`, `redis-events.ts` | — | LPUSH/LRANGE/GET/SET work against FalkorDB. Connection retry works. | +| 3. Event extractor | `event-extractor.ts`, `types/index.ts` | — | Hook payloads produce context-mode-equivalent `SessionEvent` categories. Unit tests. | +| 4. Snapshot builder | `redis-snapshot.ts` | Phase 3 | Priority-tiered XML snapshot generated from event list. Budget enforcement. Unit tests. | +| 5. Local search strategy | — | Phases 2, 4 | Redis/FalkorDB-only session recall path works; optional RediSearch path documented if available. | +| 6. Memory cache | `redis-cache.ts` | Phases 1, 2 | Async Graphiti search results written to and read from Redis. TTL expiry. Stale-read behavior. | +| 7. Batch drain | `batch-drain.ts` | Phases 1, 2, 3 | Events drain to Graphiti async with sequential ingest semantics by `groupId`. Cursor tracking. Crash recovery. | +| 8. Wire handlers | `event.ts`, `chat.ts`, `compacting.ts`, `messages.ts` | Phases 2–7 | All hooks use Redis hot path. No synchronous Graphiti calls remain. Existing test assertions hold. | +| 9. Config & bootstrap | `config.ts`, `index.ts`, `session.ts` | Phase 8 | Nested `redis`/`graphiti` config is validated, legacy nested `falkordb` compatibility remains, top-level Graphiti fallback works, and canonical nested values take precedence. | +| 10. Docs alignment ✓ (completed) | `README.md` | Phase 9 | ✓ README incorporates all adopted context-mode feature descriptions and credits the original author/project by name. | +| 11. Integration tests | — | All | End-to-end: message -> Redis event -> snapshot -> async drain -> Graphiti -> cache refresh -> injection. | --- @@ -652,9 +716,9 @@ src/services/client.ts — replaced by graphiti-mcp.ts ### 16.1 Confirmed decisions for this plan -- **Hot path:** FalkorDB/Redis (configured via `falkordb.redisEndpoint`, with - legacy fallback to `redisEndpoint`) is the hot path for writes, snapshots, and - cached reads. +- **Hot path:** FalkorDB/Redis (configured canonically via `redis.endpoint`, + with legacy fallback to nested `falkordb.*`) is the hot path for writes, + snapshots, and cached reads. - **Cold/async backend:** Graphiti stays off the hot path. The consolidation backend is Graphiti MCP (configured via `graphiti.endpoint`, with legacy fallback to `endpoint`). @@ -664,8 +728,15 @@ src/services/client.ts — replaced by graphiti-mcp.ts - **Naming:** the canonical injected structure remains `session_memory` with optional `persistent_memory`. - **Storage scope:** do not add new independent storage such as SQLite. -- **Docs follow-up:** README alignment and attribution are future implementation - work, not already-completed state. +- **Docs alignment:** README has been updated to reflect the two-layer + architecture design and includes acknowledgement of the context-mode + inspiration with proper attribution. +- **Child-session routing diverges from context-mode (intentional):** official + context-mode records subagent work as summarized `Agent` tool events. This + plugin instead resolves every child/subagent session to the canonical root + session and treats child events as first-class entries in the shared event + log, snapshot, and `` injection. See §10.1 for the full + rationale. This is a deliberate design choice, not an alignment gap. ### 16.2 Remaining implementation validation @@ -683,7 +754,7 @@ src/services/client.ts — replaced by graphiti-mcp.ts - [ ] **Cache key namespacing**: if multiple plugin instances share the same FalkorDB, cache keys need instance-level namespacing to avoid collisions. Current `groupId` prefix may suffice. -- [ ] **Drift detection heuristic**: the cached Jaccard approach compares fact +- [ ] **Drift detection heuristic**: the cached Jaccard approach compares query UUID sets rather than issuing a live search. Validate that this is good enough in practice. - [ ] **Connection manager reuse**: the existing @@ -691,14 +762,313 @@ src/services/client.ts — replaced by graphiti-mcp.ts already implements MCP transport lifecycle, reconnect backoff, and request queuing. Decide whether `graphiti-mcp.ts` wraps it as-is, adapts it, or replaces it. -- [ ] **README scope and attribution**: the README update (Phase 10) must - enumerate every context-mode-derived feature this design adopts - (structured event extraction, priority-tiered snapshots, resumable session - state, hidden background consolidation) and credit the original - context-mode author and project by name with a link. This is a hard - requirement, not optional polish. - -### 16.3 Future options (non-final) + +### 16.3 Pending: Memory Hygiene and Legacy Injection Cleanup + +**Status:** Implemented and verified in repo tests (live-session +cleanup/validation still pending) + +The current implementation still has a serious memory-quality problem even +though the hot-path architecture itself has been migrated to FalkorDB/Redis + +async Graphiti MCP. In live sessions, the canonical `` envelope +is being polluted by duplicated user text, assistant operational chatter, +tool-call scaffolding, and transcript-heavy residue that should never be treated +as durable continuity state. The same user instruction is often copied into +multiple sections such as `last_request`, `active_tasks`, and `key_decisions`, +which wastes prompt budget and weakens the signal that these sections are +supposed to carry. Assistant-authored analysis and planning text is also being +promoted into `unresolved_errors`, `discoveries`, and `residual_messages`, +causing the plugin to remember its own commentary rather than the user's actual +goals, decisions, blockers, and file work. + +The problem is broader than simple duplication. Raw tool transcript content is +still entering the memory pipeline: `Read` output dumps, wrapper tags such as +`` and ``, agent/tool orchestration text, and previously injected +memory blocks are being re-consumed as fresh session evidence. This creates a +feedback loop where memory injection becomes self-referential: old injected +memory is parsed again, assistant summaries are stored as facts, and the next +turn receives an even noisier envelope. The result is a prompt that is larger, +less stable, and less representative of the true session state than the +context-mode-style continuity model this overhaul is trying to preserve. + +Persistent memory quality is also compromised by stale or low-value Graphiti +facts. Instead of surfacing durable project knowledge, the current +`persistent_memory` block can include meta-facts about planning files, assistant +actions, prior phrasing suggestions, and historical implementation chatter that +is no longer relevant to the active turn. At the same time, the legacy top-level +`...` format is still appearing alongside the +canonical `` path in some live runs, which indicates that +compatibility handling is still leaking into effective prompt output. Until +these hygiene issues are fixed, the architecture change is only partially +successful: Graphiti is off the hot path, but the injected continuity state is +still too noisy, too repetitive, and too contaminated by assistant/tool +artifacts to deliver the intended resumability benefits. + +#### 16.3.1 Alignment target + +This cleanup should intentionally move the hot path closer to context-mode's +session-continuity behavior. The design goal is not simply "less verbose" +memory; it is a narrower contract for what counts as durable working state. +Context-mode's implementation works because it primarily stores compact, +category-specific events and reconstructs a small resume snapshot from those +events rather than replaying transcripts. The same principle should govern this +plugin's hot tier. + +The target behavior is: + +- event storage is compact, typed, and continuity-oriented rather than + transcript-oriented +- tool outputs are used to infer structure, not replayed as durable memory text +- assistant operational prose is not treated as project memory +- injected memory is stable, small, and semantically partitioned +- Graphiti acts as an optional background knowledge source, not a second + transcript channel + +In practice, that means the hot path should remember things like the user's last +request, active tasks, files in play, key decisions, and concrete blockers, but +not the raw `Read` result, not the assistant's planning narration, and not the +XML/text wrappers of previously injected memory. + +#### 16.3.2 Revised hot-tier data contract + +The hot-path pipeline should enforce a stricter contract at each stage: + +1. **Sanitize before extraction**: remove injected memory blocks and obvious + wrapper text before any new event extraction occurs. +2. **Extract compact events**: store concise, typed continuity events with hard + length limits and category-specific schemas. +3. **Build a conservative snapshot**: synthesize only high-value continuity + sections; treat everything else as discardable. +4. **Render a stable envelope**: produce a deterministic `` + block whose sections do not duplicate each other. +5. **Drain only semantic episodes**: send Graphiti compact facts about work + state, not conversational residue. + +Each stage should be allowed to throw away information aggressively. The point +of the hot tier is resumability, not archival completeness. + +#### 16.3.3 Input sanitization and reinjection prevention + +The first concrete change should be to prevent the pipeline from re-consuming +its own output. + +Planned implementation details: + +- In `src/handlers/chat.ts` and any extraction entrypoint, strip leading + canonical `...` blocks before deriving + `last_request` or user events. +- In `src/handlers/messages.ts`, continue parsing visible UUID metadata from + legacy `` blocks for compatibility, but strip legacy block + text from the effective user content before it can be re-extracted. +- Add a shared sanitizer utility that removes: + - canonical injected memory blocks + - legacy injected memory blocks + - wrapper lines such as ``, ``, and similar tool-output tags + when they are part of replayed tool transcript rather than true user input +- Ensure this sanitizer runs before both hot-tier event extraction and async + Graphiti drain preparation. + +This stage is required to break the self-referential loop visible in live +sessions, where injected memory and tool transcript wrappers become fresh memory +material on the next turn. + +#### 16.3.4 Extraction redesign around context-mode-like compact events + +`src/services/event-extractor.ts` should be narrowed so it behaves more like +context-mode's compact event extraction model. + +Planned extraction policy by source: + +- **User message events** + - Keep: explicit request/intent, user decisions, preferences, task updates, + user-pasted data references when genuinely user-originated. + - Reject: repeated injected memory text, quoted assistant prose, copied tool + output, and orchestration chatter. +- **Read/search tool events** + - Keep: file path, query, maybe a tiny summary derived from metadata. + - Reject: full returned content, wrapper blocks, and long bodies. +- **Edit/write tool events** + - Keep: touched file path plus a short semantic summary if one is reliably + derivable. +- **Error events** + - Keep: concrete failing command/tool name, status, concise failure text. + - Reject: assistant hypotheses, debugging commentary, and narrative status + updates. +- **Subagent events** + - Keep: launch intent and terse completion result. + - Reject: full delegated report bodies. +- **Integration/MCP events** + - Keep: service call occurred, optional tool name, success/failure signal. + - Reject: request/response payload bodies. + +This redesign should also reduce the default payload size of each stored event. +By default, event bodies should be one sentence or one path-like datum, not an +open-ended transcript field. + +#### 16.3.5 Section-specific rendering rules and dedupe + +The canonical `` envelope should follow a more rigid section +contract so the same sentence cannot be repeated across multiple sections. + +Planned section semantics: + +- `last_request` + - exactly one normalized user request from the latest turn + - never duplicated verbatim in any other section +- `active_tasks` + - only explicit task-state items or inferred work items with task-like shape + - should not restate `last_request` if no real task structure exists +- `key_decisions` + - only user decisions/preferences/corrections that materially changed the + direction of work +- `files_in_play` + - paths only +- `project_rules` + - rule paths or compact rule summaries only +- `unresolved_errors` + - concrete unresolved blockers only +- `session_snapshot` + - compact secondary restore layer only; never a replay of upper sections + +Implementation should normalize candidate strings and use explicit precedence +when deduping: + +- `last_request` outranks `active_tasks` +- `active_tasks` outrank `key_decisions` when text is effectively the same work + item +- explicit user decisions outrank generic discoveries +- `session_snapshot` must not restate text already emitted in top-level fields + +This is the direct fix for the failure mode where one user sentence currently +lands in `last_request`, `active_tasks`, and `key_decisions` simultaneously. + +#### 16.3.6 Snapshot simplification + +`src/services/redis-snapshot.ts` should become more conservative and closer to +context-mode's priority-tiered snapshot builder. + +Planned changes: + +- preserve a small number of high-value sections only: + - decisions / constraints + - active task state + - active files / recent edits + - concrete blockers / unresolved errors + - environment / git state +- heavily cap or omit low-value sections such as: + - `discoveries` + - `references` + - `residual_messages` +- make omission the default for weak sections rather than filling them with + low-quality text +- enforce deterministic ordering and small fixed limits so the same session + state renders similarly across turns + +The snapshot should be boring and durable. If a section cannot be represented in +compact, high-signal form, it should not be injected. + +#### 16.3.7 Graphiti drain and cache filtering + +The async Graphiti tier should inherit the same compact-memory discipline; +otherwise `persistent_memory` will remain polluted even if the hot-tier snapshot +improves. + +Planned changes: + +- Drain only semantic episodes built from structured events, not raw transcript + fragments. +- Reject drain entries dominated by: + - tool scaffolding + - injected memory text + - assistant operational narration + - agent-control syntax + - file-content dumps +- During cache refresh, prefer durable facts about: + - architecture decisions + - constraints + - explicit user preferences + - major work milestones + - meaningful project entities +- Filter out stale or low-value facts about: + - prior phrasing suggestions + - assistant planning chatter + - tool routing advice + - historical meta-discussion unrelated to active work +- Prefer rendering facts over nodes, and render nodes only when they add unique + value. + +This should make `persistent_memory` act like sparse background knowledge, +closer to context-mode's retrieval posture, rather than an echo chamber of old +agent conversation. + +#### 16.3.8 Rollout and cleanup + +Because existing Redis and Graphiti data are already polluted, the rollout must +include a cleanup step after the code-level hygiene fixes land. + +Planned rollout steps: + +- land sanitization, extraction, snapshot, and drain filtering changes first +- validate behavior in unit tests and targeted integration tests +- reset or namespace polluted Redis hot-tier keys for the affected project +- reset or namespace Graphiti group data so stale low-value facts stop + repopulating cache +- verify fresh-session behavior after cleanup, not just behavior in an already + poisoned namespace + +Without this cleanup, old low-value facts may continue to dominate recall and +hide whether the new extraction rules are actually working. + +#### 16.3.9 Required verification + +This work should only be considered complete when both code-level and live-run +verification show that the hot path now behaves more like context-mode's compact +continuity model. + +Required verification targets: + +- sanitizer tests proving injected memory cannot be re-consumed as new input +- extraction tests proving `Read`/search outputs store refs rather than bodies +- section-dedupe tests proving the same normalized text cannot occupy + `last_request`, `active_tasks`, and `key_decisions` together +- transform tests proving canonical and legacy memory blocks cannot coexist in + final injection +- Graphiti drain/cache tests proving assistant chatter and transcript wrappers + are rejected +- live-session validation proving assistant planning text no longer appears in + `unresolved_errors`, `discoveries`, or `persistent_memory` +- live-session validation proving the injected envelope is smaller, more stable, + and more continuity-focused across turns + +- [x] **Strip injected memory before extraction**: before processing a new user + turn, remove leading legacy `...` and canonical + `...` blocks so injected context is + not re-learned as fresh content. +- [x] **Harden memory hygiene filters**: never persist raw tool payloads, `Read` + output dumps, XML-like wrappers, assistant operational chatter, or agent + orchestration text into hot-tier summaries or Graphiti drain batches. +- [x] **Make extraction allowlist-based**: only promote durable continuity + signals such as user intent, explicit decisions, active tasks, file + edits/writes, meaningful git milestones, and real unresolved errors. +- [x] **Stop storing transcript-heavy tool bodies**: keep refs and compact + summaries for file reads/searches, but do not retain full returned file + contents in session memory or Graphiti episodes. +- [x] **Gate async Graphiti writes more aggressively**: skip semantic drain + entries whose content is primarily tool-call scaffolding, injected memory, + assistant self-narration, or agent-control text. +- [x] **Shrink the injected envelope**: favor `last_request`, `active_tasks`, + `key_decisions`, and `files_in_play`; heavily cap or suppress noisy + `discoveries`, `residual_messages`, and assistant-originated + `unresolved_errors`. +- [x] **Add regression coverage**: verify that legacy `` does not leak + into new injections, duplicated text does not land across multiple + sections, assistant chatter is not stored as errors, and noisy persistent + memory facts are filtered out. +- [ ] **Plan one-time cleanup of poisoned state**: after code fixes land, reset + or namespace polluted Redis hot-tier keys and Graphiti group data so stale + low-value memories stop resurfacing. + +### 16.4 Future options (non-final) - [ ] **More proactive cache prewarm**: broaden warmup beyond `get_episodes` into project-scope `search_memory_facts`/`search_nodes` if the extra async diff --git a/plans/ContextOverhaulTests.md b/plans/ContextOverhaulTests.md index 175b936..be1dc9e 100644 --- a/plans/ContextOverhaulTests.md +++ b/plans/ContextOverhaulTests.md @@ -1,7 +1,12 @@ # Context Overhaul — Test Plan -**Status:** Complete **Date:** 2026-03-14 **Canonical design:** -[`plans/ContextOverhaul.md`](plans/ContextOverhaul.md) +**Status:** Draft (planned automation not yet implemented) **Date:** 2026-03-14 +**Canonical design:** [`plans/ContextOverhaul.md`](plans/ContextOverhaul.md) + +> **Note:** This document outlines the _intended_ test strategy. The test +> infrastructure (Docker Compose fixtures, baseline files, deno task runner) is +> not yet in the repo. Current runnable tasks: +> `deno task build|deploy|dev|check|lint|fmt`. Full automation is aspirational. --- @@ -203,8 +208,9 @@ and within budget. `ContextOverhaul.md` §4.3. - [ ] B-5: Snapshot respects the 3 KB budget — lower-priority sections are truncated first. -- [ ] B-6: Each `session_memory` contains required sections: `last_request`, - `active_tasks`, `key_decisions`, `files_in_play`, `project_rules`. +- [ ] B-6: Each `session_memory` always contains `last_request`; list sections + (`active_tasks`, `key_decisions`, `files_in_play`, `project_rules`) are + present only when they have content and are omitted when empty. - [ ] B-7: Optional sections (`unresolved_errors`, `git_state`, `subagent_work`, `session_snapshot`, `persistent_memory`) appear only when source data exists. @@ -295,7 +301,7 @@ state. - [ ] E-7: Multiple sequential compactions do not cause snapshot drift — each rebuild uses the current event list. - [ ] E-8: Compaction with an empty `memory-cache` (cold Graphiti) still - produces a valid `session_memory` with empty `persistent_memory`. + produces a valid `session_memory` and omits ``. **Automation:** Automatable with simulated compaction lifecycle against mocks. @@ -312,11 +318,11 @@ from the Graphiti cache and that cross-session recall works. - [ ] F-1: On a new session with a warm `memory-cache:{groupId}`, the first `messages.transform` includes `persistent_memory` with cached facts. -- [ ] F-2: On a new session with a cold cache, the first turn has empty +- [ ] F-2: On a new session with a cold cache, the first turn omits `persistent_memory`; subsequent turns include it after async warmup completes. -- [ ] F-3: `persistent_memory` includes `fact_uuids` attribute listing the - injected fact UUIDs. +- [ ] F-3: `persistent_memory` omits legacy `fact_uuids`; the emitted shape uses + `node_refs` only. - [ ] F-4: Facts from a different `groupId` (different project) do not appear in `persistent_memory`. - [ ] F-5: Stale facts (older than `factStaleDays`) are annotated or filtered @@ -324,7 +330,8 @@ from the Graphiti cache and that cross-session recall works. - [ ] F-6: `persistent_memory` content is a structured summary, not raw Graphiti JSON. - [ ] F-7: After draining events to Graphiti and refreshing the cache, newly - created facts appear in `persistent_memory` on subsequent sessions. + created fact/node summaries appear in `persistent_memory` on subsequent + sessions. - [ ] F-8: The `node_refs` attribute in `persistent_memory` lists entity node references when present. @@ -351,8 +358,8 @@ and does not include noise. already-visible facts within the same session. - [ ] G-4: `persistent_memory` respects the budget remainder — it does not crowd out `session_memory` core sections. -- [ ] G-5: When Graphiti returns zero relevant results, `persistent_memory` is - omitted entirely (not rendered as an empty tag). +- [ ] G-5: When cached persistent memory has zero relevant results, + `persistent_memory` is omitted entirely (not rendered as an empty tag). - [ ] G-6: The legacy `` block is never emitted by the new implementation — only `` with optional ``. @@ -371,18 +378,19 @@ refreshed cache is used on the next turn. #### Checklist -- [ ] H-1: When Jaccard similarity between current and cached fact UUIDs drops - below `driftThreshold`, an async cache refresh is scheduled. +- [ ] H-1: When Jaccard similarity between current query text and cached query + text drops below `driftThreshold`, an async cache refresh is scheduled. - [ ] H-2: The current (stale) cache is still injected on the drift-triggering message (one-message staleness tradeoff). - [ ] H-3: On the next `chat.message` after the refresh completes, the updated cache is injected. - [ ] H-4: When Jaccard similarity is above `driftThreshold`, no refresh is scheduled. -- [ ] H-5: Drift detection uses the `factUuids` field from +- [ ] H-5: Drift detection uses the cached query metadata in `memory-cache:{groupId}:meta`, not a live Graphiti query. - [ ] H-6: Rapid successive messages with different topics do not cause - thundering-herd refresh calls — only one refresh is in flight at a time. + thundering-herd refresh calls — only one refresh is in flight per group at + a time, with newer queries picked up after the in-flight refresh settles. **Automation:** Fully automatable with mock MCP client tracking call counts and timing. @@ -411,7 +419,7 @@ normal operation. up). - [ ] I-6: TTL expiry of session keys (24h for events, 48h for snapshots) does not cause errors — the plugin handles missing keys gracefully. -- [ ] I-7: `memory-cache:{groupId}` TTL expiry (10 min) results in empty +- [ ] I-7: `memory-cache:{groupId}` TTL expiry (10 min) results in omitted `persistent_memory`, not an error. **Automation:** Automatable by resetting plugin state and re-initializing @@ -436,7 +444,7 @@ against pre-seeded Redis fixtures. - [ ] J-4: **Redis down mid-session:** after reconnect, state rebuilds and subsequent hooks use Redis again. - [ ] J-5: **Graphiti down at startup:** plugin logs warning, continues; - `persistent_memory` is empty. + `persistent_memory` is omitted. - [ ] J-6: **Graphiti down mid-session:** drain retries with exponential backoff; cache stales out after TTL. - [ ] J-7: **Graphiti down mid-session:** `session_memory` (Redis-sourced) is @@ -483,8 +491,9 @@ commits. - [ ] K-8: Latency percentiles (p50, p95, p99) are computed over 100 iterations of each hook. -**Automation:** Fully automatable. Requires a baseline file checked into the -repo (`tests/baselines/payload-sizes.json`). +**Automation:** Fully automatable once a baseline file +(`tests/baselines/payload-sizes.json`) is created and checked into the repo +(proposed infrastructure). --- @@ -505,11 +514,11 @@ repo (`tests/baselines/payload-sizes.json`). block — only `` with ``. - [ ] L-4: A message array containing both legacy `` and new `` blocks is handled without errors. -- [ ] L-5: The `fact_uuids` attribute in `` preserves the - same UUID semantics as the legacy `data-uuids` attribute. -- [ ] L-6: Legacy config keys (`endpoint`, `groupIdPrefix`, `driftThreshold`, - `factStaleDays`) at the top level are resolved correctly when nested - `graphiti.*` keys are absent. +- [ ] L-5: Legacy `data-uuids` remain parse-only compatibility input; + `` itself emits `node_refs` only. +- [ ] L-6: Legacy config keys (`endpoint`, `groupIdPrefix`, `driftThreshold`) at + the top level are resolved correctly when nested `graphiti.*` keys are + absent. - [ ] L-7: When both legacy top-level and nested config keys are present, nested values take precedence. - [ ] L-8: No verbose multi-paragraph memory block (characteristic of the legacy @@ -520,6 +529,51 @@ covers L-1/L-2 partially. --- +### Suite M: Child / Subagent Session Routing + +**Goal:** Verify that child/subagent sessions are resolved to the canonical root +session and that their activity flows through the same memory pipeline as the +parent. + +**Tier:** Unit + Integration + +**Canonical design reference:** `plans/ContextOverhaul.md` §10.1 + +**Divergence note:** This behavior intentionally differs from official +`mksglu/context-mode`, which treats subagent work as summarized tool events +rather than first-class session participants. See §10.1 of the design doc for +the rationale and alignment guidance. + +#### Checklist + +- [x] M-1: `session.created` with a `parentID` caches the parent/child linkage + and resolves the canonical (root) session ID. +- [x] M-2: `chat.message` from a child session records events under the + canonical root session's `session:{canonicalId}:events` key. +- [x] M-3: `experimental.chat.messages.transform` from a child session injects + the root session's `` envelope. +- [x] M-4: `experimental.session.compacting` from a child session uses the root + session's state and snapshot. +- [x] M-5: `message.updated` from a child session finalizes the assistant + message under the canonical root session. +- [x] M-6: `message.part.updated` from a child session buffers assistant text + under the canonical root session ID. +- [x] M-7: `session.deleted` for a child session removes only the child's local + bookkeeping (parent-ID cache, canonical-ID cache, buffered messages) and + does **not** delete the root session's state, events, or snapshot. +- [x] M-8: Child-derived events appear in the priority-tiered snapshot when it + is rebuilt at `session.idle` or `session.compacted`. +- [x] M-9: Future `` injections for the parent session include + events that originated from child sessions. +- [x] M-10: Canonical ID resolution handles multi-level nesting (grandchild → + child → root) and detects cycles without infinite loops. + +**Automation:** Fully automatable with mock SDK client and `MockRedisClient`. +Tests exist in `event.test.ts`, `chat.test.ts`, `messages.test.ts`, +`compacting.test.ts`, and `session-snapshot.test.ts`. + +--- + ## 7 Metrics and Thresholds | Metric | Threshold | Source | Action on breach | @@ -577,9 +631,15 @@ Any of the following triggers a fail: --- -## 9 CI/CD Automation Strategy +## 9 CI/CD Automation Strategy (Proposed) + +> **Status:** Not yet implemented. The following sections describe the +> _intended_ CI/CD flow. Docker Compose fixtures (`tests/docker-compose.yml`) +> and baseline files (`tests/baselines/payload-sizes.json`) do not yet exist. +> Current runnable tasks available in `deno.json`: `build`, `deploy`, `dev`, +> `check`, `lint`, `fmt`. -### 9.1 Test Execution +### 9.1 Test Execution (Proposed) ```bash # Unit tests (no external deps) @@ -627,13 +687,14 @@ graph LR I -->|No| X ``` -### 9.4 Baseline Management +### 9.4 Baseline Management (Proposed) -- Payload size baselines are stored in `tests/baselines/payload-sizes.json`. -- Baselines are updated manually via `deno task update-baselines` after - intentional size changes. -- CI compares current sizes against the checked-in baseline and fails on > 20% - regression. +- Payload size baselines _would be_ stored in + `tests/baselines/payload-sizes.json` (file does not yet exist). +- Baselines _would be_ updated manually via `deno task update-baselines` (task + not yet available) after intentional size changes. +- CI _would_ compare current sizes against the checked-in baseline and fail on > + 20% regression once infrastructure is available. --- @@ -665,12 +726,12 @@ dispatch, compaction trigger, multi-turn LLM interaction): ### 10.3 Tests Requiring Manual / Exploratory Verification -| Area | What to verify | -| ----------------------------- | ---------------------------------------------------------------------------------- | -| LLM continuity quality | Does the LLM actually "feel" continuous after compaction? Requires human judgment. | -| Memory relevance (semantic) | Are the right facts surfaced for a given topic? Keyword matching approximates. | -| Multi-agent orchestration | Subagent events in a real swarm session. | -| Long-running session (> 1 hr) | TTL expiry, cache staleness, and drift behavior over extended use. | +| Area | What to verify | +| ----------------------------- | ------------------------------------------------------------------------------------------------ | +| LLM continuity quality | Does the LLM actually "feel" continuous after compaction? Requires human judgment. | +| Memory relevance (semantic) | Are the right facts surfaced for a given topic? Keyword matching approximates. | +| Multi-agent orchestration | Subagent events in a real swarm session. Unit-level child-session routing is covered by Suite M. | +| Long-running session (> 1 hr) | TTL expiry, cache staleness, and drift behavior over extended use. | ### 10.4 OpenCode Shell Model Limitations diff --git a/scripts/bench-falkordb.ts b/scripts/bench-falkordb.ts new file mode 100644 index 0000000..01cf934 --- /dev/null +++ b/scripts/bench-falkordb.ts @@ -0,0 +1,131 @@ +import RedisModule from "ioredis"; + +const Redis = RedisModule as unknown as typeof import("ioredis").default; + +type Stats = { + min: number; + max: number; + avg: number; + p50: number; + p95: number; + p99: number; +}; + +type SampleMode = "set" | "get" | "del" | "ping"; + +// Default to localhost for safe contributor use. +// Pass an explicit endpoint argument to target a different Redis host. +const endpoint = Deno.args[0] ?? "redis://localhost:6379"; +const iterationsArg = Number(Deno.args[1] ?? "200"); +const iterations = Number.isFinite(iterationsArg) && iterationsArg > 0 + ? Math.floor(iterationsArg) + : 200; + +const percentile = (values: number[], ratio: number): number => { + const index = Math.min( + values.length - 1, + Math.max(0, Math.ceil(values.length * ratio) - 1), + ); + return values[index] ?? 0; +}; + +const summarize = (values: number[]): Stats => { + const sorted = [...values].sort((a, b) => a - b); + const total = sorted.reduce((sum, value) => sum + value, 0); + return { + min: sorted[0] ?? 0, + max: sorted.at(-1) ?? 0, + avg: sorted.length ? total / sorted.length : 0, + p50: percentile(sorted, 0.5), + p95: percentile(sorted, 0.95), + p99: percentile(sorted, 0.99), + }; +}; + +const fmt = (value: number): string => `${value.toFixed(3)} ms`; + +const run = async () => { + const redis = new Redis(endpoint, { + lazyConnect: true, + maxRetriesPerRequest: 1, + enableAutoPipelining: false, + }); + let connected = false; + + const keyPrefix = `bench:opencode-graphiti:${Date.now()}`; + const samples: Record = { + ping: [], + set: [], + get: [], + del: [], + }; + + try { + await redis.connect(); + connected = true; + await redis.ping(); + + for (let index = 0; index < iterations; index += 1) { + const key = `${keyPrefix}:${index}`; + const value = `value-${index}`; + + let started = performance.now(); + await redis.ping(); + samples.ping.push(performance.now() - started); + + started = performance.now(); + await redis.set(key, value); + samples.set.push(performance.now() - started); + + started = performance.now(); + await redis.get(key); + samples.get.push(performance.now() - started); + + started = performance.now(); + await redis.del(key); + samples.del.push(performance.now() - started); + } + + console.log(`Endpoint: ${endpoint}`); + console.log(`Iterations: ${iterations}`); + console.log(""); + + for (const mode of ["ping", "set", "get", "del"] as const) { + const stats = summarize(samples[mode]); + console.log(`${mode.toUpperCase()}`); + console.log(` min: ${fmt(stats.min)}`); + console.log(` p50: ${fmt(stats.p50)}`); + console.log(` p95: ${fmt(stats.p95)}`); + console.log(` p99: ${fmt(stats.p99)}`); + console.log(` avg: ${fmt(stats.avg)}`); + console.log(` max: ${fmt(stats.max)}`); + console.log(""); + } + } finally { + if (connected) { + try { + const cleanupKeys = Array.from( + { length: iterations }, + (_, index) => `${keyPrefix}:${index}`, + ); + if (cleanupKeys.length) { + await redis.del(...cleanupKeys); + } + } catch { + // ignore cleanup failures in benchmarking utility + } + + try { + await redis.quit(); + } catch { + redis.disconnect(); + } + } else { + redis.disconnect(); + } + } +}; + +if (import.meta.main) { + await run(); +} diff --git a/src/config.test.ts b/src/config.test.ts index 5bf2114..547172a 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -1,9 +1,10 @@ -import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { assert, assertEquals, assertThrows } from "jsr:@std/assert@^1.0.0"; import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import os from "node:os"; import { stub } from "jsr:@std/testing@^1.0.0/mock"; import { type ConfigExplorerAdapter, + ConfigLoadError, loadConfig, resetConfigExplorerAdapterForTesting, setConfigExplorerAdapterForTesting, @@ -12,14 +13,18 @@ import { function makeAdapter(options?: { searchResult?: unknown | null; loadResult?: unknown | null; + searchError?: Error; + loadError?: Error; }): ConfigExplorerAdapter { return { search() { + if (options?.searchError) throw options.searchError; return options?.searchResult == null ? null : { config: options.searchResult }; }, load() { + if (options?.loadError) throw options.loadError; return options?.loadResult == null ? null : { config: options.loadResult }; @@ -37,26 +42,26 @@ describe("config", () => { assertEquals(config.graphiti.endpoint, "http://localhost:8000/mcp"); assertEquals(config.graphiti.groupIdPrefix, "opencode"); assertEquals(config.graphiti.driftThreshold, 0.5); - assertEquals(config.falkordb.redisEndpoint, "redis://localhost:6379"); - assertEquals(config.falkordb.batchSize, 20); + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + assertEquals(config.redis.batchSize, 20); }); - it("prefers nested graphiti and falkordb values over legacy top-level keys", () => { + it("prefers nested graphiti and redis values over legacy top-level graphiti keys", () => { setConfigExplorerAdapterForTesting(() => makeAdapter({ searchResult: { endpoint: "http://legacy.example/mcp", groupIdPrefix: "legacy", - redisEndpoint: "redis://legacy:6379", + redis: { + endpoint: "redis://canonical:6379", + batchSize: 9, + batchMaxBytes: 40_000, + }, graphiti: { endpoint: "http://nested.example/mcp", groupIdPrefix: "nested", driftThreshold: 0.75, }, - falkordb: { - redisEndpoint: "redis://nested:6379", - batchSize: 9, - }, }, }) ); @@ -66,11 +71,77 @@ describe("config", () => { assertEquals(config.graphiti.endpoint, "http://nested.example/mcp"); assertEquals(config.graphiti.groupIdPrefix, "nested"); assertEquals(config.graphiti.driftThreshold, 0.75); - assertEquals(config.falkordb.redisEndpoint, "redis://nested:6379"); - assertEquals(config.falkordb.batchSize, 9); + assertEquals(config.redis.endpoint, "redis://canonical:6379"); + assertEquals(config.redis.batchSize, 9); + assertEquals(config.redis.batchMaxBytes, 40_000); assertEquals(config.endpoint, "http://nested.example/mcp"); assertEquals(config.driftThreshold, 0.75); - assertEquals(config.redisEndpoint, "redis://nested:6379"); + }); + + it("falls back to redis defaults when unsupported falkordb values are provided", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + falkordb: { + redisEndpoint: "redis://compat-only:6379", + batchSize: 11, + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + assertEquals(config.redis.batchSize, 20); + }); + + it("ignores removed top-level redis aliases", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + redisEndpoint: "redis://toplevel:6379", + batchSize: 5, + batchMaxBytes: 10_000, + sessionTtlSeconds: 3600, + cacheTtlSeconds: 300, + drainRetryMax: 1, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + assertEquals(config.redis.batchSize, 20); + assertEquals(config.redis.batchMaxBytes, 51_200); + assertEquals(config.redis.sessionTtlSeconds, 86_400); + assertEquals(config.redis.cacheTtlSeconds, 600); + assertEquals(config.redis.drainRetryMax, 3); + }); + + it("falls back to defaults when only removed top-level redis aliases are provided", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + redisEndpoint: "redis://removed:6379", + batchSize: 5, + batchMaxBytes: 10_000, + sessionTtlSeconds: 3600, + cacheTtlSeconds: 300, + drainRetryMax: 1, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + assertEquals(config.redis.batchSize, 20); + assertEquals(config.redis.batchMaxBytes, 51_200); + assertEquals(config.redis.sessionTtlSeconds, 86_400); + assertEquals(config.redis.cacheTtlSeconds, 600); + assertEquals(config.redis.drainRetryMax, 3); }); it("uses legacy fallback file when discovery finds nothing", () => { @@ -79,14 +150,14 @@ describe("config", () => { makeAdapter({ loadResult: { endpoint: "http://legacy.example/mcp", - redisEndpoint: "redis://legacy:6379", + redis: { endpoint: "redis://legacy:6379" }, }, }) ); const config = loadConfig(); assertEquals(config.graphiti.endpoint, "http://legacy.example/mcp"); - assertEquals(config.falkordb.redisEndpoint, "redis://legacy:6379"); + assertEquals(config.redis.endpoint, "redis://legacy:6379"); }); it("falls back to defaults for invalid numeric config values", () => { @@ -95,10 +166,11 @@ describe("config", () => { searchResult: { graphiti: { driftThreshold: 2, - factStaleDays: 0, }, - falkordb: { + redis: { batchSize: 0, + }, + falkordb: { batchMaxBytes: -10, sessionTtlSeconds: -1, cacheTtlSeconds: 0, @@ -111,11 +183,144 @@ describe("config", () => { const config = loadConfig(); assertEquals(config.graphiti.driftThreshold, 0.5); - assertEquals(config.graphiti.factStaleDays, 30); - assertEquals(config.falkordb.batchSize, 20); - assertEquals(config.falkordb.batchMaxBytes, 51_200); - assertEquals(config.falkordb.sessionTtlSeconds, 86_400); - assertEquals(config.falkordb.cacheTtlSeconds, 600); - assertEquals(config.falkordb.drainRetryMax, 3); + assertEquals(config.redis.batchSize, 20); + assertEquals(config.redis.batchMaxBytes, 51_200); + assertEquals(config.redis.sessionTtlSeconds, 86_400); + assertEquals(config.redis.cacheTtlSeconds, 600); + assertEquals(config.redis.drainRetryMax, 3); + }); + + it("prefers defaults when canonical redis values are invalid", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + redis: { + batchSize: 0, + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.redis.batchSize, 20); + }); + + it("throws when a configured graphiti endpoint is invalid", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + endpoint: "not a valid url", + }, + }, + }) + ); + + assertThrows( + () => loadConfig(), + ConfigLoadError, + "Invalid Graphiti config value for graphiti.endpoint", + ); + }); + + it("accepts endpoint-like config values with incidental surrounding whitespace", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + endpoint: " http://legacy.example/mcp ", + redis: { + endpoint: " redis://trimmed:6379 ", + }, + graphiti: { + endpoint: " http://nested.example/mcp ", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.endpoint, "http://nested.example/mcp"); + assertEquals(config.graphiti.endpoint, "http://nested.example/mcp"); + assertEquals(config.redis.endpoint, "redis://trimmed:6379"); + }); + + it("fails open to defaults when config discovery search fails", () => { + using _homedir = stub(os, "homedir", () => "/users/tester"); + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchError: new Error("search failed"), + loadResult: { + endpoint: "http://legacy.example/mcp", + redis: { endpoint: "redis://legacy:6379" }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "http://localhost:8000/mcp"); + assertEquals(config.graphiti.groupIdPrefix, "opencode"); + assertEquals(config.graphiti.driftThreshold, 0.5); + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + assertEquals(config.redis.batchSize, 20); + }); + + it("fails open to defaults when the legacy config file cannot be loaded", () => { + using _homedir = stub(os, "homedir", () => "/users/tester"); + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + loadError: new Error("legacy load failed"), + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "http://localhost:8000/mcp"); + assertEquals(config.graphiti.groupIdPrefix, "opencode"); + assertEquals(config.graphiti.driftThreshold, 0.5); + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + assertEquals(config.redis.batchSize, 20); + }); + + it("fails open to defaults when config discovery initialization fails", () => { + setConfigExplorerAdapterForTesting(() => { + throw new Error("cosmiconfig unavailable"); + }); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "http://localhost:8000/mcp"); + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + }); + + it("fails open based on stable discovery error code instead of message text", () => { + setConfigExplorerAdapterForTesting(() => ({ + search() { + throw new ConfigLoadError("different discovery wording", { + code: "config-discovery-search", + }); + }, + load() { + return null; + }, + })); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "http://localhost:8000/mcp"); + assertEquals(config.redis.endpoint, "redis://localhost:6379"); + }); + + it("uses standard Error.cause when wrapping config load failures", () => { + const cause = new Error("search failed"); + const error = new ConfigLoadError("Unable to discover Graphiti config", { + cause, + code: "config-discovery-search", + }); + + assertEquals(error.cause, cause); + assert(!Object.prototype.propertyIsEnumerable.call(error, "cause")); }); }); diff --git a/src/config.ts b/src/config.ts index b5f379b..eb9b873 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,11 +1,12 @@ import os from "node:os"; import { createRequire } from "node:module"; import { join } from "node:path"; -import type { GraphitiConfig } from "./types/index.ts"; +import { logger } from "./services/logger.ts"; +import type { GraphitiConfig, RawGraphitiConfig } from "./types/index.ts"; -const DEFAULT_CONFIG: GraphitiConfig = { - falkordb: { - redisEndpoint: "redis://localhost:6379", +const DEFAULT_CONFIG = { + redis: { + endpoint: "redis://localhost:6379", batchSize: 20, batchMaxBytes: 51_200, sessionTtlSeconds: 86_400, @@ -16,40 +17,37 @@ const DEFAULT_CONFIG: GraphitiConfig = { endpoint: "http://localhost:8000/mcp", groupIdPrefix: "opencode", driftThreshold: 0.5, - factStaleDays: 30, }, - endpoint: "http://localhost:8000/mcp", - groupIdPrefix: "opencode", - driftThreshold: 0.5, - factStaleDays: 30, - redisEndpoint: "redis://localhost:6379", - batchSize: 20, - batchMaxBytes: 51_200, - sessionTtlSeconds: 86_400, - cacheTtlSeconds: 600, - drainRetryMax: 3, -}; - -type PartialGraphitiConfig = { - falkordb?: Partial; - graphiti?: Partial; - endpoint?: string; - groupIdPrefix?: string; - driftThreshold?: number; - factStaleDays?: number; - redisEndpoint?: string; - batchSize?: number; - batchMaxBytes?: number; - sessionTtlSeconds?: number; - cacheTtlSeconds?: number; - drainRetryMax?: number; -}; +} satisfies Pick; type ConfigLoadResult = { config: unknown } | null; -type ConfigSearchOutcome = - | { ok: true; config: PartialGraphitiConfig | null } - | { ok: false }; +type ConfigLoadErrorCode = + | "config-discovery-init" + | "config-discovery-search" + | "config-file-load" + | "config-invalid"; + +export class ConfigLoadError extends Error { + readonly code: ConfigLoadErrorCode; + + constructor( + message: string, + options: { cause?: unknown; code: ConfigLoadErrorCode }, + ) { + super(message); + this.name = "ConfigLoadError"; + this.code = options.code; + if (options.cause !== undefined) { + Object.defineProperty(this, "cause", { + value: options.cause, + writable: true, + configurable: true, + enumerable: false, + }); + } + } +} export interface ConfigExplorerAdapter { search(from?: string): ConfigLoadResult; @@ -69,13 +67,21 @@ const readString = ( ): string | undefined => typeof value[key] === "string" ? value[key] as string : undefined; +const readTrimmedString = ( + value: Record, + key: string, +): string | undefined => { + const entry = readString(value, key); + return entry?.trim() || undefined; +}; + const readNumber = ( value: Record, key: string, ): number | undefined => typeof value[key] === "number" ? value[key] as number : undefined; -const normalizeConfig = (value: unknown): PartialGraphitiConfig => { +const normalizeConfig = (value: unknown): RawGraphitiConfig => { if (!isRecord(value)) return {}; const compact = >(input: T): Partial => @@ -83,36 +89,28 @@ const normalizeConfig = (value: unknown): PartialGraphitiConfig => { Object.entries(input).filter(([_, entry]) => entry !== undefined), ) as Partial; - const config: PartialGraphitiConfig = { - endpoint: readString(value, "endpoint"), + const config: RawGraphitiConfig = { + endpoint: readTrimmedString(value, "endpoint"), groupIdPrefix: readString(value, "groupIdPrefix"), driftThreshold: readNumber(value, "driftThreshold"), - factStaleDays: readNumber(value, "factStaleDays"), - redisEndpoint: readString(value, "redisEndpoint"), - batchSize: readNumber(value, "batchSize"), - batchMaxBytes: readNumber(value, "batchMaxBytes"), - sessionTtlSeconds: readNumber(value, "sessionTtlSeconds"), - cacheTtlSeconds: readNumber(value, "cacheTtlSeconds"), - drainRetryMax: readNumber(value, "drainRetryMax"), }; - if (isRecord(value.falkordb)) { - config.falkordb = compact({ - redisEndpoint: readString(value.falkordb, "redisEndpoint"), - batchSize: readNumber(value.falkordb, "batchSize"), - batchMaxBytes: readNumber(value.falkordb, "batchMaxBytes"), - sessionTtlSeconds: readNumber(value.falkordb, "sessionTtlSeconds"), - cacheTtlSeconds: readNumber(value.falkordb, "cacheTtlSeconds"), - drainRetryMax: readNumber(value.falkordb, "drainRetryMax"), + if (isRecord(value.redis)) { + config.redis = compact({ + endpoint: readTrimmedString(value.redis, "endpoint"), + batchSize: readNumber(value.redis, "batchSize"), + batchMaxBytes: readNumber(value.redis, "batchMaxBytes"), + sessionTtlSeconds: readNumber(value.redis, "sessionTtlSeconds"), + cacheTtlSeconds: readNumber(value.redis, "cacheTtlSeconds"), + drainRetryMax: readNumber(value.redis, "drainRetryMax"), }); } if (isRecord(value.graphiti)) { config.graphiti = compact({ - endpoint: readString(value.graphiti, "endpoint"), + endpoint: readTrimmedString(value.graphiti, "endpoint"), groupIdPrefix: readString(value.graphiti, "groupIdPrefix"), driftThreshold: readNumber(value.graphiti, "driftThreshold"), - factStaleDays: readNumber(value.graphiti, "factStaleDays"), }); } @@ -122,45 +120,57 @@ const normalizeConfig = (value: unknown): PartialGraphitiConfig => { const isPositiveInteger = (value: number | undefined): value is number => typeof value === "number" && Number.isInteger(value) && value > 0; -const isPositiveNumber = (value: number | undefined): value is number => - typeof value === "number" && Number.isFinite(value) && value > 0; - const isUnitInterval = (value: number | undefined): value is number => typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1; +const isValidUrlString = (value: string | undefined): value is string => { + if (!value) return false; + try { + new URL(value); + return true; + } catch { + return false; + } +}; + +const assertExplicitUrl = ( + value: string | undefined, + fieldName: string, +): void => { + if (value === undefined) return; + if (isValidUrlString(value)) return; + throw new ConfigLoadError( + `Invalid Graphiti config value for ${fieldName}: expected a valid URL`, + { code: "config-invalid" }, + ); +}; + +const validateExplicitConfig = (value: RawGraphitiConfig | null): void => { + if (!value) return; + assertExplicitUrl(value.endpoint, "endpoint"); + assertExplicitUrl(value.graphiti?.endpoint, "graphiti.endpoint"); + assertExplicitUrl(value.redis?.endpoint, "redis.endpoint"); +}; + const resolveNumber = ( ...candidates: Array ): number | undefined => candidates.find((value) => value !== undefined); -const resolveConfig = (value: PartialGraphitiConfig | null): GraphitiConfig => { +const resolveConfig = (value: RawGraphitiConfig | null): GraphitiConfig => { const raw = value ?? {}; - const resolvedRedisEndpoint = raw.falkordb?.redisEndpoint ?? - raw.redisEndpoint ?? - DEFAULT_CONFIG.falkordb.redisEndpoint; - const resolvedBatchSize = resolveNumber( - raw.falkordb?.batchSize, - raw.batchSize, - ); - const resolvedBatchMaxBytes = resolveNumber( - raw.falkordb?.batchMaxBytes, - raw.batchMaxBytes, - ); - const resolvedSessionTtlSeconds = resolveNumber( - raw.falkordb?.sessionTtlSeconds, - raw.sessionTtlSeconds, - ); - const resolvedCacheTtlSeconds = resolveNumber( - raw.falkordb?.cacheTtlSeconds, - raw.cacheTtlSeconds, - ); - const resolvedDrainRetryMax = resolveNumber( - raw.falkordb?.drainRetryMax, - raw.drainRetryMax, - ); - const resolvedGraphitiEndpoint = raw.graphiti?.endpoint ?? raw.endpoint ?? - DEFAULT_CONFIG.graphiti.endpoint; + const resolvedRedisEndpoint = raw.redis?.endpoint ?? + DEFAULT_CONFIG.redis.endpoint; + const resolvedBatchSize = resolveNumber(raw.redis?.batchSize); + const resolvedBatchMaxBytes = resolveNumber(raw.redis?.batchMaxBytes); + const resolvedSessionTtlSeconds = resolveNumber(raw.redis?.sessionTtlSeconds); + const resolvedCacheTtlSeconds = resolveNumber(raw.redis?.cacheTtlSeconds); + const resolvedDrainRetryMax = resolveNumber(raw.redis?.drainRetryMax); + const requestedGraphitiEndpoint = raw.graphiti?.endpoint ?? raw.endpoint; + const resolvedGraphitiEndpoint = isValidUrlString(requestedGraphitiEndpoint) + ? requestedGraphitiEndpoint + : DEFAULT_CONFIG.graphiti.endpoint; const resolvedGroupIdPrefix = raw.graphiti?.groupIdPrefix ?? raw.groupIdPrefix ?? DEFAULT_CONFIG.graphiti.groupIdPrefix; @@ -168,28 +178,23 @@ const resolveConfig = (value: PartialGraphitiConfig | null): GraphitiConfig => { raw.graphiti?.driftThreshold, raw.driftThreshold, ); - const resolvedFactStaleDays = resolveNumber( - raw.graphiti?.factStaleDays, - raw.factStaleDays, - ); - - const falkordb = { - redisEndpoint: resolvedRedisEndpoint, + const redis = { + endpoint: resolvedRedisEndpoint, batchSize: isPositiveInteger(resolvedBatchSize) ? resolvedBatchSize - : DEFAULT_CONFIG.falkordb.batchSize, + : DEFAULT_CONFIG.redis.batchSize, batchMaxBytes: isPositiveInteger(resolvedBatchMaxBytes) ? resolvedBatchMaxBytes - : DEFAULT_CONFIG.falkordb.batchMaxBytes, + : DEFAULT_CONFIG.redis.batchMaxBytes, sessionTtlSeconds: isPositiveInteger(resolvedSessionTtlSeconds) ? resolvedSessionTtlSeconds - : DEFAULT_CONFIG.falkordb.sessionTtlSeconds, + : DEFAULT_CONFIG.redis.sessionTtlSeconds, cacheTtlSeconds: isPositiveInteger(resolvedCacheTtlSeconds) ? resolvedCacheTtlSeconds - : DEFAULT_CONFIG.falkordb.cacheTtlSeconds, + : DEFAULT_CONFIG.redis.cacheTtlSeconds, drainRetryMax: isPositiveInteger(resolvedDrainRetryMax) ? resolvedDrainRetryMax - : DEFAULT_CONFIG.falkordb.drainRetryMax, + : DEFAULT_CONFIG.redis.drainRetryMax, }; const graphiti = { @@ -198,25 +203,14 @@ const resolveConfig = (value: PartialGraphitiConfig | null): GraphitiConfig => { driftThreshold: isUnitInterval(resolvedDriftThreshold) ? resolvedDriftThreshold : DEFAULT_CONFIG.graphiti.driftThreshold, - factStaleDays: isPositiveNumber(resolvedFactStaleDays) - ? resolvedFactStaleDays - : DEFAULT_CONFIG.graphiti.factStaleDays, }; return { - ...raw, - falkordb, + redis, graphiti, endpoint: graphiti.endpoint, groupIdPrefix: graphiti.groupIdPrefix, driftThreshold: graphiti.driftThreshold, - factStaleDays: graphiti.factStaleDays, - redisEndpoint: falkordb.redisEndpoint, - batchSize: falkordb.batchSize, - batchMaxBytes: falkordb.batchMaxBytes, - sessionTtlSeconds: falkordb.sessionTtlSeconds, - cacheTtlSeconds: falkordb.cacheTtlSeconds, - drainRetryMax: falkordb.drainRetryMax, }; }; @@ -252,23 +246,32 @@ export const resetConfigExplorerAdapterForTesting = (): void => { configExplorerFactory = createCosmiconfigAdapter; }; -const getConfigExplorerAdapter = (): ConfigExplorerAdapter | null => { +const getConfigExplorerAdapter = (): ConfigExplorerAdapter => { try { return configExplorerFactory(); - } catch { - return null; + } catch (err) { + throw new ConfigLoadError( + "Unable to initialize Graphiti config discovery", + { cause: err, code: "config-discovery-init" }, + ); } }; const loadConfigFile = ( adapter: ConfigExplorerAdapter | null, filePath: string, -): PartialGraphitiConfig | null => { +): RawGraphitiConfig | null => { try { const loaded = adapter?.load(filePath); - return loaded ? normalizeConfig(loaded.config) : null; - } catch { - return null; + const normalized = loaded ? normalizeConfig(loaded.config) : null; + validateExplicitConfig(normalized); + return normalized; + } catch (err) { + if (err instanceof ConfigLoadError) throw err; + throw new ConfigLoadError( + `Unable to load Graphiti config file: ${filePath}`, + { cause: err, code: "config-file-load" }, + ); } }; @@ -283,21 +286,24 @@ const getHomeDir = (): string | undefined => { const searchConfig = ( adapter: ConfigExplorerAdapter, directory?: string, -): ConfigSearchOutcome => { +): RawGraphitiConfig | null => { try { const loaded = adapter.search(directory); - return { - ok: true, - config: loaded ? normalizeConfig(loaded.config) : null, - }; - } catch { - return { ok: false }; + const normalized = loaded ? normalizeConfig(loaded.config) : null; + validateExplicitConfig(normalized); + return normalized; + } catch (err) { + if (err instanceof ConfigLoadError) throw err; + throw new ConfigLoadError("Unable to discover Graphiti config", { + cause: err, + code: "config-discovery-search", + }); } }; const loadLegacyConfig = ( adapter: ConfigExplorerAdapter, -): PartialGraphitiConfig | null => { +): RawGraphitiConfig | null => { const homeDir = getHomeDir(); if (!homeDir) return null; @@ -307,13 +313,27 @@ const loadLegacyConfig = ( ); }; -export function loadConfig(directory?: string): GraphitiConfig { - const adapter = getConfigExplorerAdapter(); - if (!adapter) return structuredClone(DEFAULT_CONFIG); +const isRecoverableConfigLoadFailure = (error: unknown): boolean => + error instanceof ConfigLoadError && + (error.code === "config-discovery-init" || + error.code === "config-discovery-search" || + error.code === "config-file-load"); - const searched = searchConfig(adapter, directory); - if (!searched.ok) return structuredClone(DEFAULT_CONFIG); - - const loaded = searched.config ?? loadLegacyConfig(adapter); - return resolveConfig(loaded); +export function loadConfig(directory?: string): GraphitiConfig { + try { + const adapter = getConfigExplorerAdapter(); + const loaded = searchConfig(adapter, directory); + const resolved = loaded ?? loadLegacyConfig(adapter); + validateExplicitConfig(resolved); + return resolveConfig(resolved); + } catch (error) { + if ( + !(error instanceof ConfigLoadError) || + !isRecoverableConfigLoadFailure(error) + ) { + throw error; + } + logger.warn(error.message, error); + return resolveConfig(null); + } } diff --git a/src/handlers/chat.test.ts b/src/handlers/chat.test.ts index 0d78578..b45ce06 100644 --- a/src/handlers/chat.test.ts +++ b/src/handlers/chat.test.ts @@ -1,12 +1,14 @@ import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "../services/opencode-warning.ts"; import { createChatHandler } from "./chat.ts"; class MockSessionManager { + canonicalSessionId = "session-1"; + activeCalls: Array<{ sessionId: string; canonicalSessionId?: string }> = []; prepareInjectionResult: | { envelope: string; - factUuids: string[]; nodeRefs: string[]; refreshDecision: { classification: string; @@ -37,16 +39,12 @@ class MockSessionManager { groupId: "group-1", userGroupId: "user-1", injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], messageCount: 0, - pendingMessages: [] as string[], contextLimit: 200_000, isMain: true, hotTierReady: false, pendingInjection: undefined as { envelope: string; - factUuids: string[]; nodeRefs: string[]; refreshDecision: { classification: string; @@ -63,8 +61,19 @@ class MockSessionManager { // no-op for tests: activity tracking is not under test here } + markResolvedSessionActive( + sessionId: string, + canonicalSessionId?: string, + ): void { + this.activeCalls.push({ sessionId, canonicalSessionId }); + } + resolveSessionState() { - return { state: this.state, resolved: true }; + return { + state: this.state, + resolved: true, + canonicalSessionId: this.canonicalSessionId, + }; } prepareInjection(_sessionId: string, lastRequest?: string) { @@ -76,7 +85,6 @@ class MockSessionManager { ? { envelope: `${lastRequest}`, - factUuids: [], nodeRefs: [], refreshDecision: this.nextRefreshDecision, } @@ -114,6 +122,8 @@ class MockGraphitiAsync { } describe("chat handler", () => { + setSuppressConsoleWarningsDuringTestsOverride(true); + it("records a user event, prepares session_memory, and schedules async refresh on cache miss", async () => { const sessionManager = new MockSessionManager(); const redisEvents = new MockRedisEvents(); @@ -135,9 +145,10 @@ describe("chat handler", () => { assertEquals(redisEvents.calls[0].sessionId, "session-1"); assertEquals(sessionManager.state.messageCount, 1); assertEquals(sessionManager.state.injectedMemories, true); - assertEquals(sessionManager.state.pendingMessages, [ - "User: Continue the migration", - ]); + assertEquals( + sessionManager.state.latestUserRequest, + "Continue the migration", + ); assertStringIncludes( sessionManager.state.pendingInjection?.envelope ?? "", " { assertEquals(redisEvents.calls.length, 3); }); + it("routes child-session user prompts through the canonical parent session", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.canonicalSessionId = "parent-session"; + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await handler( + { sessionID: "child-session" }, + { parts: [{ type: "text", text: "Continue the child task" }] } as never, + ); + + assertEquals(redisEvents.calls[0].sessionId, "parent-session"); + assertEquals(sessionManager.activeCalls, [{ + sessionId: "child-session", + canonicalSessionId: "parent-session", + }]); + assertEquals(sessionManager.prepareInjectionCalls, [{ + sessionId: "parent-session", + lastRequest: "Continue the child task", + }]); + }); + + it("sanitizes injected memory from the user request before recording and refresh", async () => { + const sessionManager = new MockSessionManager(); + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await handler( + { sessionID: "session-1" }, + { + parts: [{ + type: "text", + text: + 'old\n\nContinue the migration', + }], + } as never, + ); + + assertEquals( + sessionManager.state.latestUserRequest, + "Continue the migration", + ); + assertEquals(redisEvents.calls[0].summary, "Continue the migration"); + assertEquals(graphitiAsync.refreshCalls, [{ + groupId: "group-1", + query: "Continue the migration", + }]); + }); + it("schedules a drain when the pending queue reaches the trigger threshold", async () => { const sessionManager = new MockSessionManager(); const _redisEvents = new MockRedisEvents(); @@ -304,4 +378,54 @@ describe("chat handler", () => { }]); } }); + + it("swallows prepareInjection failures so chat hooks degrade gracefully", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.prepareInjection = () => { + throw new Error("redis unavailable"); + }; + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Degrade gracefully" }] } as never, + ); + + assertEquals(redisEvents.calls.length >= 1, true); + assertEquals(sessionManager.state.injectedMemories, false); + assertEquals(graphitiAsync.refreshCalls, []); + assertEquals(graphitiAsync.drainCalls, []); + }); + + it("skips session resolution and hot-tier work when no text prompt is present", async () => { + const sessionManager = new MockSessionManager(); + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "file", path: "src/index.ts" }] } as never, + ); + + assertEquals(sessionManager.activeCalls, []); + assertEquals(sessionManager.prepareInjectionCalls, []); + assertEquals(redisEvents.calls, []); + assertEquals(graphitiAsync.refreshCalls, []); + assertEquals(graphitiAsync.drainCalls, []); + }); }); diff --git a/src/handlers/chat.ts b/src/handlers/chat.ts index f680af4..7dcb7e4 100644 --- a/src/handlers/chat.ts +++ b/src/handlers/chat.ts @@ -3,6 +3,7 @@ import type { GraphitiAsyncService } from "../services/graphiti-async.ts"; import { extractStructuredEvents } from "../services/event-extractor.ts"; import type { RedisEventsService } from "../services/redis-events.ts"; import { logger } from "../services/logger.ts"; +import { sanitizeMemoryInput } from "../services/render-utils.ts"; import type { SessionManager } from "../session.ts"; import { extractTextFromParts } from "../utils.ts"; @@ -17,59 +18,72 @@ export interface ChatHandlerDeps { drainTriggerSize: number; } -export function createChatHandler(deps: ChatHandlerDeps) { +export function createChatHandler(deps: ChatHandlerDeps): ChatMessageHook { const { sessionManager, redisEvents, graphitiAsync, drainTriggerSize } = deps; return async ({ sessionID }: ChatMessageInput, output: ChatMessageOutput) => { - sessionManager.markSessionActive(sessionID); - const { state, resolved } = await sessionManager.resolveSessionState( - sessionID, - ); - if (!resolved || !state?.isMain) return; + try { + sessionManager.markSessionActive(sessionID); - const messageText = extractTextFromParts(output.parts); - if (!messageText) return; + const messageText = extractTextFromParts(output.parts); + if (!messageText) return; + const sanitizedMessageText = sanitizeMemoryInput(messageText); + if (!sanitizedMessageText) return; - state.messageCount += 1; - state.latestUserRequest = messageText; - state.latestRefreshQuery = messageText; - state.pendingMessages.push(`User: ${messageText}`); + const { state, resolved, canonicalSessionId } = await sessionManager + .resolveSessionState( + sessionID, + ); + if (!resolved || !state?.isMain) return; + if (!canonicalSessionId) return; + sessionManager.markResolvedSessionActive(sessionID, canonicalSessionId); - let queueLength = 0; - for ( - const event of extractStructuredEvents({ - eventType: "chat.message", - sessionId: sessionID, - messageText, - messageCount: state.messageCount, - role: "user", - }) - ) { - queueLength = await redisEvents.recordEvent( - sessionID, - state.groupId, - event, - ); - } + state.messageCount += 1; + state.latestUserRequest = sanitizedMessageText; + state.latestRefreshQuery = sanitizedMessageText; - const prepared = await sessionManager.prepareInjection( - sessionID, - messageText, - ); - if (prepared) { - state.injectedMemories = true; - } - logger.info("Prepared local session memory for chat transform", { - sessionID, - hotTierReady: state.hotTierReady, - refreshClassification: prepared?.refreshDecision.classification, - }); + let queueLength = 0; + for ( + const event of extractStructuredEvents({ + eventType: "chat.message", + sessionId: sessionID, + messageText: sanitizedMessageText, + messageCount: state.messageCount, + role: "user", + }) + ) { + queueLength = await redisEvents.recordEvent( + canonicalSessionId, + state.groupId, + event, + ); + } - if (prepared && prepared.refreshDecision.shouldRefresh) { - graphitiAsync.scheduleCacheRefresh(state.groupId, messageText); - } - if (queueLength >= drainTriggerSize) { - graphitiAsync.scheduleDrain(state.groupId); + const prepared = await sessionManager.prepareInjection( + canonicalSessionId, + sanitizedMessageText, + ); + if (prepared) { + state.injectedMemories = true; + } + logger.info("Prepared local session memory for chat transform", { + sessionID: canonicalSessionId, + sourceSessionID: sessionID, + hotTierReady: state.hotTierReady, + refreshClassification: prepared?.refreshDecision.classification, + }); + + if (prepared && prepared.refreshDecision.shouldRefresh) { + graphitiAsync.scheduleCacheRefresh(state.groupId, sanitizedMessageText); + } + if (queueLength >= drainTriggerSize) { + graphitiAsync.scheduleDrain(state.groupId); + } + } catch (error) { + logger.warn("Unable to prepare local session memory for chat transform", { + sessionID, + error, + }); } }; } diff --git a/src/handlers/compacting.test.ts b/src/handlers/compacting.test.ts index c70f0ec..cceb797 100644 --- a/src/handlers/compacting.test.ts +++ b/src/handlers/compacting.test.ts @@ -1,29 +1,135 @@ import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "../services/opencode-warning.ts"; import { createCompactingHandler } from "./compacting.ts"; +class MockSessionManager { + canonicalSessionId = "session-1"; + state = { + isMain: true, + hotTierReady: true, + pendingInjection: undefined as unknown, + }; + prepareInjectionCalls: string[] = []; + clearPendingInjectionCalls = 0; + activeCalls: Array<{ sessionId: string; canonicalSessionId?: string }> = []; + + resolveSessionState() { + return { + state: this.state, + resolved: true, + canonicalSessionId: this.canonicalSessionId, + }; + } + + prepareInjection(sessionId: string) { + this.prepareInjectionCalls.push(sessionId); + const prepared = { + envelope: + '', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + this.state.pendingInjection = prepared; + return prepared; + } + + markResolvedSessionActive(sessionId: string, canonicalSessionId?: string) { + this.activeCalls.push({ sessionId, canonicalSessionId }); + } + + clearPendingInjection() { + this.clearPendingInjectionCalls += 1; + this.state.pendingInjection = undefined; + } +} + describe("compacting handler", () => { + setSuppressConsoleWarningsDuringTestsOverride(true); + it("injects locally prepared session_memory without Graphiti reads", async () => { + const sessionManager = new MockSessionManager(); + const handler = createCompactingHandler({ + sessionManager: sessionManager as never, + }); + + const output = { context: ["existing"] }; + await handler({ sessionID: "session-1" }, output as never); + + assertEquals(output.context.length, 2); + assertStringIncludes(output.context[1], " { + const sessionManager = new MockSessionManager(); + sessionManager.canonicalSessionId = "parent-session"; + const handler = createCompactingHandler({ + sessionManager: sessionManager as never, + }); + + const output = { context: ["existing"] }; + await handler({ sessionID: "child-session" }, output as never); + + assertEquals(output.context.length, 2); + assertStringIncludes(output.context[1], " { const handler = createCompactingHandler({ sessionManager: { - getState() { - return { isMain: true, hotTierReady: true }; + resolveSessionState() { + return { + state: { isMain: true, hotTierReady: false }, + resolved: true, + canonicalSessionId: "session-1", + }; }, prepareInjection() { + throw new Error("redis unavailable"); + }, + } as never, + }); + + const output = { context: ["existing"] }; + await handler({ sessionID: "session-1" }, output as never); + + assertEquals(output.context, ["existing"]); + }); + + it("skips compaction injection when the canonical session cannot be resolved", async () => { + const handler = createCompactingHandler({ + sessionManager: { + resolveSessionState() { return { - envelope: - '', - factUuids: [], - nodeRefs: [], + state: null, + resolved: false, + canonicalSessionId: undefined, }; }, } as never, }); const output = { context: ["existing"] }; - await handler({ sessionID: "session-1" }, output as never); + await handler({ sessionID: "unknown-session" }, output as never); - assertEquals(output.context.length, 2); - assertStringIncludes(output.context[1], " { - const state = sessionManager.getState(sessionID); - if (!state?.isMain) return; + try { + const { + state, + resolved, + canonicalSessionId, + } = await sessionManager.resolveSessionState(sessionID); + if (!resolved || !canonicalSessionId) return; + if (!state?.isMain) return; + sessionManager.markResolvedSessionActive(sessionID, canonicalSessionId); - const prepared = await sessionManager.prepareInjection(sessionID); - if (!prepared?.envelope) return; - output.context.push(prepared.envelope); - logger.info("Injected local session_memory into compaction context", { - sessionID, - hotTierReady: state.hotTierReady, - }); + const prepared = await sessionManager.prepareInjection( + canonicalSessionId, + ); + if (!prepared?.envelope) return; + output.context.push(prepared.envelope); + sessionManager.clearPendingInjection(state, prepared); + logger.info("Injected local session_memory into compaction context", { + sessionID: canonicalSessionId, + sourceSessionID: sessionID, + hotTierReady: state.hotTierReady, + }); + } catch (error) { + logger.warn("Unable to prepare local session memory for compaction", { + sessionID, + error, + }); + } }; } diff --git a/src/handlers/event.test.ts b/src/handlers/event.test.ts index ffbbce0..576f544 100644 --- a/src/handlers/event.test.ts +++ b/src/handlers/event.test.ts @@ -1,6 +1,8 @@ -import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { createEventHandler } from "./event.ts"; +import { resolveContextLimit } from "../services/context-limit.ts"; +import { setLoggerSilentOverride } from "../services/logger.ts"; import type { SessionState } from "../session.ts"; class FakeClock { @@ -36,7 +38,9 @@ class FakeClock { class MockSessionManager { sessions = new Map(); parentIds = new Map(); - buffered = new Map(); + canonicalIds = new Map(); + buffered = new Map(); + pendingAssistantCompletions = new Set(); deletedSessions: string[] = []; activeMarks: string[] = []; idleCleanupCalls: string[] = []; @@ -68,10 +72,7 @@ class MockSessionManager { groupId, userGroupId, injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], messageCount: 0, - pendingMessages: [], contextLimit: 200_000, isMain: true, hotTierReady: false, @@ -83,20 +84,44 @@ class MockSessionManager { } setParentId(sessionId: string, parentId: string | null) { + const previousCanonicalId = this.canonicalIds.get(sessionId) ?? + (this.parentIds.get(sessionId) === null ? sessionId : undefined); this.parentIds.set(sessionId, parentId); + if (!parentId) { + this.canonicalIds.set(sessionId, sessionId); + return; + } + const canonicalId = this.canonicalIds.get(parentId) ?? parentId; + this.canonicalIds.set(sessionId, canonicalId); + if (previousCanonicalId && previousCanonicalId !== canonicalId) { + this.migrateState(sessionId, canonicalId); + } } setState(sessionId: string, state: SessionState) { this.sessions.set(sessionId, state); + if (!this.parentIds.has(sessionId) && !this.canonicalIds.has(sessionId)) { + this.parentIds.set(sessionId, null); + this.canonicalIds.set(sessionId, sessionId); + } } markSessionActive(sessionId: string) { this.activeMarks.push(sessionId); - const lifecycle = this.getLifecycle(sessionId); - lifecycle.generation += 1; - if (lifecycle.timerId !== null) { - this.clearTimerImpl(lifecycle.timerId); - lifecycle.timerId = null; + this.markLifecycleActive(sessionId); + const canonicalId = this.canonicalIds.get(sessionId); + if (canonicalId && canonicalId !== sessionId) { + this.activeMarks.push(canonicalId); + this.markLifecycleActive(canonicalId); + } + } + + markResolvedSessionActive(sessionId: string, canonicalSessionId?: string) { + this.activeMarks.push(sessionId); + this.markLifecycleActive(sessionId); + if (canonicalSessionId && canonicalSessionId !== sessionId) { + this.activeMarks.push(canonicalSessionId); + this.markLifecycleActive(canonicalSessionId); } } @@ -138,12 +163,39 @@ class MockSessionManager { return this.sessions.get(sessionId); } + resolveCanonicalSessionId(sessionId: string) { + const parentId = this.parentIds.get(sessionId); + if (parentId === null) return sessionId; + if (parentId === undefined) return this.canonicalIds.get(sessionId); + const canonicalId = this.canonicalIds.get(parentId) ?? parentId; + this.canonicalIds.set(sessionId, canonicalId); + this.markLifecycleActive(canonicalId); + return canonicalId; + } + resolveSessionState(sessionId: string) { - return { state: this.sessions.get(sessionId) ?? null, resolved: true }; + const canonicalSessionId = this.resolveCanonicalSessionId(sessionId); + let state = canonicalSessionId + ? this.sessions.get(canonicalSessionId) ?? null + : null; + if (canonicalSessionId && !state) { + state = this.createDefaultState("group-1", "user-1"); + this.sessions.set(canonicalSessionId, state); + } + return { + state, + resolved: canonicalSessionId !== undefined, + canonicalSessionId, + }; } - bufferAssistantPart(sessionId: string, messageId: string, text: string) { - this.buffered.set(`${sessionId}:${messageId}`, text); + bufferAssistantPart( + sessionId: string, + messageId: string, + text: string, + sourceSessionId = sessionId, + ) { + this.buffered.set(`${sessionId}:${messageId}`, { text, sourceSessionId }); } isAssistantBuffered() { @@ -151,17 +203,40 @@ class MockSessionManager { } finalizeAssistantMessage( - state: SessionState, + _state: SessionState, sessionId: string, messageId: string, ) { - const text = this.buffered.get(`${sessionId}:${messageId}`) ?? ""; - if (!text) return null; - state.pendingMessages.push(`Assistant: ${text}`); + const key = `${sessionId}:${messageId}`; + const buffered = this.buffered.get(key); + const text = buffered?.text ?? ""; + if (!text) { + this.pendingAssistantCompletions.add(key); + return null; + } + this.pendingAssistantCompletions.delete(key); + this.buffered.delete(key); return text; } - deletePendingAssistant() {} + hasPendingAssistantCompletion(sessionId: string, messageId: string) { + return this.pendingAssistantCompletions.has(`${sessionId}:${messageId}`); + } + + deletePendingAssistant(sessionId: string, messageId: string) { + const key = `${sessionId}:${messageId}`; + this.buffered.delete(key); + this.pendingAssistantCompletions.delete(key); + } + + purgeAssistantBufferSource(sourceSessionId: string) { + for (const [key, buffered] of [...this.buffered.entries()]) { + if (buffered.sourceSessionId === sourceSessionId) { + this.buffered.delete(key); + this.pendingAssistantCompletions.delete(key); + } + } + } deleteSession(sessionId: string) { this.deletedSessions.push(sessionId); @@ -170,8 +245,29 @@ class MockSessionManager { this.lifecycles.delete(sessionId); this.sessions.delete(sessionId); this.parentIds.delete(sessionId); + this.canonicalIds.delete(sessionId); + for (const [childSessionId, parentId] of [...this.parentIds.entries()]) { + if (parentId === sessionId) this.parentIds.delete(childSessionId); + } + for ( + const [childSessionId, canonicalId] of [...this.canonicalIds.entries()] + ) { + if (canonicalId === sessionId) this.canonicalIds.delete(childSessionId); + } for (const key of [...this.buffered.keys()]) { - if (key.startsWith(`${sessionId}:`)) this.buffered.delete(key); + if (key.startsWith(`${sessionId}:`)) { + this.buffered.delete(key); + this.pendingAssistantCompletions.delete(key); + } + } + } + + private markLifecycleActive(sessionId: string) { + const lifecycle = this.getLifecycle(sessionId); + lifecycle.generation += 1; + if (lifecycle.timerId !== null) { + this.clearTimerImpl(lifecycle.timerId); + lifecycle.timerId = null; } } @@ -183,6 +279,39 @@ class MockSessionManager { } return lifecycle; } + + private migrateState(sessionId: string, canonicalSessionId: string) { + if (sessionId === canonicalSessionId) return; + const sourceState = this.sessions.get(sessionId); + if (!sourceState) return; + const targetState = this.sessions.get(canonicalSessionId); + if (targetState) { + targetState.injectedMemories ||= sourceState.injectedMemories; + targetState.messageCount += sourceState.messageCount; + targetState.contextLimit = Math.max( + targetState.contextLimit, + sourceState.contextLimit, + ); + targetState.isMain ||= sourceState.isMain; + targetState.hotTierReady ||= sourceState.hotTierReady; + if (sourceState.latestUserRequest) { + targetState.latestUserRequest = sourceState.latestUserRequest; + } + if (sourceState.latestRefreshQuery) { + targetState.latestRefreshQuery = sourceState.latestRefreshQuery; + } + if (sourceState.pendingInjection !== undefined) { + targetState.pendingInjection = sourceState.pendingInjection; + } + targetState.pendingInjectionGeneration = Math.max( + targetState.pendingInjectionGeneration, + sourceState.pendingInjectionGeneration, + ); + } else { + this.sessions.set(canonicalSessionId, sourceState); + } + this.sessions.delete(sessionId); + } } class MockRedisEvents { @@ -263,7 +392,7 @@ class MockRedisCache { touchedGroupIds: string[] = []; metaByGroupId = new Map< string, - { lastQuery?: string; lastRefresh?: number; factUuids: string[] } + { lastQuery?: string; lastRefresh?: number } >(); async touch(groupId: string) { @@ -295,7 +424,12 @@ class MockGraphitiAsync { } } -const createHandler = (sessionManager: MockSessionManager) => { +const createHandler = ( + sessionManager: MockSessionManager, + options: { + sdkClient?: { provider: { list: () => unknown | Promise } }; + } = {}, +) => { const redisEvents = new MockRedisEvents(); const redisSnapshot = new MockRedisSnapshot(); const redisCache = new MockRedisCache(); @@ -309,13 +443,19 @@ const createHandler = (sessionManager: MockSessionManager) => { graphitiAsync: graphitiAsync as never, defaultGroupId: "group-1", defaultUserGroupId: "user-1", - sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + sdkClient: (options.sdkClient ?? + { provider: { list: () => ({ data: [] }) } }) as never, directory: "/tmp/project", }); return { handler, redisEvents, redisCache, redisSnapshot, graphitiAsync }; }; +const flushPromises = async () => { + await Promise.resolve(); + await Promise.resolve(); +}; + describe("event handler", () => { it("bootstraps main sessions and schedules primer on session.created", async () => { const sessionManager = new MockSessionManager(); @@ -339,7 +479,73 @@ describe("event handler", () => { assertEquals(graphitiAsync.primerCalls, ["group-1"]); }); - it("preserves assistant buffering and writes the completed assistant event on message.updated", async () => { + it("preserves existing canonical root state on duplicate session.created", async () => { + const sessionManager = new MockSessionManager(); + const existingState = sessionManager.createDefaultState( + "group-existing", + "user-existing", + ); + existingState.latestUserRequest = "preserve me"; + existingState.contextLimit = 123_456; + sessionManager.setParentId("session-1", null); + sessionManager.setState("session-1", existingState); + const { handler, redisEvents, redisCache, redisSnapshot, graphitiAsync } = + createHandler(sessionManager); + + await handler({ + event: { + type: "session.created", + properties: { info: { id: "session-1", parentID: null } }, + } as never, + }); + + assertEquals(sessionManager.getState("session-1"), existingState); + assertEquals( + sessionManager.getState("session-1")?.latestUserRequest, + "preserve me", + ); + assertEquals(sessionManager.getState("session-1")?.contextLimit, 123_456); + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].sessionId, "session-1"); + assertEquals(redisEvents.calls[0].groupId, "group-existing"); + assertEquals(redisEvents.calls[0].category, "session.meta"); + assertEquals(redisEvents.calls[0].summary, "Session created: session-1"); + assertEquals( + redisEvents.calls[0].continuityText, + "session created session-1", + ); + assertEquals(redisEvents.touchedSessionIds, ["session-1"]); + assertEquals(redisSnapshot.touchedSessionIds, ["session-1"]); + assertEquals(redisCache.touchedGroupIds, ["group-existing"]); + assertEquals(graphitiAsync.primerCalls, ["group-existing"]); + }); + + it("records child session creation and touch activity against the canonical parent session", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setParentId("session-1", null); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler, redisEvents, redisCache, redisSnapshot, graphitiAsync } = + createHandler(sessionManager); + + await handler({ + event: { + type: "session.created", + properties: { info: { id: "child-1", parentID: "session-1" } }, + } as never, + }); + + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].sessionId, "session-1"); + assertEquals(redisEvents.touchedSessionIds, ["session-1"]); + assertEquals(redisSnapshot.touchedSessionIds, ["session-1"]); + assertEquals(redisCache.touchedGroupIds, ["group-1"]); + assertEquals(graphitiAsync.primerCalls, []); + }); + + it("preserves assistant buffering without durably storing filtered assistant operational chatter", async () => { const sessionManager = new MockSessionManager(); sessionManager.setState( "session-1", @@ -390,13 +596,7 @@ describe("event handler", () => { } as never, }); - assertEquals(sessionManager.getState("session-1")?.pendingMessages, [ - "Assistant: Buffered answer", - ]); - assertEquals(redisEvents.calls.length >= 1, true); - assertStringIncludes(redisEvents.calls[0].summary, "Buffered answer"); - assertEquals(redisEvents.calls[0].body, undefined); - assertEquals(typeof redisEvents.calls[0].continuityText, "string"); + assertEquals(redisEvents.calls.length, 0); }); it("records the compaction summary as a structured event before rebuilding the snapshot", async () => { @@ -444,7 +644,7 @@ describe("event handler", () => { clearTimer: clock.clearTimer, }); const state = sessionManager.createDefaultState("group-1", "user-1"); - state.latestUserRequest = "Refresh the cache"; + state.latestRefreshQuery = "Refresh the cache"; sessionManager.setState("session-1", state); const redisEvents = new MockRedisEvents(); const redisSnapshot = new MockRedisSnapshot(); @@ -479,6 +679,52 @@ describe("event handler", () => { }]); }); + it("does not treat latestUserRequest alone as an always-on refresh trigger", async () => { + const sessionManager = new MockSessionManager({ idleRetentionMs: 100 }); + const state = sessionManager.createDefaultState("group-1", "user-1"); + state.latestUserRequest = "latest user request only"; + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", + }); + + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, + }); + + await handler({ + event: { + type: "session.compacted", + properties: { + sessionID: "session-1", + summary: "Compacted without refresh decision", + }, + } as never, + }); + + assertEquals(graphitiAsync.refreshCalls, []); + assertEquals( + sessionManager.getState("session-1")?.latestRefreshQuery, + undefined, + ); + }); + it("uses Redis-backed refresh query fallback on session.idle after restart", async () => { const sessionManager = new MockSessionManager({ idleRetentionMs: 100 }); const state = sessionManager.createDefaultState("group-1", "user-1"); @@ -488,7 +734,6 @@ describe("event handler", () => { const redisCache = new MockRedisCache(); redisCache.metaByGroupId.set("group-1", { lastQuery: "resume refresh from redis", - factUuids: [], }); const graphitiAsync = new MockGraphitiAsync(); @@ -540,6 +785,59 @@ describe("event handler", () => { assertEquals(sessionManager.deletedSessions, ["session-1"]); }); + it("does not delete canonical parent state when a child session is deleted", async () => { + const sessionManager = new MockSessionManager(); + const parentState = sessionManager.createDefaultState("group-1", "user-1"); + parentState.latestUserRequest = "keep parent state"; + sessionManager.setParentId("session-1", null); + sessionManager.setState("session-1", parentState); + sessionManager.setParentId("child-1", "session-1"); + const { handler } = createHandler(sessionManager); + + await handler({ + event: { + type: "session.deleted", + properties: { sessionID: "child-1" }, + } as never, + }); + + assertEquals(sessionManager.deletedSessions, ["child-1"]); + assertEquals( + sessionManager.getState("session-1")?.latestUserRequest, + "keep parent state", + ); + }); + + it("purges child-buffered assistant state without deleting canonical parent state", async () => { + const sessionManager = new MockSessionManager(); + const parentState = sessionManager.createDefaultState("group-1", "user-1"); + parentState.latestUserRequest = "keep parent state"; + sessionManager.setParentId("session-1", null); + sessionManager.setState("session-1", parentState); + sessionManager.setParentId("child-1", "session-1"); + sessionManager.bufferAssistantPart( + "session-1", + "m1", + "buffered child reply", + "child-1", + ); + const { handler } = createHandler(sessionManager); + + await handler({ + event: { + type: "session.deleted", + properties: { sessionID: "child-1" }, + } as never, + }); + + assertEquals(sessionManager.deletedSessions, ["child-1"]); + assertEquals(sessionManager.buffered.size, 0); + assertEquals( + sessionManager.getState("session-1")?.latestUserRequest, + "keep parent state", + ); + }); + it("keeps reactivated sessions from being deleted by stale idle cleanup", async () => { const clock = new FakeClock(); const sessionManager = new MockSessionManager({ @@ -638,86 +936,932 @@ describe("event handler", () => { assertEquals(clock.timers.size, 0); }); - it("uses Redis-backed refresh query fallback on session.compacted after restart", async () => { - const sessionManager = new MockSessionManager(); - const state = sessionManager.createDefaultState("group-1", "user-1"); - sessionManager.setState("session-1", state); - const redisEvents = new MockRedisEvents(); - const redisSnapshot = new MockRedisSnapshot(); - const redisCache = new MockRedisCache(); - redisCache.metaByGroupId.set("group-1", { - lastQuery: "refresh after compact restart", - factUuids: [], + it("keeps canonical session state alive when child passthrough activity resumes after idle", async () => { + const clock = new FakeClock(); + const sessionManager = new MockSessionManager({ + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, }); - const graphitiAsync = new MockGraphitiAsync(); + sessionManager.setParentId("session-1", null); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + sessionManager.setParentId("child-session", "session-1"); + const { handler, redisEvents } = createHandler(sessionManager); - const handler = createEventHandler({ - sessionManager: sessionManager as never, - redisEvents: redisEvents as never, - redisCache: redisCache as never, - redisSnapshot: redisSnapshot as never, - graphitiAsync: graphitiAsync as never, - defaultGroupId: "group-1", - defaultUserGroupId: "user-1", - sdkClient: { provider: { list: () => ({ data: [] }) } } as never, - directory: "/tmp/project", + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, }); + clock.tick(50); + await handler({ event: { - type: "session.compacted", - properties: { sessionID: "session-1", summary: "Compacted state" }, + type: "tool.called", + properties: { + sessionID: "child-session", + tool: "Read", + path: "src/handlers/event.ts", + summary: "Read file src/handlers/event.ts", + }, } as never, }); - assertEquals(graphitiAsync.refreshCalls, [{ - groupId: "group-1", - query: "refresh after compact restart", - }]); - assertEquals( - sessionManager.getState("session-1")?.latestRefreshQuery, - "refresh after compact restart", - ); + clock.tick(60); + + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + assertEquals(sessionManager.deletedSessions, []); + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].sessionId, "session-1"); }); - it("records supported non-special events into the hot-tier log for main sessions", async () => { + it("survives buffered child assistant text through delayed parent canonicalization", async () => { const sessionManager = new MockSessionManager(); - sessionManager.setState( - "session-1", - sessionManager.createDefaultState("group-1", "user-1"), - ); - const { handler, redisEvents, graphitiAsync } = createHandler( - sessionManager, - ); + const parentState = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setParentId("session-1", null); + sessionManager.setState("session-1", parentState); + sessionManager.setParentId("child-session", "session-1"); + sessionManager.canonicalIds.delete("child-session"); + const { handler } = createHandler(sessionManager); await handler({ event: { - type: "task.updated", + type: "message.part.updated", properties: { - sessionID: "session-1", - task: { - id: "task-1", - path: "src/handlers/event.ts", - summary: "Implement event handler blocker fix", + part: { + type: "text", + sessionID: "child-session", + messageID: "m1", + text: "buffered child reply before canonical resolution", }, }, } as never, }); + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + const bufferedKey = "session-1:m1"; + assertEquals( + sessionManager.buffered.get(bufferedKey)?.text, + "buffered child reply before canonical resolution", + ); + assertEquals( + sessionManager.buffered.get(bufferedKey)?.sourceSessionId, + "child-session", + ); + await handler({ event: { - type: "rules.loaded", + type: "message.updated", properties: { - sessionID: "session-1", - name: "CodingGuideline", - path: "docs/CodingGuideline.md", + info: { + id: "m1", + sessionID: "child-session", + role: "assistant", + time: { created: 1, completed: 2 }, + }, }, } as never, }); - await handler({ - event: { - type: "environment.updated", + assertEquals(sessionManager.buffered.size, 0); + }); + + it("refreshes the canonical parent lifecycle for cold-cache child message updates", async () => { + const clock = new FakeClock(); + const sessionManager = new MockSessionManager({ + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + sessionManager.setParentId("session-1", null); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + sessionManager.parentIds.set("child-session", "session-1"); + sessionManager.canonicalIds.delete("child-session"); + const { handler } = createHandler(sessionManager); + + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, + }); + + clock.tick(50); + + const idleGeneration = sessionManager.captureIdleCleanupGeneration( + "session-1", + ); + assertEquals(typeof idleGeneration, "number"); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "child-session", + role: "user", + time: { created: 1, completed: 2 }, + }, + }, + } as never, + }); + + assertEquals(sessionManager.activeMarks.includes("session-1"), true); + clock.tick(60); + + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + assertEquals(sessionManager.deletedSessions, []); + }); + + it("refreshes canonical parent on child session.created when canonical mapping is cold", async () => { + const clock = new FakeClock(); + const sessionManager = new MockSessionManager({ + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + sessionManager.setParentId("session-1", null); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler } = createHandler(sessionManager); + + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, + }); + + clock.tick(50); + + const idleGeneration = sessionManager.captureIdleCleanupGeneration( + "session-1", + ); + assertEquals(typeof idleGeneration, "number"); + + await handler({ + event: { + type: "session.created", + properties: { info: { id: "child-session", parentID: "session-1" } }, + } as never, + }); + + assertEquals(sessionManager.activeMarks.includes("session-1"), true); + clock.tick(60); + + assertEquals(sessionManager.getState("session-1")?.groupId, "group-1"); + assertEquals(sessionManager.deletedSessions, []); + }); + + it("uses Redis-backed refresh query fallback on session.compacted after restart", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + redisCache.metaByGroupId.set("group-1", { + lastQuery: "refresh after compact restart", + }); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", + }); + + await handler({ + event: { + type: "session.compacted", + properties: { sessionID: "session-1", summary: "Compacted state" }, + } as never, + }); + + assertEquals(graphitiAsync.refreshCalls, [{ + groupId: "group-1", + query: "refresh after compact restart", + }]); + assertEquals( + sessionManager.getState("session-1")?.latestRefreshQuery, + "refresh after compact restart", + ); + }); + + it("caches the fallback context-limit after a transient provider failure", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + let providerCalls = 0; + const sdkClient = { + provider: { + list: () => { + providerCalls += 1; + if (providerCalls === 1) { + throw new Error("transient provider failure"); + } + return Promise.resolve({ + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }], + }); + }, + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + try { + setLoggerSilentOverride(true); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + await flushPromises(); + + assertEquals(state.contextLimit, 200_000); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m2", + sessionID: "session-1", + role: "assistant", + time: { created: 3, completed: 4 }, + tokens: { output: 12 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + await flushPromises(); + + assertEquals(providerCalls, 1); + assertEquals(state.contextLimit, 200_000); + } finally { + setLoggerSilentOverride(false); + } + }); + + it("caches unknown provider/model misses to avoid repeated lookups", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + let providerCalls = 0; + const sdkClient = { + provider: { + list: () => { + providerCalls += 1; + return Promise.resolve({ + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }], + }); + }, + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "unknown-provider", + modelID: "unknown-model", + }, + }, + } as never, + }); + await flushPromises(); + + assertEquals(state.contextLimit, 200_000); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m2", + sessionID: "session-1", + role: "assistant", + time: { created: 3, completed: 4 }, + tokens: { output: 12 }, + providerID: "unknown-provider", + modelID: "unknown-model", + }, + }, + } as never, + }); + await flushPromises(); + + assertEquals(providerCalls, 1); + assertEquals(state.contextLimit, 200_000); + }); + + it("keeps successful positive context-limits cached across repeated lookups", async () => { + const sessionManager = new MockSessionManager(); + const firstState = sessionManager.createDefaultState("group-1", "user-1"); + const secondState = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", firstState); + sessionManager.setState("session-2", secondState); + let providerCalls = 0; + const sdkClient = { + provider: { + list: () => { + providerCalls += 1; + return Promise.resolve({ + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }], + }); + }, + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + await flushPromises(); + + assertEquals(providerCalls, 1); + assertEquals(firstState.contextLimit, 123_456); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m2", + sessionID: "session-2", + role: "assistant", + time: { created: 3, completed: 4 }, + tokens: { output: 12 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + await flushPromises(); + + assertEquals(providerCalls, 1); + assertEquals(secondState.contextLimit, 123_456); + }); + + it("separates cached context-limits by directory while preserving directory-less cache hits", async () => { + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + const calls: Array> = []; + const sdkClient = { + provider: { + list: ({ query }: { query?: { directory?: string } }) => { + calls.push(query ?? {}); + const directory = query?.directory; + const context = directory === "/tmp/project-a" + ? 111_111 + : directory === "/tmp/project-b" + ? 222_222 + : 333_333; + return Promise.resolve({ + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context } }], + }], + }); + }, + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + sdkClient as never, + "/tmp/project-a", + cache, + ), + 111_111, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + sdkClient as never, + "/tmp/project-b", + cache, + ), + 222_222, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + sdkClient as never, + "/tmp/project-a", + cache, + ), + 111_111, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + sdkClient as never, + "", + cache, + ), + 333_333, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + sdkClient as never, + " ", + cache, + ), + 333_333, + ); + + assertEquals(calls, [ + { directory: "/tmp/project-a" }, + { directory: "/tmp/project-b" }, + {}, + ]); + }); + + it("applies async context-limit updates to the current canonical session state", async () => { + const sessionManager = new MockSessionManager(); + const parentState = sessionManager.createDefaultState("group-1", "user-1"); + const childState = sessionManager.createDefaultState("group-1", "user-1"); + childState.contextLimit = 1; + sessionManager.setParentId("session-1", null); + sessionManager.setState("session-1", parentState); + sessionManager.setState("child-session", childState); + + let resolveProviders!: ( + value: { + data: Array< + { + id: string; + models: Array<{ id: string; limit: { context: number } }>; + } + >; + }, + ) => void; + const sdkClient = { + provider: { + list: () => + new Promise((resolve) => { + resolveProviders = resolve; + }), + }, + }; + const providerResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }], + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + const eventRun = handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "child-session", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + await eventRun; + + sessionManager.setParentId("child-session", "session-1"); + resolveProviders(providerResponse); + await flushPromises(); + await flushPromises(); + + assertEquals(sessionManager.getState("child-session"), undefined); + assertEquals(sessionManager.getState("session-1")?.contextLimit, 123_456); + }); + + it("ignores stale overlapping async context-limit writes for the same session", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + + let resolveFirst!: (value: typeof firstProviderResponse) => void; + let resolveSecond!: (value: typeof secondProviderResponse) => void; + let providerCalls = 0; + const firstProviderResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 111_111 } }], + }], + }; + const secondProviderResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 222_222 } }], + }], + }; + const sdkClient = { + provider: { + list: () => { + providerCalls += 1; + if (providerCalls === 1) { + return new Promise((resolve) => { + resolveFirst = resolve; + }); + } + return new Promise((resolve) => { + resolveSecond = resolve; + }); + }, + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m2", + sessionID: "session-1", + role: "assistant", + time: { created: 3, completed: 4 }, + tokens: { output: 12 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + resolveSecond(secondProviderResponse); + await flushPromises(); + await flushPromises(); + assertEquals(state.contextLimit, 222_222); + + resolveFirst(firstProviderResponse); + await flushPromises(); + await flushPromises(); + + assertEquals(providerCalls, 2); + assertEquals(state.contextLimit, 222_222); + }); + + it("ignores stale child-vs-parent async context-limit races for one canonical session", async () => { + const sessionManager = new MockSessionManager(); + const parentState = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setParentId("session-1", null); + sessionManager.setState("session-1", parentState); + sessionManager.setParentId("child-session", "session-1"); + + let resolveChild!: (value: typeof childProviderResponse) => void; + let resolveParent!: (value: typeof parentProviderResponse) => void; + let providerCalls = 0; + const childProviderResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 111_111 } }], + }], + }; + const parentProviderResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 222_222 } }], + }], + }; + const sdkClient = { + provider: { + list: () => { + providerCalls += 1; + if (providerCalls === 1) { + return new Promise((resolve) => { + resolveChild = resolve; + }); + } + return new Promise((resolve) => { + resolveParent = resolve; + }); + }, + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "child-session", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m2", + sessionID: "session-1", + role: "assistant", + time: { created: 3, completed: 4 }, + tokens: { output: 12 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + resolveParent(parentProviderResponse); + await flushPromises(); + await flushPromises(); + assertEquals(parentState.contextLimit, 222_222); + + resolveChild(childProviderResponse); + await flushPromises(); + await flushPromises(); + + assertEquals(providerCalls, 2); + assertEquals(parentState.contextLimit, 222_222); + }); + + it("drops late async context-limit completions after session deletion", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + + let resolveProviders!: (value: typeof providerResponse) => void; + const providerResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }], + }; + const sdkClient = { + provider: { + list: () => + new Promise((resolve) => { + resolveProviders = resolve; + }), + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + await handler({ + event: { + type: "session.deleted", + properties: { sessionID: "session-1" }, + } as never, + }); + + resolveProviders(providerResponse); + await flushPromises(); + await flushPromises(); + + assertEquals(sessionManager.getState("session-1"), undefined); + assertEquals(sessionManager.deletedSessions, ["session-1"]); + }); + + it("cleans stale context-limit lookups after superseded overlap and after settled writes", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + + let resolveFirst!: (value: typeof firstProviderResponse) => void; + let resolveSecond!: (value: typeof secondProviderResponse) => void; + let providerCalls = 0; + const firstProviderResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 111_111 } }], + }], + }; + const secondProviderResponse = { + data: [{ + id: "openai", + models: [{ id: "gpt-5", limit: { context: 222_222 } }], + }], + }; + const sdkClient = { + provider: { + list: () => { + providerCalls += 1; + if (providerCalls === 1) { + return new Promise((resolve) => { + resolveFirst = resolve; + }); + } + return new Promise((resolve) => { + resolveSecond = resolve; + }); + }, + }, + }; + const { handler } = createHandler(sessionManager, { sdkClient }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + tokens: { output: 10 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m2", + sessionID: "session-1", + role: "assistant", + time: { created: 3, completed: 4 }, + tokens: { output: 12 }, + providerID: "openai", + modelID: "gpt-5", + }, + }, + } as never, + }); + + resolveSecond(secondProviderResponse); + await flushPromises(); + await flushPromises(); + assertEquals(state.contextLimit, 222_222); + + await handler({ + event: { + type: "session.deleted", + properties: { sessionID: "session-1" }, + } as never, + }); + + resolveFirst(firstProviderResponse); + await flushPromises(); + await flushPromises(); + + assertEquals(state.contextLimit, 222_222); + assertEquals(sessionManager.getState("session-1"), undefined); + }); + + it("records supported non-special events into the hot-tier log for main sessions", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler, redisEvents, graphitiAsync } = createHandler( + sessionManager, + ); + + await handler({ + event: { + type: "task.updated", + properties: { + sessionID: "session-1", + task: { + id: "task-1", + path: "src/handlers/event.ts", + summary: "Implement event handler blocker fix", + }, + }, + } as never, + }); + + await handler({ + event: { + type: "rules.loaded", + properties: { + sessionID: "session-1", + name: "CodingGuideline", + path: "docs/CodingGuideline.md", + }, + } as never, + }); + + await handler({ + event: { + type: "environment.updated", properties: { sessionID: "session-1", cwd: "/tmp/project", @@ -820,35 +1964,137 @@ describe("event handler", () => { assertEquals(typeof redisEvents.calls[0].continuityText, "string"); }); - it("skips the catch-all for events without a resolvable main session", async () => { + it("routes child-session passthrough events onto the canonical parent session", async () => { const sessionManager = new MockSessionManager(); - const childState = sessionManager.createDefaultState("group-1", "user-1"); - childState.isMain = false; - sessionManager.setState("child-session", childState); + sessionManager.setParentId("session-1", null); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + sessionManager.setParentId("child-session", "session-1"); const { handler, redisEvents } = createHandler(sessionManager); await handler({ event: { type: "tool.called", properties: { - sessionID: "missing-session", + sessionID: "child-session", tool: "Read", + path: "src/handlers/event.ts", summary: "Read file src/handlers/event.ts", }, } as never, }); + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].sessionId, "session-1"); + assertEquals(redisEvents.calls[0].category, "file.read"); + assertEquals( + sessionManager.activeMarks.includes("child-session"), + true, + ); + assertEquals(sessionManager.activeMarks.includes("session-1"), true); + }); + + it("routes child assistant buffering and completion through the canonical parent session", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setParentId("session-1", null); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + sessionManager.setParentId("child-session", "session-1"); + const { handler, redisEvents } = createHandler(sessionManager); + + await handler({ + event: { + type: "message.part.updated", + properties: { + part: { + type: "text", + sessionID: "child-session", + messageID: "m1", + text: "Implemented the child-session fix", + }, + }, + } as never, + }); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "child-session", + role: "assistant", + time: { created: 1, completed: 2 }, + }, + }, + } as never, + }); + + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].sessionId, "session-1"); + }); + + it("records assistant output when completion arrives before the buffered text part", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler, redisEvents } = createHandler(sessionManager); + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "m1", + sessionID: "session-1", + role: "assistant", + time: { created: 1, completed: 2 }, + }, + }, + } as never, + }); + + await handler({ + event: { + type: "message.part.updated", + properties: { + part: { + type: "text", + sessionID: "session-1", + messageID: "m1", + text: "Discovered the delayed session fix", + }, + }, + } as never, + }); + + assertEquals(redisEvents.calls.length, 2); + assertEquals( + redisEvents.calls.every((call) => call.sessionId === "session-1"), + true, + ); + }); + + it("skips the catch-all only for events without a resolvable canonical session", async () => { + const sessionManager = new MockSessionManager(); + const { handler, redisEvents } = createHandler(sessionManager); + await handler({ event: { type: "tool.called", properties: { - sessionID: "child-session", + sessionID: "missing-session", tool: "Read", summary: "Read file src/handlers/event.ts", }, } as never, }); - assertEquals(redisEvents.calls.length, 0); }); }); diff --git a/src/handlers/event.ts b/src/handlers/event.ts index c735b6f..e266266 100644 --- a/src/handlers/event.ts +++ b/src/handlers/event.ts @@ -7,7 +7,7 @@ import type { RedisCacheService } from "../services/redis-cache.ts"; import type { RedisEventsService } from "../services/redis-events.ts"; import type { RedisSnapshotService } from "../services/redis-snapshot.ts"; import { logger } from "../services/logger.ts"; -import type { SessionManager } from "../session.ts"; +import type { SessionManager, SessionState } from "../session.ts"; import { isTextPart } from "../utils.ts"; type EventHook = NonNullable; @@ -74,232 +74,356 @@ const getCompactionSummary = (value: unknown): string => { return typeof summary === "string" ? summary : ""; }; -export function createEventHandler(deps: EventHandlerDeps) { +export function createEventHandler(deps: EventHandlerDeps): EventHook { const { sessionManager, redisEvents, redisCache, redisSnapshot, graphitiAsync, - defaultGroupId, - defaultUserGroupId, sdkClient, directory, } = deps; - const contextLimitCache = new Map(); + const contextLimitCache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + const contextLimitLookupGeneration = new Map(); + let nextContextLimitLookupGeneration = 0; + const clearContextLimitLookupGeneration = ( + sessionId: string, + generation?: number, + ): void => { + if (generation === undefined) { + contextLimitLookupGeneration.delete(sessionId); + return; + } + if (contextLimitLookupGeneration.get(sessionId) === generation) { + contextLimitLookupGeneration.delete(sessionId); + } + }; - return async ({ event }: EventInput) => { - try { - if (event.type === "session.created") { - const info = event.properties.info; - const sessionId = info.id; - const parentId = info.parentID ?? null; - const isMain = !parentId; - sessionManager.setParentId(sessionId, parentId); - sessionManager.markSessionActive(sessionId); - - if (isMain) { - const nextState = sessionManager.createDefaultState( - defaultGroupId, - defaultUserGroupId, - ); - sessionManager.setState( - sessionId, - nextState, - ); - for ( - const structured of extractStructuredEvents({ - eventType: event.type, - sessionId, - properties: event.properties as Record, - role: "system", - }) - ) { - await redisEvents.recordEvent( - sessionId, - defaultGroupId, - structured, - ); - } - await Promise.all([ - redisEvents.touchSessionEvents(sessionId), - redisSnapshot.touchSnapshot(sessionId), - redisCache.touch(defaultGroupId), - ]); - graphitiAsync.schedulePrimer(defaultGroupId); - } - return; - } + const rebuildSnapshotAndScheduleRefresh = async ( + sessionId: string, + state: SessionState | null, + ): Promise => { + if (!state?.isMain) return; + const events = await redisEvents.getRecentSessionEvents( + sessionId, + 40, + true, + ); + await redisSnapshot.rebuildAndSave(sessionId, events); + graphitiAsync.scheduleDrain(state.groupId); + const refreshQuery = state.latestRefreshQuery ?? + (await redisCache.getMeta(state.groupId))?.lastQuery; + if (!refreshQuery) return; + state.latestRefreshQuery = refreshQuery; + graphitiAsync.scheduleCacheRefresh(state.groupId, refreshQuery); + }; - if (event.type === "session.idle") { - const sessionId = event.properties.sessionID; - const { state, resolved } = await sessionManager.resolveSessionState( - sessionId, - ); - if (!resolved || !state?.isMain) return; - const idleGeneration = sessionManager.captureIdleCleanupGeneration( - sessionId, - ); - if (idleGeneration === null) return; + const handleSessionLifecycleEvent = async ( + event: EventInput["event"], + ): Promise => { + if (event.type === "session.created") { + const info = event.properties.info; + const sessionId = info.id; + const parentId = info.parentID ?? null; + sessionManager.setParentId(sessionId, parentId); + sessionManager.markSessionActive(sessionId); - const events = await redisEvents.getRecentSessionEvents( + const { state, resolved, canonicalSessionId } = await sessionManager + .resolveSessionState(sessionId); + if (!resolved || !state?.isMain || !canonicalSessionId) return true; + sessionManager.markResolvedSessionActive(sessionId, canonicalSessionId); + + for ( + const structured of extractStructuredEvents({ + eventType: event.type, sessionId, - 40, - true, + properties: event.properties as Record, + role: "system", + }) + ) { + await redisEvents.recordEvent( + canonicalSessionId, + state.groupId, + structured, ); - await redisSnapshot.rebuildAndSave(sessionId, events); - state.hotTierReady = true; - graphitiAsync.scheduleDrain(state.groupId); - const refreshQuery = state.latestUserRequest ?? - state.latestRefreshQuery ?? - (await redisCache.getMeta(state.groupId))?.lastQuery; - if (refreshQuery) { - state.latestRefreshQuery = refreshQuery; - graphitiAsync.scheduleCacheRefresh( - state.groupId, - refreshQuery, - ); - } - sessionManager.scheduleIdleSessionCleanup(sessionId, idleGeneration); - return; } + await Promise.all([ + redisEvents.touchSessionEvents(canonicalSessionId), + redisSnapshot.touchSnapshot(canonicalSessionId), + redisCache.touch(state.groupId), + ]); + if (canonicalSessionId === sessionId) { + graphitiAsync.schedulePrimer(state.groupId); + } + return true; + } + + if (event.type === "session.idle") { + const sessionId = event.properties.sessionID; + const { state, resolved, canonicalSessionId } = await sessionManager + .resolveSessionState(sessionId); + if (!resolved || !state?.isMain || !canonicalSessionId) return true; + const idleGeneration = sessionManager.captureIdleCleanupGeneration( + canonicalSessionId, + ); + if (idleGeneration === null) return true; + + await rebuildSnapshotAndScheduleRefresh(canonicalSessionId, state); + state.hotTierReady = true; + sessionManager.scheduleIdleSessionCleanup( + canonicalSessionId, + idleGeneration, + ); + return true; + } - if (event.type === "session.deleted") { - const sessionId = (event.properties as unknown as { sessionID: string }) - .sessionID; - sessionManager.deleteSession(sessionId); - return; + if (event.type === "session.deleted") { + const sessionId = (event.properties as unknown as { sessionID: string }) + .sessionID; + const canonicalSessionId = await sessionManager.resolveCanonicalSessionId( + sessionId, + ); + clearContextLimitLookupGeneration(sessionId); + if (canonicalSessionId) { + clearContextLimitLookupGeneration(canonicalSessionId); + } + if (canonicalSessionId && canonicalSessionId !== sessionId) { + sessionManager.purgeAssistantBufferSource(sessionId); } + sessionManager.deleteSession(sessionId); + return true; + } - if (event.type === "session.compacted") { - const sessionId = event.properties.sessionID; - const { state, resolved } = await sessionManager.resolveSessionState( - sessionId, - ); - if (!resolved || !state?.isMain) return; + if (event.type === "session.compacted") { + const sessionId = event.properties.sessionID; + const { state, resolved, canonicalSessionId } = await sessionManager + .resolveSessionState(sessionId); + if (!resolved || !state?.isMain || !canonicalSessionId) return true; - const structured = extractStructuredEvents({ - eventType: event.type, - sessionId, - properties: event.properties as Record, - messageText: getCompactionSummary(event.properties), - role: "system", - }); - for (const item of structured) { - await redisEvents.recordEvent(sessionId, state.groupId, item); - } - const events = await redisEvents.getRecentSessionEvents( - sessionId, - 40, - true, - ); - await redisSnapshot.rebuildAndSave( - sessionId, - events, - ); - graphitiAsync.scheduleDrain(state.groupId); - const refreshQuery = state.latestUserRequest ?? - state.latestRefreshQuery ?? - (await redisCache.getMeta(state.groupId))?.lastQuery; - if (refreshQuery) { - state.latestRefreshQuery = refreshQuery; - graphitiAsync.scheduleCacheRefresh( - state.groupId, - refreshQuery, - ); - } - return; + const structured = extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + messageText: getCompactionSummary(event.properties), + role: "system", + }); + for (const item of structured) { + await redisEvents.recordEvent(canonicalSessionId, state.groupId, item); } + await rebuildSnapshotAndScheduleRefresh(canonicalSessionId, state); + return true; + } - if (event.type === "message.updated") { - const info = event.properties.info; - const sessionId = info.sessionID; - sessionManager.markSessionActive(sessionId); - const { state, resolved } = await sessionManager.resolveSessionState( - sessionId, - ); - if (!resolved || !state?.isMain) return; + return false; + }; - if (info.role !== "assistant") { - sessionManager.deletePendingAssistant(sessionId, info.id); - return; - } + const handleMessageEvent = async ( + event: EventInput["event"], + ): Promise => { + if (event.type === "message.updated") { + const info = event.properties.info; + const sessionId = info.sessionID; + const { state, resolved, canonicalSessionId } = await sessionManager + .resolveSessionState(sessionId); + if (!resolved || !state?.isMain || !canonicalSessionId) return true; + sessionManager.markResolvedSessionActive(sessionId, canonicalSessionId); - const time = info.time as { created: number; completed?: number }; - if (!time?.completed) return; - if (sessionManager.isAssistantBuffered(sessionId, info.id)) return; + if (info.role !== "assistant") { + sessionManager.deletePendingAssistant(canonicalSessionId, info.id); + return true; + } - const assistantText = sessionManager.finalizeAssistantMessage( - state, - sessionId, - info.id, - "message.updated", - ); - if (assistantText) { - for ( - const structured of extractStructuredEvents({ - eventType: event.type, - sessionId, - properties: event.properties as Record, - messageText: assistantText, - role: "assistant", - }) - ) { - await redisEvents.recordEvent(sessionId, state.groupId, structured); - } + const time = info.time as { created: number; completed?: number }; + if (!time?.completed) return true; + if (sessionManager.isAssistantBuffered(canonicalSessionId, info.id)) { + return true; + } + + const assistantText = sessionManager.finalizeAssistantMessage( + state, + canonicalSessionId, + info.id, + "message.updated", + ); + if (assistantText) { + for ( + const structured of extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + messageText: assistantText, + role: "assistant", + }) + ) { + await redisEvents.recordEvent( + canonicalSessionId, + state.groupId, + structured, + ); } + } - if (info.tokens && info.providerID && info.modelID) { - const capturedState = state; - resolveContextLimit( + if (info.tokens && info.providerID && info.modelID) { + const lookupSessionId = canonicalSessionId; + const lookupGeneration = ++nextContextLimitLookupGeneration; + contextLimitLookupGeneration.set(lookupSessionId, lookupGeneration); + const cleanupSessionIds = new Set([lookupSessionId]); + void (async () => { + const limit = await resolveContextLimit( info.providerID as string, info.modelID as string, sdkClient, directory, contextLimitCache, - ).then((limit) => { - capturedState.contextLimit = limit; - }).catch((err) => - logger.debug("Failed to resolve context limit", err) ); - } - return; + if ( + contextLimitLookupGeneration.get(lookupSessionId) !== + lookupGeneration + ) { + return; + } + const currentCanonicalSessionId = await sessionManager + .resolveCanonicalSessionId(sessionId); + if (!currentCanonicalSessionId) return; + cleanupSessionIds.add(currentCanonicalSessionId); + if ( + currentCanonicalSessionId !== lookupSessionId && + (contextLimitLookupGeneration.get(currentCanonicalSessionId) ?? + -1) > + lookupGeneration + ) { + return; + } + if (currentCanonicalSessionId !== lookupSessionId) { + contextLimitLookupGeneration.set( + currentCanonicalSessionId, + lookupGeneration, + ); + } + if ( + contextLimitLookupGeneration.get(currentCanonicalSessionId) !== + lookupGeneration + ) { + return; + } + const currentState = sessionManager.getState( + currentCanonicalSessionId, + ); + if (!currentState?.isMain) return; + currentState.contextLimit = limit; + })().catch((err) => + logger.debug("Failed to resolve context limit", err) + ).finally(() => { + for (const lookupSessionId of cleanupSessionIds) { + clearContextLimitLookupGeneration( + lookupSessionId, + lookupGeneration, + ); + } + }); } + return true; + } - if (event.type === "message.part.updated") { - const part = event.properties.part; - if (!isTextPart(part)) return; - sessionManager.markSessionActive(part.sessionID); - sessionManager.bufferAssistantPart( - part.sessionID, + if (event.type === "message.part.updated") { + const part = event.properties.part; + if (!isTextPart(part)) return true; + const { + state, + resolved, + canonicalSessionId: resolvedCanonicalSessionId, + } = await sessionManager.resolveSessionState(part.sessionID); + const canonicalSessionId = resolvedCanonicalSessionId ?? part.sessionID; + sessionManager.markResolvedSessionActive( + part.sessionID, + canonicalSessionId, + ); + sessionManager.bufferAssistantPart( + canonicalSessionId, + part.messageID, + part.text, + part.sessionID, + ); + if ( + !sessionManager.hasPendingAssistantCompletion( + canonicalSessionId, part.messageID, - part.text, - ); - return; - } - - if (!passthroughEventTypes.has(event.type)) { - return; + ) + ) { + return true; } - const sessionId = getEventSessionId(event.properties); - if (!sessionId) return; + if (!resolved || !state?.isMain) return true; - const { state, resolved } = await sessionManager.resolveSessionState( - sessionId, + const assistantText = sessionManager.finalizeAssistantMessage( + state, + canonicalSessionId, + part.messageID, + "message.part.updated", ); - if (!resolved || !state?.isMain) return; + if (!assistantText) return true; for ( const structured of extractStructuredEvents({ - eventType: event.type, - sessionId, + eventType: "message.updated", + sessionId: part.sessionID, properties: event.properties as Record, + messageText: assistantText, + role: "assistant", }) ) { - await redisEvents.recordEvent(sessionId, state.groupId, structured); + await redisEvents.recordEvent( + canonicalSessionId, + state.groupId, + structured, + ); } + return true; + } + + return false; + }; + + const handlePassthroughEvent = async ( + event: EventInput["event"], + ): Promise => { + if (!passthroughEventTypes.has(event.type)) return; + + const sessionId = getEventSessionId(event.properties); + if (!sessionId) return; + + sessionManager.markSessionActive(sessionId); + + const { state, resolved, canonicalSessionId } = await sessionManager + .resolveSessionState(sessionId); + if (!resolved || !state?.isMain || !canonicalSessionId) return; + sessionManager.markResolvedSessionActive(sessionId, canonicalSessionId); + + for ( + const structured of extractStructuredEvents({ + eventType: event.type, + sessionId, + properties: event.properties as Record, + }) + ) { + await redisEvents.recordEvent( + canonicalSessionId, + state.groupId, + structured, + ); + } + }; + + return async ({ event }: EventInput) => { + try { + if (await handleSessionLifecycleEvent(event)) return; + if (await handleMessageEvent(event)) return; + await handlePassthroughEvent(event); } catch (err) { logger.error("Event handler error", { type: event.type, err }); } diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts index ec2c45e..70b8676 100644 --- a/src/handlers/messages.test.ts +++ b/src/handlers/messages.test.ts @@ -1,35 +1,71 @@ import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { spy } from "jsr:@std/testing@^1.0.0/mock"; +import { logger } from "../services/logger.ts"; import { createMessagesHandler } from "./messages.ts"; +class MockSessionManager { + canonicalSessionId = "session-1"; + state = { + isMain: true, + hotTierReady: false, + pendingInjection: undefined as { + envelope: string; + nodeRefs: string[]; + refreshDecision: { + classification: string; + shouldRefresh: boolean; + similarity: number; + threshold: number; + cachedQuery: string | null; + }; + } | undefined, + }; + prepareInjectionImpl?: (sessionId: string, lastRequest?: string) => unknown; + activeCalls: Array<{ sessionId: string; canonicalSessionId?: string }> = []; + clearPendingInjection(state: typeof this.state, prepared?: unknown) { + if (state.pendingInjection === prepared) { + state.pendingInjection = undefined; + } + } + + resolveSessionState() { + return { + state: this.state, + resolved: true, + canonicalSessionId: this.canonicalSessionId, + }; + } + + prepareInjection(sessionId: string, lastRequest?: string) { + if (this.prepareInjectionImpl) { + return this.prepareInjectionImpl(sessionId, lastRequest); + } + return this.state.pendingInjection; + } + + markResolvedSessionActive(sessionId: string, canonicalSessionId?: string) { + this.activeCalls.push({ sessionId, canonicalSessionId }); + } +} + describe("messages handler", () => { it("injects pending session memory into the latest user message", async () => { - const state = { - isMain: true, - visibleFactUuids: [], - pendingInjection: { - envelope: - 'fresh', - factUuids: ["fact-1"], - nodeRefs: [], - refreshDecision: { - classification: "aligned", - shouldRefresh: false, - similarity: 1, - threshold: 0.5, - cachedQuery: "fresh", - }, + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'fresh', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "fresh", }, }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection() { - throw new Error("should not be called"); - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -41,38 +77,37 @@ describe("messages handler", () => { await handler({}, output as never); assertStringIncludes(output.messages[0].parts[0].text, " { - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: undefined as unknown, + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = undefined; + sessionManager.prepareInjectionImpl = ( + sessionId: string, + lastRequest?: string, + ) => { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "fallback request"); + return { + envelope: + 'fallback request', + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection(sessionId: string, lastRequest?: string) { - assertEquals(sessionId, "session-1"); - assertEquals(lastRequest, "fallback request"); - return { - envelope: - 'fallback request', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }, - }; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -87,33 +122,28 @@ describe("messages handler", () => { }); it("falls back to latest user text when transform fallback message is non-string", async () => { - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: undefined as unknown, + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = undefined; + sessionManager.prepareInjectionImpl = ( + sessionId: string, + lastRequest?: string, + ) => { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "fallback request"); + return { + envelope: '', + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection(sessionId: string, lastRequest?: string) { - assertEquals(sessionId, "session-1"); - assertEquals(lastRequest, "fallback request"); - return { - envelope: '', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }, - }; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -131,34 +161,29 @@ describe("messages handler", () => { }); it("falls back to the latest user text as the recall query", async () => { - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: undefined as unknown, + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = undefined; + sessionManager.prepareInjectionImpl = ( + sessionId: string, + lastRequest?: string, + ) => { + assertEquals(sessionId, "session-1"); + assertEquals(lastRequest, "message body query"); + return { + envelope: + 'message body query', + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection(sessionId: string, lastRequest?: string) { - assertEquals(sessionId, "session-1"); - assertEquals(lastRequest, "message body query"); - return { - envelope: - 'message body query', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }, - }; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -172,32 +197,21 @@ describe("messages handler", () => { assertStringIncludes(output.messages[0].parts[0].text, " { - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: { - envelope: '', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "aligned", - shouldRefresh: false, - similarity: 1, - threshold: 0.5, - cachedQuery: "next", - }, + it("does not mutate assistant history text while reinjecting the latest user prompt", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: '', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", }, }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection() { - return state.pendingInjection; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -218,91 +232,338 @@ describe("messages handler", () => { }; await handler({}, output as never); - assertEquals(state.visibleFactUuids, ["fact-1", "fact-2"]); + assertStringIncludes(output.messages[1].parts[0].text, "', + ); }); - it("preserves legacy Graphiti memory data-uuids extraction semantics", async () => { - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: undefined as unknown, + it("rewrites legacy memory at the latest user prompt into a single canonical injection", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'next', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", + }, }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection() { - return { - envelope: '', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "aligned", - shouldRefresh: false, - similarity: 1, - threshold: 0.5, - cachedQuery: "next", - }, - }; + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: '\n\nnext', + }], + }], + }; + + await handler({}, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, "', + ), + false, + ); + assertStringIncludes(output.messages[0].parts[0].text, "next"); + }); + + it("rewrites leading legacy memory blocks with empty or missing data-uuids", async () => { + const cases = [ + '\n\nnext', + "\n\nnext", + ]; + + for (const text of cases) { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'next', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", }, - } as never, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text }], + }], + }; + + await handler({}, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, " { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'next', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, }); const output = { - messages: [ - { - info: { role: "assistant", sessionID: "session-1" }, + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + 'next\n\nstale memory', + }], + }], + }; + + await handler({}, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, "stale memory', + ); + }); + + it("preserves literal user-authored session memory XML in the latest user message", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'inspect example', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "inspect example", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + 'Please inspect this example:\n\nexample', + }], + }], + }; + + await handler({} as never, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, "example', + ); + }); + + it("preserves leading user-authored session_memory blocks that do not match the injected shape", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'inspect example', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "inspect example", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const userAuthoredBlock = + 'user-authored example'; + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: `${userAuthoredBlock}\n\ninspect example`, + }], + }], + }; + + await handler({} as never, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, userAuthoredBlock); + }); + + it("preserves leading user-authored legacy and persistent memory blocks", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'inspect example', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "inspect example", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const cases = [ + "user-authored example", + "user-authored example", + ]; + + for (const userAuthoredBlock of cases) { + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, parts: [{ type: "text", - text: '', + text: `${userAuthoredBlock}\n\ninspect example`, }], - }, - { - info: { role: "user", sessionID: "session-1" }, - parts: [{ type: "text", text: "next" }], - }, - ], + }], + }; + + await handler({} as never, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, userAuthoredBlock); + } + }); + + it("preserves leading user-authored non-empty legacy memory blocks without data-uuids", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'inspect example', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "inspect example", + }, }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); - await handler({}, output as never); + const userAuthoredBlock = "user-authored example"; + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: `${userAuthoredBlock}\n\ninspect example`, + }], + }], + }; - assertEquals(state.visibleFactUuids, ["fact-legacy-1", "fact-legacy-2"]); + await handler({} as never, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, userAuthoredBlock); + }); + + it("reports rewroteExistingMemory when canonical or legacy blocks were scrubbed", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'next', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "next", + }, + }; + const infoSpy = spy(logger, "info"); + try { + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: '\n\nnext', + }], + }], + }; + + await handler({}, output as never); + + const call = infoSpy.calls.find((entry) => + entry.args[0] === "Injected canonical session_memory block" + ); + assertEquals(Boolean(call), true); + assertEquals( + (call?.args[1] as { rewroteExistingMemory: boolean }) + .rewroteExistingMemory, + true, + ); + } finally { + infoSpy.restore(); + } }); - it("passes current-turn visible fact uuids into prepareInjection", async () => { - const state = { - isMain: true, - visibleFactUuids: ["stale-fact"] as string[], - pendingInjection: undefined as unknown, + it("does not scrub canonical and legacy memory blocks from earlier prompt history", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection( - sessionId: string, - lastRequest?: string, - visibleFactUuids?: string[], - ) { - assertEquals(sessionId, "session-1"); - assertEquals(lastRequest, "next"); - assertEquals(visibleFactUuids, ["fact-1", "fact-2"]); - return { - envelope: '', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "aligned", - shouldRefresh: false, - similarity: 1, - threshold: 0.5, - cachedQuery: "next", - }, - }; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -312,56 +573,57 @@ describe("messages handler", () => { parts: [{ type: "text", text: - '', + 'before canonical alpha after canonical', }], }, { info: { role: "user", sessionID: "session-1" }, - parts: [{ type: "text", text: "next" }], + parts: [{ + type: "text", + text: + 'before legacy old memory after legacy', + }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "continue" }], }, ], }; await handler({} as never, output as never); - assertEquals(state.visibleFactUuids, ["fact-1", "fact-2"]); - assertStringIncludes(output.messages[1].parts[0].text, "alpha after canonical', + ); + assertEquals( + output.messages[1].parts[0].text, + 'before legacy old memory after legacy', + ); + assertStringIncludes(output.messages[2].parts[0].text, " { - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: undefined as unknown, + it("does not scrub standalone persistent memory blocks from earlier prompt history", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection( - sessionId: string, - lastRequest?: string, - visibleFactUuids?: string[], - ) { - assertEquals(sessionId, "session-1"); - assertEquals(lastRequest, "continue"); - assertEquals(visibleFactUuids, ["fact-1", "fact-2", "fact-3"]); - return { - envelope: - 'continue', - factUuids: [], - nodeRefs: [], - refreshDecision: { - classification: "aligned", - shouldRefresh: false, - similarity: 1, - threshold: 0.5, - cachedQuery: "continue", - }, - }; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -371,14 +633,7 @@ describe("messages handler", () => { parts: [{ type: "text", text: - '', - }], - }, - { - info: { role: "assistant", sessionID: "session-1" }, - parts: [{ - type: "text", - text: '', + 'before standalone stale memory after standalone', }], }, { @@ -388,17 +643,23 @@ describe("messages handler", () => { ], }; - await handler({}, output as never); + await handler({} as never, output as never); - assertEquals(state.visibleFactUuids, ["fact-1", "fact-2", "fact-3"]); - assertStringIncludes(output.messages[2].parts[0].text, "stale memory after standalone', + ); + assertStringIncludes(output.messages[1].parts[0].text, " { const newerPrepared = { envelope: 'newer', - factUuids: ["fact-2"], nodeRefs: [], refreshDecision: { classification: "aligned", @@ -408,33 +669,25 @@ describe("messages handler", () => { cachedQuery: "newer", }, }; - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: undefined as typeof newerPrepared | undefined, + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = undefined; + sessionManager.prepareInjectionImpl = () => { + sessionManager.state.pendingInjection = newerPrepared; + return { + envelope: + 'older', + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; }; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection() { - state.pendingInjection = newerPrepared; - return { - envelope: - 'older', - factUuids: ["fact-1"], - nodeRefs: [], - refreshDecision: { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }, - }; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -445,14 +698,121 @@ describe("messages handler", () => { }; await handler({ message: "current request" } as never, output as never); - assertEquals(state.pendingInjection, newerPrepared); + assertEquals(sessionManager.state.pendingInjection, newerPrepared); assertStringIncludes(output.messages[0].parts[0].text, "older"); }); + it("preserves existing memory blocks when prepareInjection returns null", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = undefined; + sessionManager.prepareInjectionImpl = () => null; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const assistantText = + 'before canonical alpha after canonical'; + const userText = '\n\ncontinue'; + const output = { + messages: [ + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ type: "text", text: assistantText }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: userText }], + }, + ], + }; + + await handler({ message: "continue" } as never, output as never); + + assertEquals(output.messages[0].parts[0].text, assistantText); + assertEquals(output.messages[1].parts[0].text, userText); + }); + + it("preserves whitespace-sensitive history text outside the reinjection target", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const assistantText = + 'assistant spacing\n\nalpha\n\n keep-indentation'; + const output = { + messages: [ + { + info: { role: "assistant", sessionID: "session-1" }, + parts: [{ type: "text", text: assistantText }], + }, + { + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "continue" }], + }, + ], + }; + + await handler({} as never, output as never); + + assertEquals(output.messages[0].parts[0].text, assistantText); + assertStringIncludes(output.messages[1].parts[0].text, " { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const trailingExample = + 'keep transcript\n\nexample'; + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + `stale\n\n${trailingExample}`, + }], + }], + }; + + await handler({} as never, output as never); + + assertEquals( + output.messages[0].parts[0].text, + `continue\n\n${trailingExample}`, + ); + }); + it("remains compatible with extended prepareInjection results", async () => { const prepared = { envelope: '', - factUuids: ["fact-1"], nodeRefs: ["node-1"], refreshDecision: { classification: "drifted", @@ -462,20 +822,10 @@ describe("messages handler", () => { cachedQuery: "prior topic", }, }; - const state = { - isMain: true, - visibleFactUuids: [] as string[], - pendingInjection: prepared, - }; + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = prepared; const handler = createMessagesHandler({ - sessionManager: { - getState() { - return state; - }, - prepareInjection() { - return prepared; - }, - } as never, + sessionManager: sessionManager as never, }); const output = { @@ -487,6 +837,95 @@ describe("messages handler", () => { await handler({}, output as never); assertStringIncludes(output.messages[0].parts[0].text, " { + const sessionManager = new MockSessionManager(); + sessionManager.canonicalSessionId = "parent-session"; + sessionManager.prepareInjectionImpl = ( + sessionId: string, + lastRequest?: string, + ) => { + assertEquals(sessionId, "parent-session"); + assertEquals(lastRequest, "follow up from child"); + return { + envelope: + 'follow up from child', + nodeRefs: [], + refreshDecision: { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "child-session" }, + parts: [{ type: "text", text: "follow up from child" }], + }], + }; + + await handler({}, output as never); + + assertStringIncludes(output.messages[0].parts[0].text, " { + const sessionManager = new MockSessionManager(); + sessionManager.resolveSessionState = () => { + throw new Error("Session not found"); + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "startup prompt" }], + }], + }; + + await handler({} as never, output as never); + + assertEquals(output.messages[0].parts[0].text, "startup prompt"); + }); + + it("skips transform work when the latest user entry has no text part", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.prepareInjectionImpl = () => { + throw new Error("prepareInjection should not run"); + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "file", path: "src/index.ts" }], + }], + }; + + await handler({ message: "should be ignored" } as never, output as never); + + assertEquals(sessionManager.activeCalls, []); + assertEquals(sessionManager.state.pendingInjection, undefined); }); }); diff --git a/src/handlers/messages.ts b/src/handlers/messages.ts index 7517704..16696e5 100644 --- a/src/handlers/messages.ts +++ b/src/handlers/messages.ts @@ -1,5 +1,10 @@ import type { Hooks } from "@opencode-ai/plugin"; import { logger } from "../services/logger.ts"; +import { + sanitizeMemoryInput, + sanitizeMemoryInputPreservingMemoryBlocks, + stripInjectedMemoryBlocks, +} from "../services/render-utils.ts"; import type { SessionManager } from "../session.ts"; import { isTextPart } from "../utils.ts"; @@ -23,32 +28,31 @@ const getTransformMessage = (input: unknown): string | undefined => { return typeof message === "string" ? message : undefined; }; -const getLatestUserText = ( - output: MessagesTransformOutput, -): string | undefined => { - const lastUserEntry = output.messages - .findLast((message) => message.info.role === "user"); - const textPart = lastUserEntry?.parts.find(isTextPart); - return textPart?.text; -}; +const LEADING_INJECTED_SESSION_MEMORY_BLOCK = + /^]*\bsource=(['"])graphiti\1)(?=[^>]*\bversion=(['"])1\2)[^>]*>[\s\S]*?<\/session_memory>(?:\r?\n){0,2}/; +const LEADING_INJECTED_LEGACY_MEMORY_BLOCK_WITH_UUIDS = + /^]*\bdata-uuids=(["'])(?:[^"']*)\1)[^>]*>[\s\S]*?<\/memory>(?:\r?\n){0,2}/; +const LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK = + /^]*\bdata-uuids=)[^>]*>\s*<\/memory>(?:\r?\n){0,2}/; +const LEADING_INJECTED_PERSISTENT_MEMORY_BLOCK = + /^]*\b(?:node_refs|fact_uuids)=(["'])[^"']*\1)[^>]*>[\s\S]*?<\/persistent_memory>(?:\r?\n){0,2}/; -const extractVisibleUuids = (text: string): string[] => { - const uuids: string[] = []; - for ( - const regex of [ - /]*\bdata-uuids="([^"]*)"[^>]*>/g, - /]*\bfact_uuids="([^"]*)"[^>]*>/g, - ] - ) { - let match: RegExpExecArray | null; - while ((match = regex.exec(text)) !== null) { - if (match[1]) uuids.push(...match[1].split(",").filter(Boolean)); - } +const scrubPromptMemoryText = (text: string): string => { + let scrubbed = text; + while (true) { + const next = scrubbed + .replace(LEADING_INJECTED_SESSION_MEMORY_BLOCK, "") + .replace(LEADING_INJECTED_LEGACY_MEMORY_BLOCK_WITH_UUIDS, "") + .replace(LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK, "") + .replace(LEADING_INJECTED_PERSISTENT_MEMORY_BLOCK, ""); + if (next === scrubbed) return scrubbed; + scrubbed = next; } - return uuids; }; -export function createMessagesHandler(deps: MessagesHandlerDeps) { +export function createMessagesHandler( + deps: MessagesHandlerDeps, +): MessagesTransformHook { const { sessionManager } = deps; return async ( @@ -59,45 +63,61 @@ export function createMessagesHandler(deps: MessagesHandlerDeps) { .findLast((message) => message.info.role === "user"); if (!lastUserEntry) return; - const sessionID = lastUserEntry.info.sessionID; - const state = sessionManager.getState(sessionID); - if (!state?.isMain) return; + const textPart = lastUserEntry.parts.find(isTextPart); + const latestUserText = textPart?.text; + if (latestUserText === undefined) return; - const allVisibleUuids: string[] = []; - for (const entry of output.messages) { - for (const part of entry.parts) { - if (isTextPart(part)) { - allVisibleUuids.push(...extractVisibleUuids(part.text)); - } - } - } - state.visibleFactUuids = [...new Set(allVisibleUuids)]; + const sourceSessionID = lastUserEntry.info.sessionID; - const recallQuery = getTransformMessage(input) ?? getLatestUserText(output); - const prepared = state.pendingInjection ?? - await sessionManager.prepareInjection( - sessionID, - recallQuery, - state.visibleFactUuids, + try { + const { + state, + resolved, + canonicalSessionId, + } = await sessionManager.resolveSessionState(sourceSessionID); + if (!resolved || !canonicalSessionId) return; + if (!state?.isMain) return; + sessionManager.markResolvedSessionActive( + sourceSessionID, + canonicalSessionId, ); - if (!prepared) return; - const textPart = lastUserEntry.parts.find(isTextPart); - if (!textPart) return; - if (textPart.text.includes(", +) => Promise>; + +function createEntrypointHarness(connected: boolean) { + return createEntrypointHarnessWithOptions({ connected }); +} + +function createEntrypointHarnessWithOptions(options: { + connected?: boolean; + readyError?: Error; + redisConnectError?: Error; + teardownRun?: () => Promise; + teardownDispose?: () => void; +}) { + const connected = options.connected ?? true; + const config = { + graphiti: { + endpoint: "http://graphiti.test/mcp", + driftThreshold: 42, + groupIdPrefix: "prefix", + }, + redis: { + endpoint: "redis://redis.test:6379", + sessionTtlSeconds: 60, + cacheTtlSeconds: 90, + batchSize: 7, + batchMaxBytes: 2048, + drainRetryMax: 5, + }, + }; + const input = { + client: { id: "client" }, + directory: "/workspace/project", + }; + const hooks = { + event: { kind: "event" }, + chat: { kind: "chat" }, + compacting: { kind: "compacting" }, + messages: { kind: "messages" }, + }; + const records = { + loadConfigCalls: [] as string[], + setOpenCodeClientCalls: [] as unknown[], + graphitiWarnCalls: [] as Array<{ connected: boolean; endpoint: string }>, + redisWarnCalls: [] as Array<{ connected: boolean; endpoint: string }>, + connectionManagerOptions: [] as Array<{ endpoint: string }>, + connectionManagerInstances: [] as unknown[], + connectionStartCalls: 0, + connectionReadyCalls: 0, + connectionStopCalls: 0, + redisClientOptions: [] as Array<{ endpoint: string }>, + redisClientInstances: [] as unknown[], + redisConnectCalls: 0, + redisCloseCalls: 0, + graphitiAsyncDisposeCalls: 0, + graphitiAsyncFlushCalls: [] as string[][], + teardownTaskRuns: [] as string[], + teardownRegistrations: [] as Array< + { + tasks: Array<{ name: string; run: () => unknown }>; + registration: { run: () => Promise; dispose: () => void }; + } + >, + graphitiMcpArgs: [] as unknown[], + graphitiMcpInstances: [] as unknown[], + redisEventsArgs: [] as Array<[unknown, { sessionTtlSeconds: number }]>, + redisEventsInstances: [] as unknown[], + redisSnapshotArgs: [] as Array<[unknown, { ttlSeconds: number }]>, + redisSnapshotInstances: [] as unknown[], + redisCacheArgs: [] as Array<[ + unknown, + { ttlSeconds: number; driftThreshold: number }, + ]>, + redisCacheInstances: [] as unknown[], + batchDrainArgs: [] as Array<[ + unknown, + unknown, + { batchSize: number; batchMaxBytes: number; drainRetryMax: number }, + ]>, + batchDrainInstances: [] as unknown[], + graphitiAsyncArgs: [] as Array<[unknown, unknown, unknown]>, + graphitiAsyncInstances: [] as unknown[], + makeGroupIdCalls: [] as Array<[string | undefined, string]>, + makeUserGroupIdCalls: [] as Array<[string | undefined, string]>, + sessionManagerArgs: [] as Array<[ + string, + string, + unknown, + unknown, + unknown, + unknown, + { idleRetentionMs: number }, + ]>, + sessionManagerInstances: [] as unknown[], + createEventHandlerArgs: [] as Array>, + createChatHandlerArgs: [] as Array>, + createCompactingHandlerArgs: [] as Array>, + createMessagesHandlerArgs: [] as Array>, + }; + + class MockGraphitiConnectionManager { + constructor(options: { endpoint: string }) { + records.connectionManagerOptions.push(options); + records.connectionManagerInstances.push(this); + } + + start() { + records.connectionStartCalls += 1; + } + + ready() { + records.connectionReadyCalls += 1; + if (options.readyError) { + return Promise.reject(options.readyError); + } + return Promise.resolve(connected); + } + + stop() { + records.connectionStopCalls += 1; + records.teardownTaskRuns.push("graphiti"); + } + } + + class MockRedisClient { + constructor(options: { endpoint: string }) { + records.redisClientOptions.push(options); + records.redisClientInstances.push(this); + } + + connect() { + records.redisConnectCalls += 1; + if (options.redisConnectError) { + return Promise.reject(options.redisConnectError); + } + return Promise.resolve(); + } + + close() { + records.redisCloseCalls += 1; + records.teardownTaskRuns.push("redis"); + return Promise.resolve(); + } + } + + class MockGraphitiMcpClient { + constructor(connectionManager: unknown) { + records.graphitiMcpArgs.push(connectionManager); + records.graphitiMcpInstances.push(this); + } + } + + class MockRedisEventsService { + constructor(redisClient: unknown, options: { sessionTtlSeconds: number }) { + records.redisEventsArgs.push([redisClient, options]); + records.redisEventsInstances.push(this); + } + } + + class MockRedisSnapshotService { + constructor(redisClient: unknown, options: { ttlSeconds: number }) { + records.redisSnapshotArgs.push([redisClient, options]); + records.redisSnapshotInstances.push(this); + } + } + + class MockRedisCacheService { + constructor( + redisClient: unknown, + options: { ttlSeconds: number; driftThreshold: number }, + ) { + records.redisCacheArgs.push([redisClient, options]); + records.redisCacheInstances.push(this); + } + } + + class MockBatchDrainService { + constructor( + redisClient: unknown, + redisEvents: unknown, + options: { + batchSize: number; + batchMaxBytes: number; + drainRetryMax: number; + }, + ) { + records.batchDrainArgs.push([redisClient, redisEvents, options]); + records.batchDrainInstances.push(this); + } + } + + class MockGraphitiAsyncService { + constructor( + graphitiClient: unknown, + redisCache: unknown, + batchDrain: unknown, + ) { + records.graphitiAsyncArgs.push([graphitiClient, redisCache, batchDrain]); + records.graphitiAsyncInstances.push(this); + } + + dispose() { + records.graphitiAsyncDisposeCalls += 1; + records.teardownTaskRuns.push("graphiti-async"); + return Promise.resolve(); + } + + flushPendingGroups(groupIds: Iterable) { + records.graphitiAsyncFlushCalls.push([...groupIds]); + records.teardownTaskRuns.push("graphiti-drain-flush"); + return Promise.resolve(); + } + } + + class MockSessionManager { + getTrackedGroupIds() { + return ["group-id"]; + } + + constructor( + defaultGroupId: string, + defaultUserGroupId: string, + client: unknown, + redisEvents: unknown, + redisSnapshot: unknown, + redisCache: unknown, + options: { idleRetentionMs: number }, + ) { + records.sessionManagerArgs.push([ + defaultGroupId, + defaultUserGroupId, + client, + redisEvents, + redisSnapshot, + redisCache, + options, + ]); + records.sessionManagerInstances.push(this); + } + } + + const dependencies = { + loadConfig: (directory: string) => { + records.loadConfigCalls.push(directory); + return config; + }, + setOpenCodeClient: (client: unknown) => { + records.setOpenCodeClientCalls.push(client); + }, + warnOnGraphitiStartupUnavailable: (ready: boolean, endpoint: string) => { + records.graphitiWarnCalls.push({ connected: ready, endpoint }); + }, + warnOnRedisStartupUnavailable: (ready: boolean, endpoint: string) => { + records.redisWarnCalls.push({ connected: ready, endpoint }); + }, + GraphitiConnectionManager: MockGraphitiConnectionManager, + RedisClient: MockRedisClient, + registerRuntimeTeardown: ( + tasks: Array<{ name: string; run: () => unknown }>, + ) => { + const registration = { + run: options.teardownRun ?? + (async () => { + for (const task of tasks) { + await task.run(); + } + }), + dispose: options.teardownDispose ?? (() => {}), + }; + records.teardownRegistrations.push({ tasks, registration }); + return registration; + }, + GraphitiMcpClient: MockGraphitiMcpClient, + RedisEventsService: MockRedisEventsService, + RedisSnapshotService: MockRedisSnapshotService, + RedisCacheService: MockRedisCacheService, + BatchDrainService: MockBatchDrainService, + GraphitiAsyncService: MockGraphitiAsyncService, + SessionManager: MockSessionManager, + createEventHandler: (args: Record) => { + records.createEventHandlerArgs.push(args); + return hooks.event; + }, + createChatHandler: (args: Record) => { + records.createChatHandlerArgs.push(args); + return hooks.chat; + }, + createCompactingHandler: (args: Record) => { + records.createCompactingHandlerArgs.push(args); + return hooks.compacting; + }, + createMessagesHandler: (args: Record) => { + records.createMessagesHandlerArgs.push(args); + return hooks.messages; + }, + makeGroupId: (prefix: string | undefined, directory: string) => { + records.makeGroupIdCalls.push([prefix, directory]); + return "group-id"; + }, + makeUserGroupId: (prefix: string | undefined, directory: string) => { + records.makeUserGroupIdCalls.push([prefix, directory]); + return "user-group-id"; + }, + }; + + return { config, input, hooks, records, dependencies }; +} + describe("index", () => { afterEach(() => { setOpenCodeClient(undefined); @@ -16,62 +331,62 @@ describe("index", () => { describe("makeGroupId", () => { it("should omit undefined prefix text when prefix is missing", () => { const groupId = makeGroupId(undefined, "/home/user/my-project"); - assertEquals(groupId, "my-project__main"); + assertEquals(groupId, "MyProject__main"); }); it("should create group ID from simple directory path", () => { const groupId = makeGroupId("opencode", "/home/user/my-project"); - assertEquals(groupId, "opencode-my-project__main"); + assertEquals(groupId, "opencode_MyProject__main"); }); it("should use last directory component as project name", () => { const groupId = makeGroupId("test", "/var/www/html/app"); - assertEquals(groupId, "test-app__main"); + assertEquals(groupId, "test_App__main"); }); it("should handle single directory name", () => { const groupId = makeGroupId("prefix", "project"); - assertEquals(groupId, "prefix-project__main"); + assertEquals(groupId, "prefix_Project__main"); }); it("should return default when directory is empty", () => { const groupId = makeGroupId("prefix", ""); - assertEquals(groupId, "prefix-default__main"); + assertEquals(groupId, "prefix_Default__main"); }); it("should return default when directory is just slashes", () => { const groupId = makeGroupId("prefix", "///"); - assertEquals(groupId, "prefix-default__main"); + assertEquals(groupId, "prefix_Default__main"); }); it("should sanitize special characters to underscores", () => { const groupId = makeGroupId("opencode", "/home/user/my-project@2.0"); - assertEquals(groupId, "opencode-my-project_2_0__main"); + assertEquals(groupId, "opencode_MyProject20__main"); }); it("should sanitize multiple special characters", () => { const groupId = makeGroupId("test", "/projects/my project (v1.0)"); - assertEquals(groupId, "test-my_project__v1_0___main"); + assertEquals(groupId, "test_MyProjectV10__main"); }); - it("should preserve hyphens and underscores", () => { + it("should normalize hyphens and underscores into PascalCase", () => { const groupId = makeGroupId("prefix", "/dir/my_project-name"); - assertEquals(groupId, "prefix-my_project-name__main"); + assertEquals(groupId, "prefix_MyProjectName__main"); }); it("should handle directory with dots", () => { const groupId = makeGroupId("test", "/projects/app.example.com"); - assertEquals(groupId, "test-app_example_com__main"); + assertEquals(groupId, "test_AppExampleCom__main"); }); it("should handle directory with spaces", () => { const groupId = makeGroupId("test", "/home/my projects/app name"); - assertEquals(groupId, "test-app_name__main"); + assertEquals(groupId, "test_AppName__main"); }); it("should handle directory ending with slash", () => { const groupId = makeGroupId("test", "/home/user/project/"); - assertEquals(groupId, "test-project__main"); + assertEquals(groupId, "test_Project__main"); }); it("should handle complex path with multiple special chars", () => { @@ -79,26 +394,25 @@ describe("index", () => { "opencode", "/Users/name/Projects/my-app@v2.0 (beta)", ); - assertEquals(groupId, "opencode-my-app_v2_0__beta___main"); + assertEquals(groupId, "opencode_MyAppV20Beta__main"); }); it("should use different prefixes correctly", () => { const groupId1 = makeGroupId("prod", "/apps/myapp"); const groupId2 = makeGroupId("dev", "/apps/myapp"); - assertEquals(groupId1, "prod-myapp__main"); - assertEquals(groupId2, "dev-myapp__main"); + assertEquals(groupId1, "prod_Myapp__main"); + assertEquals(groupId2, "dev_Myapp__main"); }); - it("should handle unicode characters", () => { + it("should keep unicode-only basenames non-default", () => { const groupId = makeGroupId("test", "/projects/مشروع"); - assertEquals(groupId.startsWith("test-"), true); - assertEquals(groupId.endsWith("__main"), true); + assertEquals(groupId, "test_مشروع__main"); }); it("should handle very long directory names", () => { const longName = "a".repeat(200); const groupId = makeGroupId("test", `/projects/${longName}`); - assertEquals(groupId, `test-${longName}__main`); + assertEquals(groupId, `test_${"A"}${"a".repeat(199)}__main`); }); it("should be deterministic", () => { @@ -113,7 +427,12 @@ describe("index", () => { it("should omit undefined prefix text when prefix is missing", () => { const groupId = makeUserGroupId(undefined, "/home/user/my-project"); assertEquals(groupId.startsWith("undefined"), false); - assertEquals(groupId.startsWith("my-project__user-"), true); + assertEquals(groupId.startsWith("MyProject__user_"), true); + }); + + it("should preserve unicode-only project basenames", () => { + const groupId = makeUserGroupId("prefix", "/projects/東京"); + assertEquals(groupId.startsWith("prefix_東京__user_"), true); }); }); @@ -178,25 +497,416 @@ describe("index", () => { }); }); - describe("plugin export shape", () => { + describe("warnOnRedisStartupUnavailable", () => { + it("shows a native warning toast and structured log when Redis is unavailable", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnRedisStartupUnavailable(true, "redis://redis.test:6379"); + + assertEquals(appLogCalls.length, 0); + assertEquals(toastCalls.length, 0); + assertEquals(scheduledTasks.length, 0); + + warnOnRedisStartupUnavailable(false, "redis://redis.test:6379"); + + assertEquals(appLogCalls.length, 0); + assertEquals(toastCalls.length, 0); + assertEquals(scheduledTasks.length, 2); + for (const task of scheduledTasks) task(); + + assertEquals(appLogCalls.length, 1); + assertEquals(toastCalls, [{ + body: { + message: + "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + variant: "warning", + }, + }]); + }); + }); + + describe("graphiti entrypoint", () => { it("exports graphiti as the plugin entrypoint", () => { assertEquals(typeof graphiti, "function"); }); - }); - // NOTE: The main `graphiti()` plugin function requires a live Graphiti MCP - // server and cannot be integration-tested here without mocking the MCP - // transport layer. All testable units are covered in the files listed below: - // - // - makeGroupId / makeUserGroupId (this file) - // - logger (src/services/logger.test.ts) - // - handleCompaction / getCompactionContext - // (src/services/compaction.test.ts) - // - formatMemoryContext (src/services/context.test.ts) - // - GraphitiClient parsing (src/services/client.test.ts) - // - createChatHandler (src/handlers/chat.test.ts) - // - createEventHandler (src/handlers/event.test.ts) - // - SessionManager (src/services/session-snapshot.test.ts) - // - context utilities (src/services/context-utils.test.ts) - // - compaction utilities (src/services/compaction-utils.test.ts) + it("wires startup dependencies and returns handler hooks", async () => { + const { config, input, hooks, records, dependencies } = + createEntrypointHarness(true); + + const plugin = await invokeGraphiti(input, dependencies); + await Promise.resolve(); + + assertEquals(records.loadConfigCalls, [input.directory]); + assertEquals(records.setOpenCodeClientCalls, [input.client]); + assertEquals(records.connectionManagerOptions, [{ + endpoint: config.graphiti.endpoint, + }]); + assertEquals(records.connectionStartCalls, 1); + assertEquals(records.connectionReadyCalls, 1); + assertEquals(records.graphitiWarnCalls, []); + assertEquals(records.redisWarnCalls, []); + + assertEquals(records.redisClientOptions, [{ + endpoint: config.redis.endpoint, + }]); + assertEquals(records.redisConnectCalls, 1); + assertEquals(records.teardownRegistrations.length, 1); + assertEquals( + records.teardownRegistrations[0].tasks.map((task) => task.name), + ["graphiti-drain-flush", "graphiti-async", "graphiti", "redis"], + ); + + records.teardownRegistrations[0].tasks[0].run(); + records.teardownRegistrations[0].tasks[1].run(); + records.teardownRegistrations[0].tasks[2].run(); + records.teardownRegistrations[0].tasks[3].run(); + assertEquals(records.graphitiAsyncFlushCalls, [["group-id"]]); + assertEquals(records.graphitiAsyncDisposeCalls, 1); + assertEquals(records.connectionStopCalls, 1); + assertEquals(records.redisCloseCalls, 1); + + assertStrictEquals( + records.graphitiMcpArgs[0], + records.connectionManagerInstances[0], + ); + assertStrictEquals( + records.redisEventsArgs[0][0], + records.redisClientInstances[0], + ); + assertEquals(records.redisEventsArgs[0][1], { + sessionTtlSeconds: config.redis.sessionTtlSeconds, + }); + assertStrictEquals( + records.redisSnapshotArgs[0][0], + records.redisClientInstances[0], + ); + assertEquals(records.redisSnapshotArgs[0][1], { + ttlSeconds: config.redis.sessionTtlSeconds * 2, + }); + assertStrictEquals( + records.redisCacheArgs[0][0], + records.redisClientInstances[0], + ); + assertEquals(records.redisCacheArgs[0][1], { + ttlSeconds: config.redis.cacheTtlSeconds, + driftThreshold: config.graphiti.driftThreshold, + }); + assertStrictEquals( + records.batchDrainArgs[0][0], + records.redisClientInstances[0], + ); + assertStrictEquals( + records.batchDrainArgs[0][1], + records.redisEventsInstances[0], + ); + assertEquals(records.batchDrainArgs[0][2], { + batchSize: config.redis.batchSize, + batchMaxBytes: config.redis.batchMaxBytes, + drainRetryMax: config.redis.drainRetryMax, + }); + assertStrictEquals( + records.graphitiAsyncArgs[0][0], + records.graphitiMcpInstances[0], + ); + assertStrictEquals( + records.graphitiAsyncArgs[0][1], + records.redisCacheInstances[0], + ); + assertStrictEquals( + records.graphitiAsyncArgs[0][2], + records.batchDrainInstances[0], + ); + assertEquals(records.makeGroupIdCalls, [[ + config.graphiti.groupIdPrefix, + input.directory, + ]]); + assertEquals(records.makeUserGroupIdCalls, [[ + config.graphiti.groupIdPrefix, + input.directory, + ]]); + assertEquals(records.sessionManagerArgs[0][0], "group-id"); + assertEquals(records.sessionManagerArgs[0][1], "user-group-id"); + assertStrictEquals(records.sessionManagerArgs[0][2], input.client); + assertStrictEquals( + records.sessionManagerArgs[0][3], + records.redisEventsInstances[0], + ); + assertStrictEquals( + records.sessionManagerArgs[0][4], + records.redisSnapshotInstances[0], + ); + assertStrictEquals( + records.sessionManagerArgs[0][5], + records.redisCacheInstances[0], + ); + assertEquals(records.sessionManagerArgs[0][6], { + idleRetentionMs: config.redis.sessionTtlSeconds * 1000, + }); + + assertEquals(records.createEventHandlerArgs.length, 1); + assertStrictEquals( + records.createEventHandlerArgs[0].sessionManager, + records.sessionManagerInstances[0], + ); + assertStrictEquals( + records.createEventHandlerArgs[0].redisEvents, + records.redisEventsInstances[0], + ); + assertStrictEquals( + records.createEventHandlerArgs[0].redisCache, + records.redisCacheInstances[0], + ); + assertStrictEquals( + records.createEventHandlerArgs[0].redisSnapshot, + records.redisSnapshotInstances[0], + ); + assertStrictEquals( + records.createEventHandlerArgs[0].graphitiAsync, + records.graphitiAsyncInstances[0], + ); + assertEquals( + records.createEventHandlerArgs[0].defaultGroupId, + "group-id", + ); + assertEquals( + records.createEventHandlerArgs[0].defaultUserGroupId, + "user-group-id", + ); + assertStrictEquals( + records.createEventHandlerArgs[0].sdkClient, + input.client, + ); + assertEquals( + records.createEventHandlerArgs[0].directory, + input.directory, + ); + assertEquals(records.createChatHandlerArgs.length, 1); + assertStrictEquals( + records.createChatHandlerArgs[0].sessionManager, + records.sessionManagerInstances[0], + ); + assertStrictEquals( + records.createChatHandlerArgs[0].redisEvents, + records.redisEventsInstances[0], + ); + assertStrictEquals( + records.createChatHandlerArgs[0].graphitiAsync, + records.graphitiAsyncInstances[0], + ); + assertEquals( + records.createChatHandlerArgs[0].drainTriggerSize, + config.redis.batchSize, + ); + assertEquals(records.createCompactingHandlerArgs.length, 1); + assertStrictEquals( + records.createCompactingHandlerArgs[0].sessionManager, + records.sessionManagerInstances[0], + ); + assertEquals(records.createMessagesHandlerArgs.length, 1); + assertStrictEquals( + records.createMessagesHandlerArgs[0].sessionManager, + records.sessionManagerInstances[0], + ); + + assertStrictEquals(plugin.event, hooks.event); + assertStrictEquals(plugin["chat.message"], hooks.chat); + assertStrictEquals( + plugin["experimental.session.compacting"], + hooks.compacting, + ); + assertStrictEquals( + plugin["experimental.chat.messages.transform"], + hooks.messages, + ); + }); + + it("warns on degraded startup without blocking plugin initialization", async () => { + const { config, input, hooks, records, dependencies } = + createEntrypointHarness(false); + + const plugin = await invokeGraphiti(input, dependencies); + await Promise.resolve(); + + assertEquals(records.graphitiWarnCalls, [{ + connected: false, + endpoint: config.graphiti.endpoint, + }]); + assertEquals(records.redisWarnCalls, []); + assertEquals(records.connectionStartCalls, 1); + assertEquals(records.redisConnectCalls, 1); + assertStrictEquals(plugin.event, hooks.event); + assertStrictEquals(plugin["chat.message"], hooks.chat); + }); + + it("degrades cleanly when Graphiti readiness rejects", async () => { + const { config, input, hooks, records, dependencies } = + createEntrypointHarnessWithOptions({ + readyError: new Error("graphiti startup failed"), + }); + + const plugin = await invokeGraphiti(input, dependencies); + await Promise.resolve(); + await Promise.resolve(); + + assertEquals(records.connectionStartCalls, 1); + assertEquals(records.connectionReadyCalls, 1); + assertEquals(records.redisConnectCalls, 1); + assertEquals(records.graphitiWarnCalls, [{ + connected: false, + endpoint: config.graphiti.endpoint, + }]); + assertEquals(records.redisWarnCalls, []); + assertStrictEquals(plugin.event, hooks.event); + assertStrictEquals(plugin["chat.message"], hooks.chat); + }); + + it("degrades cleanly when Redis startup rejects", async () => { + const { config, input, hooks, records, dependencies } = + createEntrypointHarnessWithOptions({ + redisConnectError: new Error("redis startup failed"), + }); + + const plugin = await invokeGraphiti(input, dependencies); + await Promise.resolve(); + await Promise.resolve(); + + assertEquals(records.connectionStartCalls, 1); + assertEquals(records.connectionReadyCalls, 1); + assertEquals(records.redisConnectCalls, 1); + assertEquals(records.graphitiWarnCalls, []); + assertEquals(records.redisWarnCalls, [{ + connected: false, + endpoint: config.redis.endpoint, + }]); + assertStrictEquals(plugin.event, hooks.event); + assertStrictEquals(plugin["chat.message"], hooks.chat); + }); + + it("reports degraded startup once when both startup promises reject", async () => { + const { input, records, dependencies } = + createEntrypointHarnessWithOptions({ + readyError: new Error("graphiti startup failed"), + redisConnectError: new Error("redis startup failed"), + }); + + await invokeGraphiti(input, dependencies); + await Promise.resolve(); + await Promise.resolve(); + + assertEquals( + records.graphitiWarnCalls.length + records.redisWarnCalls.length, + 1, + ); + }); + + it("waits for previous runtime teardown before starting a new runtime", async () => { + let releasePreviousTeardown!: () => void; + const previousTeardown = new Promise((resolve) => { + releasePreviousTeardown = resolve; + }); + const firstHarness = createEntrypointHarnessWithOptions({ + teardownRun: () => previousTeardown, + }); + + await invokeGraphiti(firstHarness.input, firstHarness.dependencies); + + const secondHarness = createEntrypointHarness(true); + const secondPluginPromise = invokeGraphiti( + secondHarness.input, + secondHarness.dependencies, + ); + await Promise.resolve(); + + assertEquals( + secondHarness.records.loadConfigCalls, + [], + ); + assertEquals( + firstHarness.records.teardownRegistrations.length, + 1, + ); + + releasePreviousTeardown(); + await secondPluginPromise; + + assertEquals( + secondHarness.records.loadConfigCalls, + [secondHarness.input.directory], + ); + assertEquals(secondHarness.records.connectionStartCalls, 1); + }); + + it("continues startup when previous runtime teardown rejects", async () => { + const originalWarn = logger.warn; + const warnCalls: unknown[][] = []; + logger.warn = (...args: unknown[]) => { + warnCalls.push(args); + }; + + try { + const firstHarness = createEntrypointHarnessWithOptions({ + teardownRun: () => + Promise.reject(new Error("previous teardown failed")), + }); + await invokeGraphiti(firstHarness.input, firstHarness.dependencies); + + const secondHarness = createEntrypointHarness(true); + const plugin = await invokeGraphiti( + secondHarness.input, + secondHarness.dependencies, + ); + await Promise.resolve(); + + assertEquals(secondHarness.records.loadConfigCalls, [ + secondHarness.input.directory, + ]); + assertEquals(secondHarness.records.connectionStartCalls, 1); + assertEquals(warnCalls.length, 1); + assertEquals(warnCalls[0][0], "Previous runtime teardown rejected"); + assertEquals( + (warnCalls[0][1] as Error).message, + "previous teardown failed", + ); + assertStrictEquals(plugin.event, secondHarness.hooks.event); + } finally { + logger.warn = originalWarn; + } + }); + + it("tears down async work before graphiti and redis during re-initialization", async () => { + const firstHarness = createEntrypointHarness(true); + await invokeGraphiti(firstHarness.input, firstHarness.dependencies); + + const secondHarness = createEntrypointHarness(true); + await invokeGraphiti(secondHarness.input, secondHarness.dependencies); + + assertEquals(firstHarness.records.teardownTaskRuns, [ + "graphiti-drain-flush", + "graphiti-async", + "graphiti", + "redis", + ]); + assertEquals(firstHarness.records.graphitiAsyncDisposeCalls, 1); + assertEquals(firstHarness.records.connectionStopCalls, 1); + assertEquals(firstHarness.records.redisCloseCalls, 1); + }); + }); }); diff --git a/src/index.ts b/src/index.ts index a5e85ac..84479f9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -16,10 +16,45 @@ import { import { RedisCacheService } from "./services/redis-cache.ts"; import { RedisClient } from "./services/redis-client.ts"; import { RedisEventsService } from "./services/redis-events.ts"; +import { logger } from "./services/logger.ts"; import { RedisSnapshotService } from "./services/redis-snapshot.ts"; import { registerRuntimeTeardown } from "./services/runtime-teardown.ts"; import { makeGroupId, makeUserGroupId } from "./utils.ts"; +type GraphitiDependencies = { + loadConfig: typeof loadConfig; + setOpenCodeClient: typeof setOpenCodeClient; + warnOnGraphitiStartupUnavailable: ( + connected: boolean, + endpoint: string, + ) => void; + warnOnRedisStartupUnavailable: ( + connected: boolean, + endpoint: string, + ) => void; + GraphitiConnectionManager: typeof GraphitiConnectionManager; + RedisClient: typeof RedisClient; + registerRuntimeTeardown: typeof registerRuntimeTeardown; + GraphitiMcpClient: typeof GraphitiMcpClient; + RedisEventsService: typeof RedisEventsService; + RedisSnapshotService: typeof RedisSnapshotService; + RedisCacheService: typeof RedisCacheService; + BatchDrainService: typeof BatchDrainService; + GraphitiAsyncService: typeof GraphitiAsyncService; + SessionManager: typeof SessionManager; + createEventHandler: typeof createEventHandler; + createChatHandler: typeof createChatHandler; + createCompactingHandler: typeof createCompactingHandler; + createMessagesHandler: typeof createMessagesHandler; + makeGroupId: typeof makeGroupId; + makeUserGroupId: typeof makeUserGroupId; +}; + +let activeRuntimeTeardown: + | ReturnType + | null = null; +let runtimeInitialization = Promise.resolve(); + export const warnOnGraphitiStartupUnavailable = ( connected: boolean, endpoint: string, @@ -31,99 +66,193 @@ export const warnOnGraphitiStartupUnavailable = ( ); }; -export const graphiti: Plugin = (input: PluginInput) => { - const config = loadConfig(input.directory); - setOpenCodeClient(input.client); +export const warnOnRedisStartupUnavailable = ( + connected: boolean, + endpoint: string, +): void => { + if (connected) return; + notifyGraphitiAvailabilityIssue( + `Redis unavailable at ${endpoint}; continuing without persistent memory.`, + { endpoint }, + ); +}; - const connectionManager = new GraphitiConnectionManager({ - endpoint: config.graphiti.endpoint, - }); - connectionManager.start(); - void connectionManager.ready().then((connected) => { - warnOnGraphitiStartupUnavailable(connected, config.graphiti.endpoint); - }); +const defaultGraphitiDependencies: GraphitiDependencies = { + loadConfig, + setOpenCodeClient, + warnOnGraphitiStartupUnavailable, + warnOnRedisStartupUnavailable, + GraphitiConnectionManager, + RedisClient, + registerRuntimeTeardown, + GraphitiMcpClient, + RedisEventsService, + RedisSnapshotService, + RedisCacheService, + BatchDrainService, + GraphitiAsyncService, + SessionManager, + createEventHandler, + createChatHandler, + createCompactingHandler, + createMessagesHandler, + makeGroupId, + makeUserGroupId, +}; - const redisClient = new RedisClient({ - endpoint: config.falkordb.redisEndpoint, - }); - void redisClient.connect(); - registerRuntimeTeardown([ - { - name: "redis", - run: () => redisClient.close(), - }, - { - name: "graphiti", - run: () => connectionManager.stop(), - }, - ]); +export const graphiti: Plugin = ( + input: PluginInput, + dependencies: GraphitiDependencies = defaultGraphitiDependencies, +) => { + const setup = runtimeInitialization.then(async () => { + const previousTeardown = activeRuntimeTeardown; + activeRuntimeTeardown = null; + previousTeardown?.dispose(); + if (previousTeardown) { + try { + await previousTeardown.run(); + } catch (err) { + logger.warn("Previous runtime teardown rejected", err); + } + } - const graphitiClient = new GraphitiMcpClient(connectionManager); - const redisEvents = new RedisEventsService(redisClient, { - sessionTtlSeconds: config.falkordb.sessionTtlSeconds, - }); - const redisSnapshot = new RedisSnapshotService(redisClient, { - ttlSeconds: config.falkordb.sessionTtlSeconds * 2, - }); - const redisCache = new RedisCacheService(redisClient, { - ttlSeconds: config.falkordb.cacheTtlSeconds, - driftThreshold: config.graphiti.driftThreshold, - }); - const batchDrain = new BatchDrainService(redisClient, redisEvents, { - batchSize: config.falkordb.batchSize, - batchMaxBytes: config.falkordb.batchMaxBytes, - drainRetryMax: config.falkordb.drainRetryMax, - }); - const graphitiAsync = new GraphitiAsyncService( - graphitiClient, - redisCache, - batchDrain, - ); + const config = dependencies.loadConfig(input.directory); + dependencies.setOpenCodeClient(input.client); + let startupUnavailableReported = false; + const reportStartupUnavailable = (service: "graphiti" | "redis") => { + if (startupUnavailableReported) return; + startupUnavailableReported = true; + if (service === "graphiti") { + dependencies.warnOnGraphitiStartupUnavailable( + false, + config.graphiti.endpoint, + ); + return; + } + dependencies.warnOnRedisStartupUnavailable(false, config.redis.endpoint); + }; - const defaultGroupId = makeGroupId( - config.graphiti.groupIdPrefix, - input.directory, - ); - const defaultUserGroupId = makeUserGroupId( - config.graphiti.groupIdPrefix, - input.directory, - ); - - const sessionManager = new SessionManager( - defaultGroupId, - defaultUserGroupId, - input.client, - redisEvents, - redisSnapshot, - redisCache, - { - idleRetentionMs: config.falkordb.sessionTtlSeconds * 1000, - }, - ); + const connectionManager = new dependencies.GraphitiConnectionManager({ + endpoint: config.graphiti.endpoint, + }); + connectionManager.start(); + void connectionManager.ready() + .then((connected) => { + if (!connected) { + reportStartupUnavailable("graphiti"); + } + }) + .catch(() => { + reportStartupUnavailable("graphiti"); + }); - return Promise.resolve({ - event: createEventHandler({ - sessionManager, + const redisClient = new dependencies.RedisClient({ + endpoint: config.redis.endpoint, + }); + void redisClient.connect() + .catch(() => { + reportStartupUnavailable("redis"); + }); + const graphitiClient = new dependencies.GraphitiMcpClient( + connectionManager, + ); + const redisEvents = new dependencies.RedisEventsService(redisClient, { + sessionTtlSeconds: config.redis.sessionTtlSeconds, + }); + const redisSnapshot = new dependencies.RedisSnapshotService(redisClient, { + ttlSeconds: config.redis.sessionTtlSeconds * 2, + }); + const redisCache = new dependencies.RedisCacheService(redisClient, { + ttlSeconds: config.redis.cacheTtlSeconds, + driftThreshold: config.graphiti.driftThreshold, + }); + const batchDrain = new dependencies.BatchDrainService( + redisClient, redisEvents, + { + batchSize: config.redis.batchSize, + batchMaxBytes: config.redis.batchMaxBytes, + drainRetryMax: config.redis.drainRetryMax, + }, + ); + const graphitiAsync = new dependencies.GraphitiAsyncService( + graphitiClient, redisCache, - redisSnapshot, - graphitiAsync, + batchDrain, + ); + + const defaultGroupId = dependencies.makeGroupId( + config.graphiti.groupIdPrefix, + input.directory, + ); + const defaultUserGroupId = dependencies.makeUserGroupId( + config.graphiti.groupIdPrefix, + input.directory, + ); + + const sessionManager = new dependencies.SessionManager( defaultGroupId, defaultUserGroupId, - sdkClient: input.client, - directory: input.directory, - }), - "chat.message": createChatHandler({ - sessionManager, + input.client, redisEvents, - graphitiAsync, - drainTriggerSize: config.falkordb.batchSize, - }), - "experimental.session.compacting": createCompactingHandler({ - sessionManager, - }), - "experimental.chat.messages.transform": createMessagesHandler({ - sessionManager, - }), + redisSnapshot, + redisCache, + { + idleRetentionMs: config.redis.sessionTtlSeconds * 1000, + }, + ); + + activeRuntimeTeardown = dependencies.registerRuntimeTeardown([ + { + name: "graphiti-drain-flush", + run: () => + graphitiAsync.flushPendingGroups( + sessionManager.getTrackedGroupIds(), + ), + }, + { + name: "graphiti-async", + run: () => graphitiAsync.dispose(), + }, + { + name: "graphiti", + run: () => connectionManager.stop(), + }, + { + name: "redis", + run: () => redisClient.close(), + }, + ]); + + return { + event: dependencies.createEventHandler({ + sessionManager, + redisEvents, + redisCache, + redisSnapshot, + graphitiAsync, + defaultGroupId, + defaultUserGroupId, + sdkClient: input.client, + directory: input.directory, + }), + "chat.message": dependencies.createChatHandler({ + sessionManager, + redisEvents, + graphitiAsync, + drainTriggerSize: config.redis.batchSize, + }), + "experimental.session.compacting": dependencies + .createCompactingHandler({ + sessionManager, + }), + "experimental.chat.messages.transform": dependencies + .createMessagesHandler({ + sessionManager, + }), + }; }); + + runtimeInitialization = setup.then(() => undefined, () => undefined); + return setup; }; diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index 6410f0f..d01d986 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -1,10 +1,15 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { spy } from "jsr:@std/testing@^1.0.0/mock"; import { BatchDrainService } from "./batch-drain.ts"; import { createSessionEvent } from "./event-extractor.ts"; +import { logger } from "./logger.ts"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "./opencode-warning.ts"; import { RedisClient } from "./redis-client.ts"; import { + buildDrainEpisodeBody, drainClaimActiveKey, + drainClaimCheckpointKey, drainClaimKey, drainClaimLockKey, drainDeadKey, @@ -13,24 +18,248 @@ import { RedisEventsService, } from "./redis-events.ts"; -const createDeps = () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); +type RedisEvent = "close" | "end" | "error" | "ready"; + +setSuppressConsoleWarningsDuringTestsOverride(true); + +class FakeRedisRuntime { + private readonly values = new Map(); + private readonly lists = new Map(); + private readonly listeners = new Map< + RedisEvent, + Set<(...args: unknown[]) => void> + >(); + + connect(): Promise { + this.emit("ready"); + return Promise.resolve(); + } + + ping(): Promise<"PONG"> { + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + private ensureList(key: string): string[] { + if (this.values.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const existing = this.lists.get(key); + if (existing) return existing; + const list: string[] = []; + this.lists.set(key, list); + return list; + } + + lpush(key: string, value: string): Promise { + const list = this.ensureList(key); + list.unshift(value); + return Promise.resolve(list.length); + } + + rpush(key: string, value: string): Promise { + const list = this.ensureList(key); + list.push(value); + return Promise.resolve(list.length); + } + + lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" ? sourceList.shift() : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") destinationList.unshift(value); + else destinationList.push(value); + return Promise.resolve(value); + } + + lrange(key: string, start: number, stop: number): Promise { + const list = this.lists.get(key) ?? []; + const normalizedStop = stop < 0 ? list.length + stop : stop; + return Promise.resolve(list.slice(start, normalizedStop + 1)); + } + + llen(key: string): Promise { + return Promise.resolve((this.lists.get(key) ?? []).length); + } + + ltrim(key: string, start: number, stop: number): Promise { + const list = this.lists.get(key) ?? []; + const normalizedStop = stop < 0 ? list.length + stop : stop; + this.lists.set(key, list.slice(start, normalizedStop + 1)); + return Promise.resolve(); + } + + lindex(key: string, index: number): Promise { + return Promise.resolve(this.lists.get(key)?.[index] ?? null); + } + + lset(key: string, index: number, value: string): Promise { + const list = this.lists.get(key); + if (!list || index < 0 || index >= list.length) { + return Promise.reject(new Error("ERR index out of range")); + } + list[index] = value; + return Promise.resolve(); + } + + get(key: string): Promise { + return Promise.resolve(this.values.get(key) ?? null); + } + + set( + key: string, + value: string, + ...args: Array + ): Promise<"OK" | null> { + const onlyIfAbsent = args.includes("NX"); + if (onlyIfAbsent && this.values.has(key)) return Promise.resolve(null); + this.values.set(key, value); + return Promise.resolve("OK"); + } + + expire(_key: string, _ttlSeconds: number): Promise { + return Promise.resolve(1); + } + + del(key: string): Promise { + const deleted = this.values.delete(key) || this.lists.delete(key); + return Promise.resolve(deleted ? 1 : 0); + } + + eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + return Promise.reject(new Error("unsupported eval script")); + } + + on(event: RedisEvent, listener: (...args: unknown[]) => void): void { + const set = this.listeners.get(event) ?? new Set(); + set.add(listener); + this.listeners.set(event, set); + } + + off(event: RedisEvent, listener: (...args: unknown[]) => void): void { + this.listeners.get(event)?.delete(listener); + } + + private emit(event: RedisEvent, ...args: unknown[]): void { + for (const listener of this.listeners.get(event) ?? []) { + listener(...args); + } + } +} + +const createDeps = async (options?: { + events?: { claimLockTtlSeconds?: number }; + drain?: { + batchMaxBytes?: number; + batchSize?: number; + claimHeartbeatIntervalMs?: number | null; + }; +}) => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new FakeRedisRuntime(), + }); + await redis.connect(); const events = new RedisEventsService(redis, { sessionTtlSeconds: 60, - claimLockTtlSeconds: 1, + claimLockTtlSeconds: options?.events?.claimLockTtlSeconds ?? 1, }); - const drain = new BatchDrainService(redis, events, { - batchSize: 2, - batchMaxBytes: 20_000, + const drainOptions = { + batchSize: options?.drain?.batchSize ?? 2, + batchMaxBytes: options?.drain?.batchMaxBytes ?? 20_000, drainRetryMax: 2, - claimHeartbeatIntervalMs: 100, - }); + }; + const heartbeatIntervalMs = options?.drain?.claimHeartbeatIntervalMs; + const drain = new BatchDrainService( + redis, + events, + heartbeatIntervalMs === null ? drainOptions : { + ...drainOptions, + claimHeartbeatIntervalMs: heartbeatIntervalMs ?? 100, + }, + ); return { redis, events, drain }; }; describe("batch drain", () => { + it("uses a sub-TTL default heartbeat when the claim TTL is small", () => { + const drain = new BatchDrainService( + new RedisClient({ endpoint: "redis://unused" }), + {} as never, + { + batchSize: 2, + batchMaxBytes: 20_000, + drainRetryMax: 2, + }, + ); + const heartbeatIntervalMs = (drain as unknown as { + getClaimHeartbeatIntervalMs: (ttl: number) => number; + }).getClaimHeartbeatIntervalMs(1); + assertEquals(heartbeatIntervalMs, 333); + }); + + it("warns and clamps an explicit heartbeat interval that exceeds the claim TTL budget", () => { + const warnSpy = spy(logger, "warn"); + try { + const drain = new BatchDrainService( + new RedisClient({ endpoint: "redis://unused" }), + {} as never, + { + batchSize: 2, + batchMaxBytes: 20_000, + drainRetryMax: 2, + claimHeartbeatIntervalMs: 1_500, + }, + ); + + const heartbeatIntervalMs = (drain as unknown as { + getClaimHeartbeatIntervalMs: (ttl: number) => number; + }).getClaimHeartbeatIntervalMs(1); + + assertEquals(heartbeatIntervalMs, 500); + assertEquals(warnSpy.calls.length, 1); + assertEquals( + warnSpy.calls[0].args[0], + "Clamped drain heartbeat interval to stay below claim TTL", + ); + } finally { + warnSpy.restore(); + } + }); + it("claims oldest events, drains them FIFO, and leaves newer items pending", async () => { - const { redis, events, drain } = createDeps(); + const { redis, events, drain } = await createDeps(); const added: string[] = []; const recorded = []; for (const summary of ["first", "second", "third"]) { @@ -74,8 +303,124 @@ describe("batch drain", () => { ); }); + it("avoids an extra ownership refresh before checkpointing skipped entries", async () => { + const { events, drain } = await createDeps({ + drain: { batchSize: 2, claimHeartbeatIntervalMs: null }, + }); + const skipped = createSessionEvent("message", "assistant", { + summary: "assistant chatter", + body: "assistant chatter", + }); + const drained = createSessionEvent("message", "user", { + summary: "user message", + body: "user message", + }); + await events.recordEvent("session-1", "group-1", skipped); + await events.recordEvent("session-1", "group-1", drained); + + const refreshSpy = spy(events, "refreshClaimLease"); + const added: string[] = []; + try { + const result = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + added.push(input.name); + }, + } as never); + + assertEquals(result, { status: "success", drained: 1 }); + assertEquals(added, [`message:${drained.id}`]); + assertEquals(refreshSpy.calls.length, 4); + } finally { + refreshSpy.restore(); + } + }); + + it("serializes claim heartbeat refreshes so they never overlap", async () => { + const { events, drain } = await createDeps({ + events: { claimLockTtlSeconds: 2 }, + drain: { batchSize: 1, claimHeartbeatIntervalMs: 250 }, + }); + const event = createSessionEvent("message", "user", { + summary: "long running", + body: "long running", + }); + await events.recordEvent("session-1", "group-1", event); + + const originalRefreshClaimLease = events.refreshClaimLease.bind(events); + let inFlight = 0; + let maxInFlight = 0; + let refreshCalls = 0; + events.refreshClaimLease = async (...args) => { + refreshCalls += 1; + inFlight += 1; + maxInFlight = Math.max(maxInFlight, inFlight); + await new Promise((resolve) => setTimeout(resolve, 300)); + inFlight -= 1; + return await originalRefreshClaimLease(...args); + }; + + const result = await drain.drainGroup("group-1", { + async addMemory() { + await new Promise((resolve) => setTimeout(resolve, 650)); + }, + } as never); + + assertEquals(result, { status: "success", drained: 1 }); + assertEquals(refreshCalls >= 3, true); + assertEquals(maxInFlight, 1); + }); + + it("limits batches using serialized Graphiti episode bodies", async () => { + const first = createSessionEvent("message", "user", { + summary: "first", + body: "x".repeat(8_000), + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "y".repeat(8_000), + }); + const encoder = new TextEncoder(); + const batchMaxBytes = encoder.encode(buildDrainEpisodeBody({ + sessionId: "session-1", + groupId: "group-1", + event: first, + })).length + + encoder.encode(buildDrainEpisodeBody({ + sessionId: "session-1", + groupId: "group-1", + event: second, + })).length - 1; + const { redis, events, drain } = await createDeps({ + drain: { batchMaxBytes }, + }); + + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + const added: string[] = []; + const firstResult = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + added.push(input.name); + }, + } as never); + + assertEquals(firstResult, { status: "success", drained: 1 }); + assertEquals(added, [`message:${first.id}`]); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + + const secondResult = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + added.push(input.name); + }, + } as never); + + assertEquals(secondResult, { status: "success", drained: 1 }); + assertEquals(added, [`message:${first.id}`, `message:${second.id}`]); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + }); + it("keeps FIFO order across claim interleaving and does not lose newer enqueues", async () => { - const { redis, events } = createDeps(); + const { redis, events } = await createDeps(); const first = createSessionEvent("message", "user", { summary: "first", body: "first", @@ -129,7 +474,7 @@ describe("batch drain", () => { }); it("releases claims on retry and dead-letters after max attempts", async () => { - const { redis, events, drain } = createDeps(); + const { redis, events, drain } = await createDeps(); const event = createSessionEvent("error", "tool", { summary: "failing batch", body: "failing batch", @@ -155,12 +500,179 @@ describe("batch drain", () => { const second = await drain.drainGroup("group-1", failingGraphiti as never); assertEquals(second.status, "dead-letter"); + assertEquals(second.drained, 0); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals(await redis.getListLength(drainDeadKey("group-1")), 1); + }); + + it("backs off and releases the claim when retry state is scheduled for later", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "wait before retry", + body: "wait before retry", + }); + await events.recordEvent("session-1", "group-1", event); + + const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); + const retryState = { attempts: 1, nextAttemptAt: Date.now() + 60_000 }; + await redis.setString(retryKey, JSON.stringify(retryState), 60); + + let addMemoryCalls = 0; + const result = await drain.drainGroup("group-1", { + addMemory() { + addMemoryCalls += 1; + }, + } as never); + + assertEquals(result.status, "backoff"); + assertEquals(result.drained, 0); + if (result.retryAfterMs === undefined || result.retryAfterMs <= 0) { + throw new Error("Expected backoff result to include retryAfterMs"); + } + assertEquals(addMemoryCalls, 0); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + assertEquals(await redis.getString(retryKey), JSON.stringify(retryState)); + }); + + it("clears corrupted retry state before retrying a batch", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "recover retry state", + body: "recover retry state", + }); + await events.recordEvent("session-1", "group-1", event); + + const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); + await redis.setString(retryKey, "{not-json", 60); + + let calls = 0; + const result = await drain.drainGroup("group-1", { + addMemory() { + calls += 1; + }, + } as never); + + assertEquals(result, { status: "success", drained: 1 }); + assertEquals(calls, 1); + assertEquals(await redis.getString(retryKey), null); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + }); + + it("clears parsed but invalid retry state before retrying a batch", async () => { + const invalidStates = [ + { attempts: -1, nextAttemptAt: 0 }, + { attempts: 1, nextAttemptAt: "later" }, + ]; + + for (const invalidState of invalidStates) { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "recover invalid retry state", + body: "recover invalid retry state", + }); + await events.recordEvent("session-1", "group-1", event); + + const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); + await redis.setString(retryKey, JSON.stringify(invalidState), 60); + + let calls = 0; + const result = await drain.drainGroup("group-1", { + addMemory() { + calls += 1; + }, + } as never); + + assertEquals(result, { status: "success", drained: 1 }); + assertEquals(calls, 1); + assertEquals(await redis.getString(retryKey), null); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + } + }); + + it("reports only successfully ingested events when a batch dead-letters mid-batch", async () => { + const { redis, events, drain } = await createDeps({ + drain: { batchSize: 2 }, + }); + const first = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + await redis.setString( + drainRetryKey("group-1", `${first.id}:${second.id}`), + JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), + 60, + ); + + let calls = 0; + const result = await drain.drainGroup("group-1", { + addMemory() { + calls += 1; + if (calls === 2) { + throw new Error("boom"); + } + }, + } as never); + + assertEquals(result, { status: "dead-letter", drained: 1 }); assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); assertEquals(await redis.getListLength(drainDeadKey("group-1")), 1); }); + it("does not dead-letter or mark success after claim loss at max retry", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("error", "tool", { + summary: "failing batch", + body: "failing batch", + metadata: { resolved: false }, + }); + await events.recordEvent("session-1", "group-1", event); + await redis.setString( + drainRetryKey("group-1", `${event.id}:${event.id}`), + JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), + 60, + ); + + const deadLetterSpy = spy(events, "moveBatchToDeadLetter"); + const markSuccessSpy = spy(events, "markBatchSuccess"); + try { + const result = await drain.drainGroup("group-1", { + async addMemory() { + await redis.deleteKey(drainClaimLockKey("group-1")); + await new Promise((resolve) => setTimeout(resolve, 250)); + throw new Error("boom"); + }, + } as never); + + assertEquals(result, { status: "retry", drained: 0 }); + assertEquals(deadLetterSpy.calls.length, 0); + assertEquals(markSuccessSpy.calls.length, 0); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals(await redis.getListLength(drainDeadKey("group-1")), 0); + assertEquals( + await redis.getString( + drainRetryKey("group-1", `${event.id}:${event.id}`), + ), + null, + ); + assertEquals( + typeof await redis.getString(drainClaimActiveKey("group-1")), + "string", + ); + } finally { + deadLetterSpy.restore(); + markSuccessSpy.restore(); + } + }); + it("requeues abandoned claimed batches after lock loss and drains them", async () => { - const { redis, events, drain } = createDeps(); + const { redis, events, drain } = await createDeps(); const first = createSessionEvent("message", "user", { summary: "first", body: "first", @@ -174,10 +686,15 @@ describe("batch drain", () => { await events.recordEvent("session-1", "group-1", second); const claimed = await events.getPendingBatch("group-1", 2, 20_000); - assertEquals(claimed?.entries.map((entry) => entry.event.id), [ - first.id, - second.id, - ]); + assertEquals( + claimed?.entries.map((entry: { event: { id: string } }) => + entry.event.id + ), + [ + first.id, + second.id, + ], + ); assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); await redis.deleteKey(drainClaimLockKey("group-1")); @@ -200,7 +717,7 @@ describe("batch drain", () => { }); it("can recover an abandoned claim before the next drain attempt", async () => { - const { redis, events } = createDeps(); + const { redis, events } = await createDeps(); const first = createSessionEvent("message", "user", { summary: "first", body: "first", @@ -229,13 +746,13 @@ describe("batch drain", () => { -1, ); assertEquals( - pendingRaw.map((item) => JSON.parse(item).event.id), + pendingRaw.map((item: string) => JSON.parse(item).event.id), [second.id, first.id], ); }); it("keeps an active long-running drain claim alive so recovery cannot steal it", async () => { - const { redis, events, drain } = createDeps(); + const { redis, events, drain } = await createDeps(); const first = createSessionEvent("message", "user", { summary: "first", body: "first", @@ -286,14 +803,19 @@ describe("batch drain", () => { assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); }); - it("fails and requeues when heartbeat loses ownership during a long drain", async () => { - const { redis, events, drain } = createDeps(); - const event = createSessionEvent("message", "user", { + it("replays only the uncheckpointed suffix after claim loss", async () => { + const { redis, events, drain } = await createDeps(); + const first = createSessionEvent("message", "user", { summary: "first", body: "first", }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); - await events.recordEvent("session-1", "group-1", event); + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); let started!: () => void; let release!: () => void; @@ -305,9 +827,12 @@ describe("batch drain", () => { }); const drainPromise = drain.drainGroup("group-1", { - async addMemory() { + async addMemory(input: { name: string }) { + if (input.name === `message:${second.id}`) { + started(); + await releasePromise; + } started(); - await releasePromise; }, } as never); @@ -321,17 +846,222 @@ describe("batch drain", () => { const result = await drainPromise; assertEquals(result.status, "retry"); - assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); - assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); assertEquals( await redis.getString( - drainRetryKey("group-1", `${event.id}:${event.id}`), - ) !== - null, - true, + drainRetryKey("group-1", `${first.id}:${second.id}`), + ), + null, + ); + assertEquals( + (await redis.getListRange( + drainClaimCheckpointKey("group-1", activeToken!), + 0, + -1, + )).map((item) => JSON.parse(item).event.id), + [first.id], ); const recovered = await events.recoverAbandonedClaim("group-1"); - assertEquals(recovered, false); + assertEquals(recovered, true); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + + const replayed: string[] = []; + const replayResult = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + replayed.push(input.name); + }, + } as never); + assertEquals(replayResult, { status: "success", drained: 1 }); + assertEquals(replayed, [`message:${second.id}`]); + }); + + it("replays the recovered suffix before newer enqueues after claim loss", async () => { + const { redis, events, drain } = await createDeps(); + const first = createSessionEvent("message", "user", { + summary: "first", + body: "first", + }); + const second = createSessionEvent("message", "user", { + summary: "second", + body: "second", + }); + const third = createSessionEvent("message", "user", { + summary: "third", + body: "third", + }); + + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + let release!: () => void; + const releasePromise = new Promise((resolve) => { + release = resolve; + }); + let secondStarted!: () => void; + const secondStartedPromise = new Promise((resolve) => { + secondStarted = resolve; + }); + + const drainPromise = drain.drainGroup("group-1", { + async addMemory(input: { name: string }) { + if (input.name === `message:${second.id}`) { + secondStarted(); + await releasePromise; + } + }, + } as never); + + await secondStartedPromise; + await events.recordEvent("session-2", "group-1", third); + await redis.deleteKey(drainClaimLockKey("group-1")); + await new Promise((resolve) => setTimeout(resolve, 250)); + release(); + + const result = await drainPromise; + assertEquals(result, { status: "retry", drained: 0 }); + + const recovered = await events.recoverAbandonedClaim("group-1"); + assertEquals(recovered, true); + + const replayed: string[] = []; + const replayResult = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + replayed.push(input.name); + }, + } as never); + + assertEquals(replayResult, { status: "success", drained: 2 }); + assertEquals(replayed, [`message:${second.id}`, `message:${third.id}`]); + }); + + it("checkpoints handled non-semantic entries in mixed batches before later claim loss", async () => { + const { redis, events, drain } = await createDeps({ + drain: { batchSize: 3 }, + }); + const semantic = createSessionEvent("message", "user", { + summary: "semantic", + body: "semantic", + }); + const nonSemantic = createSessionEvent("message", "assistant", { + summary: "assistant chatter", + body: "assistant chatter", + }); + const trailingSemantic = createSessionEvent("message", "user", { + summary: "trailing", + body: "trailing", + }); + + await events.recordEvent("session-1", "group-1", semantic); + await events.recordEvent("session-1", "group-1", nonSemantic); + await events.recordEvent("session-1", "group-1", trailingSemantic); + + let release!: () => void; + const releasePromise = new Promise((resolve) => { + release = resolve; + }); + let stage = 0; + + const drainPromise = drain.drainGroup("group-1", { + async addMemory(input: { name: string }) { + stage += 1; + if (stage === 2 && input.name === `message:${trailingSemantic.id}`) { + await redis.deleteKey(drainClaimLockKey("group-1")); + await releasePromise; + } + }, + } as never); + + await new Promise((resolve) => setTimeout(resolve, 50)); + release(); + + const result = await drainPromise; + assertEquals(result.status, "retry"); + + const recovered = await events.recoverAbandonedClaim("group-1"); + assertEquals(recovered, true); + const pendingRaw = await redis.getListRange( + drainPendingKey("group-1"), + 0, + -1, + ); + assertEquals( + pendingRaw.map((item) => JSON.parse(item).event.id), + [trailingSemantic.id], + ); + + const replayed: string[] = []; + const replayResult = await drain.drainGroup("group-1", { + addMemory(input: { name: string }) { + replayed.push(input.name); + }, + } as never); + assertEquals(replayResult, { status: "success", drained: 1 }); + assertEquals(replayed, [`message:${trailingSemantic.id}`]); + }); + + it("strips injected memory blocks from drained Graphiti episode bodies", async () => { + const { events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "continue work", + detail: + 'old continue work', + continuityText: 'old continue work', + body: ' continue work', + }); + await events.recordEvent("session-1", "group-1", event); + + const bodies: string[] = []; + const result = await drain.drainGroup("group-1", { + addMemory(input: { episodeBody: string }) { + bodies.push(input.episodeBody); + }, + } as never); + + assertEquals(result.status, "success"); + assertEquals(bodies.length, 1); + assertEquals(bodies[0].includes(" { + const { events, drain } = await createDeps(); + const event = createSessionEvent("error", "tool", { + summary: "Failed to update src/session.ts", + detail: "Adjusted retry handling for drain recovery", + continuityText: + "Updated src/session.ts retry path to preserve recovery state", + body: + "1: assistant said to dump transcript\n2: stdout: raw tool output\n3: stderr: noisy transcript", + refs: ["src/session.ts"], + keywords: ["retry", "recovery"], + metadata: { reason: "claim lost" }, + }); + await events.recordEvent("session-1", "group-1", event); + + const payloads: string[] = []; + const result = await drain.drainGroup("group-1", { + addMemory(input: { episodeBody: string }) { + payloads.push(input.episodeBody); + }, + } as never); + + assertEquals(result, { status: "success", drained: 1 }); + assertEquals(payloads.length, 1); + assertEquals( + payloads[0].includes("Summary: Failed to update src/session.ts"), + true, + ); + assertEquals( + payloads[0].includes( + "Continuity: Updated src/session.ts retry path to preserve recovery state", + ), + true, + ); + assertEquals(payloads[0].includes("Keywords: retry, recovery"), true); + assertEquals(payloads[0].includes("Refs: src/session.ts"), true); + assertEquals(payloads[0].includes("Body:"), false); + assertEquals(payloads[0].includes("stdout:"), false); }); }); diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index b244f37..2ce89a9 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -3,10 +3,16 @@ import { getSessionEventRecallText, } from "../types/index.ts"; import type { GraphitiMcpClient } from "./graphiti-mcp.ts"; -import { drainRetryKey } from "./redis-events.ts"; -import type { RedisEventsService } from "./redis-events.ts"; -import type { RedisClient } from "./redis-client.ts"; import { logger } from "./logger.ts"; +import type { RedisClient } from "./redis-client.ts"; +import type { RedisEventsService } from "./redis-events.ts"; +import { drainRetryKey } from "./redis-events.ts"; +import { + looksLikeOperationalChatter, + looksLikeToolTranscript, + looksTranscriptHeavy, + sanitizeMemoryInput, +} from "./render-utils.ts"; export interface BatchDrainServiceOptions { batchSize: number; @@ -17,6 +23,16 @@ export interface BatchDrainServiceOptions { type RetryState = { attempts: number; nextAttemptAt: number }; +const isValidRetryState = (value: unknown): value is RetryState => { + if (!value || typeof value !== "object") return false; + const state = value as Partial; + return typeof state.attempts === "number" && + Number.isFinite(state.attempts) && + state.attempts >= 0 && + typeof state.nextAttemptAt === "number" && + Number.isFinite(state.nextAttemptAt); +}; + class DrainClaimLostError extends Error { constructor() { super("Drain claim lease lost during batch processing"); @@ -27,25 +43,73 @@ class DrainClaimLostError extends Error { const makeBatchKey = (entries: DrainQueueEntry[]): string => `${entries[0]?.event.id ?? "empty"}:${entries.at(-1)?.event.id ?? "empty"}`; -const buildEpisodeBody = (entry: DrainQueueEntry): string => { - const refs = entry.event.refs?.length - ? `\nRefs: ${entry.event.refs.join(", ")}` +type PreparedDrainEntry = { + entry: DrainQueueEntry; + recallText: string; +}; + +const prepareDrainEntries = ( + entries: DrainQueueEntry[], +): PreparedDrainEntry[] => + entries.map((entry) => ({ + entry, + recallText: getDrainEntryRecallText(entry), + })); + +const getDrainableEntryIds = (entries: PreparedDrainEntry[]): Set => { + const drainableEntryIds = new Set(); + for (const entry of entries) { + if (shouldDrainEntry(entry)) { + drainableEntryIds.add(entry.entry.event.id); + } + } + return drainableEntryIds; +}; + +const getDrainEntryRecallText = (entry: DrainQueueEntry): string => + sanitizeMemoryInput(getSessionEventRecallText(entry.event)); + +const buildGraphitiEpisodeBody = (entry: PreparedDrainEntry): string => { + const refs = entry.entry.event.refs?.length + ? `\nRefs: ${entry.entry.event.refs.join(", ")}` : ""; - const keywords = entry.event.keywords?.length - ? `\nKeywords: ${entry.event.keywords.join(", ")}` + const keywords = entry.entry.event.keywords?.length + ? `\nKeywords: ${entry.entry.event.keywords.join(", ")}` : ""; - return [ - `Category: ${entry.event.category}`, - `Role: ${entry.event.role}`, - `Summary: ${entry.event.summary}`, - entry.event.detail ? `Detail: ${entry.event.detail}` : "", - entry.event.continuityText - ? `Continuity: ${entry.event.continuityText}` - : getSessionEventRecallText(entry.event), - entry.event.body ? `Body: ${entry.event.body}` : "", - keywords, - refs, - ].filter(Boolean).join("\n"); + return sanitizeMemoryInput( + [ + `Category: ${entry.entry.event.category}`, + `Role: ${entry.entry.event.role}`, + `Summary: ${entry.entry.event.summary}`, + entry.entry.event.detail ? `Detail: ${entry.entry.event.detail}` : "", + entry.entry.event.continuityText + ? `Continuity: ${entry.entry.event.continuityText}` + : entry.recallText, + keywords, + refs, + ].filter(Boolean).join("\n"), + ); +}; + +const shouldDrainEntry = (entry: PreparedDrainEntry): boolean => { + const text = entry.recallText; + if (!text) return false; + if (looksLikeToolTranscript(text)) return false; + if (looksLikeOperationalChatter(text)) return false; + if (looksTranscriptHeavy(text)) return false; + if ( + entry.entry.event.role === "assistant" && + entry.entry.event.category !== "discovery" + ) { + return false; + } + if ( + entry.entry.event.category === "message" && + entry.entry.event.role !== "user" + ) { + return false; + } + return true; }; export class BatchDrainService { @@ -56,19 +120,60 @@ export class BatchDrainService { ) {} private getClaimHeartbeatIntervalMs(lockTtlSeconds: number): number { - return this.options.claimHeartbeatIntervalMs ?? - Math.max(1_000, Math.floor((lockTtlSeconds * 1000) / 3)); + const ttlMs = Math.max(1_000, Math.floor(lockTtlSeconds * 1000)); + const defaultIntervalMs = Math.max(250, Math.floor(ttlMs / 3)); + const configuredIntervalMs = this.options.claimHeartbeatIntervalMs; + const requestedIntervalMs = configuredIntervalMs ?? defaultIntervalMs; + const minSafeIntervalMs = 250; + const maxSafeIntervalMs = Math.max(250, Math.floor(ttlMs / 2)); + + if (requestedIntervalMs < minSafeIntervalMs) { + if (configuredIntervalMs !== undefined) { + logger.warn("Clamped drain heartbeat interval to a safe minimum", { + claimLockTtlSeconds: lockTtlSeconds, + requestedHeartbeatIntervalMs: requestedIntervalMs, + effectiveHeartbeatIntervalMs: minSafeIntervalMs, + configuredHeartbeatIntervalMs: configuredIntervalMs, + }); + } + return minSafeIntervalMs; + } + + if (requestedIntervalMs <= maxSafeIntervalMs) { + return requestedIntervalMs; + } + + logger.warn("Clamped drain heartbeat interval to stay below claim TTL", { + claimLockTtlSeconds: lockTtlSeconds, + requestedHeartbeatIntervalMs: requestedIntervalMs, + effectiveHeartbeatIntervalMs: maxSafeIntervalMs, + configuredHeartbeatIntervalMs: configuredIntervalMs, + }); + return maxSafeIntervalMs; } private async getRetryState( groupId: string, batchKey: string, ): Promise { - const raw = await this.redis.getString(drainRetryKey(groupId, batchKey)); + const key = drainRetryKey(groupId, batchKey); + const raw = await this.redis.getString(key); if (!raw) return null; try { - return JSON.parse(raw) as RetryState; + const parsed = JSON.parse(raw); + if (isValidRetryState(parsed)) return parsed; + await this.redis.deleteKey(key); + logger.warn("Cleared invalid drain retry state", { + groupId, + batchKey, + }); + return null; } catch { + await this.redis.deleteKey(key); + logger.warn("Cleared corrupted drain retry state", { + groupId, + batchKey, + }); return null; } } @@ -92,6 +197,7 @@ export class BatchDrainService { { status: "empty" | "backoff" | "success" | "dead-letter" | "retry"; drained: number; + retryAfterMs?: number; } > { const claimed = await this.events.getPendingBatch( @@ -104,69 +210,144 @@ export class BatchDrainService { } const batch = claimed.entries; - + const preparedBatch = prepareDrainEntries(batch); const batchKey = makeBatchKey(batch); + const eventIds = batch.map((entry) => entry.event.id); + const drainableEntryIds = getDrainableEntryIds(preparedBatch); + if (drainableEntryIds.size === 0) { + await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); + await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + return { status: "success", drained: 0 }; + } + const retryState = await this.getRetryState(groupId, batchKey); if (retryState && retryState.nextAttemptAt > Date.now()) { + const retryAfterMs = Math.max(0, retryState.nextAttemptAt - Date.now()); await this.events.releaseClaim(groupId, claimed.claimToken); - return { status: "backoff", drained: 0 }; + return { status: "backoff", drained: 0, retryAfterMs }; } let lostClaim = false; + let claimRefreshChain: Promise = Promise.resolve(); + let heartbeatTimer: number | null = null; + let refreshClaimHeartbeatRunning = false; + const refreshClaimOwnership = (): Promise => { + const refreshTask = claimRefreshChain.then(async () => { + if (lostClaim) return false; + try { + const refreshed = await this.events.refreshClaimLease( + groupId, + claimed.claimToken, + claimed.lockTtlSeconds, + ); + if (!refreshed) lostClaim = true; + } catch { + lostClaim = true; + } + return !lostClaim; + }); + claimRefreshChain = refreshTask.then(() => undefined, () => undefined); + return refreshTask; + }; const refreshClaimHeartbeat = async (): Promise => { + if (refreshClaimHeartbeatRunning) return; + refreshClaimHeartbeatRunning = true; try { - const refreshed = await this.events.refreshClaimLease( - groupId, - claimed.claimToken, - claimed.lockTtlSeconds, - ); - if (!refreshed) lostClaim = true; - } catch { - lostClaim = true; + await refreshClaimOwnership(); + } finally { + refreshClaimHeartbeatRunning = false; + if (!lostClaim) { + heartbeatTimer = setTimeout( + refreshClaimHeartbeat, + this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), + ) as unknown as number; + } + } + }; + const confirmClaimOwnership = (): Promise => + refreshClaimOwnership(); + const assertClaimOwnership = async (): Promise => { + if (!await confirmClaimOwnership()) { + throw new DrainClaimLostError(); } }; - const heartbeatInterval = setInterval(() => { - void refreshClaimHeartbeat(); - }, this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds)); + heartbeatTimer = setTimeout( + refreshClaimHeartbeat, + this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), + ) as unknown as number; + let checkpointedCount = 0; try { - for (const entry of batch) { - await graphiti.addMemory({ - name: `${entry.event.category}:${entry.event.id}`, - episodeBody: buildEpisodeBody(entry), + for (const preparedEntry of preparedBatch) { + const entry = preparedEntry.entry; + if (drainableEntryIds.has(entry.event.id)) { + await assertClaimOwnership(); + await graphiti.addMemory({ + name: `${entry.event.category}:${entry.event.id}`, + episodeBody: buildGraphitiEpisodeBody(preparedEntry), + groupId, + source: "text", + sourceDescription: `session-event:${entry.event.category}`, + }); + } + await assertClaimOwnership(); + await this.events.markClaimEntrySuccess( groupId, - source: "text", - sourceDescription: `session-event:${entry.event.category}`, - }); - if (lostClaim) throw new DrainClaimLostError(); + claimed.claimToken, + entry, + ); + checkpointedCount += 1; } - clearInterval(heartbeatInterval); - const stillOwned = await this.events.refreshClaimLease( - groupId, - claimed.claimToken, - claimed.lockTtlSeconds, - ); - if (lostClaim || !stillOwned) throw new DrainClaimLostError(); + await assertClaimOwnership(); await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); - return { status: "success", drained: batch.length }; + return { status: "success", drained: drainableEntryIds.size }; } catch (err) { - if (err instanceof DrainClaimLostError) { + const lostOwnership = err instanceof DrainClaimLostError; + if (lostOwnership) { logger.warn("Drain claim heartbeat lost ownership", { groupId, - eventIds: batch.map((entry) => entry.event.id), + eventIds, }); } const attempts = (retryState?.attempts ?? 0) + 1; + const stillOwnClaim = await confirmClaimOwnership(); + if (!stillOwnClaim) { + if (!lostOwnership) { + logger.warn("Drain claim heartbeat lost ownership", { + groupId, + eventIds, + }); + } + await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + logger.warn( + "Drain batch failed after claim loss; waiting for recovery", + { + groupId, + err, + }, + ); + return { status: "retry", drained: 0 }; + } + if (attempts >= this.options.drainRetryMax) { + const remainingEntries = batch.slice(checkpointedCount); + let drainedCount = 0; + for (const entry of batch.slice(0, checkpointedCount)) { + if (drainableEntryIds.has(entry.event.id)) drainedCount += 1; + } logger.warn("Moving drain batch to dead-letter", { groupId, - eventIds: batch.map((entry) => entry.event.id), + eventIds: remainingEntries.map((entry) => entry.event.id), }); - await this.events.moveBatchToDeadLetter(groupId, batch); - await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); + await this.events.moveBatchToDeadLetter(groupId, remainingEntries); + await this.events.markBatchSuccess( + groupId, + claimed.claimToken, + batch, + ); await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); - return { status: "dead-letter", drained: batch.length }; + return { status: "dead-letter", drained: drainedCount }; } await this.events.releaseClaim(groupId, claimed.claimToken); @@ -177,7 +358,8 @@ export class BatchDrainService { logger.warn("Drain batch failed; will retry later", { groupId, err }); return { status: "retry", drained: 0 }; } finally { - clearInterval(heartbeatInterval); + if (heartbeatTimer !== null) clearTimeout(heartbeatTimer); + await claimRefreshChain; } } } diff --git a/src/services/client.test.ts b/src/services/client.test.ts deleted file mode 100644 index 22b3367..0000000 --- a/src/services/client.test.ts +++ /dev/null @@ -1,305 +0,0 @@ -import { - assertEquals, - assertRejects, - assertStrictEquals, -} from "jsr:@std/assert@^1.0.0"; -import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { GraphitiClient } from "./client.ts"; -import { - GraphitiOfflineError, - GraphitiRequestTimeoutError, - type GraphitiToolCaller, -} from "./connection-manager.ts"; -import { logger } from "./logger.ts"; -import { - setOpenCodeClient, - setWarningTaskScheduler, -} from "./opencode-warning.ts"; - -const originalLogger = { ...logger }; -logger.info = () => {}; -logger.warn = () => {}; -logger.error = () => {}; -logger.debug = () => {}; - -addEventListener("unload", () => { - logger.info = originalLogger.info; - logger.warn = originalLogger.warn; - logger.error = originalLogger.error; - logger.debug = originalLogger.debug; - setOpenCodeClient(undefined); - setWarningTaskScheduler(undefined); -}); - -class FakeToolCaller implements GraphitiToolCaller { - started = false; - stopped = false; - readyResult = true; - callToolImpl: ( - name: string, - args: Record, - ) => Promise = () => Promise.resolve(undefined); - - start(): void { - this.started = true; - } - - stop(): Promise { - this.stopped = true; - return Promise.resolve(); - } - - ready(): Promise { - return Promise.resolve(this.readyResult); - } - - callTool(name: string, args: Record): Promise { - return this.callToolImpl(name, args); - } -} - -describe("client", () => { - afterEach(() => { - setOpenCodeClient(undefined); - setWarningTaskScheduler(undefined); - }); - - describe("parseToolResult", () => { - const client = new GraphitiClient(new FakeToolCaller()); - - it("should return original result when no content array", () => { - const result = { status: "ok" }; - const parsed = client.parseToolResult(result); - assertEquals(parsed, result); - }); - - it("should return original result when content is empty array", () => { - const result = { content: [] }; - const parsed = client.parseToolResult(result); - assertEquals(parsed, result); - }); - - it("should parse JSON from text content", () => { - const result = { - content: [{ - type: "text", - text: '{"facts": [{"uuid": "1", "fact": "test"}]}', - }], - }; - const parsed = client.parseToolResult(result); - assertEquals(parsed, { facts: [{ uuid: "1", fact: "test" }] }); - }); - - it("should return plain text when not valid JSON", () => { - const result = { - content: [{ type: "text", text: "Hello, world!" }], - }; - const parsed = client.parseToolResult(result); - assertStrictEquals(parsed, "Hello, world!"); - }); - - it("should handle text field that is not a string", () => { - const result = { - content: [{ type: "text", text: 123 }], - }; - const parsed = client.parseToolResult(result); - assertStrictEquals(parsed, 123); - }); - - it("should handle text field that is undefined", () => { - const result = { - content: [{ type: "text" }], - }; - const parsed = client.parseToolResult(result); - assertEquals(parsed, result); - }); - }); - - describe("response parsing integration", () => { - const client = new GraphitiClient(new FakeToolCaller()); - - it("should parse wrapped arrays", () => { - assertEquals( - client.parseWrappedArray([{ uuid: "1" }], "facts"), - [{ uuid: "1" }], - ); - assertEquals( - client.parseWrappedArray({ facts: [{ uuid: "2" }] }, "facts"), - [{ uuid: "2" }], - ); - assertEquals(client.parseWrappedArray({ status: "ok" }, "facts"), null); - }); - }); - - describe("read error handling", () => { - it("returns empty array on timeout", async () => { - const tools = new FakeToolCaller(); - tools.callToolImpl = () => - Promise.reject(new GraphitiRequestTimeoutError()); - const client = new GraphitiClient(tools); - - assertEquals(await client.searchFacts({ query: "test" }), []); - assertEquals(await client.searchNodes({ query: "test" }), []); - assertEquals(await client.getEpisodes({ groupId: "g" }), []); - }); - - it("returns empty array on offline", async () => { - const tools = new FakeToolCaller(); - tools.callToolImpl = () => - Promise.reject(new GraphitiOfflineError("offline")); - const client = new GraphitiClient(tools); - - assertEquals(await client.searchFacts({ query: "test" }), []); - assertEquals(await client.searchNodes({ query: "test" }), []); - assertEquals(await client.getEpisodes({ groupId: "g" }), []); - }); - - it("emits native warning toast and structured log on fail-open reads", async () => { - const appLogCalls: unknown[] = []; - const toastCalls: unknown[] = []; - const scheduledTasks: Array<() => void> = []; - setWarningTaskScheduler((callback) => { - scheduledTasks.push(callback); - }); - setOpenCodeClient({ - app: { - log: (input: unknown) => { - appLogCalls.push(input); - }, - }, - tui: { - showToast: (input: unknown) => { - toastCalls.push(input); - }, - }, - }); - - const tools = new FakeToolCaller(); - const err = new GraphitiOfflineError("offline"); - tools.callToolImpl = () => Promise.reject(err); - const client = new GraphitiClient(tools); - - assertEquals(await client.searchFacts({ query: "test" }), []); - assertEquals(appLogCalls.length, 0); - assertEquals(toastCalls.length, 0); - assertEquals(scheduledTasks.length, 2); - for (const task of scheduledTasks) task(); - assertEquals(appLogCalls.length, 1); - assertEquals(appLogCalls[0], { - body: { - service: "graphiti", - level: "warn", - message: "Graphiti unavailable; continuing without memory facts.", - extra: { - operation: "searchMemoryFacts", - err, - }, - }, - }); - assertEquals(toastCalls, [{ - body: { - message: "Graphiti unavailable; continuing without memory facts.", - variant: "warning", - }, - }]); - }); - }); - - describe("write error propagation", () => { - it("rethrows offline errors from addEpisode", async () => { - const tools = new FakeToolCaller(); - tools.callToolImpl = () => - Promise.reject(new GraphitiOfflineError("offline")); - const client = new GraphitiClient(tools); - - await assertRejects( - () => - client.addEpisode({ - name: "episode", - episodeBody: "body", - }), - GraphitiOfflineError, - ); - }); - - it("emits native warning toast on write availability errors", async () => { - const appLogCalls: unknown[] = []; - const toastCalls: unknown[] = []; - const scheduledTasks: Array<() => void> = []; - setWarningTaskScheduler((callback) => { - scheduledTasks.push(callback); - }); - setOpenCodeClient({ - app: { - log: (input: unknown) => { - appLogCalls.push(input); - }, - }, - tui: { - showToast: (input: unknown) => { - toastCalls.push(input); - }, - }, - }); - - const tools = new FakeToolCaller(); - const err = new GraphitiOfflineError("offline"); - tools.callToolImpl = () => Promise.reject(err); - const client = new GraphitiClient(tools); - - await assertRejects( - () => - client.addEpisode({ - name: "episode", - episodeBody: "body", - }), - GraphitiOfflineError, - ); - - assertEquals(appLogCalls.length, 0); - assertEquals(toastCalls.length, 0); - assertEquals(scheduledTasks.length, 2); - for (const task of scheduledTasks) task(); - assertEquals(appLogCalls.length, 1); - assertEquals(appLogCalls[0], { - body: { - service: "graphiti", - level: "warn", - message: "Graphiti unavailable; memory was not saved.", - extra: { - operation: "addMemory", - err, - }, - }, - }); - assertEquals(toastCalls, [{ - body: { - message: "Graphiti unavailable; memory was not saved.", - variant: "warning", - }, - }]); - }); - }); - - describe("manager passthroughs", () => { - it("start and stop delegate to tool caller", async () => { - const tools = new FakeToolCaller(); - const client = new GraphitiClient(tools); - - client.start(); - await client.stop(); - - assertEquals(tools.started, true); - assertEquals(tools.stopped, true); - }); - - it("connect starts and returns readiness", async () => { - const tools = new FakeToolCaller(); - tools.readyResult = false; - const client = new GraphitiClient(tools); - - assertEquals(await client.connect(), false); - assertEquals(tools.started, true); - }); - }); -}); diff --git a/src/services/client.ts b/src/services/client.ts deleted file mode 100644 index 03de5b3..0000000 --- a/src/services/client.ts +++ /dev/null @@ -1,6 +0,0 @@ -/** - * @deprecated Compatibility re-export only. Import GraphitiMcpClient directly - * from ./graphiti-mcp.ts in new code. Kept temporarily to avoid breaking older - * imports of GraphitiClient. - */ -export { GraphitiMcpClient as GraphitiClient } from "./graphiti-mcp.ts"; diff --git a/src/services/compaction-utils.test.ts b/src/services/compaction-utils.test.ts deleted file mode 100644 index 6ca7163..0000000 --- a/src/services/compaction-utils.test.ts +++ /dev/null @@ -1,227 +0,0 @@ -import { assertEquals } from "jsr:@std/assert@^1.0.0"; -import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import { classifyFacts, takeFactsWithinBudget } from "./compaction.ts"; -import { formatFactLine } from "./context.ts"; - -describe("compaction-utils", () => { - describe("classifyFacts", () => { - it("should classify decision facts", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "The system must use a microservices architecture", - }, - { uuid: "f2", fact: "Database schema includes users table" }, - ]; - const result = classifyFacts( - facts, - new Date("2026-02-14T00:00:00Z"), - ); - assertEquals(result.decisions.length, 2); - }); - - it("should classify active facts by recency", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Recent update", - valid_at: "2026-02-10T00:00:00Z", - }, - { - uuid: "f2", - fact: "Older update", - valid_at: "2025-12-01T00:00:00Z", - }, - ]; - const result = classifyFacts( - facts, - new Date("2026-02-14T00:00:00Z"), - ); - assertEquals(result.active.map((fact) => fact.uuid), ["f1"]); - }); - - it("should classify background facts as default", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "General context fact" }, - { uuid: "f2", fact: "Historical note" }, - ]; - const result = classifyFacts( - facts, - new Date("2026-02-14T00:00:00Z"), - ); - assertEquals(result.background.length, 2); - }); - - it("should classify decision facts by keywords", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Decided to use PostgreSQL instead of MySQL" }, - { uuid: "f2", fact: "Team agreed on REST API design" }, - ]; - const result = classifyFacts( - facts, - new Date("2026-02-14T00:00:00Z"), - ); - assertEquals(result.decisions.length, 2); - }); - - it("should classify background facts (no decision keyword, no recency)", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "The user prefers dark mode" }, - { uuid: "f2", fact: "Recent conversation about API endpoints" }, - ]; - const result = classifyFacts( - facts, - new Date("2026-02-14T00:00:00Z"), - ); - assertEquals(result.background.length, 2); - }); - - it("should handle mixed fact types", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "System should use microservices" }, // decision - { uuid: "f2", fact: "Recent change", valid_at: "2026-02-10T00:00:00Z" }, // active - { uuid: "f3", fact: "Decided to use TypeScript" }, // decision - { uuid: "f4", fact: "Must be GDPR compliant" }, // decision - { uuid: "f5", fact: "User mentioned preferences" }, // background - ]; - const result = classifyFacts( - facts, - new Date("2026-02-14T00:00:00Z"), - ); - assertEquals(result.decisions.length, 3); - assertEquals(result.active.length, 1); - assertEquals(result.background.length, 1); - }); - - it("should handle empty array", () => { - const result = classifyFacts([], new Date("2026-02-14T00:00:00Z")); - assertEquals(result.decisions.length, 0); - assertEquals(result.active.length, 0); - assertEquals(result.background.length, 0); - }); - - it("should preserve original fact properties", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "System architecture detail", - valid_at: "2026-02-14T00:00:00Z", - source_node: { name: "System", uuid: "n1" }, - }, - ]; - const result = classifyFacts(facts, new Date("2026-02-14T00:00:00Z")); - const found = [ - ...result.decisions, - ...result.active, - ...result.background, - ].find((f) => f.uuid === "f1"); - assertEquals(found?.uuid, "f1"); - assertEquals(found?.valid_at, "2026-02-14T00:00:00Z"); - assertEquals(found?.source_node?.name, "System"); - }); - }); - - describe("takeFactsWithinBudget", () => { - it("should prioritize decision facts in compaction", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Background fact" }, - { uuid: "f2", fact: "Must use Graphiti" }, - { uuid: "f3", fact: "Recent update", valid_at: "2026-02-10T00:00:00Z" }, - ]; - const selected = takeFactsWithinBudget( - facts, - formatFactLine(facts[1]).length + 1, - { - factStaleDays: 30, - now: new Date("2026-02-14T00:00:00Z"), - }, - ); - assertEquals(selected.map((fact) => fact.uuid), ["f2"]); - }); - - it("should include facts up to character budget", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Short fact" }, - { uuid: "f2", fact: "Another short fact" }, - { uuid: "f3", fact: "One more short fact" }, - ]; - const budget = formatFactLine(facts[0]).length + 1 + - formatFactLine(facts[1]).length + 1; - const selected = takeFactsWithinBudget( - facts, - budget, - { - factStaleDays: 30, - now: new Date("2026-02-14T00:00:00Z"), - }, - ); - assertEquals(selected.map((fact) => fact.uuid), ["f1", "f2"]); - }); - - it("should not exceed budget even if single fact is too large", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "A".repeat(200) }, - ]; - const budget = 100; - const selected = takeFactsWithinBudget( - facts, - budget, - { - factStaleDays: 30, - now: new Date("2026-02-14T00:00:00Z"), - }, - ); - assertEquals(selected.length, 0); - }); - - it("should handle empty facts array", () => { - const facts: GraphitiFact[] = []; - const selected = takeFactsWithinBudget( - facts, - 1000, - { - factStaleDays: 30, - now: new Date("2026-02-14T00:00:00Z"), - }, - ); - assertEquals(selected.length, 0); - }); - - it("should respect category budget allocations", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Must use Graphiti" }, - { uuid: "f2", fact: "Decided on REST" }, - { uuid: "f3", fact: "Recent update", valid_at: "2026-02-10T00:00:00Z" }, - ]; - const selected = takeFactsWithinBudget( - facts, - formatFactLine(facts[0]).length + 1, - { - factStaleDays: 30, - now: new Date("2026-02-14T00:00:00Z"), - }, - ); - assertEquals(selected.map((fact) => fact.uuid), ["f1"]); - }); - - it("should ignore nodes when selecting facts", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Fact 1" }, - ]; - const nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A", summary: "Summary" }, - ]; - const selected = takeFactsWithinBudget( - facts, - 200, - { - factStaleDays: 30, - now: new Date("2026-02-14T00:00:00Z"), - }, - ); - assertEquals(selected.map((fact) => fact.uuid), ["f1"]); - assertEquals(nodes.length, 1); // nodes param not consumed by this helper - }); - }); -}); diff --git a/src/services/compaction.test.ts b/src/services/compaction.test.ts deleted file mode 100644 index 8089a75..0000000 --- a/src/services/compaction.test.ts +++ /dev/null @@ -1,443 +0,0 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; -import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { getCompactionContext, handleCompaction } from "./compaction.ts"; -import { setLoggerSilentOverride } from "./logger.ts"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; - -type HandleCompactionClient = Parameters[0]["client"]; -type GetCompactionContextClient = Parameters< - typeof getCompactionContext ->[0]["client"]; - -// Mock GraphitiClient -class MockGraphitiClient - implements HandleCompactionClient, GetCompactionContextClient { - public addEpisodeCalls: Array<{ - name: string; - episodeBody: string; - groupId?: string; - source?: string; - sourceDescription?: string; - }> = []; - public searchFactsCalls: Array<{ - query: string; - groupIds?: string[]; - maxFacts?: number; - }> = []; - public searchNodesCalls: Array<{ - query: string; - groupIds?: string[]; - maxNodes?: number; - }> = []; - - addEpisode(params: { - name: string; - episodeBody: string; - groupId?: string; - source?: string; - sourceDescription?: string; - }): Promise { - this.addEpisodeCalls.push(params); - return Promise.resolve(); - } - - searchFacts(params: { - query: string; - groupIds?: string[]; - maxFacts?: number; - }): Promise { - this.searchFactsCalls.push(params); - return Promise.resolve(this.searchFactsResult || []); - } - - searchNodes(params: { - query: string; - groupIds?: string[]; - maxNodes?: number; - }): Promise { - this.searchNodesCalls.push(params); - return Promise.resolve(this.searchNodesResult || []); - } - - searchFactsResult: GraphitiFact[] = []; - searchNodesResult: GraphitiNode[] = []; -} - -describe("compaction", () => { - describe("handleCompaction", () => { - it("should save compaction summary when enabled", async () => { - const client = new MockGraphitiClient(); - await handleCompaction({ - client, - groupId: "test:project", - summary: "Session summary content", - sessionId: "session-123", - }); - - assertEquals(client.addEpisodeCalls.length, 1); - assertEquals( - client.addEpisodeCalls[0].name, - "Session compaction: session-123", - ); - assertEquals( - client.addEpisodeCalls[0].episodeBody, - "Session summary content", - ); - assertEquals(client.addEpisodeCalls[0].groupId, "test:project"); - assertEquals(client.addEpisodeCalls[0].source, "text"); - assertEquals( - client.addEpisodeCalls[0].sourceDescription, - "OpenCode session compaction summary", - ); - }); - - it("should not save when summary is empty", async () => { - const client = new MockGraphitiClient(); - await handleCompaction({ - client, - groupId: "test:project", - summary: "", - sessionId: "session-123", - }); - - assertEquals(client.addEpisodeCalls.length, 0); - }); - - it("should handle errors gracefully", async () => { - const client = new MockGraphitiClient(); - client.addEpisode = () => { - return Promise.reject(new Error("Network error")); - }; - // Should not throw - try { - setLoggerSilentOverride(true); - await handleCompaction({ - client, - groupId: "test:project", - summary: "Session summary", - sessionId: "session-123", - }); - } finally { - setLoggerSilentOverride(false); - } - - // Error is logged but not thrown - assertEquals(client.addEpisodeCalls.length, 0); - }); - }); - - describe("getCompactionContext", () => { - it("should return empty array when contextStrings is empty", async () => { - const client = new MockGraphitiClient(); - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: [], - }); - - assertEquals(result, []); - assertEquals(client.searchFactsCalls.length, 0); - }); - - it("should return empty array when contextStrings contain only empty strings", async () => { - const client = new MockGraphitiClient(); - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["", " ", ""], - }); - - assertEquals(result, []); - }); - - it("should search facts with joined context strings", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [{ uuid: "fact-1", fact: "Important fact" }]; - - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["First context", "Second context", "Third context"], - }); - - assertEquals(client.searchFactsCalls.length, 1); - assertEquals( - client.searchFactsCalls[0].query, - "First context Second context Third context", - ); - assertEquals(client.searchFactsCalls[0].groupIds, ["test:project"]); - assertEquals(client.searchFactsCalls[0].maxFacts, 50); - }); - - it("should limit query to first 3 context strings", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [{ uuid: "fact-1", fact: "Fact" }]; - - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["One", "Two", "Three", "Four", "Five"], - }); - - assertEquals(client.searchFactsCalls[0].query, "One Two Three"); - }); - - it("should limit query text to 500 characters", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [{ uuid: "fact-1", fact: "Fact" }]; - - const longString = "a".repeat(300); - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: [longString, longString], - }); - - assertStrictEquals(client.searchFactsCalls[0].query.length <= 500, true); - }); - - it("should return empty array when no facts found", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = []; - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["some context"], - }); - - assertEquals(result, []); - }); - - it("should format facts into context string", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [ - { uuid: "fact-1", fact: "First important fact" }, - { uuid: "fact-2", fact: "Second important fact" }, - ]; - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - - assertEquals(result.length, 1); - assertEquals(result[0].includes(""), true); - assertEquals(result[0].includes(""), true); - assertEquals( - result[0].includes("- First important fact"), - true, - ); - assertEquals( - result[0].includes("- Second important fact"), - true, - ); - }); - - it("should handle search errors gracefully", async () => { - const client = new MockGraphitiClient(); - client.searchFacts = () => { - return Promise.reject(new Error("Search failed")); - }; - - const result = await (async () => { - try { - setLoggerSilentOverride(true); - return await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - } finally { - setLoggerSilentOverride(false); - } - })(); - - assertEquals(result, []); - }); - - it("should truncate context to character budget", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [ - { uuid: "fact-1", fact: "A".repeat(200) }, - ]; - - const result = await getCompactionContext({ - client, - characterBudget: 120, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - - assertEquals(result.length, 1); - assertStrictEquals(result[0].length <= 120, true); - }); - - it("should search both project and user group IDs", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [{ uuid: "fact-1", fact: "Important fact" }]; - - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project", user: "test:user" }, - contextStrings: ["context"], - }); - - // Should search project facts and user facts - assertEquals(client.searchFactsCalls.length, 2); - assertEquals(client.searchFactsCalls[0].groupIds, ["test:project"]); - assertEquals(client.searchFactsCalls[1].groupIds, ["test:user"]); - }); - - it("should not search user facts when user group ID is undefined", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [{ uuid: "fact-1", fact: "Important fact" }]; - - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - - // Should only search project facts once - assertEquals(client.searchFactsCalls.length, 1); - assertEquals(client.searchFactsCalls[0].groupIds, ["test:project"]); - }); - - it("should allocate 70% budget to project and 30% to user", async () => { - const client = new MockGraphitiClient(); - const longFact = "A".repeat(500); - client.searchFactsResult = [ - { uuid: "fact-1", fact: longFact }, - ]; - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project", user: "test:user" }, - contextStrings: ["context"], - }); - - // Result should respect budget allocation - assertEquals(result.length, 1); - assertStrictEquals(result[0].length <= 1000, true); - }); - - it("should include both project and user sections when both have results", async () => { - const client = new MockGraphitiClient(); - // Override to return different results for project vs user - let callCount = 0; - client.searchFacts = (params) => { - callCount++; - client.searchFactsCalls.push(params); - if (callCount === 1) { - // Project facts - return Promise.resolve([ - { uuid: "f1", fact: "Project fact" }, - ] as GraphitiFact[]); - } else { - // User facts - return Promise.resolve([ - { uuid: "f2", fact: "User fact" }, - ] as GraphitiFact[]); - } - }; - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project", user: "test:user" }, - contextStrings: ["context"], - }); - - assertEquals(result.length, 1); - assertEquals(result[0].includes('source="project"'), true); - assertEquals(result[0].includes('source="user"'), true); - assertEquals(result[0].includes("Project fact"), true); - assertEquals(result[0].includes("User fact"), true); - }); - - it("should include summary template structure", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = [{ uuid: "fact-1", fact: "Important fact" }]; - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - - assertEquals(result.length, 1); - assertEquals(result[0].includes(""), true); - assertEquals(result[0].includes(""), true); - assertEquals(result[0].includes(""), true); - assertEquals(result[0].includes(""), true); - }); - - it("should request appropriate maxFacts and maxNodes for project", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = []; - client.searchNodesResult = []; - - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - - assertEquals(client.searchFactsCalls[0].maxFacts, 50); - assertEquals(client.searchNodesCalls[0].maxNodes, 30); - }); - - it("should request appropriate maxFacts and maxNodes for user", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = []; - client.searchNodesResult = []; - - await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project", user: "test:user" }, - contextStrings: ["context"], - }); - - // Second search call should be user with smaller limits - assertEquals(client.searchFactsCalls[1].maxFacts, 20); - assertEquals(client.searchNodesCalls[1].maxNodes, 10); - }); - - it("should include nodes in output when available", async () => { - const client = new MockGraphitiClient(); - client.searchFactsResult = []; - client.searchNodesResult = [ - { uuid: "n1", name: "Important Node", summary: "Key entity" }, - ]; - - const result = await getCompactionContext({ - client, - characterBudget: 1000, - groupIds: { project: "test:project" }, - contextStrings: ["context"], - }); - - assertEquals(result.length, 1); - assertEquals(result[0].includes(""), true); - assertEquals(result[0].includes("Important Node"), true); - }); - }); -}); diff --git a/src/services/compaction.ts b/src/services/compaction.ts deleted file mode 100644 index b2aae0e..0000000 --- a/src/services/compaction.ts +++ /dev/null @@ -1,320 +0,0 @@ -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import { truncateAtLineBoundary } from "../utils.ts"; -import { - formatFactLines, - formatNodeLines, - resolveProjectUserContext, -} from "./context.ts"; -import { DAY_MS, PROJECT_MAX_FACTS } from "./constants.ts"; -import { logger } from "./logger.ts"; -const DECISION_KEYWORDS = [ - "decided", - "must", - "should", - "prefer", - "constraint", - "require", - "chose", - "always", - "never", - "schema", - "architecture", - "agreed", - "design", - "selected", -]; - -// Precompile keyword regex once, outside the fact-classification loop. -const DECISION_KEYWORD_REGEX = new RegExp( - DECISION_KEYWORDS.map((kw) => `\\b${kw}\\b`).join("|"), - "i", -); - -export const classifyFacts = ( - facts: GraphitiFact[], - now: Date, -): { - decisions: GraphitiFact[]; - active: GraphitiFact[]; - background: GraphitiFact[]; -} => { - const decisions: GraphitiFact[] = []; - const active: GraphitiFact[] = []; - const background: GraphitiFact[] = []; - const cutoff = now.getTime() - 7 * DAY_MS; - - for (const fact of facts) { - const text = fact.fact; - // Task 7: use precompiled regex instead of building one per keyword per fact. - if (DECISION_KEYWORD_REGEX.test(text)) { - decisions.push(fact); - continue; - } - const validAt = fact.valid_at ? Date.parse(fact.valid_at) : NaN; - if (!Number.isNaN(validAt) && validAt >= cutoff) { - active.push(fact); - continue; - } - background.push(fact); - } - - return { decisions, active, background }; -}; - -export const takeFactsWithinBudget = ( - facts: GraphitiFact[], - budget: number, - formatOptions: { factStaleDays: number; now: Date }, -): GraphitiFact[] => { - if (budget <= 0 || facts.length === 0) return []; - - const classified = classifyFacts(facts, formatOptions.now); - const prioritized = [ - ...classified.decisions, - ...classified.active, - ...classified.background, - ]; - const lines = formatFactLines(prioritized, formatOptions); - const selected: GraphitiFact[] = []; - let remaining = budget; - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const length = line.length + 1; - if (remaining - length < 0) continue; - selected.push(prioritized[i]); - remaining -= length; - } - return selected; -}; - -/** - * Persist a compaction summary episode when enabled. - */ -export async function handleCompaction(params: { - client: { - addEpisode: (args: { - name: string; - episodeBody: string; - groupId?: string; - source?: "text" | "json" | "message"; - sourceDescription?: string; - }) => Promise; - }; - groupId: string; - summary: string; - sessionId: string; -}): Promise { - const { client, groupId, summary, sessionId } = params; - - if (!summary) return; - - try { - await client.addEpisode({ - name: `Session compaction: ${sessionId}`, - episodeBody: summary, - groupId, - source: "text", - sourceDescription: "OpenCode session compaction summary", - }); - logger.info("Saved compaction summary to Graphiti for session", sessionId); - } catch (err) { - logger.error("Failed to save compaction summary:", err); - } -} - -/** - * Retrieve persistent fact context to include during compaction. - */ -export async function getCompactionContext(params: { - client: { - searchFacts: (args: { - query: string; - groupIds?: string[]; - maxFacts?: number; - }) => Promise; - searchNodes: (args: { - query: string; - groupIds?: string[]; - maxNodes?: number; - }) => Promise; - }; - characterBudget: number; - groupIds: { - project: string; - user?: string; - }; - contextStrings: string[]; - factStaleDays?: number; -}): Promise { - const { client, characterBudget, groupIds, contextStrings } = params; - const now = new Date(); - const factStaleDays = params.factStaleDays ?? 30; - - try { - const queryText = contextStrings.slice(0, 3).join(" ").slice(0, 500); - if (!queryText.trim()) return []; - - const projectFactsPromise = client.searchFacts({ - query: queryText, - groupIds: [groupIds.project], - maxFacts: PROJECT_MAX_FACTS, - }); - const projectNodesPromise = client.searchNodes({ - query: queryText, - groupIds: [groupIds.project], - maxNodes: 30, - }); - const userGroupId = groupIds.user; - const userFactsPromise = userGroupId - ? client.searchFacts({ - query: queryText, - groupIds: [userGroupId], - maxFacts: 20, - }) - : Promise.resolve([] as GraphitiFact[]); - const userNodesPromise = userGroupId - ? client.searchNodes({ - query: queryText, - groupIds: [userGroupId], - maxNodes: 10, - }) - : Promise.resolve([] as GraphitiNode[]); - - const { - projectContext, - userContext, - projectFacts, - projectNodes, - userFacts, - userNodes, - } = await resolveProjectUserContext({ - projectFacts: projectFactsPromise, - projectNodes: projectNodesPromise, - userFacts: userFactsPromise, - userNodes: userNodesPromise, - }); - - if ( - projectFacts.length === 0 && projectNodes.length === 0 && - userFacts.length === 0 && userNodes.length === 0 - ) { - return []; - } - - const formatOptions = { factStaleDays, now }; - - // Task 1: build section line-by-line; truncate at a line boundary to avoid - // cutting mid-tag or mid-line while still respecting the per-section budget. - const buildSection = ( - header: string, - facts: GraphitiFact[], - nodes: GraphitiNode[], - budget: number, - ): string => { - const lines: string[] = []; - lines.push(header); - lines.push( - "Background context only; do not reference in titles, summaries, or opening responses unless directly relevant.", - ); - - const classified = classifyFacts(facts, now); - const decisionBudget = Math.floor(budget * 0.4); - const activeBudget = Math.floor(budget * 0.35); - const backgroundBudget = budget - decisionBudget - activeBudget; - - const selectedDecisions = takeFactsWithinBudget( - classified.decisions, - decisionBudget, - formatOptions, - ); - const selectedActive = takeFactsWithinBudget( - classified.active, - activeBudget, - formatOptions, - ); - const selectedBackground = takeFactsWithinBudget( - classified.background, - backgroundBudget, - formatOptions, - ); - - if (selectedDecisions.length > 0) { - lines.push(""); - lines.push(...formatFactLines(selectedDecisions, formatOptions)); - lines.push(""); - } - if (selectedActive.length > 0) { - lines.push(""); - lines.push(...formatFactLines(selectedActive, formatOptions)); - lines.push(""); - } - if (selectedBackground.length > 0) { - lines.push(""); - lines.push(...formatFactLines(selectedBackground, formatOptions)); - lines.push(""); - } - if (nodes.length > 0) { - lines.push(""); - lines.push(...formatNodeLines(nodes)); - lines.push(""); - } - return truncateAtLineBoundary(lines.join("\n"), budget); - }; - - const headerLines = [ - "", - "", - "- ", - "", - "", - "", - "- ", - "", - "", - "", - "- ", - "", - "", - "", - ]; - const header = headerLines.join("\n"); - const base = `${header}\n`; - const remainingBudget = Math.max(characterBudget - base.length, 0); - const projectBudget = Math.floor(remainingBudget * 0.7); - const userBudget = remainingBudget - projectBudget; - const projectSection = buildSection( - '', - projectContext.facts, - projectContext.nodes, - projectBudget, - ); - const userSection = buildSection( - '', - userContext.facts, - userContext.nodes, - userBudget, - ); - - const sections: string[] = [header]; - if (projectSection.trim()) { - sections.push(projectSection); - sections.push(""); - } - if (userSection.trim()) { - sections.push(userSection); - sections.push(""); - } - sections.push(""); - sections.push(""); - - // Final overall truncation at a line boundary. - const content = truncateAtLineBoundary( - sections.join("\n"), - characterBudget, - ); - return [content]; - } catch (err) { - logger.error("Failed to get compaction context:", err); - return []; - } -} diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index 3bd11ea..2b2236f 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -1,9 +1,14 @@ -import { assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; +import { + assertEquals, + assertRejects, + assertThrows, +} from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { GraphitiConnectionManager, GraphitiOfflineError, GraphitiQueueTimeoutError, + GraphitiRequestTimeoutError, GraphitiSessionExpiredError, GraphitiTransportError, } from "./connection-manager.ts"; @@ -49,6 +54,8 @@ class FakeClock { nextId = 1; timers = new Map void }>(); + nowFn = (): number => this.now; + setTimer = (callback: () => void, delayMs: number): number => { const id = this.nextId++; this.timers.set(id, { at: this.now + delayMs, callback }); @@ -87,6 +94,33 @@ class FakeClock { } } +class TrackingTimers { + nextId = 1; + entries = new Map void; cleared: boolean }>(); + + setTimer = (callback: () => void): number => { + const id = this.nextId++; + this.entries.set(id, { callback, cleared: false }); + return id; + }; + + clearTimer = (id: number): void => { + const entry = this.entries.get(id); + if (entry) { + entry.cleared = true; + this.entries.delete(id); + } + }; + + fire(id: number): void { + const entry = this.entries.get(id); + if (!entry) { + throw new Error(`Timer ${id} not found`); + } + entry.callback(); + } +} + type FakeConnection = { connect: () => Promise; close: () => Promise; @@ -126,6 +160,7 @@ describe("connection manager", () => { close: () => Promise.resolve(), callTool: () => Promise.resolve({ ok: true }), }), + now: clock.nowFn, setTimer: clock.setTimer, clearTimer: clock.clearTimer, }); @@ -170,6 +205,7 @@ describe("connection manager", () => { close: () => Promise.resolve(), callTool: () => Promise.resolve({ ok: true }), }), + now: clock.nowFn, setTimer: clock.setTimer, clearTimer: clock.clearTimer, }); @@ -181,6 +217,79 @@ describe("connection manager", () => { await assertRejects(() => queued, GraphitiQueueTimeoutError); }); + it("times out already-connected calls at the configured deadline", async () => { + const clock = new FakeClock(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 10, + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => new Promise(() => {}), + }), + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + await clock.advanceBy(10); + + await assertRejects(() => request, GraphitiRequestTimeoutError); + }); + + it("times out already-connected calls at a per-request override", async () => { + const clock = new FakeClock(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 100, + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => new Promise(() => {}), + }), + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}, 5); + await clock.advanceBy(5); + + await assertRejects(() => request, GraphitiRequestTimeoutError); + }); + + it("clears the deadline timer when the timeout callback fires", async () => { + const timers = new TrackingTimers(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 10, + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => new Promise(() => {}), + }), + setTimer: timers.setTimer, + clearTimer: timers.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + const [timerId] = [...timers.entries.keys()]; + timers.fire(timerId); + + await assertRejects(() => request, GraphitiRequestTimeoutError); + assertEquals(timers.entries.has(timerId), false); + }); + it("offline requests reject immediately", async () => { const clock = new FakeClock(); const manager = new GraphitiConnectionManager({ @@ -190,6 +299,7 @@ describe("connection manager", () => { close: () => Promise.resolve(), callTool: () => Promise.resolve({ ok: true }), }), + now: clock.nowFn, setTimer: clock.setTimer, clearTimer: clock.clearTimer, random: () => 0.5, @@ -233,6 +343,46 @@ describe("connection manager", () => { assertEquals(connectionIndex, 2); }); + it("times out transport reconnect retries within the original deadline", async () => { + const clock = new FakeClock(); + let connectionIndex = 0; + const reconnectGate = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 10, + connectionFactory: () => { + connectionIndex += 1; + const index = connectionIndex; + return { + connect: () => + index === 1 ? Promise.resolve() : reconnectGate.promise, + close: () => Promise.resolve(), + callTool: () => { + if (index === 1) { + return Promise.reject(new Error("socket hang up")); + } + return Promise.resolve({ ok: true, index }); + }, + }; + }, + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + await settleMicrotasks(); + assertEquals(manager.getState(), "connecting"); + + await clock.advanceBy(10); + + await assertRejects(() => request, GraphitiRequestTimeoutError); + assertEquals(connectionIndex, 2); + }); + it("retries once after session expiry", async () => { let connectionIndex = 0; let called = false; @@ -262,6 +412,46 @@ describe("connection manager", () => { assertEquals(connectionIndex, 2); }); + it("times out session-expiry retries within a per-request deadline", async () => { + const clock = new FakeClock(); + let connectionIndex = 0; + const reconnectGate = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 100, + connectionFactory: () => { + connectionIndex += 1; + const index = connectionIndex; + return { + connect: () => + index === 1 ? Promise.resolve() : reconnectGate.promise, + close: () => Promise.resolve(), + callTool: () => { + if (index === 1) { + return Promise.reject({ code: 404, message: "session expired" }); + } + return Promise.resolve({ ok: true, index }); + }, + }; + }, + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}, 5); + await settleMicrotasks(); + assertEquals(manager.getState(), "connecting"); + + await clock.advanceBy(5); + + await assertRejects(() => request, GraphitiRequestTimeoutError); + assertEquals(connectionIndex, 2); + }); + it("request during reconnect shares a single reconnect", async () => { let connectionIndex = 0; let failed = false; @@ -306,6 +496,112 @@ describe("connection manager", () => { assertEquals(connectionIndex, 2); }); + it("queued reconnect request expires at its original deadline", async () => { + const clock = new FakeClock(); + let connectionIndex = 0; + let failed = false; + const firstFailure = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 100, + connectionFactory: () => { + connectionIndex += 1; + const index = connectionIndex; + return { + connect: () => { + if (index === 1) { + return Promise.resolve(); + } + + return new Promise((resolve) => { + clock.setTimer(resolve, 10); + }); + }, + close: () => Promise.resolve(), + callTool: async ({ name }) => { + if (index === 1 && !failed) { + await firstFailure.promise; + failed = true; + throw new Error("connection reset by peer"); + } + + return { ok: name, index }; + }, + }; + }, + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const firstRequest = manager.callTool("a", {}); + firstFailure.resolve(); + await settleMicrotasks(); + assertEquals(manager.getState(), "connecting"); + + const queued = manager.callTool("b", {}, 10); + await clock.advanceBy(10); + + assertEquals(await firstRequest, { ok: "a", index: 2 }); + await assertRejects(() => queued, GraphitiQueueTimeoutError); + }); + + it("queued reconnect request succeeds within its original deadline", async () => { + const clock = new FakeClock(); + let connectionIndex = 0; + let failed = false; + const firstFailure = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 100, + connectionFactory: () => { + connectionIndex += 1; + const index = connectionIndex; + return { + connect: () => { + if (index === 1) { + return Promise.resolve(); + } + + return new Promise((resolve) => { + clock.setTimer(resolve, 5); + }); + }, + close: () => Promise.resolve(), + callTool: async ({ name }) => { + if (index === 1 && !failed) { + await firstFailure.promise; + failed = true; + throw new Error("connection reset by peer"); + } + + return { ok: name, index }; + }, + }; + }, + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const firstRequest = manager.callTool("a", {}); + firstFailure.resolve(); + await settleMicrotasks(); + assertEquals(manager.getState(), "connecting"); + + const queued = manager.callTool("b", {}, 10); + await clock.advanceBy(5); + + assertEquals(await firstRequest, { ok: "a", index: 2 }); + assertEquals(await queued, { ok: "b", index: 2 }); + }); + it("auto-reconnects from offline with backoff", async () => { const clock = new FakeClock(); let attempts = 0; @@ -322,6 +618,7 @@ describe("connection manager", () => { close: () => Promise.resolve(), callTool: () => Promise.resolve({ ok: true }), }), + now: clock.nowFn, setTimer: clock.setTimer, clearTimer: clock.clearTimer, random: () => 0.5, @@ -339,6 +636,44 @@ describe("connection manager", () => { assertEquals(attempts, 2); }); + it("keeps queued reconnect requests alive until their own deadline", async () => { + const clock = new FakeClock(); + let connectAttempt = 0; + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => { + connectAttempt += 1; + if (connectAttempt <= 2) { + return Promise.reject(new Error("connect failed")); + } + return Promise.resolve(); + }, + close: () => Promise.resolve(), + callTool: ({ name }) => Promise.resolve({ ok: name, connectAttempt }), + }), + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + random: () => 0.5, + reconnectInitialDelayMs: 10, + }); + + manager.start(); + const queued = manager.callTool("queued", {}, 50); + await settleMicrotasks(); + assertEquals(manager.getState(), "offline"); + + await clock.advanceBy(10); + await settleMicrotasks(); + assertEquals(manager.getState(), "offline"); + + await clock.advanceBy(20); + await settleMicrotasks(); + + assertEquals(await queued, { ok: "queued", connectAttempt: 3 }); + }); + it("queue full drops the oldest request", async () => { const connectGate = deferred(); const manager = new GraphitiConnectionManager({ @@ -372,6 +707,7 @@ describe("connection manager", () => { close: () => Promise.resolve(), callTool: () => Promise.resolve({ ok: true }), }), + now: clock.nowFn, setTimer: clock.setTimer, clearTimer: clock.clearTimer, }); @@ -380,10 +716,150 @@ describe("connection manager", () => { const queued = manager.callTool("queued", {}, 100); await manager.stop(); - await assertRejects(() => queued, GraphitiOfflineError); + const queuedError = await assertRejects( + () => queued, + GraphitiOfflineError, + ); + assertEquals(queuedError.state, "closing"); assertEquals(clock.timers.size, 0); }); + it("stop keeps reconnect from transitioning back to connected", async () => { + let connectionIndex = 0; + let failed = false; + const firstFailure = deferred(); + const reconnectGate = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => { + connectionIndex += 1; + const index = connectionIndex; + return { + connect: () => + index === 1 ? Promise.resolve() : reconnectGate.promise, + close: () => Promise.resolve(), + callTool: async () => { + if (index === 1 && !failed) { + await firstFailure.promise; + failed = true; + return Promise.reject(new Error("connection reset by peer")); + } + return Promise.resolve({ ok: true, index }); + }, + }; + }, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + firstFailure.resolve(); + await settleMicrotasks(); + assertEquals(manager.getState(), "connecting"); + + await manager.stop(); + reconnectGate.resolve(); + await settleMicrotasks(); + + await assertRejects( + () => request, + GraphitiTransportError, + ); + + assertEquals(manager.getState(), "stopped"); + assertEquals(await manager.ready(10), false); + assertEquals(connectionIndex, 2); + }); + + it("stop is terminal and rejects restart attempts explicitly", async () => { + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => Promise.resolve({ ok: true }), + }), + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + await manager.stop(); + + assertEquals(manager.getState(), "stopped"); + assertEquals(await manager.ready(10), false); + + const callError = await assertRejects( + () => manager.callTool("search", {}), + GraphitiOfflineError, + ); + assertEquals(callError.state, "stopped"); + + const startError = assertThrows( + () => manager.start(), + GraphitiOfflineError, + ); + assertEquals(startError.state, "stopped"); + }); + + it("preserves the closing state when start is called during shutdown", async () => { + const closeGate = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => closeGate.promise, + callTool: () => Promise.resolve({ ok: true }), + }), + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const stopPromise = manager.stop(); + const startError = assertThrows( + () => manager.start(), + GraphitiOfflineError, + ); + + assertEquals(startError.state, "closing"); + + closeGate.resolve(); + await stopPromise; + }); + + it("clears stopPromise after shutdown completes", async () => { + const closeGate = deferred(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => closeGate.promise, + callTool: () => Promise.resolve({ ok: true }), + }), + }); + const internals = manager as unknown as { + stopPromise: Promise | null; + }; + + manager.start(); + assertEquals(await manager.ready(10), true); + + const firstStop = manager.stop(); + assertEquals(internals.stopPromise === null, false); + + const pendingStop = internals.stopPromise; + const secondStop = manager.stop(); + assertEquals(internals.stopPromise, pendingStop); + + closeGate.resolve(); + await Promise.all([firstStop, secondStop]); + + assertEquals(internals.stopPromise, null); + assertEquals(manager.getState(), "stopped"); + }); + it("surfaces typed errors after failed retry", async () => { let connectionIndex = 0; const manager = new GraphitiConnectionManager({ @@ -435,4 +911,41 @@ describe("connection manager", () => { GraphitiTransportError, ); }); + + it("rejects invalid non-empty endpoints up front", () => { + assertThrows( + () => + new GraphitiConnectionManager({ + endpoint: "not a valid url", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => Promise.resolve({ ok: true }), + }), + }), + Error, + 'Invalid Graphiti endpoint: "not a valid url"', + ); + }); + + it("moves back offline when connectionFactory throws synchronously", async () => { + const clock = new FakeClock(); + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => { + throw new Error("factory boom"); + }, + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + random: () => 0.5, + reconnectInitialDelayMs: 10, + }); + + manager.start(); + await settleMicrotasks(); + + assertEquals(manager.getState(), "offline"); + assertEquals(clock.timers.size, 1); + }); }); diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index 04698ae..babcee1 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -7,18 +7,24 @@ export type GraphitiConnectionState = | "connecting" | "connected" | "offline" - | "closing"; + | "closing" + | "stopped"; type TimerHandle = ReturnType | number; export class GraphitiOfflineError extends Error { readonly kind = "offline"; - constructor(readonly state: "offline" | "closing", message?: string) { + constructor( + readonly state: "offline" | "closing" | "stopped", + message?: string, + ) { super( message ?? (state === "closing" ? "Graphiti connection manager is closing" + : state === "stopped" + ? "Graphiti connection manager is stopped" : "Graphiti connection manager is offline"), ); this.name = "GraphitiOfflineError"; @@ -101,6 +107,7 @@ export interface GraphitiToolCaller { type PendingRequest = { name: string; args: Record; + deadlineAt: number; resolve: (value: unknown) => void; reject: (reason?: unknown) => void; timer: TimerHandle | null; @@ -108,6 +115,26 @@ type PendingRequest = { type ConnectionFactory = (endpoint: string) => GraphitiConnection; +const validateEndpoint = (endpoint: string): string => { + const normalized = endpoint.trim(); + if (!normalized) { + throw new Error("Graphiti endpoint must not be empty"); + } + + try { + new URL(normalized); + } catch (cause) { + throw new Error( + `Invalid Graphiti endpoint: ${JSON.stringify(normalized)}`, + { + cause, + }, + ); + } + + return normalized; +}; + type GraphitiConnectionManagerOptions = { endpoint: string; requestDeadlineMs?: number; @@ -119,6 +146,7 @@ type GraphitiConnectionManagerOptions = { reconnectJitter?: number; connectionFactory?: ConnectionFactory; random?: () => number; + now?: () => number; setTimer?: (callback: () => void, delayMs: number) => TimerHandle; clearTimer?: (timer: TimerHandle) => void; }; @@ -202,6 +230,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { private readonly reconnectJitter: number; private readonly connectionFactory: ConnectionFactory; private readonly random: () => number; + private readonly now: () => number; private readonly setTimerImpl: ( callback: () => void, delayMs: number, @@ -217,9 +246,10 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { private reconnectDelayMs: number; private started = false; private flushingQueue = false; + private stopPromise: Promise | null = null; constructor(options: GraphitiConnectionManagerOptions) { - this.endpoint = options.endpoint; + this.endpoint = validateEndpoint(options.endpoint); this.requestDeadlineMs = options.requestDeadlineMs ?? 15_000; this.queueCapacity = options.queueCapacity ?? 32; this.startupTimeoutMs = options.startupTimeoutMs ?? this.requestDeadlineMs; @@ -229,6 +259,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { this.reconnectJitter = options.reconnectJitter ?? 0.25; this.connectionFactory = options.connectionFactory ?? createMcpConnection; this.random = options.random ?? Math.random; + this.now = options.now ?? Date.now; this.setTimerImpl = options.setTimer ?? ((callback, delayMs) => setTimeout(callback, delayMs)); this.clearTimerImpl = options.clearTimer ?? @@ -241,39 +272,64 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { } start(): void { - if (this.started || this.state === "closing") return; + if (this.state === "closing" || this.state === "stopped") { + throw new GraphitiOfflineError( + this.state, + this.state === "closing" + ? "Graphiti connection manager is closing" + : "Graphiti connection manager has been stopped and cannot be restarted", + ); + } + if (this.started) return; this.started = true; void this.reconnect(); } async stop(): Promise { - if (this.state === "closing") return; + if (this.state === "stopped") return; + if (this.stopPromise) { + await this.stopPromise; + return; + } - this.started = false; - this.state = "closing"; - this.cancelReconnectTimer(); - this.rejectAllPending( - new GraphitiOfflineError( - "closing", - "Graphiti connection manager stopped", - ), - ); - this.resolveReadyWaiters(false); + const stopPromise = (async () => { + this.started = false; + this.state = "closing"; + this.cancelReconnectTimer(); + this.rejectAllPending( + new GraphitiOfflineError( + "closing", + "Graphiti connection manager is closing", + ), + ); + this.resolveReadyWaiters(false); + + const connection = this.connection; + this.connection = null; + if (connection) { + try { + await connection.close(); + } catch { + // Ignore close errors while shutting down. + } + } - const connection = this.connection; - this.connection = null; - if (connection) { - try { - await connection.close(); - } catch { - // Ignore close errors while shutting down. + this.state = "stopped"; + })(); + this.stopPromise = stopPromise; + + try { + await stopPromise; + } finally { + if (this.stopPromise === stopPromise) { + this.stopPromise = null; } } } async ready(timeoutMs = this.startupTimeoutMs): Promise { if (this.state === "connected") return true; - if (this.state === "closing") return false; + if (this.state === "closing" || this.state === "stopped") return false; return await new Promise((resolve) => { let settled = false; @@ -306,8 +362,8 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { ), ); - if (this.state === "closing") { - throw new GraphitiOfflineError("closing"); + if (this.state === "closing" || this.state === "stopped") { + throw new GraphitiOfflineError(this.state); } if (this.state === "offline") { @@ -318,11 +374,15 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { return await this.enqueueRequest(name, sanitizedArgs, deadlineMs); } - return await this.executeConnectedCall(name, sanitizedArgs); + return await this.executeConnectedCallWithinDeadline( + name, + sanitizedArgs, + this.now() + deadlineMs, + ); } async reconnect(): Promise { - if (this.state === "closing") return false; + if (this.state === "closing" || this.state === "stopped") return false; if (this.connectPromise) return await this.connectPromise; this.cancelReconnectTimer(); @@ -339,6 +399,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { private async performReconnect(): Promise { const previousConnection = this.connection; this.connection = null; + let nextConnection: GraphitiConnection | null = null; if (previousConnection) { try { @@ -348,12 +409,11 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { } } - const nextConnection = this.connectionFactory(this.endpoint); - try { + nextConnection = this.connectionFactory(this.endpoint); await nextConnection.connect(); - if (this.state === "closing") { + if (this.state === "closing" || this.state === "stopped") { try { await nextConnection.close(); } catch { @@ -370,15 +430,16 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { void this.flushPendingQueue(); return true; } catch (err) { - try { - await nextConnection.close(); - } catch { - // Ignore close failures for failed connects. + if (nextConnection) { + try { + await nextConnection.close(); + } catch { + // Ignore close failures for failed connects. + } } - if (this.state !== "closing") { + if (this.state !== "closing" && this.state !== "stopped") { this.state = "offline"; - this.rejectAllPending(new GraphitiOfflineError("offline")); this.scheduleReconnect(); logger.warn("Failed to connect to Graphiti MCP server", err); } @@ -387,17 +448,26 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { } } - private async executeConnectedCall( + private async executeConnectedCallWithinDeadline( name: string, args: Record, + deadlineAt: number, attempt = 0, ): Promise { if (this.state !== "connected" || !this.connection) { throw new GraphitiOfflineError("offline"); } + const deadlineMs = this.getRemainingDeadlineMs(deadlineAt); + if (deadlineMs <= 0) { + throw new GraphitiRequestTimeoutError(); + } + try { - return await this.connection.callTool({ name, arguments: args }); + return await this.runWithRequestDeadline( + this.connection.callTool({ name, arguments: args }), + deadlineMs, + ); } catch (err) { if (isRequestTimeout(err)) { throw new GraphitiRequestTimeoutError( @@ -406,39 +476,109 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { } if (isSessionExpired(err)) { - const typedError = new GraphitiSessionExpiredError( - getErrorMessage(err) || undefined, + return await this.retryConnectedCallAfterRecoverableError( + new GraphitiSessionExpiredError( + getErrorMessage(err) || undefined, + ), + name, + args, + deadlineAt, + attempt, ); - - if (attempt >= 1) { - void this.reconnect(); - throw typedError; - } - - const connected = await this.reconnect(); - if (!connected) throw typedError; - return await this.executeConnectedCall(name, args, attempt + 1); } if (isTransportFailure(err)) { - const typedError = new GraphitiTransportError( - getErrorMessage(err) || undefined, + return await this.retryConnectedCallAfterRecoverableError( + new GraphitiTransportError( + getErrorMessage(err) || undefined, + ), + name, + args, + deadlineAt, + attempt, ); - - if (attempt >= 1) { - void this.reconnect(); - throw typedError; - } - - const connected = await this.reconnect(); - if (!connected) throw typedError; - return await this.executeConnectedCall(name, args, attempt + 1); } throw err; } } + private async retryConnectedCallAfterRecoverableError( + typedError: GraphitiSessionExpiredError | GraphitiTransportError, + name: string, + args: Record, + deadlineAt: number, + attempt: number, + ): Promise { + if (attempt >= 1) { + void this.reconnect(); + throw typedError; + } + + const connected = await this.reconnectWithinDeadline(deadlineAt); + if (!connected) { + throw typedError; + } + + return await this.executeConnectedCallWithinDeadline( + name, + args, + deadlineAt, + attempt + 1, + ); + } + + private getRemainingDeadlineMs(deadlineAt: number): number { + return deadlineAt - this.now(); + } + + private async reconnectWithinDeadline(deadlineAt: number): Promise { + const deadlineMs = this.getRemainingDeadlineMs(deadlineAt); + if (deadlineMs <= 0) { + throw new GraphitiRequestTimeoutError(); + } + + return await this.runWithRequestDeadline(this.reconnect(), deadlineMs); + } + + private runWithRequestDeadline( + task: Promise, + deadlineMs: number, + ): Promise { + return new Promise((resolve, reject) => { + let settled = false; + let timer: TimerHandle | null = null; + const clearDeadlineTimer = () => { + if (timer !== null) { + this.clearTimerImpl(timer); + timer = null; + } + }; + + timer = this.setTimerImpl(() => { + if (settled) return; + settled = true; + clearDeadlineTimer(); + reject(new GraphitiRequestTimeoutError()); + }, deadlineMs); + + task.then( + (value) => { + if (settled) return; + settled = true; + clearDeadlineTimer(); + resolve(value); + }, + (error) => { + if (settled) return; + settled = true; + clearDeadlineTimer(); + reject(error); + }, + ); + }); + } + private enqueueRequest( name: string, args: Record, @@ -449,9 +589,11 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { } return new Promise((resolve, reject) => { + const deadlineAt = this.now() + deadlineMs; const pending: PendingRequest = { name, args, + deadlineAt, resolve, reject, timer: null, @@ -489,8 +631,17 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { this.clearPendingTimer(next); + if (this.getRemainingDeadlineMs(next.deadlineAt) <= 0) { + next.reject(new GraphitiQueueTimeoutError()); + continue; + } + try { - const result = await this.executeConnectedCall(next.name, next.args); + const result = await this.executeConnectedCallWithinDeadline( + next.name, + next.args, + next.deadlineAt, + ); next.resolve(result); } catch (err) { next.reject(err); @@ -528,7 +679,8 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { private scheduleReconnect(): void { if ( - !this.started || this.state === "closing" || this.reconnectTimer !== null + !this.started || this.state === "closing" || this.state === "stopped" || + this.reconnectTimer !== null ) { return; } @@ -541,7 +693,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { this.reconnectTimer = this.setTimerImpl(() => { this.reconnectTimer = null; - if (this.state === "closing") return; + if (this.state === "closing" || this.state === "stopped") return; void this.reconnect(); }, delayMs); diff --git a/src/services/context-limit.test.ts b/src/services/context-limit.test.ts new file mode 100644 index 0000000..f047077 --- /dev/null +++ b/src/services/context-limit.test.ts @@ -0,0 +1,197 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { resolveContextLimit } from "./context-limit.ts"; + +Deno.test("resolveContextLimit re-probes after fallback cache expiry", async () => { + const originalNow = Date.now; + let now = 100_000; + Date.now = () => now; + + try { + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + let calls = 0; + const client = { + provider: { + list: () => { + calls += 1; + if (calls === 1) { + return Promise.reject(new Error("provider unavailable")); + } + + return Promise.resolve({ + providers: [ + { + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }, + ], + }); + }, + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 200_000, + ); + assertEquals(calls, 1); + + now += 60_001; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 123_456, + ); + assertEquals(calls, 2); + } finally { + Date.now = originalNow; + } +}); + +Deno.test("resolveContextLimit keeps fallback caches scoped per normalized directory until expiry", async () => { + const originalNow = Date.now; + let now = 200_000; + Date.now = () => now; + + try { + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + const calls: string[] = []; + const client = { + provider: { + list: ({ query }: { query?: { directory?: string } }) => { + calls.push(query?.directory ?? ""); + return Promise.reject(new Error("provider unavailable")); + }, + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "/tmp/project-a", + cache, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "/tmp/project-a", + cache, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + " ", + cache, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit("openai", "gpt-5", client as never, "", cache), + 200_000, + ); + assertEquals(calls, ["/tmp/project-a", ""]); + + now += 60_001; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "/tmp/project-a", + cache, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit("openai", "gpt-5", client as never, "", cache), + 200_000, + ); + assertEquals(calls, ["/tmp/project-a", "", "/tmp/project-a", ""]); + } finally { + Date.now = originalNow; + } +}); + +Deno.test("resolveContextLimit keeps positive cache entries without expiry re-probes", async () => { + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + let calls = 0; + const client = { + provider: { + list: () => { + calls += 1; + return Promise.resolve({ + providers: [ + { + id: "openai", + models: [{ id: "gpt-5", limit: { context: 321_000 } }], + }, + ], + }); + }, + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 321_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 321_000, + ); + + assertEquals(calls, 1); +}); diff --git a/src/services/context-limit.ts b/src/services/context-limit.ts index 688476d..c50f3fc 100644 --- a/src/services/context-limit.ts +++ b/src/services/context-limit.ts @@ -3,20 +3,56 @@ import { DEFAULT_CONTEXT_LIMIT } from "./constants.ts"; import { logger } from "./logger.ts"; import { extractSdkProviders } from "./sdk-normalize.ts"; +const UNKNOWN_CONTEXT_LIMIT = -1; +const UNKNOWN_CONTEXT_LIMIT_TTL_MS = 60_000; + +type ContextLimitCacheEntry = + | number + | { + value: number; + expiresAt?: number; + }; + +const getContextLimitCacheKey = ( + providerID: string, + modelID: string, + directory?: string, +): string => { + const normalizedDirectory = directory?.trim(); + return normalizedDirectory + ? `${normalizedDirectory}\u0000${providerID}/${modelID}` + : `${providerID}/${modelID}`; +}; + export async function resolveContextLimit( providerID: string, modelID: string, client: OpencodeClient, - directory: string, - cache: Map, + directory: string | undefined, + cache: Map, ): Promise { - const modelKey = `${providerID}/${modelID}`; + const normalizedDirectory = directory?.trim(); + const modelKey = getContextLimitCacheKey( + providerID, + modelID, + normalizedDirectory, + ); const cached = cache.get(modelKey); - if (cached) return cached; + if (cached !== undefined) { + if (typeof cached === "number") { + return cached > 0 ? cached : DEFAULT_CONTEXT_LIMIT; + } + + if (cached.expiresAt === undefined || cached.expiresAt > Date.now()) { + return cached.value > 0 ? cached.value : DEFAULT_CONTEXT_LIMIT; + } + + cache.delete(modelKey); + } try { const response = await client.provider.list({ - query: { directory }, + query: normalizedDirectory ? { directory: normalizedDirectory } : {}, }); const list = extractSdkProviders(response); for (const provider of list) { @@ -33,16 +69,16 @@ export async function resolveContextLimit( } } catch (err) { logger.warn("Failed to fetch provider context limit", err); + cache.set(modelKey, { + value: UNKNOWN_CONTEXT_LIMIT, + expiresAt: Date.now() + UNKNOWN_CONTEXT_LIMIT_TTL_MS, + }); + return DEFAULT_CONTEXT_LIMIT; } - cache.set(modelKey, DEFAULT_CONTEXT_LIMIT); + cache.set(modelKey, { + value: UNKNOWN_CONTEXT_LIMIT, + expiresAt: Date.now() + UNKNOWN_CONTEXT_LIMIT_TTL_MS, + }); return DEFAULT_CONTEXT_LIMIT; } - -/** - * Calculate the character budget for memory injection - * (5% of context limit * 4 chars/token). - */ -export function calculateInjectionBudget(contextLimit: number): number { - return Math.floor(contextLimit * 0.05 * 4); -} diff --git a/src/services/context-utils.test.ts b/src/services/context-utils.test.ts deleted file mode 100644 index d6e0e53..0000000 --- a/src/services/context-utils.test.ts +++ /dev/null @@ -1,418 +0,0 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; -import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import { - deduplicateFactsByUuid, - deduplicateNodesByUuid, - filterAndAnnotateFacts, - removeNodesReferencedByFacts, - sortFactsByRecency, -} from "./context.ts"; - -describe("context-utils", () => { - describe("filterAndAnnotateFacts", () => { - it("should keep facts without valid_at or invalid_at", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Always valid fact" }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertEquals(result.length, 1); - }); - - it("should keep facts within valid window", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Currently valid", - valid_at: "2026-02-01T00:00:00Z", - invalid_at: "2026-02-28T00:00:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertEquals(result.length, 1); - }); - - it("should filter out facts before valid_at", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Future fact", - valid_at: "2026-03-01T00:00:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertStrictEquals(result.length, 0); - }); - - it("should filter out facts after invalid_at", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Expired fact", - invalid_at: "2026-01-31T00:00:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertStrictEquals(result.length, 0); - }); - - it("should handle mixed valid and stale facts", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Valid fact 1" }, - { - uuid: "f2", - fact: "Future fact", - valid_at: "2026-03-01T00:00:00Z", - }, - { - uuid: "f3", - fact: "Valid fact 2", - valid_at: "2026-02-01T00:00:00Z", - }, - { - uuid: "f4", - fact: "Expired fact", - invalid_at: "2026-01-31T00:00:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertEquals(result.map((fact: GraphitiFact) => fact.uuid), [ - "f3", - "f1", - ]); - }); - - it("should handle invalid date strings gracefully", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Invalid date", - valid_at: "not-a-date", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertEquals(result.length, 1); - }); - }); - - describe("annotateFacts", () => { - it("should add stale annotation to facts with valid_at", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Event occurred", - valid_at: "2026-02-01T10:30:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { - now, - factStaleDays: 10, - }); - assertEquals(result[0].fact.startsWith("[stale:"), true); - }); - - it("should ignore invalid_at for stale annotation", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Temporary state", - invalid_at: "2026-02-28T00:00:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { - now, - factStaleDays: 10, - }); - assertEquals(result[0].fact, "Temporary state"); - }); - - it("should add stale annotation when valid_at is old", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Limited period event", - valid_at: "2026-02-01T00:00:00Z", - invalid_at: "2026-02-28T00:00:00Z", - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { - now, - factStaleDays: 5, - }); - assertEquals(result[0].fact.startsWith("[stale:"), true); - }); - - it("should not modify facts without timestamps", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "No timestamp fact" }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertEquals(result[0].fact, "No timestamp fact"); - }); - - it("should preserve source and target node references", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Relationship", - valid_at: "2026-02-14T00:00:00Z", - source_node: { name: "NodeA", uuid: "n1" }, - target_node: { name: "NodeB", uuid: "n2" }, - }, - ]; - const now = new Date("2026-02-14T12:00:00Z"); - const result = filterAndAnnotateFacts(facts, { now }); - assertEquals(result[0].source_node?.name, "NodeA"); - assertEquals(result[0].target_node?.name, "NodeB"); - }); - }); - - describe("sortFactsByRecency", () => { - it("should sort facts by recency (most recent first)", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Old fact", - valid_at: "2026-01-01T00:00:00Z", - }, - { - uuid: "f2", - fact: "Recent fact", - valid_at: "2026-02-14T00:00:00Z", - }, - { - uuid: "f3", - fact: "Middle fact", - valid_at: "2026-02-01T00:00:00Z", - }, - ]; - const sorted = sortFactsByRecency(facts); - assertEquals(sorted.map((fact) => fact.uuid), ["f2", "f3", "f1"]); - }); - - it("should keep stable order without timestamps", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Standalone fact" }, - { - uuid: "f2", - fact: "Connected fact", - source_node: { name: "Node", uuid: "n1" }, - }, - ]; - const sorted = sortFactsByRecency(facts); - assertEquals(sorted.map((fact) => fact.uuid), ["f1", "f2"]); - }); - - it("should handle facts without valid_at consistently", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "No timestamp A" }, - { - uuid: "f2", - fact: "Has timestamp", - valid_at: "2026-02-14T00:00:00Z", - }, - { uuid: "f3", fact: "No timestamp B" }, - ]; - const sorted = sortFactsByRecency(facts); - assertEquals(sorted.map((fact) => fact.uuid), ["f2", "f1", "f3"]); - }); - - it("should handle empty array", () => { - const facts: GraphitiFact[] = []; - const sorted = sortFactsByRecency(facts); - assertEquals(sorted.length, 0); - }); - - it("should maintain stable sort for equal relevance", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "First", - valid_at: "2026-02-14T10:00:00Z", - }, - { - uuid: "f2", - fact: "Second", - valid_at: "2026-02-14T10:00:00Z", - }, - ]; - const sorted = sortFactsByRecency(facts); - assertEquals(sorted[0].uuid, "f1"); - assertEquals(sorted[1].uuid, "f2"); - }); - }); - - describe("deduplicateByUuid", () => { - it("should remove duplicate facts by UUID", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "First occurrence" }, - { uuid: "f2", fact: "Unique fact" }, - { uuid: "f1", fact: "Duplicate occurrence" }, - ]; - const deduped = deduplicateFactsByUuid(facts); - assertEquals(deduped.map((fact) => fact.uuid), ["f1", "f2"]); - }); - - it("should remove duplicate nodes by UUID", () => { - const nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - { uuid: "n2", name: "Node B" }, - { uuid: "n1", name: "Node A duplicate" }, - ]; - const deduped = deduplicateNodesByUuid(nodes); - assertEquals(deduped.map((node) => node.uuid), ["n1", "n2"]); - }); - - it("should preserve first occurrence when deduplicating", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Keep this one" }, - { uuid: "f1", fact: "Discard this one" }, - ]; - const deduped = deduplicateFactsByUuid(facts); - assertEquals(deduped[0].fact, "Keep this one"); - }); - - it("should handle empty array", () => { - const facts: GraphitiFact[] = []; - const deduped = deduplicateFactsByUuid(facts); - assertEquals(deduped.length, 0); - }); - - it("should handle array with all unique items", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Fact 1" }, - { uuid: "f2", fact: "Fact 2" }, - { uuid: "f3", fact: "Fact 3" }, - ]; - const deduped = deduplicateFactsByUuid(facts); - assertEquals(deduped.length, 3); - }); - - it("should handle array with all duplicate items", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Same fact" }, - { uuid: "f1", fact: "Same fact" }, - { uuid: "f1", fact: "Same fact" }, - ]; - const deduped = deduplicateFactsByUuid(facts); - assertEquals(deduped.length, 1); - }); - }); - - describe("removeNodesReferencedByFacts", () => { - it("should remove nodes referenced by facts", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Relationship", - source_node: { name: "Node A", uuid: "n1" }, - target_node: { name: "Node B", uuid: "n2" }, - }, - ]; - const _nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - { uuid: "n2", name: "Node B" }, - { uuid: "n3", name: "Orphan Node" }, - ]; - const filtered = removeNodesReferencedByFacts(facts, _nodes); - assertEquals(filtered.map((node) => node.uuid), ["n3"]); - }); - - it("should keep all nodes when no facts exist", () => { - const _facts: GraphitiFact[] = []; - const _nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - { uuid: "n2", name: "Node B" }, - ]; - const filtered = removeNodesReferencedByFacts(_facts, _nodes); - assertEquals(filtered.map((node) => node.uuid), ["n1", "n2"]); - }); - - it("should remove all nodes when all are referenced", () => { - const _facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Fact 1", - source_node: { name: "Node A", uuid: "n1" }, - }, - { - uuid: "f2", - fact: "Fact 2", - target_node: { name: "Node B", uuid: "n2" }, - }, - ]; - const _nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - { uuid: "n2", name: "Node B" }, - ]; - const filtered = removeNodesReferencedByFacts(_facts, _nodes); - assertEquals(filtered.length, 0); - }); - - it("should keep nodes when facts have no references", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Standalone fact" }, - ]; - const _nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - ]; - const filtered = removeNodesReferencedByFacts(facts, _nodes); - assertEquals(filtered.map((node) => node.uuid), ["n1"]); - }); - - it("should remove nodes referenced as source only", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Source only", - source_node: { name: "Node A", uuid: "n1" }, - }, - ]; - const _nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - { uuid: "n2", name: "Node B" }, - ]; - const filtered = removeNodesReferencedByFacts(facts, _nodes); - assertEquals(filtered.map((node) => node.uuid), ["n2"]); - }); - - it("should remove nodes referenced as target only", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Target only", - target_node: { name: "Node B", uuid: "n2" }, - }, - ]; - const _nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node A" }, - { uuid: "n2", name: "Node B" }, - ]; - const filtered = removeNodesReferencedByFacts(facts, _nodes); - assertEquals(filtered.map((node) => node.uuid), ["n1"]); - }); - - it("should handle empty nodes array", () => { - const _facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: "Fact", - source_node: { name: "Node A", uuid: "n1" }, - }, - ]; - const _nodes: GraphitiNode[] = []; - const filtered = removeNodesReferencedByFacts(_facts, _nodes); - assertEquals(filtered.length, 0); - }); - }); -}); diff --git a/src/services/context.test.ts b/src/services/context.test.ts deleted file mode 100644 index c962139..0000000 --- a/src/services/context.test.ts +++ /dev/null @@ -1,372 +0,0 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; -import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { formatMemoryContext } from "./context.ts"; -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; - -describe("context", () => { - describe("formatMemoryContext", () => { - it("should return empty string when no facts or nodes provided", () => { - const result = formatMemoryContext([], []); - assertStrictEquals(result, ""); - }); - - it("should format facts only", () => { - const facts: GraphitiFact[] = [ - { - uuid: "fact-1", - fact: "The API endpoint is at /api/v1", - source_node: { name: "API", uuid: "node-1" }, - target_node: { name: "Endpoint", uuid: "node-2" }, - }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes("# Persistent Memory"), true); - assertEquals(result.includes("## Known Facts"), true); - assertEquals( - result.includes( - "- The API endpoint is at /api/v1 [API -> Endpoint]", - ), - true, - ); - }); - - it("should format nodes only", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "node-1", - name: "Deno", - summary: "A modern JavaScript runtime", - labels: ["runtime", "javascript"], - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals(result.includes("# Persistent Memory"), true); - assertEquals(result.includes("## Known Entities"), true); - assertEquals( - result.includes( - "- **Deno** (runtime, javascript): A modern JavaScript runtime", - ), - true, - ); - }); - - it("should format both facts and nodes", () => { - const facts: GraphitiFact[] = [ - { - uuid: "fact-1", - fact: "Uses TypeScript", - source_node: { name: "Project", uuid: "node-1" }, - }, - ]; - const nodes: GraphitiNode[] = [ - { - uuid: "node-2", - name: "TypeScript", - summary: "Typed JavaScript", - labels: ["language"], - }, - ]; - const result = formatMemoryContext(facts, nodes); - - assertEquals(result.includes("## Known Facts"), true); - assertEquals(result.includes("## Known Entities"), true); - assertEquals(result.includes("Uses TypeScript"), true); - assertEquals( - result.includes("- **TypeScript** (language): Typed JavaScript"), - true, - ); - }); - - it("should handle facts without source or target nodes", () => { - const facts: GraphitiFact[] = [ - { - uuid: "fact-1", - fact: "A standalone fact without entity references", - }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals( - result.includes("A standalone fact without entity references"), - true, - ); - // Should not have entity brackets when no nodes - assertEquals(result.includes("[]"), false); - }); - - it("should handle facts with only source node", () => { - const facts: GraphitiFact[] = [ - { - uuid: "fact-1", - fact: "Has a source only", - source_node: { name: "Source", uuid: "node-1" }, - }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes("[Source]"), true); - }); - - it("should handle facts with only target node", () => { - const facts: GraphitiFact[] = [ - { - uuid: "fact-1", - fact: "Has a target only", - target_node: { name: "Target", uuid: "node-2" }, - }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes("[Target]"), true); - }); - - it("should handle nodes without labels", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "node-1", - name: "SimpleNode", - summary: "Just a node", - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals( - result.includes("- **SimpleNode**: Just a node"), - true, - ); - // Should not have empty parentheses - assertEquals(result.includes("()"), false); - }); - - it("should handle nodes without summary", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "node-1", - name: "LabelOnly", - labels: ["category"], - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals(result.includes("- **LabelOnly** (category)"), true); - // Should not have colon without summary - assertEquals(result.match(/:\s*$/m), null); - }); - - it("should handle nodes with empty labels array", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "node-1", - name: "EmptyLabels", - labels: [], - summary: "Has empty labels", - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals( - result.includes("- **EmptyLabels**: Has empty labels"), - true, - ); - // Should not have empty parentheses - assertEquals(result.includes("()"), false); - }); - - it("should handle multiple facts and nodes", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "First fact" }, - { uuid: "f2", fact: "Second fact" }, - { uuid: "f3", fact: "Third fact" }, - ]; - const nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node1" }, - { uuid: "n2", name: "Node2" }, - ]; - const result = formatMemoryContext(facts, nodes); - - assertEquals(result.includes("First fact"), true); - assertEquals(result.includes("Second fact"), true); - assertEquals(result.includes("Third fact"), true); - assertEquals(result.includes("- **Node1**"), true); - assertEquals(result.includes("- **Node2**"), true); - }); - - it("should format facts with source -> target arrows correctly", () => { - const facts: GraphitiFact[] = [ - { - uuid: "fact-1", - fact: "relates to", - source_node: { name: "A", uuid: "n1" }, - target_node: { name: "B", uuid: "n2" }, - }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes("[A -> B]"), true); - }); - - it("should include instruction block in output", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Test fact" }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals( - result.includes( - "do not mention it unless asked", - ), - true, - ); - }); - - it("should include persistent memory header", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Test fact" }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.startsWith("# Persistent Memory"), true); - }); - - it("should include facts section", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "First" }, - { uuid: "f2", fact: "Second" }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes("## Known Facts"), true); - const factsStart = result.indexOf("## Known Facts"); - const factsSection = result.slice(factsStart); - assertEquals(factsSection.includes("First"), true); - assertEquals(factsSection.includes("Second"), true); - }); - - it("should include nodes section", () => { - const nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node1" }, - { uuid: "n2", name: "Node2" }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals(result.includes("## Known Entities"), true); - const nodesStart = result.indexOf("## Known Entities"); - const nodesSection = result.slice(nodesStart); - assertEquals(nodesSection.includes("Node1"), true); - assertEquals(nodesSection.includes("Node2"), true); - }); - - it("should format multiple labels with comma separation", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "n1", - name: "MultiLabel", - labels: ["type", "category", "tag"], - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals(result.includes("(type, category, tag)"), true); - }); - - it("should handle facts with special characters", () => { - const facts: GraphitiFact[] = [ - { - uuid: "f1", - fact: 'Fact with "quotes" and & ampersands', - }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals( - result.includes('Fact with "quotes" and & ampersands'), - true, - ); - }); - - it("should handle node names with special characters", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "n1", - name: 'Node "special" & chars', - summary: "Summary", - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals(result.includes('Node "special" & chars'), true); - }); - - it("should format facts and nodes in correct order", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Fact content" }, - ]; - const nodes: GraphitiNode[] = [ - { uuid: "n1", name: "Node name" }, - ]; - const result = formatMemoryContext(facts, nodes); - - const memoryIndex = result.indexOf("# Persistent Memory"); - const instructionIndex = result.indexOf("do not mention it unless asked"); - const factsIndex = result.indexOf("## Known Facts"); - const nodesIndex = result.indexOf("## Known Entities"); - - // Verify order - assertEquals(memoryIndex < instructionIndex, true); - assertEquals(instructionIndex < factsIndex, true); - assertEquals(factsIndex < nodesIndex, true); - }); - - it("should handle very long fact text", () => { - const longText = "A".repeat(10000); - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: longText }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes(longText), true); - assertEquals(result.includes("- "), true); - }); - - it("should handle facts with newlines", () => { - const facts: GraphitiFact[] = [ - { uuid: "f1", fact: "Line 1\nLine 2\nLine 3" }, - ]; - const result = formatMemoryContext(facts, []); - - assertEquals(result.includes("Line 1\nLine 2\nLine 3"), true); - }); - - it("should handle nodes with empty string summary", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "n1", - name: "Node", - summary: "", - }, - ]; - const result = formatMemoryContext([], nodes); - - // Empty summary should not add colon - assertEquals(result.includes("- **Node**"), true); - assertEquals(result.includes("Node:"), false); - }); - - it("should handle single label correctly", () => { - const nodes: GraphitiNode[] = [ - { - uuid: "n1", - name: "SingleLabel", - labels: ["only-one"], - }, - ]; - const result = formatMemoryContext([], nodes); - - assertEquals(result.includes("(only-one)"), true); - // For a single label, the formatted string should be exactly "(only-one)" without extra commas - assertEquals(result.includes("(only-one,"), false); - }); - }); -}); diff --git a/src/services/context.ts b/src/services/context.ts deleted file mode 100644 index 06a1886..0000000 --- a/src/services/context.ts +++ /dev/null @@ -1,243 +0,0 @@ -import type { GraphitiFact, GraphitiNode } from "../types/index.ts"; -import { DAY_MS } from "./constants.ts"; - -export const parseDate = (value?: string): Date | null => { - if (!value) return null; - const parsed = Date.parse(value); - if (Number.isNaN(parsed)) return null; - return new Date(parsed); -}; - -export const isFactInvalid = (fact: GraphitiFact, now: Date): boolean => { - const invalidAt = parseDate(fact.invalid_at); - if (invalidAt && invalidAt.getTime() < now.getTime()) return true; - - const validAt = parseDate(fact.valid_at); - if (validAt && validAt.getTime() > now.getTime()) return true; - - return false; -}; - -export const annotateStaleFact = ( - fact: GraphitiFact, - now: Date, - factStaleDays: number, -): GraphitiFact => { - const validAt = parseDate(fact.valid_at); - if (!validAt) return fact; - const ageDays = Math.floor((now.getTime() - validAt.getTime()) / DAY_MS); - if (ageDays < 0) return fact; - if (ageDays < factStaleDays) return fact; - return { - ...fact, - fact: `[stale: ${ageDays} days ago] ${fact.fact}`, - }; -}; - -export const sortFactsByRecency = (facts: GraphitiFact[]): GraphitiFact[] => { - const indexed = facts.map((fact, index) => ({ - fact, - index, - time: parseDate(fact.valid_at)?.getTime() ?? -Infinity, - })); - indexed.sort((a, b) => { - if (a.time !== b.time) return b.time - a.time; - return a.index - b.index; - }); - return indexed.map((entry) => entry.fact); -}; - -export const filterAndAnnotateFacts = ( - facts: GraphitiFact[], - options?: { - factStaleDays?: number; - now?: Date; - }, -): GraphitiFact[] => { - const now = options?.now ?? new Date(); - const factStaleDays = options?.factStaleDays ?? 30; - const filtered = facts.filter((fact) => !isFactInvalid(fact, now)); - const sorted = sortFactsByRecency(filtered); - return sorted.map((fact) => annotateStaleFact(fact, now, factStaleDays)); -}; - -export const formatFactLine = (fact: GraphitiFact): string => { - const entities: string[] = []; - if (fact.source_node?.name) entities.push(fact.source_node.name); - if (fact.target_node?.name) entities.push(fact.target_node.name); - const entityStr = entities.length > 0 ? ` [${entities.join(" -> ")}]` : ""; - return `- ${fact.fact}${entityStr}`; -}; - -export const formatFactLines = ( - facts: GraphitiFact[], - options?: { - factStaleDays?: number; - now?: Date; - }, -): string[] => { - const annotated = filterAndAnnotateFacts(facts, options); - return annotated.map((fact) => formatFactLine(fact)); -}; - -export const formatNodeLines = (nodes: GraphitiNode[]): string[] => - nodes.map((node) => { - const labels = node.labels?.length ? ` (${node.labels.join(", ")})` : ""; - const summary = node.summary ? `: ${node.summary}` : ""; - return `- **${node.name}**${labels}${summary}`; - }); - -export const deduplicateFactsByUuid = ( - facts: GraphitiFact[], -): GraphitiFact[] => { - const seen = new Set(); - const deduped: GraphitiFact[] = []; - for (const fact of facts) { - if (seen.has(fact.uuid)) continue; - seen.add(fact.uuid); - deduped.push(fact); - } - return deduped; -}; - -export const deduplicateNodesByUuid = ( - nodes: GraphitiNode[], -): GraphitiNode[] => { - const seen = new Set(); - const deduped: GraphitiNode[] = []; - for (const node of nodes) { - if (seen.has(node.uuid)) continue; - seen.add(node.uuid); - deduped.push(node); - } - return deduped; -}; - -export const removeNodesReferencedByFacts = ( - facts: GraphitiFact[], - nodes: GraphitiNode[], -): GraphitiNode[] => { - const factNodeUuids = new Set(); - for (const fact of facts) { - if (fact.source_node?.uuid) factNodeUuids.add(fact.source_node.uuid); - if (fact.target_node?.uuid) factNodeUuids.add(fact.target_node.uuid); - } - return nodes.filter((node) => !factNodeUuids.has(node.uuid)); -}; - -const deduplicateContext = (params: { - facts: GraphitiFact[]; - nodes: GraphitiNode[]; -}): { facts: GraphitiFact[]; nodes: GraphitiNode[] } => { - const dedupedFacts = deduplicateFactsByUuid(params.facts); - const dedupedNodes = deduplicateNodesByUuid(params.nodes); - const filteredNodes = removeNodesReferencedByFacts( - dedupedFacts, - dedupedNodes, - ); - return { facts: dedupedFacts, nodes: filteredNodes }; -}; - -/** - * Await four parallel fact/node promises, deduplicate each side, and return - * the resolved project and user contexts. - * - * Callers construct the promises themselves — this lets chat.ts seed the - * project-facts promise from an earlier drift-check fetch without issuing a - * duplicate network request. - */ -export async function resolveProjectUserContext(promises: { - projectFacts: Promise; - projectNodes: Promise; - userFacts: Promise; - userNodes: Promise; -}): Promise<{ - projectContext: { facts: GraphitiFact[]; nodes: GraphitiNode[] }; - userContext: { facts: GraphitiFact[]; nodes: GraphitiNode[] }; - projectFacts: GraphitiFact[]; - projectNodes: GraphitiNode[]; - userFacts: GraphitiFact[]; - userNodes: GraphitiNode[]; -}> { - const [projectFacts, projectNodes, userFacts, userNodes] = await Promise.all([ - promises.projectFacts, - promises.projectNodes, - promises.userFacts, - promises.userNodes, - ]); - - const projectContext = deduplicateContext({ - facts: projectFacts, - nodes: projectNodes, - }); - const userContext = deduplicateContext({ - facts: userFacts, - nodes: userNodes, - }); - - return { - projectContext, - userContext, - projectFacts, - projectNodes, - userFacts, - userNodes, - }; -} - -/** - * Format Graphiti facts and nodes into a user-facing context block. - */ -export function formatMemoryContext( - facts: GraphitiFact[], - nodes: GraphitiNode[], - options?: { - factStaleDays?: number; - now?: Date; - }, -): string { - if (facts.length === 0 && nodes.length === 0) { - return ""; - } - - const sections: string[] = []; - sections.push( - "# Persistent Memory (from Graphiti Knowledge Graph)", - ); - sections.push( - "The following information was retrieved from your persistent memory.", - ); - sections.push( - "Use this context to inform your responses, but do not mention it unless asked.", - ); - - if (facts.length > 0) { - sections.push(""); - sections.push("## Known Facts"); - sections.push(...formatFactLines(facts, options)); - } - - if (nodes.length > 0) { - sections.push(""); - sections.push("## Known Entities"); - sections.push(...formatNodeLines(nodes)); - } - - return sections.join("\n"); -} - -/** - * Extract fact UUIDs from all blocks in a text string. - */ -export function extractVisibleUuids(text: string): string[] { - const uuids: string[] = []; - const regex = /]*\bdata-uuids="([^"]*)"[^>]*>/g; - let match; - while ((match = regex.exec(text)) !== null) { - const raw = match[1]; - if (raw) { - uuids.push(...raw.split(",").filter(Boolean)); - } - } - return uuids; -} diff --git a/src/services/event-extractor.test.ts b/src/services/event-extractor.test.ts index 5867d6d..e89d342 100644 --- a/src/services/event-extractor.test.ts +++ b/src/services/event-extractor.test.ts @@ -65,6 +65,22 @@ describe("event-extractor", () => { eventType: "tool.called", properties: { tool: "graphiti-mcp", summary: "Graphiti MCP search" }, }); + const integrationFailure = extractStructuredEvents({ + eventType: "tool.completed", + properties: { + tool: "graphiti-mcp", + summary: "Graphiti MCP search failed with error", + resolved: false, + }, + }); + const resolvedIntegrationFailure = extractStructuredEvents({ + eventType: "tool.completed", + properties: { + tool: "graphiti-mcp", + summary: "Graphiti MCP search failed with error", + resolved: true, + }, + }); const error = extractStructuredEvents({ eventType: "tool.completed", properties: { tool: "shell", summary: "command failed with error" }, @@ -73,10 +89,14 @@ describe("event-extractor", () => { assertEquals(fileEdit[0].category, "file.edit"); assertEquals(gitActivity[0].category, "git.activity"); assertEquals(integration[0].category, "integration.call"); + assertEquals(integrationFailure[0].category, "error"); + assertEquals(integrationFailure[0].metadata?.resolved, false); + assertEquals(resolvedIntegrationFailure[0].category, "integration.call"); + assertEquals(resolvedIntegrationFailure[0].metadata?.resolved, true); assertEquals(error[0].category, "error"); }); - it("stores continuity for assistant/tool events without transcript-heavy bodies by default", () => { + it("suppresses assistant operational chatter while still storing compact tool continuity", () => { const assistant = extractStructuredEvents({ eventType: "message.updated", role: "assistant", @@ -94,15 +114,46 @@ describe("event-extractor", () => { }, }); - assertEquals(assistant[0].category, "message"); - assertEquals(assistant[0].body, undefined); - assertEquals(typeof assistant[0].continuityText, "string"); + assertEquals(assistant, []); assertEquals(tool[0].category, "file.read"); assertEquals(tool[0].body, undefined); assertEquals(typeof tool[0].continuityText, "string"); }); - it("extracts rules, environment, subagent, discovery, and assistant error signals", () => { + it("dedupes repeated continuity fragments for user task-like messages", () => { + const [event] = extractStructuredEvents({ + eventType: "chat.message", + sessionId: "session-1", + messageCount: 2, + role: "user", + messageText: "do the cleanup on code and data, don't commit yet", + }); + + assertEquals( + event.continuityText, + "do the cleanup on code and data, don't commit yet", + ); + }); + + it("dedupes repeated detail fragments in compactParts-backed task updates", () => { + const [event] = extractStructuredEvents({ + eventType: "task.updated", + properties: { + task: { + id: "t1", + summary: + "yes, keep the review-refine loop until no more issues are found.", + }, + }, + }); + + assertEquals( + event.detail, + "Task update — yes, keep the review-refine loop until no more issues are found.", + ); + }); + + it("extracts rules, environment, and subagent signals while filtering assistant operational blocker chatter", () => { const rules = extractStructuredEvents({ eventType: "rules.loaded", properties: { @@ -148,10 +199,71 @@ describe("event-extractor", () => { ]); assertEquals(started[0].category, "subagent.start"); assertEquals(finished[0].category, "subagent.finish"); - assertEquals(assistant.map((event) => event.category), [ - "message", - "discovery", - "error", - ]); + assertEquals(assistant, []); + }); + + it("rejects transcript-heavy user and tool wrapper content from extraction", () => { + const user = extractStructuredEvents({ + eventType: "chat.message", + sessionId: "session-1", + messageCount: 2, + role: "user", + messageText: + '\nsrc/session.ts\n1: const x = 1', + }); + const tool = extractStructuredEvents({ + eventType: "tool.completed", + properties: { + tool: "Read", + path: "src/session.ts", + summary: "Read src/session.ts", + }, + messageText: + "src/session.ts\n1: export const huge = true;", + }); + + assertEquals(user, []); + assertEquals(tool[0].category, "file.read"); + assertEquals(tool[0].body, undefined); + assertEquals(tool[0].continuityText?.includes("content"), false); + }); + + it("preserves legitimate inline xml-like tags in normal text", () => { + const [event] = extractStructuredEvents({ + eventType: "chat.message", + sessionId: "session-1", + messageCount: 2, + role: "user", + messageText: + "Keep the literal tags docs/notes and manual in the summary.", + }); + + assertEquals( + event.summary.includes("docs/notes"), + true, + ); + assertEquals(event.summary.includes("manual"), true); + }); + + it("extracts refs from nested call payloads", () => { + const [event] = extractStructuredEvents({ + eventType: "tool.called", + properties: { + call: { + tool: { + name: "Read", + refs: ["src/services/event-extractor.ts"], + path: "src/services/render-utils.ts", + }, + }, + summary: "Read nested call payload refs", + }, + }); + + assertEquals( + event.refs?.includes("src/services/event-extractor.ts"), + true, + ); + assertEquals(event.refs?.includes("src/services/render-utils.ts"), true); }); }); diff --git a/src/services/event-extractor.ts b/src/services/event-extractor.ts index e7eb83f..abd341a 100644 --- a/src/services/event-extractor.ts +++ b/src/services/event-extractor.ts @@ -3,6 +3,13 @@ import type { SessionEvent, SessionEventSourceKind, } from "../types/index.ts"; +import { + isHighValueMemoryText, + looksLikeOperationalChatter, + looksLikeToolTranscript, + looksTranscriptHeavy, + sanitizeMemoryInput, +} from "./render-utils.ts"; const MAX_SUMMARY = 200; const MAX_BODY = 4096; @@ -70,6 +77,9 @@ const eventRoles = new Set(["user", "assistant", "tool", "system"]); const normalizeWhitespace = (text: string): string => text.replace(/\s+/g, " ").trim(); +const normalizeMemoryWhitespace = (text: string): string => + normalizeWhitespace(sanitizeMemoryInput(text)); + const summarize = (text: string): string => normalizeWhitespace(text).slice(0, MAX_SUMMARY); @@ -79,6 +89,13 @@ const truncateDetail = (text: string): string => text.slice(0, 600); const truncateContinuity = (text: string): string => text.slice(0, 800); +const USER_DECISION_PATTERN = + /\b(?:must|should|keep|prefer|never|always|do not|don't|avoid|require|only)\b/i; +const USER_TASK_PATTERN = + /\b(?:implement|update|fix|continue|finish|complete|add|remove|refactor|investigate|revisit|clean(?:up)?|align|strip|prevent|enforce|make|keep)\b/i; +const ASSISTANT_META_PATTERN = + /\b(?:plan per target|i(?:'m| am| will| can| should| need to)|reading|checking|inspecting|updating|running|prepared|schedule(?:d)?|inject(?:ed|ion)|hot-tier|continuity)/i; + const makeId = (): string => crypto.randomUUID?.() ?? `${Date.now()}-${Math.random().toString(16).slice(2)}`; @@ -140,6 +157,55 @@ const toText = (value: unknown): string | undefined => { return undefined; }; +const sanitizeExtractedText = (value: string | undefined): string => + value ? normalizeMemoryWhitespace(value) : ""; + +const sanitizeRefs = (refs: string[]): string[] => + refs.map((ref) => sanitizeMemoryInput(ref)).filter(Boolean); + +const shouldRejectUserText = (text: string): boolean => + !text || looksLikeToolTranscript(text) || looksTranscriptHeavy(text); + +const shouldPromoteUserDecision = (text: string): boolean => + USER_DECISION_PATTERN.test(text) && !looksLikeOperationalChatter(text); + +const shouldPromoteUserTask = (text: string): boolean => + USER_TASK_PATTERN.test(text) && !looksLikeOperationalChatter(text); + +const shouldPromoteAssistantSignal = (text: string): boolean => + !looksLikeOperationalChatter(text) && !ASSISTANT_META_PATTERN.test(text) && + !looksTranscriptHeavy(text) && isHighValueMemoryText(text); + +const shouldCaptureToolError = (tool: string, text: string): boolean => { + const lowerTool = tool.toLowerCase(); + const lowerText = text.toLowerCase(); + return !hasLowerKeyword( + lowerTool, + "read", + "open", + "grep", + "search", + "glob", + ) && + hasLowerKeyword( + lowerText, + "error", + "failed", + "exception", + "unable", + "exit", + ); +}; + +const sourceKindForRole = (role: EventRole): SessionEventSourceKind => + role === "assistant" + ? "assistant-response" + : role === "user" + ? "user-request" + : role === "tool" + ? "tool-activity" + : "system-state"; + const pickStrings = ( values: Array, limit = 8, @@ -196,6 +262,11 @@ const collectPathRefs = ( } const record = asRecord(value); if (!record) return [...refs]; + const nestedCall = asRecord(record.call); + if (nestedCall?.tool !== undefined) { + collectPathRefs(nestedCall, refs); + collectPathRefs(nestedCall.tool, refs); + } for (const [key, item] of Object.entries(record)) { if (/(path|paths|file|files|ref|refs|cwd|directory)/i.test(key)) { collectPathRefs(item, refs); @@ -220,10 +291,17 @@ const hasKeyword = ( const compactParts = ( ...parts: Array ): string | undefined => { - const compact = parts - .map((part) => part ? normalizeWhitespace(part) : "") - .filter(Boolean) - .join(" — "); + const fragments: string[] = []; + for (const part of parts) { + const value = part ? normalizeWhitespace(part) : ""; + if (!value) continue; + const normalized = value.toLowerCase(); + if (fragments.some((fragment) => fragment.toLowerCase() === normalized)) { + continue; + } + fragments.push(value); + } + const compact = fragments.join(" — "); return compact || undefined; }; @@ -273,21 +351,42 @@ const buildContinuityText = ( refs?: string[], keywords?: string[], ): string | undefined => { - const continuity = [ - summary, - detail, - refs?.join(" "), - keywords?.join(" "), - ] - .map((value) => value ? normalizeWhitespace(value) : "") - .filter(Boolean) - .join(" "); + const fragments: string[] = []; + for ( + const candidate of [summary, detail, refs?.join(" "), keywords?.join(" ")] + ) { + const value = candidate ? normalizeWhitespace(candidate) : ""; + if (!value) continue; + + const normalized = value.toLowerCase(); + let replaced = false; + for (let index = fragments.length - 1; index >= 0; index -= 1) { + const existing = fragments[index]; + const existingNormalized = existing.toLowerCase(); + if ( + existingNormalized === normalized || + existingNormalized.includes(normalized) + ) { + replaced = true; + break; + } + if (normalized.includes(existingNormalized)) { + fragments.splice(index, 1); + } + } + if (!replaced) fragments.push(value); + } + + const continuity = fragments.join(" "); return continuity ? truncateContinuity(continuity) : undefined; }; const compactMessageBody = (text: string): string | undefined => { const normalized = normalizeWhitespace(text); if (!normalized) return undefined; + if (looksTranscriptHeavy(normalized) || looksLikeToolTranscript(normalized)) { + return undefined; + } return truncateBody(normalized.slice(0, 480)); }; @@ -304,8 +403,12 @@ const buildToolActivityContext = ( extraMetadata?: Record; } = {}, ): EventContext => { - const normalizedText = normalizeWhitespace(text); - const refSummary = refs.slice(0, 3).join(", "); + const normalizedText = + looksTranscriptHeavy(text) || looksLikeToolTranscript(text) + ? "" + : sanitizeExtractedText(text); + const cleanRefs = sanitizeRefs(refs); + const refSummary = cleanRefs.slice(0, 3).join(", "); const statusSummary = compactParts( asString(props.status), asString(props.result), @@ -319,11 +422,13 @@ const buildToolActivityContext = ( const detail = compactParts( summarize(normalizedText), statusSummary, - refs.length > 0 ? `refs ${refs.slice(0, 4).join(", ")}` : undefined, + cleanRefs.length > 0 + ? `refs ${cleanRefs.slice(0, 4).join(", ")}` + : undefined, ); const keywords = pickKeywords([ tool, - ...refs, + ...cleanRefs, ...collectMetadataKeywords(props), ...(options.extraKeywords ?? []), ]); @@ -331,10 +436,10 @@ const buildToolActivityContext = ( summary, body: options.preserveBody ? compactMessageBody(normalizedText) : undefined, detail, - continuityText: buildContinuityText(summary, detail, refs, keywords), + continuityText: buildContinuityText(summary, detail, cleanRefs, keywords), keywords, sourceKind: options.sourceKind ?? "tool-activity", - refs, + refs: cleanRefs, metadata: compactToolMetadata(props, options.extraMetadata), }; }; @@ -343,7 +448,7 @@ const normalizeInput = ( input: ExtractedEventInput, ): NormalizedEventInput => { const props = input.properties ?? {}; - const text = input.messageText ?? toText(props) ?? ""; + const text = sanitizeExtractedText(input.messageText ?? toText(props) ?? ""); const refs = [ ...new Set([...collectPathRefs(props), ...collectInlinePathRefs(text)]), ]; @@ -511,9 +616,13 @@ export const extractStructuredEvents = ( const { eventType, props, text, refs, role, messageCount } = normalized; if (eventType === "chat.message") { + if (shouldRejectUserText(text)) return []; const events = [extractUserMessageEvent(text, messageCount)]; const lower = text.toLowerCase(); - if (hasLowerKeyword(lower, "prefer", "please", "always", "never")) { + if ( + shouldPromoteUserDecision(text) && + hasLowerKeyword(lower, "prefer", "please", "always", "never") + ) { events.push( createEvent("preference", "user", { summary: text, @@ -525,6 +634,7 @@ export const extractStructuredEvents = ( ); } if ( + shouldPromoteUserDecision(text) && hasLowerKeyword(lower, "decide", "decision", "must", "should", "keep ") ) { events.push( @@ -559,12 +669,27 @@ export const extractStructuredEvents = ( }), ); } + if (shouldPromoteUserTask(text) && messageCount > 1) { + events.push( + createEvent(inferTaskCategory(text), "user", { + summary: text, + detail: compactParts("User task", summarize(text)), + continuityText: buildContinuityText(text, summarize(text), refs), + keywords: pickKeywords([text, ...refs, "task"]), + sourceKind: "user-request", + refs, + }), + ); + } return events; } if (eventType === "message.updated") { const resolvedRole = input.role ?? asEventRole(asRecord(props.info)?.role); if (resolvedRole === "assistant" && text) { + if (!shouldPromoteAssistantSignal(text)) { + return []; + } const events = [extractAssistantMessageEvent(text)]; if (hasKeyword(text, "discovered", "found", "identified", "confirmed")) { events.push( @@ -578,17 +703,6 @@ export const extractStructuredEvents = ( }), ); } - if (hasKeyword(text, "error", "failed", "blocker", "cannot", "unable")) { - events.push(createEvent("error", "assistant", { - summary: text, - detail: summarize(text), - continuityText: buildContinuityText(text, summarize(text), refs), - keywords: pickKeywords([text, ...refs, "error", "blocker"]), - sourceKind: "assistant-response", - refs, - metadata: { resolved: false, eventType }, - })); - } return events; } } @@ -713,10 +827,24 @@ export const extractStructuredEvents = ( ), ]; } - if ( + const isIntegrationActivity = hasLowerKeyword(lowerTool, "graphiti", "mcp", "redis", "http") || - asString(props.integration) + asString(props.integration); + const isUnresolvedToolFailure = props.resolved === false; + if ( + shouldCaptureToolError(tool, summaryText) && + (!isIntegrationActivity || isUnresolvedToolFailure) ) { + return [createEvent("error", "tool", { + ...buildToolActivityContext(tool, summaryText, refs, props, { + summaryPrefix: "Tool error", + preserveBody: true, + extraKeywords: ["error", "failed"], + extraMetadata: isUnresolvedToolFailure ? { resolved: false } : {}, + }), + })]; + } + if (isIntegrationActivity) { return [ createEvent( "integration.call", @@ -728,16 +856,6 @@ export const extractStructuredEvents = ( ), ]; } - if (hasLowerKeyword(lowerText, "error", "failed", "exception", "unable")) { - return [createEvent("error", "tool", { - ...buildToolActivityContext(tool, summaryText, refs, props, { - summaryPrefix: "Tool error", - preserveBody: true, - extraKeywords: ["error", "failed"], - extraMetadata: { resolved: false }, - }), - })]; - } } if (eventType === "environment.updated") { @@ -812,36 +930,30 @@ export const extractStructuredEvents = ( if (text) { const lower = text.toLowerCase(); - if (hasLowerKeyword(lower, "error", "failed", "exception", "blocker")) { + if ( + role !== "assistant" && + hasLowerKeyword(lower, "error", "failed", "exception", "blocker") + ) { return [createEvent("error", role, { summary: text, detail: summarize(text), continuityText: buildContinuityText(text, summarize(text), refs), keywords: pickKeywords([text, ...refs, "error"]), - sourceKind: role === "assistant" - ? "assistant-response" - : role === "user" - ? "user-request" - : role === "tool" - ? "tool-activity" - : "system-state", + sourceKind: sourceKindForRole(role), refs, metadata: { ...props, resolved: false, eventType }, })]; } - if (hasLowerKeyword(lower, "discover", "found", "inspect", "observed")) { + if ( + role !== "assistant" && + hasLowerKeyword(lower, "discover", "found", "inspect", "observed") + ) { return [createEvent("discovery", role, { summary: text, detail: summarize(text), continuityText: buildContinuityText(text, summarize(text), refs), keywords: pickKeywords([text, ...refs, "discovery"]), - sourceKind: role === "assistant" - ? "assistant-response" - : role === "user" - ? "user-request" - : role === "tool" - ? "tool-activity" - : "system-state", + sourceKind: sourceKindForRole(role), refs, metadata: { ...props, eventType }, })]; @@ -852,13 +964,7 @@ export const extractStructuredEvents = ( detail: summarize(text), continuityText: buildContinuityText(text, summarize(text), refs), keywords: pickKeywords([text, ...refs]), - sourceKind: role === "assistant" - ? "assistant-response" - : role === "user" - ? "user-request" - : role === "tool" - ? "tool-activity" - : "system-state", + sourceKind: sourceKindForRole(role), refs, metadata: { ...props, eventType }, })]; diff --git a/src/services/graphiti-async.test.ts b/src/services/graphiti-async.test.ts new file mode 100644 index 0000000..336b43c --- /dev/null +++ b/src/services/graphiti-async.test.ts @@ -0,0 +1,433 @@ +import { assert, assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { GraphitiAsyncService } from "./graphiti-async.ts"; + +function deferred() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + +async function flushMicrotasks(count = 6) { + for (let i = 0; i < count; i += 1) { + await Promise.resolve(); + } +} + +function createFakeTimers() { + let nextId = 0; + const scheduledTimeouts: number[] = []; + const clearedTimers: number[] = []; + const active = new Map void; delayMs: number }>(); + + return { + scheduledTimeouts, + clearedTimers, + setTimer(callback: () => void, delayMs: number) { + const id = ++nextId; + active.set(id, { callback, delayMs }); + scheduledTimeouts.push(delayMs); + return id; + }, + clearTimer(timer: number) { + if (!active.has(timer)) return; + active.delete(timer); + clearedTimers.push(timer); + }, + runNext(delayMs?: number) { + const entry = [...active.entries()].find(([_, timer]) => + delayMs === undefined || timer.delayMs === delayMs + ); + if (!entry) return false; + const [id, timer] = entry; + active.delete(id); + timer.callback(); + return true; + }, + }; +} + +describe("GraphitiAsyncService", () => { + it("coalesces concurrent cache refreshes and follows up with the latest query", async () => { + const realSetTimeout = globalThis.setTimeout; + const searchCalls: string[] = []; + const nodeSearchCalls: string[] = []; + const rememberCalls: string[] = []; + const cacheSets: string[] = []; + + const firstFacts = deferred< + Array<{ + fact: string; + source_node?: { name?: string }; + target_node?: { name?: string }; + }> + >(); + const firstNodes = deferred<{ + nodes: Array<{ uuid: string; name: string; summary: string }>; + degraded: boolean; + }>(); + + let refreshRuns = 0; + const graphiti = { + searchMemoryFacts({ query }: { query: string }) { + searchCalls.push(query); + refreshRuns += 1; + if (refreshRuns === 1) return firstFacts.promise; + return Promise.resolve([ + { + fact: `fact:${query}`, + source_node: { name: "Source" }, + target_node: { name: query }, + }, + ]); + }, + searchNodesWithStatus({ query }: { query: string }) { + nodeSearchCalls.push(query); + if (nodeSearchCalls.length === 1) return firstNodes.promise; + return Promise.resolve({ + nodes: [{ + uuid: `node:${query}`, + name: query, + summary: `summary:${query}`, + }], + degraded: false, + }); + }, + getEpisodes() { + return Promise.resolve([]); + }, + }; + + const meta = new Map(); + const entries = new Map(); + const cache = { + rememberRefreshQuery(groupId: string, query: string) { + rememberCalls.push(query); + meta.set(groupId, { lastQuery: query }); + return Promise.resolve(); + }, + getMeta(groupId: string) { + return Promise.resolve(meta.get(groupId) ?? null); + }, + get(groupId: string) { + return Promise.resolve(entries.get(groupId) ?? null); + }, + set( + groupId: string, + entry: { + query: string; + refreshedAt: number; + nodes: Array<{ uuid: string; name: string; summary: string }>; + episodeSummaries?: string[]; + nodeRefs: string[]; + }, + ) { + cacheSets.push(entry.query); + entries.set(groupId, { query: entry.query }); + return Promise.resolve(); + }, + }; + + const service = new GraphitiAsyncService( + graphiti as never, + cache as never, + { + drainGroup: () => Promise.resolve({ status: "idle" as const }), + } as never, + ); + + service.scheduleCacheRefresh("group-1", "alpha"); + service.scheduleCacheRefresh("group-1", "beta"); + + await Promise.resolve(); + assertEquals(searchCalls, ["alpha"]); + assertEquals(nodeSearchCalls, ["alpha"]); + assertEquals(rememberCalls, ["alpha", "beta"]); + + firstFacts.resolve([ + { + fact: "fact:alpha", + source_node: { name: "Source" }, + target_node: { name: "alpha" }, + }, + ]); + firstNodes.resolve({ + nodes: [{ uuid: "node:alpha", name: "alpha", summary: "summary:alpha" }], + degraded: false, + }); + + await flushMicrotasks(); + await new Promise((resolve) => realSetTimeout(resolve, 0)); + await service.dispose(); + + assertEquals(searchCalls, ["alpha", "beta"]); + assertEquals(nodeSearchCalls, ["alpha", "beta"]); + assertEquals(cacheSets, ["beta"]); + }); + + it("dispose clears timers and waits for in-flight work", async () => { + const timers = createFakeTimers(); + + const drainDeferred = deferred<{ status: "retry" }>(); + const refreshFactsDeferred = deferred< + Array<{ + fact: string; + source_node?: { name?: string }; + target_node?: { name?: string }; + }> + >(); + const refreshNodesDeferred = deferred<{ + nodes: Array<{ uuid: string; name: string; summary: string }>; + degraded: boolean; + }>(); + const primerDeferred = deferred>(); + + const graphiti = { + searchMemoryFacts() { + return refreshFactsDeferred.promise; + }, + searchNodesWithStatus() { + return refreshNodesDeferred.promise; + }, + getEpisodes() { + return primerDeferred.promise; + }, + }; + + const cache = { + get() { + return Promise.resolve(null); + }, + getMeta() { + return Promise.resolve({ lastQuery: "alpha" }); + }, + set() { + return Promise.resolve(); + }, + rememberRefreshQuery() { + return Promise.resolve(); + }, + }; + + const service = new GraphitiAsyncService( + graphiti as never, + cache as never, + { drainGroup: () => drainDeferred.promise } as never, + 25, + undefined, + timers, + ); + + service.scheduleDrain("group-1"); + service.scheduleCacheRefresh("group-1", "alpha"); + service.schedulePrimer("group-1"); + + await Promise.resolve(); + + let disposed = false; + const disposePromise = service.dispose().then(() => { + disposed = true; + }); + + await Promise.resolve(); + assertEquals(disposed, false); + assertEquals(timers.clearedTimers.length, 1); + + drainDeferred.resolve({ status: "retry" }); + refreshFactsDeferred.resolve([]); + refreshNodesDeferred.resolve({ nodes: [], degraded: true }); + primerDeferred.resolve([{ name: "episode", content: "content" }]); + + await disposePromise; + + assert(disposed); + assertEquals(timers.clearedTimers.length, 1); + }); + + it("preserves fact-only cache refreshes when node search degrades", async () => { + const cacheSets: Array<{ + query: string; + nodes: Array<{ uuid: string; name: string; summary: string }>; + episodeSummaries?: string[]; + nodeRefs: string[]; + }> = []; + + const service = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([ + { + fact: "fact:alpha", + source_node: { name: "Source" }, + target_node: { name: "alpha" }, + }, + ]); + }, + searchNodesWithStatus() { + return Promise.resolve({ + nodes: [{ uuid: "node:alpha", name: "alpha", summary: "unused" }], + degraded: true, + }); + }, + } as never, + { + get() { + return Promise.resolve(null); + }, + getMeta() { + return Promise.resolve({ lastQuery: "alpha" }); + }, + rememberRefreshQuery() { + return Promise.resolve(); + }, + set( + _groupId: string, + entry: { + query: string; + refreshedAt: number; + nodes: Array<{ uuid: string; name: string; summary: string }>; + episodeSummaries?: string[]; + nodeRefs: string[]; + }, + ) { + cacheSets.push(entry); + return Promise.resolve(); + }, + } as never, + { + drainGroup: () => Promise.resolve({ status: "idle" as const }), + } as never, + ); + + service.scheduleCacheRefresh("group-1", "alpha"); + await flushMicrotasks(); + await service.dispose(); + + assertEquals(cacheSets.length, 1); + assertEquals(cacheSets[0]?.query, "alpha"); + assertEquals(cacheSets[0]?.nodes, []); + assertEquals(cacheSets[0]?.episodeSummaries, [ + "Source → alpha: fact:alpha", + ]); + assertEquals(cacheSets[0]?.nodeRefs, []); + }); + + it("does not start a second drain while a slow drain is still in flight", async () => { + const timers = createFakeTimers(); + const drainDeferred = deferred<{ status: "idle" }>(); + let drainCalls = 0; + + const service = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: true }); + }, + } as never, + { + get() { + return Promise.resolve(null); + }, + getMeta() { + return Promise.resolve(null); + }, + rememberRefreshQuery() { + return Promise.resolve(); + }, + set() { + return Promise.resolve(); + }, + } as never, + { + drainGroup() { + drainCalls += 1; + return drainDeferred.promise; + }, + } as never, + 25, + 50, + timers, + ); + + service.scheduleDrain("group-1"); + await Promise.resolve(); + service.scheduleDrain("group-1"); + + assertEquals(drainCalls, 1); + assert(timers.runNext(50)); + await Promise.resolve(); + + assertEquals(drainCalls, 1); + + drainDeferred.resolve({ status: "idle" }); + await service.dispose(); + + assertEquals(drainCalls, 1); + }); + + it("uses returned backoff timing while keeping fixed delay for retry", async () => { + const timers = createFakeTimers(); + const drainResults = [ + { status: "backoff" as const, drained: 0, retryAfterMs: 250 }, + { status: "retry" as const, drained: 0 }, + ]; + + const service = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: true }); + }, + } as never, + { + get() { + return Promise.resolve(null); + }, + getMeta() { + return Promise.resolve(null); + }, + rememberRefreshQuery() { + return Promise.resolve(); + }, + set() { + return Promise.resolve(); + }, + } as never, + { + drainGroup() { + const result = drainResults.shift(); + if (!result) throw new Error("Unexpected extra drainGroup call"); + return Promise.resolve(result); + }, + } as never, + 25, + 50, + timers, + ); + + service.scheduleDrain("group-1"); + await Promise.resolve(); + service.scheduleDrain("group-2"); + await Promise.resolve(); + await service.dispose(); + + assertEquals(timers.scheduledTimeouts, [50, 250, 50, 25]); + }); +}); diff --git a/src/services/graphiti-async.ts b/src/services/graphiti-async.ts index 06e3c6f..05ba0b4 100644 --- a/src/services/graphiti-async.ts +++ b/src/services/graphiti-async.ts @@ -1,94 +1,291 @@ import type { PersistentMemoryCacheEntry } from "../types/index.ts"; import type { BatchDrainService } from "./batch-drain.ts"; import type { GraphitiMcpClient } from "./graphiti-mcp.ts"; -import type { RedisCacheService } from "./redis-cache.ts"; import { logger } from "./logger.ts"; +import type { RedisCacheService } from "./redis-cache.ts"; + +type TimerHandle = ReturnType | number; + +type GraphitiAsyncServiceOptions = { + setTimer?: (callback: () => void, delayMs: number) => TimerHandle; + clearTimer?: (timer: TimerHandle) => void; +}; export class GraphitiAsyncService { + private static readonly DEFAULT_DRAIN_RECOVERY_DELAY_MS = 30_000; private readonly drainInFlight = new Map>(); + private readonly setTimerImpl: ( + callback: () => void, + delayMs: number, + ) => TimerHandle; + private readonly clearTimerImpl: (timer: TimerHandle) => void; + private readonly drainRetryTimers = new Map(); + private readonly drainRecoveryTimers = new Map< + string, + { + run: Promise; + timer: TimerHandle; + } + >(); private readonly refreshInFlight = new Map>(); private readonly primerInFlight = new Map>(); + private stopped = false; constructor( private readonly graphiti: GraphitiMcpClient, private readonly cache: RedisCacheService, private readonly drain: BatchDrainService, - ) {} + private readonly drainRetryDelayMs = 1_000, + private readonly drainRecoveryDelayMs = + GraphitiAsyncService.DEFAULT_DRAIN_RECOVERY_DELAY_MS, + options: GraphitiAsyncServiceOptions = {}, + ) { + this.setTimerImpl = options.setTimer ?? + ((callback, delayMs) => setTimeout(callback, delayMs)); + this.clearTimerImpl = options.clearTimer ?? + ((timer) => clearTimeout(timer)); + } + + async flushPendingGroups(groupIds: Iterable): Promise { + const pendingGroups = [ + ...new Set( + [...groupIds].map((groupId) => groupId.trim()).filter(Boolean), + ), + ]; + if (pendingGroups.length === 0) return; + + const priorStopped = this.stopped; + this.stopped = false; + try { + for (const groupId of pendingGroups) { + this.scheduleDrain(groupId); + } + const inFlight = pendingGroups.map((groupId) => + this.drainInFlight.get(groupId) + ) + .filter((run): run is Promise => Boolean(run)); + await Promise.allSettled(inFlight); + } finally { + this.stopped = priorStopped; + } + } + + async dispose(): Promise { + this.stopped = true; + for (const timer of this.drainRetryTimers.values()) { + this.clearTimerImpl(timer); + } + this.drainRetryTimers.clear(); + for (const recovery of this.drainRecoveryTimers.values()) { + this.clearTimerImpl(recovery.timer); + } + this.drainRecoveryTimers.clear(); + + const inFlight = [ + ...this.drainInFlight.values(), + ...this.refreshInFlight.values(), + ...this.primerInFlight.values(), + ]; + this.drainInFlight.clear(); + this.refreshInFlight.clear(); + this.primerInFlight.clear(); + await Promise.allSettled(inFlight); + } + + private armDrainRetry( + groupId: string, + delayMs = this.drainRetryDelayMs, + ): void { + if (this.stopped) return; + if (this.drainRetryTimers.has(groupId)) return; + const timer = this.setTimerImpl(() => { + if (this.stopped) return; + this.drainRetryTimers.delete(groupId); + this.scheduleDrain(groupId); + }, delayMs); + this.drainRetryTimers.set(groupId, timer); + } + + private armDrainRecovery(groupId: string, run: Promise): void { + if (this.stopped) return; + const existing = this.drainRecoveryTimers.get(groupId); + if (existing?.run === run) return; + if (existing) this.clearTimerImpl(existing.timer); + + const timer = this.setTimerImpl(() => { + if (this.stopped) return; + const recovery = this.drainRecoveryTimers.get(groupId); + if (!recovery || recovery.run !== run) return; + this.drainRecoveryTimers.delete(groupId); + if (this.drainInFlight.get(groupId) !== run) return; + logger.warn( + "Graphiti drain recovery timeout exceeded; leaving in-flight drain intact", + { groupId, timeoutMs: this.drainRecoveryDelayMs }, + ); + }, this.drainRecoveryDelayMs); + + this.drainRecoveryTimers.set(groupId, { run, timer }); + } + + private clearDrainRecovery(groupId: string, run: Promise): void { + const recovery = this.drainRecoveryTimers.get(groupId); + if (!recovery || recovery.run !== run) return; + this.clearTimerImpl(recovery.timer); + this.drainRecoveryTimers.delete(groupId); + } schedulePrimer(groupId: string): void { + if (this.stopped) return; if (this.primerInFlight.has(groupId)) return; const run = (async () => { const existing = await this.cache.get(groupId); if (existing) return; const episodes = await this.graphiti.getEpisodes({ groupId, lastN: 5 }); + if (this.stopped) return; if (episodes.length === 0) return; const entry: PersistentMemoryCacheEntry = { query: "primer", refreshedAt: Date.now(), - facts: [], nodes: [], - factUuids: [], nodeRefs: [], episodeSummaries: episodes.map((episode) => `${episode.name}: ${episode.content}`.slice(0, 240) ), }; await this.cache.set(groupId, entry); - })().catch((err) => logger.debug("Graphiti primer failed", err)).finally( + })().catch((err) => logger.warn("Graphiti primer failed", err)).finally( () => this.primerInFlight.delete(groupId), ); this.primerInFlight.set(groupId, run); } scheduleCacheRefresh(groupId: string, query: string): void { + if (this.stopped) return; const normalized = query.trim(); if (!normalized) return; - const key = `${groupId}:${normalized.toLowerCase()}`; - if (this.refreshInFlight.has(key)) return; + const key = groupId; + if (this.refreshInFlight.has(key)) { + void this.cache.rememberRefreshQuery(groupId, normalized).catch((err) => + logger.warn("Graphiti refresh query update failed", err) + ); + return; + } const run = (async () => { await this.cache.rememberRefreshQuery(groupId, normalized); - const [facts, nodes] = await Promise.all([ + if (this.stopped) return; + const [facts, result] = await Promise.all([ this.graphiti.searchMemoryFacts({ query: normalized, groupIds: [groupId], - maxFacts: 20, + maxFacts: 8, }), - this.graphiti.searchNodes({ + this.graphiti.searchNodesWithStatus({ query: normalized, groupIds: [groupId], maxNodes: 12, }), ]); + if (this.stopped) return; + + const [meta, current] = await Promise.all([ + this.cache.getMeta(groupId), + this.cache.get(groupId), + ]); + const latestQuery = meta?.lastQuery ?? current?.query; + if ( + latestQuery && + latestQuery.trim().toLowerCase() !== normalized.toLowerCase() + ) { + return; + } + if (this.stopped) return; + + const nodes = result.degraded ? [] : result.nodes; await this.cache.set(groupId, { query: normalized, refreshedAt: Date.now(), - facts, nodes, - factUuids: facts.map((fact) => fact.uuid), + episodeSummaries: facts.map((fact) => { + const source = fact.source_node?.name?.trim(); + const target = fact.target_node?.name?.trim(); + const relation = [source, target].filter(Boolean).join(" → "); + return relation ? `${relation}: ${fact.fact}` : fact.fact; + }), nodeRefs: nodes.map((node) => node.uuid), }); - })().catch((err) => logger.debug("Graphiti cache refresh failed", err)) - .finally(() => this.refreshInFlight.delete(key)); + })().catch((err) => logger.warn("Graphiti cache refresh failed", err)) + .finally(async () => { + this.refreshInFlight.delete(key); + try { + if (this.stopped) return; + const latestQuery = (await this.cache.getMeta(groupId))?.lastQuery; + if ( + latestQuery && + latestQuery.trim().toLowerCase() !== normalized.toLowerCase() + ) { + this.scheduleCacheRefresh(groupId, latestQuery); + } + } catch (err) { + logger.warn("Graphiti follow-up cache refresh failed", err); + } + }); this.refreshInFlight.set(key, run); } scheduleDrain(groupId: string): void { - if (this.drainInFlight.has(groupId)) return; + if (this.stopped) return; + const inFlight = this.drainInFlight.get(groupId); + if (inFlight) { + this.armDrainRecovery(groupId, inFlight); + return; + } + const retryTimer = this.drainRetryTimers.get(groupId); + if (retryTimer) { + this.clearTimerImpl(retryTimer); + this.drainRetryTimers.delete(groupId); + } const run = (async () => { - const result = await this.drain.drainGroup(groupId, this.graphiti); - if (result.status === "success" || result.status === "dead-letter") { + let shouldRefresh = false; + while (true) { + if (this.stopped) break; + const result = await this.drain.drainGroup(groupId, this.graphiti); + if (this.stopped) break; + if (result.status === "success" || result.status === "dead-letter") { + shouldRefresh = true; + continue; + } + if (result.status === "backoff") { + this.armDrainRetry( + groupId, + result.retryAfterMs ?? this.drainRetryDelayMs, + ); + } + if (result.status === "retry") { + this.armDrainRetry(groupId); + } + break; + } + if (this.stopped) return; + if (shouldRefresh) { const [current, meta] = await Promise.all([ this.cache.get(groupId), this.cache.getMeta(groupId), ]); - const refreshQuery = current?.query || meta?.lastQuery; + if (this.stopped) return; + const refreshQuery = meta?.lastQuery || current?.query; if (refreshQuery) this.scheduleCacheRefresh(groupId, refreshQuery); } - })().catch((err) => logger.debug("Graphiti drain failed", err)).finally( - () => this.drainInFlight.delete(groupId), + })().catch((err) => logger.warn("Graphiti drain failed", err)).finally( + () => { + this.clearDrainRecovery(groupId, run); + if (this.drainInFlight.get(groupId) === run) { + this.drainInFlight.delete(groupId); + } + }, ); this.drainInFlight.set(groupId, run); + this.armDrainRecovery(groupId, run); } } diff --git a/src/services/graphiti-mcp.test.ts b/src/services/graphiti-mcp.test.ts new file mode 100644 index 0000000..4c65393 --- /dev/null +++ b/src/services/graphiti-mcp.test.ts @@ -0,0 +1,119 @@ +import { assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { GraphitiOfflineError } from "./connection-manager.ts"; +import { GraphitiMcpClient } from "./graphiti-mcp.ts"; +import { setLoggerSilentOverride } from "./logger.ts"; +import { + setOpenCodeClient, + setSuppressConsoleWarningsDuringTestsOverride, + setWarningTaskScheduler, +} from "./opencode-warning.ts"; + +describe("GraphitiMcpClient", () => { + it("connect rejects explicitly after stop", async () => { + let stopped = false; + const client = new GraphitiMcpClient({ + start() { + if (stopped) { + throw new GraphitiOfflineError( + "stopped", + "Graphiti connection manager has been stopped and cannot be restarted", + ); + } + }, + stop() { + stopped = true; + return Promise.resolve(); + }, + ready() { + return Promise.resolve(!stopped); + }, + callTool() { + return Promise.resolve({}); + }, + }); + + assertEquals(await client.connect(), true); + await client.stop(); + + const error = await assertRejects( + () => client.connect(), + GraphitiOfflineError, + ); + + assertEquals(error.state, "stopped"); + }); + + it("marks unexpected search node failures as degraded", async () => { + try { + setLoggerSilentOverride(true); + const client = new GraphitiMcpClient({ + start() {}, + stop() { + return Promise.resolve(); + }, + ready() { + return Promise.resolve(true); + }, + callTool() { + return Promise.reject(new Error("boom")); + }, + }); + + assertEquals(await client.searchNodesWithStatus({ query: "test" }), { + nodes: [], + degraded: true, + }); + } finally { + setLoggerSilentOverride(false); + } + }); + + it("reports searchNodesWithStatus availability warnings with the correct operation name", async () => { + const scheduledTasks: Array<() => void> = []; + const appLogCalls: unknown[] = []; + setSuppressConsoleWarningsDuringTestsOverride(true); + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log(input: unknown) { + appLogCalls.push(input); + }, + }, + }); + + try { + const client = new GraphitiMcpClient({ + start() {}, + stop() { + return Promise.resolve(); + }, + ready() { + return Promise.resolve(true); + }, + callTool() { + return Promise.reject(new GraphitiOfflineError("offline", "offline")); + }, + }); + + assertEquals(await client.searchNodesWithStatus({ query: "test" }), { + nodes: [], + degraded: true, + }); + assertEquals(scheduledTasks.length, 1); + assertEquals(appLogCalls.length, 0); + for (const task of scheduledTasks) task(); + assertEquals( + (appLogCalls[0] as { body: { extra: { operation: string } } }).body + .extra.operation, + "searchNodesWithStatus", + ); + } finally { + setOpenCodeClient(undefined); + setWarningTaskScheduler(undefined); + setSuppressConsoleWarningsDuringTestsOverride(undefined); + } + }); +}); diff --git a/src/services/graphiti-mcp.ts b/src/services/graphiti-mcp.ts index 917512c..e36f590 100644 --- a/src/services/graphiti-mcp.ts +++ b/src/services/graphiti-mcp.ts @@ -1,5 +1,6 @@ import { GraphitiConnectionManager, + GraphitiOfflineError, GraphitiSessionExpiredError, type GraphitiToolCaller, GraphitiTransportError, @@ -15,6 +16,11 @@ import { logger } from "./logger.ts"; import { notifyGraphitiAvailabilityIssue } from "./opencode-warning.ts"; import { normalizeEpisode } from "./sdk-normalize.ts"; +export type GraphitiNodeSearchResult = { + nodes: GraphitiNode[]; + degraded: boolean; +}; + export class GraphitiMcpClient { private readonly toolCaller: GraphitiToolCaller; @@ -33,7 +39,18 @@ export class GraphitiMcpClient { } async connect(): Promise { - this.toolCaller.start(); + try { + this.toolCaller.start(); + } catch (err) { + if (isGraphitiOfflineError(err)) { + throw new GraphitiOfflineError( + err.state, + err.message || + "Graphiti client has been stopped and cannot be restarted", + ); + } + throw err; + } return await this.toolCaller.ready(); } @@ -168,13 +185,25 @@ export class GraphitiMcpClient { groupIds?: string[]; maxNodes?: number; }): Promise { + const result = await this.searchNodesWithStatus(params); + return result.nodes; + } + + async searchNodesWithStatus(params: { + query: string; + groupIds?: string[]; + maxNodes?: number; + }): Promise { try { const result = await this.callTool("search_nodes", { query: params.query, group_ids: params.groupIds, max_nodes: params.maxNodes ?? 10, }); - return this.parseWrappedArray(result, "nodes") ?? []; + return { + nodes: this.parseWrappedArray(result, "nodes") ?? [], + degraded: false, + }; } catch (err) { if ( isGraphitiTimeoutError(err) || @@ -185,14 +214,14 @@ export class GraphitiMcpClient { notifyGraphitiAvailabilityIssue( "Graphiti unavailable; continuing without memory nodes.", { - operation: "searchNodes", + operation: "searchNodesWithStatus", err, }, ); - return []; + return { nodes: [], degraded: true }; } logger.error("searchNodes error", err); - return []; + return { nodes: [], degraded: true }; } } diff --git a/src/services/hot-tier-slice.test.ts b/src/services/hot-tier-slice.test.ts index dc2972e..d4c0f2c 100644 --- a/src/services/hot-tier-slice.test.ts +++ b/src/services/hot-tier-slice.test.ts @@ -1,18 +1,383 @@ -import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import { + assert, + assertEquals, + assertStringIncludes, +} from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { spy } from "jsr:@std/testing@^1.0.0/mock"; import { createChatHandler } from "../handlers/chat.ts"; import { createCompactingHandler } from "../handlers/compacting.ts"; import { createMessagesHandler } from "../handlers/messages.ts"; import { SessionManager } from "../session.ts"; import { BatchDrainService } from "./batch-drain.ts"; +import { GraphitiAsyncService } from "./graphiti-async.ts"; +import { logger, setLoggerDebugOverride } from "./logger.ts"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "./opencode-warning.ts"; import { RedisCacheService } from "./redis-cache.ts"; import { RedisClient } from "./redis-client.ts"; import { RedisEventsService } from "./redis-events.ts"; import { RedisSnapshotService } from "./redis-snapshot.ts"; +type RedisEvent = "close" | "end" | "error" | "ready"; + +setSuppressConsoleWarningsDuringTestsOverride(true); + +class ReconnectingRedisRuntime { + private readonly values = new Map(); + private readonly lists = new Map(); + private readonly hashes = new Map>(); + private readonly listeners = new Map< + RedisEvent, + Set<(...args: unknown[]) => void> + >(); + + constructor(private readonly state: { available: boolean }) {} + + connect(): Promise { + this.ensureAvailable(); + this.emit("ready"); + return Promise.resolve(); + } + + ping(): Promise<"PONG"> { + this.ensureAvailable(); + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + private ensureAvailable(): void { + if (!this.state.available) { + throw new Error("redis unavailable"); + } + } + + private ensureList(key: string): string[] { + if (this.values.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const existing = this.lists.get(key); + if (existing) return existing; + const list: string[] = []; + this.lists.set(key, list); + return list; + } + + private ensureHash(key: string): Map { + if (this.values.has(key) || this.lists.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const existing = this.hashes.get(key); + if (existing) return existing; + const hash = new Map(); + this.hashes.set(key, hash); + return hash; + } + + lpush(key: string, value: string): Promise { + this.ensureAvailable(); + const list = this.ensureList(key); + list.unshift(value); + return Promise.resolve(list.length); + } + + rpush(key: string, value: string): Promise { + this.ensureAvailable(); + const list = this.ensureList(key); + list.push(value); + return Promise.resolve(list.length); + } + + lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.ensureAvailable(); + if (this.values.has(source) || this.hashes.has(source)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" ? sourceList.shift() : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") { + destinationList.unshift(value); + } else { + destinationList.push(value); + } + return Promise.resolve(value); + } + + lrange(key: string, start: number, stop: number): Promise { + this.ensureAvailable(); + if (this.values.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const list = this.lists.get(key) ?? []; + return Promise.resolve(list.slice(start, stop + 1)); + } + + llen(key: string): Promise { + this.ensureAvailable(); + if (this.values.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + return Promise.resolve((this.lists.get(key) ?? []).length); + } + + ltrim(key: string, start: number, stop: number): Promise { + this.ensureAvailable(); + if (this.values.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const list = this.lists.get(key) ?? []; + this.lists.set(key, list.slice(start, stop + 1)); + return Promise.resolve(); + } + + lindex(key: string, index: number): Promise { + this.ensureAvailable(); + if (this.values.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + return Promise.resolve(this.lists.get(key)?.[index] ?? null); + } + + lset(key: string, index: number, value: string): Promise { + this.ensureAvailable(); + if (this.values.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const list = this.lists.get(key); + if (!list || index < 0 || index >= list.length) { + return Promise.reject(new Error("ERR index out of range")); + } + list[index] = value; + return Promise.resolve(); + } + + get(key: string): Promise { + this.ensureAvailable(); + return Promise.resolve(this.values.get(key) ?? null); + } + + hset(key: string, values: Record): Promise { + this.ensureAvailable(); + const hash = this.ensureHash(key); + let added = 0; + for (const [field, value] of Object.entries(values)) { + if (!hash.has(field)) added += 1; + hash.set(field, value); + } + return Promise.resolve(added); + } + + hgetall(key: string): Promise> { + this.ensureAvailable(); + if (this.values.has(key) || this.lists.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + return Promise.resolve( + Object.fromEntries((this.hashes.get(key) ?? new Map()).entries()), + ); + } + + set( + key: string, + value: string, + ...args: Array + ): Promise<"OK" | null> { + this.ensureAvailable(); + if (this.lists.has(key) || this.hashes.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + + const onlyIfAbsent = args.includes("NX"); + if (onlyIfAbsent && this.values.has(key)) return Promise.resolve(null); + this.values.set(key, value); + return Promise.resolve("OK"); + } + + expire(_key: string, _ttlSeconds: number): Promise { + this.ensureAvailable(); + return Promise.resolve(1); + } + + del(key: string): Promise { + this.ensureAvailable(); + const deleted = this.values.delete(key) || this.lists.delete(key) || + this.hashes.delete(key); + return Promise.resolve(deleted ? 1 : 0); + } + + eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + return Promise.reject(new Error("unsupported eval script")); + } + + on(event: RedisEvent, listener: (...args: unknown[]) => void): void { + const set = this.listeners.get(event) ?? new Set(); + set.add(listener); + this.listeners.set(event, set); + } + + off(event: RedisEvent, listener: (...args: unknown[]) => void): void { + this.listeners.get(event)?.delete(listener); + } + + emit(event: RedisEvent, ...args: unknown[]): void { + for (const listener of this.listeners.get(event) ?? []) { + listener(...args); + } + } +} + +async function waitFor( + condition: () => boolean, + timeoutMs = 250, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (condition()) return; + await new Promise((resolve) => setTimeout(resolve, 5)); + } + assert(condition(), "condition not met before timeout"); +} + +Deno.test("hot-tier reconnect recovery integrates event/cache/drain flow", async () => { + const state = { available: true }; + const runtime = new ReconnectingRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + await redis.connect(); + + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + const drain = new BatchDrainService(redis, redisEvents, { + batchSize: 8, + batchMaxBytes: 8_192, + drainRetryMax: 2, + }); + + try { + await redisCache.set("group-1", { + query: "recovery query", + refreshedAt: Date.now(), + nodes: [{ + uuid: "node-1", + name: "RECOVERY-NODE-1", + summary: "Recovered persistent memory after reconnect", + }], + nodeRefs: ["node-1"], + }); + await redisEvents.recordEvent("session-1", "group-1", { + id: "event-1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "user", + summary: "Use reconnect-safe recovery flow", + continuityText: + "RECOVERY-TOKEN keeps event recall and drain recovery aligned after reconnect", + }); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + state.available = true; + await waitFor(() => redis.isConnected()); + + const recoveredCache = await redisCache.get("group-1"); + assertEquals(recoveredCache?.query, "recovery query"); + assertEquals(recoveredCache?.nodeRefs, ["node-1"]); + + const recalled = await redisEvents.recallSessionEvents( + "session-1", + "RECOVERY-TOKEN", + ); + assertEquals(recalled.map((event) => event.id), ["event-1"]); + + const calls: Array<{ name: string; episodeBody: string }> = []; + const result = await drain.drainGroup("group-1", { + addMemory(input: { name: string; episodeBody: string }) { + calls.push(input); + return Promise.resolve(); + }, + } as never); + + assertEquals(result, { status: "success", drained: 1 }); + assertEquals(await redisEvents.getPendingCount("group-1"), 0); + assertEquals(calls.length, 1); + assertStringIncludes(calls[0].name, "decision:event-1"); + assertStringIncludes(calls[0].episodeBody, "RECOVERY-TOKEN"); + } finally { + await redis.close(); + } +}); + describe("hot-tier vertical slice", () => { it("records local state, prepares injection, transforms messages, and serves compaction context without live MCP", async () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); const redisEvents = new RedisEventsService(redis, { sessionTtlSeconds: 300, }); @@ -24,9 +389,7 @@ describe("hot-tier vertical slice", () => { await redisCache.set("group-1", { query: "Continue the overhaul", refreshedAt: Date.now(), - facts: [{ uuid: "fact-1", fact: "Graphiti remains async" }], nodes: [{ uuid: "node-1", name: "ContextOverhaul" }], - factUuids: ["fact-1"], nodeRefs: ["node-1"], }); @@ -37,6 +400,7 @@ describe("hot-tier vertical slice", () => { redisEvents, redisSnapshot, redisCache, + {} as never, ); manager.setParentId("session-1", null); manager.setState( @@ -86,9 +450,9 @@ describe("hot-tier vertical slice", () => { transformOutput.messages[0].parts[0].text, " { assertStringIncludes(compactOutput.context[0], " { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + await redisCache.set("group-1", { + query: "cache-only recall", + refreshedAt: Date.now(), + nodes: [{ + uuid: "node-1", + name: "ArchitectureDecision", + summary: + "Cached cross-session recall about keeping Graphiti off hook-time injection", + }], + episodeSummaries: [ + "ArchitectureDecision → HotPath: Cached fact summary about Redis-backed injection", + ], + nodeRefs: ["node-1"], + }); + + const manager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + {} as never, + ); + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + const graphitiAsync = { + scheduleCacheRefresh() {}, + scheduleDrain() {}, + }; + + const chat = createChatHandler({ + sessionManager: manager, + redisEvents, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + const transform = createMessagesHandler({ sessionManager: manager }); + const compacting = createCompactingHandler({ sessionManager: manager }); + + await chat( + { sessionID: "session-1" } as never, + { + parts: [{ + type: "text", + text: "cache-only recall", + }], + } as never, + ); + + const transformOutput = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: "cache-only recall", + }], + }], + }; + await transform( + { message: "cache-only recall" } as never, + transformOutput as never, + ); + + const compactOutput = { context: [] as string[] }; + await compacting( + { sessionID: "session-1" } as never, + compactOutput as never, + ); + + assertStringIncludes( + transformOutput.messages[0].parts[0].text, + " { - const redis = new RedisClient({ endpoint: "redis://unused" }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); const redisEvents = new RedisEventsService(redis, { sessionTtlSeconds: 300, }); @@ -126,6 +594,7 @@ describe("hot-tier vertical slice", () => { redisEvents, redisSnapshot, redisCache, + {} as never, ); manager.setParentId("session-1", null); manager.setState( @@ -209,7 +678,11 @@ describe("hot-tier vertical slice", () => { }); it("recalls continuity-rich events without relying on transcript bodies", async () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); const redisEvents = new RedisEventsService(redis, { sessionTtlSeconds: 300, }); @@ -236,7 +709,11 @@ describe("hot-tier vertical slice", () => { }); it("drains structured semantic payloads to Graphiti asynchronously", async () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); const redisEvents = new RedisEventsService(redis, { sessionTtlSeconds: 300, }); @@ -283,7 +760,11 @@ describe("hot-tier vertical slice", () => { }); it("updates only the refresh query field without clobbering cache metadata", async () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); const redisCache = new RedisCacheService(redis, { ttlSeconds: 300, driftThreshold: 0.5, @@ -292,7 +773,7 @@ describe("hot-tier vertical slice", () => { await redis.setHashFields("memory-cache:group-1:meta", { lastQuery: "previous query", lastRefresh: 123, - factUuids: "fact-1,fact-2", + retainedField: "fact-1,fact-2", }, 300); await redisCache.rememberRefreshQuery("group-1", "next query"); @@ -300,87 +781,1033 @@ describe("hot-tier vertical slice", () => { assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { lastQuery: "next query", lastRefresh: "123", - factUuids: "fact-1,fact-2", + retainedField: "fact-1,fact-2", }); }); - it("classifies drift deterministically at the configured threshold boundary", () => { + it("serializes same-group refreshes and follows up with the newest queued query", async () => { const redis = new RedisClient({ endpoint: "redis://unused" }); const redisCache = new RedisCacheService(redis, { ttlSeconds: 300, driftThreshold: 0.5, }); - const aligned = redisCache.classifyRefresh({ - query: "alpha beta", - refreshedAt: Date.now(), - facts: [], - nodes: [], - factUuids: [], - nodeRefs: [], - }, "alpha beta gamma delta"); - const drifted = redisCache.classifyRefresh({ - query: "alpha beta", - refreshedAt: Date.now(), - facts: [], - nodes: [], - factUuids: [], - nodeRefs: [], - }, "alpha delta epsilon"); + let resolveAlpha!: ( + value: { + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }, + ) => void; + let resolveBeta!: ( + value: { + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }, + ) => void; + const alphaStarted = new Promise((resolve) => { + resolveAlpha = (value) => { + resolve(); + alphaResult.resolve(value); + }; + }); + const betaStarted = new Promise((resolve) => { + resolveBeta = (value) => { + resolve(); + betaResult.resolve(value); + }; + }); - assertEquals(aligned.classification, "aligned"); - assertEquals(aligned.shouldRefresh, false); - assertEquals(aligned.similarity, 0.5); - assertEquals(drifted.classification, "drifted"); - assertEquals(drifted.shouldRefresh, true); + const alphaResult = Promise.withResolvers<{ + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }>(); + const betaResult = Promise.withResolvers<{ + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }>(); + + const searchCalls: string[] = []; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string }) { + searchCalls.push(input.query); + if (input.query === "Alpha query") return alphaResult.promise; + if (input.query === "Beta query") return betaResult.promise; + return Promise.reject(new Error(`unexpected query: ${input.query}`)); + }, + } as never, + redisCache, + { + drainGroup() { + return Promise.resolve({ status: "empty" as const, drained: 0 }); + }, + } as never, + ); + + graphitiAsync.scheduleCacheRefresh("group-1", "Alpha query"); + await waitFor(() => searchCalls.includes("Alpha query")); + graphitiAsync.scheduleCacheRefresh("group-1", "Beta query"); + await new Promise((resolve) => setTimeout(resolve, 0)); + resolveAlpha({ + nodes: [{ uuid: "alpha-node", name: "AlphaNode" }], + degraded: false, + }); + await alphaStarted; + await waitFor(() => + searchCalls.filter((query) => query === "Beta query").length === 1 + ); + resolveBeta({ + nodes: [{ uuid: "beta-node", name: "BetaNode" }], + degraded: false, + }); + await betaStarted; + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + const cached = await redisCache.get("group-1"); + const meta = await redisCache.getMeta("group-1"); + + assertEquals(searchCalls, ["Alpha query", "Beta query"]); + assertEquals(cached?.query, "Beta query"); + assertEquals(cached?.nodeRefs, ["beta-node"]); + assertEquals(cached?.nodes, [{ uuid: "beta-node", name: "BetaNode" }]); + assertEquals(meta?.lastQuery, "Beta query"); }); - it("detects primer-only and stale cache states while preserving injection", async () => { + it("coalesces duplicate follow-up refresh requests while one refresh is in flight", async () => { const redis = new RedisClient({ endpoint: "redis://unused" }); - const redisEvents = new RedisEventsService(redis, { - sessionTtlSeconds: 300, - }); - const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); const redisCache = new RedisCacheService(redis, { ttlSeconds: 300, driftThreshold: 0.5, }); - const manager = new SessionManager( - "group-1", - "user-1", - { session: { get: () => ({ parentID: null }) } } as never, - redisEvents, - redisSnapshot, + + const alphaResult = Promise.withResolvers<{ + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }>(); + const betaResult = Promise.withResolvers<{ + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }>(); + + const searchCalls: string[] = []; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string }) { + searchCalls.push(input.query); + if (input.query === "Alpha query") return alphaResult.promise; + if (input.query === "Beta query") return betaResult.promise; + return Promise.reject(new Error(`unexpected query: ${input.query}`)); + }, + } as never, redisCache, - ); - manager.setParentId("session-1", null); - manager.setState( - "session-1", - manager.createDefaultState("group-1", "user-1"), + { + drainGroup() { + return Promise.resolve({ status: "empty" as const, drained: 0 }); + }, + } as never, ); - await redisCache.set("group-1", { - query: "primer", - refreshedAt: Date.now(), - facts: [], - nodes: [], - factUuids: [], - nodeRefs: [], - episodeSummaries: ["Primer episode"], + graphitiAsync.scheduleCacheRefresh("group-1", "Alpha query"); + await waitFor(() => searchCalls.includes("Alpha query")); + + graphitiAsync.scheduleCacheRefresh("group-1", "Beta query"); + graphitiAsync.scheduleCacheRefresh("group-1", "Beta query"); + graphitiAsync.scheduleCacheRefresh("group-1", " Beta query "); + + alphaResult.resolve({ + nodes: [{ uuid: "alpha-node", name: "AlphaNode" }], + degraded: false, }); - const primerPrepared = await manager.prepareInjection( - "session-1", - "real query", + await waitFor(() => + searchCalls.filter((query) => query === "Beta query").length === 1 ); - assertEquals(primerPrepared?.refreshDecision.classification, "primer-only"); - assertStringIncludes(primerPrepared?.envelope ?? "", " setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + const cached = await redisCache.get("group-1"); + const meta = await redisCache.getMeta("group-1"); + + assertEquals(searchCalls, ["Alpha query", "Beta query"]); + assertEquals(cached?.query, "Beta query"); + assertEquals(cached?.nodeRefs, ["beta-node"]); + assertEquals(meta?.lastQuery, "Beta query"); + }); + + it("stores fact-derived summaries alongside refreshed nodes", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([{ + uuid: "fact-1", + fact: "Keep Graphiti off the hot path", + source_node: { uuid: "source-1", name: "ArchitectureDecision" }, + target_node: { uuid: "target-1", name: "HotPath" }, + }]); + }, + searchNodesWithStatus() { + return Promise.resolve({ + nodes: [{ uuid: "node-1", name: "HotPath" }], + degraded: false, + }); + }, + } as never, + redisCache, + { + drainGroup() { + return Promise.resolve({ status: "empty" as const, drained: 0 }); + }, + } as never, + ); + + graphitiAsync.scheduleCacheRefresh("group-1", "hot path recall"); + let committed = false; + for (let attempt = 0; attempt < 50; attempt += 1) { + committed = (await redisCache.get("group-1"))?.query === + "hot path recall"; + if (committed) break; + await new Promise((resolve) => setTimeout(resolve, 5)); + } + assert(committed, "fact-backed refresh did not commit before timeout"); + + const cached = await redisCache.get("group-1"); + assertEquals(cached?.nodeRefs, ["node-1"]); + assertEquals(cached?.episodeSummaries, [ + "ArchitectureDecision → HotPath: Keep Graphiti off the hot path", + ]); + }); + + it("dedupes same-query case and whitespace variants on the canonical key", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const searchCalls: string[] = []; + const searchResult = Promise.withResolvers<{ + nodes: Array<{ uuid: string; name: string }>; + degraded: boolean; + }>(); + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string }) { + searchCalls.push(input.query); + return searchResult.promise; + }, + } as never, + redisCache, + { + drainGroup() { + return Promise.resolve({ status: "empty" as const, drained: 0 }); + }, + } as never, + ); + + graphitiAsync.scheduleCacheRefresh("group-1", " Alpha Query "); + await waitFor(() => searchCalls.length === 1); + graphitiAsync.scheduleCacheRefresh("group-1", "alpha query"); + await new Promise((resolve) => setTimeout(resolve, 0)); + + searchResult.resolve({ + nodes: [{ uuid: "alpha-node", name: "AlphaNode" }], + degraded: false, + }); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + const cached = await redisCache.get("group-1"); + const meta = await redisCache.getMeta("group-1"); + + assertEquals(searchCalls, ["Alpha Query"]); + assertEquals(cached?.query, "Alpha Query"); + assertEquals(cached?.nodeRefs, ["alpha-node"]); + assertEquals(meta?.lastQuery, "Alpha Query"); + }); + + it("prefers remembered metadata query over stale cached query after drain success", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "older cached query", + refreshedAt: Date.now() - 60_000, + nodes: [], + nodeRefs: [], + }); + await redisCache.rememberRefreshQuery("group-1", "newer remembered query"); + + const refreshCalls: Array<{ groupId: string; query: string }> = []; + let drainCalls = 0; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string; groupIds: string[] }) { + refreshCalls.push({ groupId: input.groupIds[0], query: input.query }); + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup() { + drainCalls += 1; + return Promise.resolve( + drainCalls === 1 + ? { status: "success" as const, drained: 1 } + : { status: "empty" as const, drained: 0 }, + ); + }, + } as never, + ); + + graphitiAsync.scheduleDrain("group-1"); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(refreshCalls, [{ + groupId: "group-1", + query: "newer remembered query", + }]); + }); + + it("drains multiple claimable batches from one scheduled trigger before refreshing", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "cached query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + await redisCache.rememberRefreshQuery("group-1", "remembered query"); + + const drainStatuses = [ + { status: "success" as const, drained: 2 }, + { status: "success" as const, drained: 1 }, + { status: "empty" as const, drained: 0 }, + ]; + const drainCalls: string[] = []; + const refreshCalls: Array<{ groupId: string; query: string }> = []; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string; groupIds: string[] }) { + refreshCalls.push({ groupId: input.groupIds[0], query: input.query }); + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup(groupId: string) { + drainCalls.push(groupId); + return Promise.resolve( + drainStatuses.shift() ?? { + status: "empty" as const, + drained: 0, + }, + ); + }, + } as never, + ); + + graphitiAsync.scheduleDrain("group-1"); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(drainCalls, ["group-1", "group-1", "group-1"]); + assertEquals(refreshCalls, [{ + groupId: "group-1", + query: "remembered query", + }]); + }); + + it("preserves an armed retry when a duplicate schedule arrives during in-flight cleanup", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "cached query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + + const refreshCalls: Array<{ groupId: string; query: string }> = []; + let releaseDrain!: () => void; + const firstDrainStarted = new Promise((resolve) => { + releaseDrain = resolve; + }); + const drainCalls: string[] = []; + let callCount = 0; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string; groupIds: string[] }) { + refreshCalls.push({ groupId: input.groupIds[0], query: input.query }); + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + async drainGroup(groupId: string) { + drainCalls.push(groupId); + callCount += 1; + if (callCount === 1) { + await firstDrainStarted; + return { status: "retry" as const, drained: 0 }; + } + if (callCount === 2) { + return { status: "success" as const, drained: 1 }; + } + return { status: "empty" as const, drained: 0 }; + }, + } as never, + 1, + ); + + const drainRetryTimers = ( + graphitiAsync as unknown as { + drainRetryTimers: Map>; + } + ).drainRetryTimers; + const originalSet = drainRetryTimers.set.bind(drainRetryTimers); + drainRetryTimers.set = ((groupId, timer) => { + const result = originalSet(groupId, timer); + graphitiAsync.scheduleDrain(groupId); + return result; + }) as typeof drainRetryTimers.set; + + graphitiAsync.scheduleDrain("group-1"); + await new Promise((resolve) => setTimeout(resolve, 0)); + releaseDrain(); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + await waitFor(() => drainCalls.length === 3); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(drainCalls, ["group-1", "group-1", "group-1"]); + assertEquals(refreshCalls, [{ + groupId: "group-1", + query: "cached query", + }]); + }); + + it("re-arms a delayed drain after backoff without stacking duplicate timers", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "cached query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + + const drainStatuses = [ + { status: "backoff" as const, drained: 0 }, + { status: "success" as const, drained: 1 }, + { status: "empty" as const, drained: 0 }, + ]; + const drainCalls: string[] = []; + const refreshCalls: Array<{ groupId: string; query: string }> = []; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus(input: { query: string; groupIds: string[] }) { + refreshCalls.push({ groupId: input.groupIds[0], query: input.query }); + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup(groupId: string) { + drainCalls.push(groupId); + return Promise.resolve( + drainStatuses.shift() ?? { + status: "empty" as const, + drained: 0, + }, + ); + }, + } as never, + 1, + ); + + graphitiAsync.scheduleDrain("group-1"); + graphitiAsync.scheduleDrain("group-1"); + await waitFor(() => drainCalls.length === 3); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(drainCalls, ["group-1", "group-1", "group-1"]); + assertEquals(refreshCalls, [{ + groupId: "group-1", + query: "cached query", + }]); + }); + + it("keeps one bounded recovery timer for a stuck same-group drain", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "cached query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + + const neverSettles = new Promise(() => {}); + const drainCalls: string[] = []; + const warnSpy = spy(logger, "warn"); + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup(groupId: string) { + drainCalls.push(groupId); + return neverSettles; + }, + } as never, + 1, + 1, + ); + + try { + const drainRecoveryTimers = ( + graphitiAsync as unknown as { + drainRecoveryTimers: Map< + string, + { run: Promise; timer: ReturnType } + >; + } + ).drainRecoveryTimers; + + graphitiAsync.scheduleDrain("group-1"); + await new Promise((resolve) => setTimeout(resolve, 0)); + graphitiAsync.scheduleDrain("group-1"); + graphitiAsync.scheduleDrain("group-1"); + + assertEquals(drainRecoveryTimers.size, 1); + + await waitFor(() => warnSpy.calls.length === 1); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(drainCalls, ["group-1"]); + assertEquals( + warnSpy.calls[0].args[0], + "Graphiti drain recovery timeout exceeded; leaving in-flight drain intact", + ); + assertEquals(warnSpy.calls[0].args[1], { + groupId: "group-1", + timeoutMs: 1, + }); + assertEquals(drainRecoveryTimers.size, 0); + } finally { + warnSpy.restore(); + } + }); + + it("warns on a stuck drain even without a duplicate schedule signal", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "cached query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + + const neverSettles = new Promise(() => {}); + const drainCalls: string[] = []; + const warnSpy = spy(logger, "warn"); + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup(groupId: string) { + drainCalls.push(groupId); + return neverSettles; + }, + } as never, + 1, + 1, + ); + + try { + graphitiAsync.scheduleDrain("group-1"); + + await waitFor(() => warnSpy.calls.length === 1); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(drainCalls, ["group-1"]); + assertEquals( + warnSpy.calls[0].args[0], + "Graphiti drain recovery timeout exceeded; leaving in-flight drain intact", + ); + assertEquals(warnSpy.calls[0].args[1], { + groupId: "group-1", + timeoutMs: 1, + }); + } finally { + warnSpy.restore(); + } + }); + + it("stores fact-only refreshes with empty nodes when node search degrades", async () => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "warm query", + refreshedAt: 111, + nodes: [{ + uuid: "warm-node", + name: "WarmNode", + summary: "Existing warm cache entry", + }], + nodeRefs: ["warm-node"], + }); + + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([ + { + fact: "fact:outage", + source_node: { name: "WarmNode" }, + target_node: { name: "OutageTopic" }, + }, + ]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: true }); + }, + } as never, + redisCache, + { + drainGroup() { + return Promise.resolve({ status: "success" as const }); + }, + } as never, + ); + + graphitiAsync.scheduleCacheRefresh("group-1", "outage query"); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + const cached = await redisCache.get("group-1"); + const meta = await redisCache.getMeta("group-1"); + + assertEquals(cached?.query, "outage query"); + assertEquals(cached?.nodes, []); + assertEquals(cached?.nodeRefs, []); + assertEquals(cached?.episodeSummaries, [ + "WarmNode → OutageTopic: fact:outage", + ]); + assertEquals(meta?.lastQuery, "outage query"); + }); + + it("writes successful empty refresh results into cache", async () => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redisCache.set("group-1", { + query: "warm query", + refreshedAt: 111, + nodes: [{ + uuid: "warm-node", + name: "WarmNode", + }], + nodeRefs: ["warm-node"], + }); + + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup() { + return Promise.resolve({ status: "success" as const }); + }, + } as never, + ); + + const startedAt = Date.now(); + graphitiAsync.scheduleCacheRefresh("group-1", "empty query"); + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + const cached = await redisCache.get("group-1"); + const meta = await redisCache.getMeta("group-1"); + + assert(cached); + assertEquals(cached.query, "empty query"); + assertEquals(cached.nodes, []); + assertEquals(cached.nodeRefs, []); + assert(cached.refreshedAt >= startedAt); + assertEquals(meta?.lastQuery, "empty query"); + }); + + it("surfaces unexpected async background failures at warn level when debug is disabled", async () => { + setLoggerDebugOverride(false); + const warnSpy = spy(logger, "warn"); + const debugSpy = spy(logger, "debug"); + let graphitiAsync: GraphitiAsyncService | undefined; + + try { + graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.reject(new Error("primer failed")); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.reject(new Error("refresh failed")); + }, + } as never, + { + get() { + return Promise.resolve(null); + }, + set() { + return Promise.resolve(); + }, + rememberRefreshQuery() { + return Promise.resolve(); + }, + getMeta() { + return Promise.resolve(null); + }, + } as never, + { + drainGroup() { + return Promise.reject(new Error("drain failed")); + }, + } as never, + ); + + graphitiAsync.schedulePrimer("group-1"); + graphitiAsync.scheduleCacheRefresh("group-1", "refresh me"); + graphitiAsync.scheduleDrain("group-1"); + + await new Promise((resolve) => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); + + assertEquals(warnSpy.calls.length, 3); + assertEquals( + new Set(warnSpy.calls.map((call) => call.args[0])), + new Set([ + "Graphiti primer failed", + "Graphiti cache refresh failed", + "Graphiti drain failed", + ]), + ); + assertEquals( + new Set(warnSpy.calls.map((call) => (call.args[1] as Error).message)), + new Set(["primer failed", "refresh failed", "drain failed"]), + ); + assertEquals(debugSpy.calls.length, 0); + } finally { + graphitiAsync?.dispose(); + warnSpy.restore(); + debugSpy.restore(); + setLoggerDebugOverride(undefined); + } + }); + + it("clears pending retry and recovery timers when disposed", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const neverSettles = new Promise(() => {}); + const drainCalls: string[] = []; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup(groupId: string) { + drainCalls.push(groupId); + return neverSettles; + }, + } as never, + 50, + ); + + graphitiAsync.scheduleDrain("group-1"); + await new Promise((resolve) => setTimeout(resolve, 0)); + graphitiAsync.scheduleDrain("group-1"); + + const internals = graphitiAsync as unknown as { + drainRecoveryTimers: Map< + string, + { run: Promise; timer: ReturnType } + >; + drainRetryTimers: Map>; + drainInFlight: Map>; + }; + assertEquals(internals.drainRecoveryTimers.size, 1); + assertEquals(internals.drainInFlight.size, 1); + + graphitiAsync.dispose(); + await new Promise((resolve) => setTimeout(resolve, 75)); + + assertEquals(drainCalls, ["group-1"]); + assertEquals(internals.drainRecoveryTimers.size, 0); + assertEquals(internals.drainRetryTimers.size, 0); + assertEquals(internals.drainInFlight.size, 0); + }); + + it("flushes undersized pending groups before dispose completes", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const drainCalls: string[] = []; + const graphitiAsync = new GraphitiAsyncService( + { + getEpisodes() { + return Promise.resolve([]); + }, + searchMemoryFacts() { + return Promise.resolve([]); + }, + searchNodesWithStatus() { + return Promise.resolve({ nodes: [], degraded: false }); + }, + } as never, + redisCache, + { + drainGroup(groupId: string) { + drainCalls.push(groupId); + return Promise.resolve( + drainCalls.length === 1 + ? { status: "success" as const, drained: 1 } + : { status: "empty" as const, drained: 0 }, + ); + }, + } as never, + ); + + await graphitiAsync.flushPendingGroups(["group-1"]); + await graphitiAsync.dispose(); + + assertEquals(drainCalls, ["group-1", "group-1"]); + }); + + it("classifies drift deterministically at the configured threshold boundary", () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const aligned = redisCache.classifyRefresh({ + query: "alpha beta", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }, "alpha beta gamma delta"); + const drifted = redisCache.classifyRefresh({ + query: "alpha beta", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }, "alpha delta epsilon"); + + assertEquals(aligned.classification, "aligned"); + assertEquals(aligned.shouldRefresh, false); + assertEquals(aligned.similarity, 0.5); + assertEquals(drifted.classification, "drifted"); + assertEquals(drifted.shouldRefresh, true); + }); + + it("detects primer-only and stale cache states while keeping injection available", async () => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 300, + }); + const redisSnapshot = new RedisSnapshotService(redis, { ttlSeconds: 600 }); + const redisCache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + const manager = new SessionManager( + "group-1", + "user-1", + { session: { get: () => ({ parentID: null }) } } as never, + redisEvents, + redisSnapshot, + redisCache, + {} as never, + ); + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + + await redisCache.set("group-1", { + query: "primer", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + episodeSummaries: ["Primer episode"], + }); + const primerPrepared = await manager.prepareInjection( + "session-1", + "real query", + ); + assertEquals(primerPrepared?.refreshDecision.classification, "primer-only"); + assertStringIncludes(primerPrepared?.envelope ?? "", " { "older query", ); assertEquals(stalePrepared?.refreshDecision.classification, "stale"); - assertStringIncludes(stalePrepared?.envelope ?? "", "Stale fact"); + assertStringIncludes(stalePrepared?.envelope ?? "", " { @@ -405,14 +1833,11 @@ describe("hot-tier vertical slice", () => { await redisCache.set("group-1", { query: "architecture token", refreshedAt: Date.now(), - facts: [{ - uuid: "fact-1", - fact: - "Exact token ALPHA-RECALL-42 identifies the architecture decision", + nodes: [{ + uuid: "node-1", + name: "ALPHA-RECALL-42 architecture decision", }], - nodes: [], - factUuids: ["fact-1"], - nodeRefs: [], + nodeRefs: ["node-1"], }); const sameGroupManager = new SessionManager( @@ -422,6 +1847,7 @@ describe("hot-tier vertical slice", () => { redisEvents, redisSnapshot, redisCache, + {} as never, ); sameGroupManager.setParentId("session-b", null); sameGroupManager.setState( @@ -436,6 +1862,7 @@ describe("hot-tier vertical slice", () => { redisEvents, redisSnapshot, redisCache, + {} as never, ); otherGroupManager.setParentId("session-c", null); otherGroupManager.setState( @@ -457,11 +1884,14 @@ describe("hot-tier vertical slice", () => { (otherGroupPrepared?.envelope ?? "").includes("ALPHA-RECALL-42"), false, ); - assertEquals(otherGroupPrepared?.factUuids ?? [], []); }); - it("injects stale cached memory immediately while scheduling async refresh", async () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + it("schedules async refresh for stale cache while suppressing low-value stale persistent memory", async () => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ReconnectingRedisRuntime({ available: true }), + }); + await redis.connect(); const redisEvents = new RedisEventsService(redis, { sessionTtlSeconds: 300, }); @@ -474,9 +1904,7 @@ describe("hot-tier vertical slice", () => { await redisCache.set("group-1", { query: "old recall topic", refreshedAt: Date.now() - 301_000, - facts: [{ uuid: "fact-1", fact: "Stale but still useful recall fact" }], nodes: [], - factUuids: ["fact-1"], nodeRefs: [], }); @@ -487,6 +1915,7 @@ describe("hot-tier vertical slice", () => { redisEvents, redisSnapshot, redisCache, + {} as never, ); manager.setParentId("session-1", null); manager.setState( @@ -540,7 +1969,13 @@ describe("hot-tier vertical slice", () => { }]); assertStringIncludes( transformOutput.messages[0].parts[0].text, - "Stale but still useful recall fact", + " { let consoleDebugSpy: any; beforeEach(() => { - consoleLogSpy = spy(console, "log"); - consoleWarnSpy = spy(console, "warn"); - consoleErrorSpy = spy(console, "error"); - consoleDebugSpy = spy(console, "debug"); + consoleLogSpy = stub(console, "log", () => {}); + consoleWarnSpy = stub(console, "warn", () => {}); + consoleErrorSpy = stub(console, "error", () => {}); + consoleDebugSpy = stub(console, "debug", () => {}); }); afterEach(() => { @@ -36,6 +37,7 @@ describe("logger", () => { consoleDebugSpy.restore(); setLoggerDebugOverride(undefined); setLoggerSilentOverride(false); + setSuppressConsoleWarningsDuringTestsOverride(undefined); setOpenCodeClient(undefined); setWarningTaskScheduler(undefined); }); @@ -53,6 +55,7 @@ describe("logger", () => { }); it("should log warn messages with [graphiti] prefix", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); const { logger } = await import("./logger.ts"); logger.warn("warning message"); assertEquals(consoleWarnSpy.calls.length, 1); @@ -95,6 +98,7 @@ describe("logger", () => { }); it("should forward multiple arguments to warn", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); const { logger } = await import("./logger.ts"); logger.warn("warning", { code: 42 }, ["array"]); assertEquals(consoleWarnSpy.calls.length, 1); @@ -139,6 +143,55 @@ describe("logger", () => { }]); }); + it("falls back to console.warn when structured warn logging rejects later", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: () => Promise.reject(new Error("structured warn failed")), + }, + }); + + const { logger } = await import("./logger.ts"); + logger.warn("warning", { code: 42 }); + + assertEquals(consoleWarnSpy.calls.length, 0); + assertEquals(scheduledTasks.length, 1); + for (const task of scheduledTasks) task(); + await Promise.resolve(); + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args[0], "[graphiti]"); + assertEquals(consoleWarnSpy.calls[0].args[1], "warning"); + assertEquals(consoleWarnSpy.calls[0].args[2], { + data: [{ code: 42 }], + }); + }); + + it("falls back to console.warn when structured warn scheduling throws", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + setWarningTaskScheduler(() => { + throw new Error("schedule failed"); + }); + setOpenCodeClient({ + app: { + log: () => Promise.resolve(), + }, + }); + + const { logger } = await import("./logger.ts"); + logger.warn("warning", { code: 42 }); + + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args, [ + "[graphiti]", + "warning", + { code: 42 }, + ]); + }); + it("should forward multiple arguments to error", async () => { const { logger } = await import("./logger.ts"); const error = new Error("test"); @@ -177,6 +230,7 @@ describe("logger", () => { }); it("warn always emits regardless of GRAPHITI_DEBUG", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); const { logger } = await import("./logger.ts"); logger.warn("warning message"); assertEquals(consoleWarnSpy.calls.length, 1); @@ -187,11 +241,25 @@ describe("logger", () => { }); it("warn falls back to console when no client is available", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); const { logger } = await import("./logger.ts"); logger.warn("warning message"); assertEquals(consoleWarnSpy.calls.length, 1); }); + it("warn still emits error payloads when debug is disabled", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + const { logger } = await import("./logger.ts"); + const err = new Error("background failure"); + logger.warn("warning message", err); + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args, [ + "[graphiti]", + "warning message", + err, + ]); + }); + it("error always emits regardless of GRAPHITI_DEBUG", async () => { const { logger } = await import("./logger.ts"); logger.error("error message"); @@ -209,6 +277,7 @@ describe("logger", () => { }); it("info and debug suppressed; warn and error always emit", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); const { logger } = await import("./logger.ts"); const err = new Error("test"); logger.info("message", 123, { key: "value" }); @@ -248,6 +317,7 @@ describe("logger", () => { }); it("warn still emits when GRAPHITI_DEBUG is empty string", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); const { logger } = await import("./logger.ts"); logger.warn("alert"); assertEquals(consoleWarnSpy.calls.length, 1); diff --git a/src/services/logger.ts b/src/services/logger.ts index d1f0417..a9ece95 100644 --- a/src/services/logger.ts +++ b/src/services/logger.ts @@ -1,4 +1,7 @@ -import { logStructuredWarning } from "./opencode-warning.ts"; +import { + logStructuredWarning, + shouldSuppressConsoleWarningsDuringTests, +} from "./opencode-warning.ts"; const console = globalThis.console as { log: (...args: unknown[]) => void; @@ -85,7 +88,12 @@ export const logger = { warn: (...args: unknown[]) => { if (silentOverride) return; const payload = toWarningPayload(args); - if (logStructuredWarning(payload.message, payload.extra)) return; + try { + if (logStructuredWarning(payload.message, payload.extra)) return; + } catch { + // Fall back to console below when structured warning scheduling fails. + } + if (shouldSuppressConsoleWarningsDuringTests()) return; console.warn(PREFIX, ...args); }, error: (...args: unknown[]) => { diff --git a/src/services/opencode-warning.test.ts b/src/services/opencode-warning.test.ts new file mode 100644 index 0000000..fde6075 --- /dev/null +++ b/src/services/opencode-warning.test.ts @@ -0,0 +1,217 @@ +import { + assertEquals, + assertRejects, + assertStrictEquals, +} from "jsr:@std/assert@^1.0.0"; +import { + afterEach, + beforeEach, + describe, + it, +} from "jsr:@std/testing@^1.0.0/bdd"; +import { stub } from "jsr:@std/testing@^1.0.0/mock"; +import { + notifyGraphitiAvailabilityIssue, + setOpenCodeClient, + setSuppressConsoleWarningsDuringTestsOverride, + setWarningTaskScheduler, + showWarningToast, +} from "./opencode-warning.ts"; + +describe("opencode warning delivery", () => { + let consoleWarnSpy: { + restore(): void; + calls: Array<{ args: unknown[] }>; + }; + + beforeEach(() => { + consoleWarnSpy = stub(console, "warn", () => {}); + }); + + afterEach(() => { + consoleWarnSpy.restore(); + setOpenCodeClient(undefined); + setSuppressConsoleWarningsDuringTestsOverride(undefined); + setWarningTaskScheduler(undefined); + }); + + it("suppresses fallback console warnings while running inside tests", () => { + setSuppressConsoleWarningsDuringTestsOverride(undefined); + + notifyGraphitiAvailabilityIssue("warning message", { + endpoint: "http://graphiti.test/mcp", + }); + + assertEquals(consoleWarnSpy.calls.length, 0); + }); + + it("can re-enable fallback console warnings explicitly", () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + + notifyGraphitiAvailabilityIssue("warning message", { + endpoint: "http://graphiti.test/mcp", + }); + + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args[0], "[graphiti]"); + assertEquals(consoleWarnSpy.calls[0].args[1], "warning message"); + }); + + it("reports scheduled async toast dispatch immediately", async () => { + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + return Promise.resolve(); + }, + }, + }); + + const delivered = showWarningToast("warning message"); + + assertEquals(delivered, true); + assertEquals(toastCalls.length, 0); + assertEquals(scheduledTasks.length, 1); + + scheduledTasks[0](); + await Promise.resolve(); + + assertEquals(toastCalls, [{ + body: { + message: "warning message", + variant: "warning", + }, + }]); + assertEquals(consoleWarnSpy.calls.length, 0); + }); + + it("falls back to console.warn when toast dispatch rejects", async () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + const scheduledTasks: Array<() => void> = []; + const error = new Error("toast rejected"); + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: () => undefined, + }, + tui: { + showToast: () => Promise.reject(error), + }, + }); + + notifyGraphitiAvailabilityIssue("warning message", { + endpoint: "http://graphiti.test/mcp", + }); + + assertEquals(consoleWarnSpy.calls.length, 0); + assertEquals(scheduledTasks.length, 2); + + for (const task of scheduledTasks) task(); + await Promise.resolve(); + await Promise.resolve(); + + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args[0], "[graphiti]"); + assertEquals(consoleWarnSpy.calls[0].args[1], "warning message"); + assertEquals(consoleWarnSpy.calls[0].args[2], { + endpoint: "http://graphiti.test/mcp", + }); + assertStrictEquals(consoleWarnSpy.calls[0].args[3], error); + }); + + it("falls back to console.warn when toast dispatch throws", () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + const scheduledTasks: Array<() => void> = []; + const error = new Error("toast threw"); + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: () => undefined, + }, + tui: { + showToast: () => { + throw error; + }, + }, + }); + + notifyGraphitiAvailabilityIssue("warning message", { + endpoint: "http://graphiti.test/mcp", + }); + + assertEquals(consoleWarnSpy.calls.length, 0); + assertEquals(scheduledTasks.length, 2); + + for (const task of scheduledTasks) task(); + + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args[0], "[graphiti]"); + assertEquals(consoleWarnSpy.calls[0].args[1], "warning message"); + assertEquals(consoleWarnSpy.calls[0].args[2], { + endpoint: "http://graphiti.test/mcp", + }); + assertStrictEquals(consoleWarnSpy.calls[0].args[3], error); + }); + + it("contains synchronous scheduler throws and falls back to console.warn", () => { + setSuppressConsoleWarningsDuringTestsOverride(false); + setWarningTaskScheduler(() => { + throw new Error("schedule failed"); + }); + setOpenCodeClient({ + app: { + log: () => undefined, + }, + tui: { + showToast: () => undefined, + }, + }); + + notifyGraphitiAvailabilityIssue("warning message", { + endpoint: "http://graphiti.test/mcp", + }); + + assertEquals(consoleWarnSpy.calls.length, 1); + assertEquals(consoleWarnSpy.calls[0].args, [ + "[graphiti]", + "warning message", + { + endpoint: "http://graphiti.test/mcp", + }, + ]); + }); + + it("does not let synchronous scheduler throws mask original caller failures", async () => { + setWarningTaskScheduler(() => { + throw new Error("schedule failed"); + }); + setOpenCodeClient({ + app: { + log: () => undefined, + }, + }); + + const originalError = new Error("graphiti failed"); + + const thrown = await assertRejects(() => { + return Promise.reject(originalError).catch((err) => { + notifyGraphitiAvailabilityIssue("warning message", { + operation: "addMemory", + err, + }); + throw err; + }); + }); + + assertStrictEquals(thrown, originalError); + }); +}); diff --git a/src/services/opencode-warning.ts b/src/services/opencode-warning.ts index be4c30e..7a9ed12 100644 --- a/src/services/opencode-warning.ts +++ b/src/services/opencode-warning.ts @@ -32,6 +32,37 @@ let openCodeClient: unknown; let scheduleTask: (callback: () => void) => void = (callback) => { setTimeout(callback, 0); }; +let suppressConsoleWarningsDuringTestsOverride: boolean | undefined; + +export const shouldSuppressConsoleWarningsDuringTests = (): boolean => { + if (suppressConsoleWarningsDuringTestsOverride !== undefined) { + return suppressConsoleWarningsDuringTestsOverride; + } + const stack = new Error().stack; + return typeof stack === "string" && stack.includes("ext:cli/40_test.js"); +}; + +const warnToConsole = ( + message: string, + extra?: unknown, + error?: unknown, +): void => { + if (shouldSuppressConsoleWarningsDuringTests()) return; + if (extra === undefined) { + if (error === undefined) { + console.warn(PREFIX, message); + return; + } + console.warn(PREFIX, message, error); + return; + } + + if (error === undefined) { + console.warn(PREFIX, message, extra); + return; + } + console.warn(PREFIX, message, extra, error); +}; const asRecord = (value: unknown): Record => { if (value && typeof value === "object" && !Array.isArray(value)) { @@ -47,14 +78,59 @@ const getClient = (): OpenCodeClientLike | undefined => { const runSafely = ( task: () => Promise | unknown, onError?: (err: unknown) => void, -): void => { - scheduleTask(() => { - try { - void Promise.resolve(task()).catch((err) => onError?.(err)); - } catch (err) { - onError?.(err); - } - }); +): boolean => { + try { + scheduleTask(() => { + try { + void Promise.resolve(task()).catch((err) => onError?.(err)); + } catch (err) { + onError?.(err); + } + }); + return true; + } catch { + return false; + } +}; + +const scheduleStructuredWarning = ( + message: string, + extra?: unknown, +): boolean => { + const client = getClient(); + if (!client?.app?.log) return false; + + return runSafely( + () => + client.app!.log({ + body: { + service: "graphiti", + level: "warn", + message, + ...(extra === undefined ? {} : { extra: asRecord(extra) }), + }, + }), + (error) => warnToConsole(message, extra, error), + ); +}; + +const scheduleWarningToast = ( + message: string, + extra?: unknown, +): boolean => { + const client = getClient(); + if (!client?.tui?.showToast) return false; + + return runSafely( + () => + client.tui!.showToast({ + body: { + message, + variant: "warning", + }, + }), + (error) => warnToConsole(message, extra, error), + ); }; export const setOpenCodeClient = ( @@ -71,52 +147,30 @@ export const setWarningTaskScheduler = ( }); }; +export const setSuppressConsoleWarningsDuringTestsOverride = ( + value: boolean | undefined, +): void => { + suppressConsoleWarningsDuringTestsOverride = value; +}; + export const logStructuredWarning = ( message: string, extra?: unknown, ): boolean => { - const client = getClient(); - if (!client?.app?.log) return false; - - runSafely(() => - client.app!.log({ - body: { - service: "graphiti", - level: "warn", - message, - ...(extra === undefined ? {} : { extra: asRecord(extra) }), - }, - }) - ); - return true; + return scheduleStructuredWarning(message, extra); }; -export const showWarningToast = (message: string): boolean => { - const client = getClient(); - if (!client?.tui?.showToast) return false; - - runSafely(() => - client.tui!.showToast({ - body: { - message, - variant: "warning", - }, - }) - ); - return true; +export const showWarningToast = (message: string, extra?: unknown): boolean => { + return scheduleWarningToast(message, extra); }; export const notifyGraphitiAvailabilityIssue = ( message: string, extra?: unknown, ): void => { - const logged = logStructuredWarning(message, extra); - const toasted = showWarningToast(message); + const logged = scheduleStructuredWarning(message, extra); + const toasted = scheduleWarningToast(message, extra); if (!logged && !toasted) { - if (extra === undefined) { - console.warn(PREFIX, message); - return; - } - console.warn(PREFIX, message, extra); + warnToConsole(message, extra); } }; diff --git a/src/services/redis-cache.test.ts b/src/services/redis-cache.test.ts index 53488f1..055e736 100644 --- a/src/services/redis-cache.test.ts +++ b/src/services/redis-cache.test.ts @@ -1,11 +1,153 @@ import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { setLoggerSilentOverride } from "./logger.ts"; import { RedisCacheService } from "./redis-cache.ts"; import { RedisClient } from "./redis-client.ts"; +import { memoryCacheMetaKey } from "./redis-events.ts"; + +type RedisEvent = "close" | "end" | "error" | "ready"; + +class HashRedisRuntime { + private readonly values = new Map(); + private readonly hashes = new Map>(); + private readonly listeners = new Map< + RedisEvent, + Set<(...args: unknown[]) => void> + >(); + + constructor(private readonly state: { available: boolean }) {} + + connect(): Promise { + this.ensureAvailable(); + this.emit("ready"); + return Promise.resolve(); + } + + ping(): Promise<"PONG"> { + this.ensureAvailable(); + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + private ensureAvailable(): void { + if (!this.state.available) throw new Error("redis unavailable"); + } + + get(key: string): Promise { + this.ensureAvailable(); + return Promise.resolve(this.values.get(key) ?? null); + } + + set( + key: string, + value: string, + ..._args: Array + ): Promise<"OK"> { + this.ensureAvailable(); + this.values.set(key, value); + return Promise.resolve("OK"); + } + + hset(key: string, values: Record): Promise { + this.ensureAvailable(); + const hash = this.hashes.get(key) ?? new Map(); + let added = 0; + for (const [field, value] of Object.entries(values)) { + if (!hash.has(field)) added += 1; + hash.set(field, value); + } + this.hashes.set(key, hash); + return Promise.resolve(added); + } + + hgetall(key: string): Promise> { + this.ensureAvailable(); + return Promise.resolve( + Object.fromEntries((this.hashes.get(key) ?? new Map()).entries()), + ); + } + + expire(_key: string, _ttlSeconds: number): Promise { + this.ensureAvailable(); + return Promise.resolve(1); + } + + del(key: string): Promise { + this.ensureAvailable(); + const deleted = this.values.delete(key) || this.hashes.delete(key); + return Promise.resolve(deleted ? 1 : 0); + } + + lpush(_key: string, _value: string): Promise { + throw new Error("not implemented"); + } + + rpush(_key: string, _value: string): Promise { + throw new Error("not implemented"); + } + + lmove( + _source: string, + _destination: string, + _sourceSide: "LEFT" | "RIGHT", + _destinationSide: "LEFT" | "RIGHT", + ): Promise { + throw new Error("not implemented"); + } + + lrange(_key: string, _start: number, _stop: number): Promise { + throw new Error("not implemented"); + } + + llen(_key: string): Promise { + throw new Error("not implemented"); + } + + ltrim(_key: string, _start: number, _stop: number): Promise { + throw new Error("not implemented"); + } + + lindex(_key: string, _index: number): Promise { + throw new Error("not implemented"); + } + + lset(_key: string, _index: number, _value: string): Promise { + throw new Error("not implemented"); + } + + eval(_script: string, _numKeys: number, ..._args: string[]): Promise { + throw new Error("not implemented"); + } + + on(event: RedisEvent, listener: (...args: unknown[]) => void): void { + const set = this.listeners.get(event) ?? new Set(); + set.add(listener); + this.listeners.set(event, set); + } + + off(event: RedisEvent, listener: (...args: unknown[]) => void): void { + this.listeners.get(event)?.delete(listener); + } + + emit(event: RedisEvent, ...args: unknown[]): void { + for (const listener of this.listeners.get(event) ?? []) { + listener(...args); + } + } +} + +const createRedis = (state = { available: true }) => + new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new HashRedisRuntime(state), + }); describe("redis cache", () => { it("stores cache entries per group without leaking across groups", async () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + const redis = createRedis(); const cache = new RedisCacheService(redis, { ttlSeconds: 300, driftThreshold: 0.5, @@ -14,27 +156,23 @@ describe("redis cache", () => { await cache.set("group-1", { query: "project alpha policy", refreshedAt: Date.now(), - facts: [{ uuid: "fact-1", fact: "Alpha uses kebab-case config names" }], nodes: [], - factUuids: ["fact-1"], nodeRefs: [], }); await cache.set("group-2", { query: "project beta policy", refreshedAt: Date.now(), - facts: [{ uuid: "fact-2", fact: "Beta uses snake_case env names" }], nodes: [], - factUuids: ["fact-2"], nodeRefs: [], }); - assertEquals((await cache.get("group-1"))?.factUuids, ["fact-1"]); - assertEquals((await cache.get("group-2"))?.factUuids, ["fact-2"]); + assertEquals((await cache.get("group-1"))?.query, "project alpha policy"); + assertEquals((await cache.get("group-2"))?.query, "project beta policy"); assertEquals(await cache.get("group-3"), null); }); - it("filters already visible facts and returns little or no persistent memory for noise-only remainder", () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + it("returns little or no persistent memory for noise-only remainder", () => { + const redis = createRedis(); const cache = new RedisCacheService(redis, { ttlSeconds: 300, driftThreshold: 0.5, @@ -43,19 +181,78 @@ describe("redis cache", () => { const rendered = cache.renderPersistentMemory({ query: "naming policy", refreshedAt: Date.now(), - facts: [{ uuid: "fact-1", fact: "Use kebab-case route names" }], nodes: [], - factUuids: ["fact-1"], nodeRefs: [], - }, ["fact-1"]); + }); assertEquals(rendered.body, ""); - assertEquals(rendered.factUuids, []); assertEquals(rendered.nodeRefs, []); }); - it("renders bounded persistent memory with deduped visible facts and truncated long content", () => { - const redis = new RedisClient({ endpoint: "redis://unused" }); + it("renders cached node and fact summaries without leaking node refs into the body", () => { + const redis = createRedis(); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const rendered = cache.renderPersistentMemory({ + query: "naming policy", + refreshedAt: Date.now(), + nodes: [{ + uuid: "node-1", + name: "Policy Guidelines", + summary: "Enforce kebab-case naming decision for all routes", + }], + episodeSummaries: [ + "Policy Guidelines → Routing: Prefer kebab-case route names", + ], + nodeRefs: ["node-1"], + }); + + assertEquals(rendered.body.includes("node-1"), false); + assertEquals(rendered.nodeRefs.includes("node-1"), true); + assertStringIncludes(rendered.body, "Prefer kebab-case route names"); + }); + + it("dedupes equivalent rendered nodes and episodes while keeping the first node ref", () => { + const redis = createRedis(); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + const rendered = cache.renderPersistentMemory({ + query: "redis hot path", + refreshedAt: Date.now(), + nodes: [ + { + uuid: "node-1", + name: "Redis policy", + summary: "Keep hot path deduped for persistent memory", + }, + { + uuid: "node-2", + name: " Redis policy ", + summary: "Keep hot path deduped for persistent memory", + }, + ], + episodeSummaries: [ + "Redis policy decision for persistent memory", + 'old Redis policy decision for persistent memory', + ], + nodeRefs: ["node-1", "node-2"], + }); + + assertEquals( + rendered.body, + "Redis policy: Keep hot path deduped for persistent memoryRedis policy decision for persistent memory", + ); + assertEquals(rendered.nodeRefs, ["node-1"]); + }); + + it("suppresses persistent memory when the remaining facts and nodes are transcript-heavy noise", () => { + const redis = createRedis(); const cache = new RedisCacheService(redis, { ttlSeconds: 300, driftThreshold: 0.5, @@ -65,10 +262,6 @@ describe("redis cache", () => { const rendered = cache.renderPersistentMemory({ query: "context overhaul policy", refreshedAt: Date.now(), - facts: Array.from({ length: 10 }, (_, index) => ({ - uuid: `fact-${index + 1}`, - fact: `Fact ${index + 1} ${huge}`, - })), nodes: Array.from({ length: 8 }, (_, index) => ({ uuid: `node-${index + 1}`, name: `Node ${index + 1}`, @@ -78,17 +271,116 @@ describe("redis cache", () => { { length: 6 }, (_, index) => `Episode ${index + 1} ${huge}`, ), - factUuids: Array.from({ length: 10 }, (_, index) => `fact-${index + 1}`), nodeRefs: Array.from({ length: 8 }, (_, index) => `node-${index + 1}`), - }, ["fact-1", "fact-2", "fact-3"]); - - assertEquals(rendered.factUuids.includes("fact-1"), false); - assertEquals(rendered.factUuids.includes("fact-2"), false); - assertEquals(rendered.factUuids.includes("fact-3"), false); - assertEquals(rendered.factUuids.length <= 7, true); - assertEquals(rendered.nodeRefs.length <= 6, true); - assertEquals(rendered.body.length <= 1800, true); - assertStringIncludes(rendered.body, "Fact 4"); - assertEquals(rendered.body.includes(huge), false); + }); + + assertEquals(rendered.nodeRefs, []); + assertEquals(rendered.body, ""); + }); + + it("sanitizes injected memory blocks before storing cache entries", async () => { + const redis = createRedis(); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await cache.set("group-1", { + query: + 'old next query', + refreshedAt: Date.now(), + nodes: [{ + uuid: "node-1", + name: "Context Overhaul", + summary: + ' Hot path uses Redis', + }], + episodeSummaries: [ + 'old Durable project note', + ], + nodeRefs: ["node-1"], + }); + + const stored = await cache.get("group-1"); + assertEquals(stored?.query, "next query"); + assertEquals(stored?.nodes[0].summary, "Hot path uses Redis"); + assertEquals(stored?.episodeSummaries, ["Durable project note"]); + }); + + it("persists query metadata without fact uuid state", async () => { + const redis = createRedis(); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await cache.set("group-1", { + query: "query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + + assertEquals(await cache.getMeta("group-1"), { + lastQuery: "query", + lastRefresh: (await cache.get("group-1"))?.refreshedAt, + }); + }); + + it("preserves a string lastRefresh value of 0 in metadata", async () => { + const redis = createRedis(); + const cache = new RedisCacheService(redis, { + ttlSeconds: 300, + driftThreshold: 0.5, + }); + + await redis.setHashFields(memoryCacheMetaKey("group-1"), { + lastQuery: "query", + lastRefresh: "0", + }, 300); + + assertEquals(await cache.getMeta("group-1"), { + lastQuery: "query", + lastRefresh: 0, + }); + }); + + it("keeps cache entry and metadata alive through fallback after a live touch", async () => { + const state = { available: true }; + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => new HashRedisRuntime(state), + }); + const cache = new RedisCacheService(redis, { + ttlSeconds: 1.2, + driftThreshold: 0.5, + }); + try { + setLoggerSilentOverride(true); + await redis.connect(); + await cache.set("group-1", { + query: "query", + refreshedAt: Date.now(), + nodes: [], + nodeRefs: [], + }); + + await new Promise((resolve) => setTimeout(resolve, 750)); + await cache.touch("group-1"); + + state.available = false; + (redis as unknown as { redis: HashRedisRuntime | null }).redis?.emit( + "close", + ); + await new Promise((resolve) => setTimeout(resolve, 500)); + + assertEquals((await cache.get("group-1"))?.query, "query"); + assertEquals((await cache.getMeta("group-1"))?.lastQuery, "query"); + } finally { + setLoggerSilentOverride(false); + await redis.close(); + } }); }); diff --git a/src/services/redis-cache.ts b/src/services/redis-cache.ts index feb52db..af5f9da 100644 --- a/src/services/redis-cache.ts +++ b/src/services/redis-cache.ts @@ -1,23 +1,32 @@ import type { CacheRefreshDecision, - GraphitiFact, GraphitiNode, PersistentMemoryCacheEntry, PersistentMemoryCacheMeta, } from "../types/index.ts"; -import { escapeXml } from "./render-utils.ts"; import type { RedisClient } from "./redis-client.ts"; import { memoryCacheKey, memoryCacheMetaKey } from "./redis-events.ts"; - -const formatFact = (fact: GraphitiFact): string => { - const refs = [fact.source_node?.name, fact.target_node?.name] - .filter(Boolean) - .join(" → "); - return refs ? `${fact.fact} (${refs})` : fact.fact; -}; +import { + escapeXml, + isHighValueMemoryText, + looksLikeOperationalChatter, + looksLikeToolTranscript, + looksTranscriptHeavy, + sanitizeMemoryInput, + stripInjectedMemoryBlocks, +} from "./render-utils.ts"; const formatNode = (node: GraphitiNode): string => - node.summary ? `${node.name}: ${node.summary}` : node.name; + sanitizeMemoryInput( + node.summary ? `${node.name}: ${node.summary}` : node.name, + ); + +const normalizeRenderedPersistentText = (value: string): string => + value.toLowerCase() + .replace(/&(?:amp|lt|gt|quot|apos);/g, " ") + .replace(/[^a-z0-9./_-]+/g, " ") + .replace(/\s+/g, " ") + .trim(); export interface RedisCacheServiceOptions { ttlSeconds: number; @@ -25,10 +34,33 @@ export interface RedisCacheServiceOptions { } const TOKEN_PATTERN = /[a-z0-9._/-]{2,}/g; -const FACT_RENDER_LIMIT = 220; const NODE_RENDER_LIMIT = 180; const EPISODE_RENDER_LIMIT = 180; -const PERSISTENT_MEMORY_BODY_BUDGET = 1_800; +export const PERSISTENT_MEMORY_BODY_BUDGET = 1_800; + +const isLowValuePersistentText = (value: string): boolean => { + const sanitized = sanitizeMemoryInput(value); + if (!sanitized) return true; + if (looksLikeToolTranscript(sanitized)) return true; + if (looksLikeOperationalChatter(sanitized)) return true; + if (looksTranscriptHeavy(sanitized)) return true; + return !isHighValueMemoryText(sanitized); +}; + +const distinctByNormalized = ( + values: T[], + getNormalizedText: (value: T) => string, +): T[] => { + const seen = new Set(); + const result: T[] = []; + for (const value of values) { + const normalized = getNormalizedText(value); + if (!normalized || seen.has(normalized)) continue; + seen.add(normalized); + result.push(value); + } + return result; +}; const normalizeQuery = (query: string): string => query.trim().toLowerCase(); @@ -48,7 +80,7 @@ const jaccardSimilarity = (left: string, right: string): number => { for (const token of leftTokens) { if (rightTokens.has(token)) intersection += 1; } - const union = new Set([...leftTokens, ...rightTokens]).size; + const union = leftTokens.size + rightTokens.size - intersection; return union === 0 ? 0 : intersection / union; }; @@ -72,19 +104,19 @@ export class RedisCacheService { const raw = await this.redis.getHashAll(memoryCacheMetaKey(groupId)); if (Object.keys(raw).length === 0) return null; + const hasLastRefresh = Object.hasOwn(raw, "lastRefresh"); + const parsedLastRefresh = hasLastRefresh ? Number(raw.lastRefresh) : NaN; + return { lastQuery: raw.lastQuery?.trim() || undefined, - lastRefresh: raw.lastRefresh && Number.isFinite(Number(raw.lastRefresh)) - ? Number(raw.lastRefresh) + lastRefresh: Number.isFinite(parsedLastRefresh) + ? parsedLastRefresh : undefined, - factUuids: raw.factUuids - ? raw.factUuids.split(",").map((value) => value.trim()).filter(Boolean) - : [], }; } async rememberRefreshQuery(groupId: string, query: string): Promise { - const normalized = query.trim(); + const normalized = sanitizeMemoryInput(stripInjectedMemoryBlocks(query)); if (!normalized) return; await this.redis.setHashFields( @@ -116,17 +148,31 @@ export class RedisCacheService { groupId: string, entry: PersistentMemoryCacheEntry, ): Promise { + const sanitizedEntry: PersistentMemoryCacheEntry = { + query: sanitizeMemoryInput(stripInjectedMemoryBlocks(entry.query)), + refreshedAt: entry.refreshedAt, + nodes: entry.nodes.map((node) => ({ + ...node, + name: sanitizeMemoryInput(stripInjectedMemoryBlocks(node.name)), + summary: node.summary + ? sanitizeMemoryInput(stripInjectedMemoryBlocks(node.summary)) + : undefined, + })).filter((node) => node.name), + episodeSummaries: entry.episodeSummaries?.map((episode) => + sanitizeMemoryInput(stripInjectedMemoryBlocks(episode)) + ).filter(Boolean), + nodeRefs: [...entry.nodeRefs], + }; await this.redis.setString( memoryCacheKey(groupId), - JSON.stringify(entry), + JSON.stringify(sanitizedEntry), this.options.ttlSeconds, ); await this.redis.setHashFields( memoryCacheMetaKey(groupId), { - lastQuery: entry.query, - lastRefresh: entry.refreshedAt, - factUuids: entry.factUuids.join(","), + lastQuery: sanitizedEntry.query, + lastRefresh: sanitizedEntry.refreshedAt, }, this.options.ttlSeconds, ); @@ -163,12 +209,12 @@ export class RedisCacheService { const normalizedQuery = normalizeQuery(query); const normalizedCachedQuery = normalizeQuery(entry.query); const hasPrimerEpisodes = (entry.episodeSummaries?.length ?? 0) > 0; - const hasFactsOrNodes = entry.facts.length > 0 || entry.nodes.length > 0; + const hasNodes = entry.nodes.length > 0; if ( normalizedCachedQuery === "primer" && normalizedQuery && hasPrimerEpisodes && - !hasFactsOrNodes + !hasNodes ) { return { classification: "primer-only", @@ -199,31 +245,38 @@ export class RedisCacheService { renderPersistentMemory( entry: PersistentMemoryCacheEntry | null, - visibleFactUuids: string[] = [], - ): { body: string; factUuids: string[]; nodeRefs: string[] } { - if (!entry) return { body: "", factUuids: [], nodeRefs: [] }; - const visible = new Set(visibleFactUuids); - const facts = entry.facts.filter((fact) => !visible.has(fact.uuid)); + budget = PERSISTENT_MEMORY_BODY_BUDGET, + ): { body: string; nodeRefs: string[] } { + if (!entry) return { body: "", nodeRefs: [] }; + + const renderedNodes = distinctByNormalized( + entry.nodes.flatMap((node) => { + const rendered = formatNode(node); + const normalized = normalizeRenderedPersistentText(rendered); + if (!normalized || isLowValuePersistentText(rendered)) return []; + return [{ uuid: node.uuid, rendered, normalized }]; + }), + (node) => node.normalized, + ); + const renderedEpisodes = distinctByNormalized( + (entry.episodeSummaries ?? []).flatMap((episode) => { + const rendered = sanitizeMemoryInput(episode); + const normalized = normalizeRenderedPersistentText(rendered); + if (!normalized || isLowValuePersistentText(rendered)) return []; + return [{ rendered, normalized }]; + }), + (episode) => episode.normalized, + ); const sections: string[] = []; - const factUuids: string[] = []; const nodeRefs: string[] = []; - let remaining = PERSISTENT_MEMORY_BODY_BUDGET; - for (const fact of facts.slice(0, 8)) { - const section = `${ - escapeXml( - formatFact(fact).slice(0, FACT_RENDER_LIMIT), - ) - }`; - if (section.length > remaining) break; - sections.push(section); - factUuids.push(fact.uuid); - remaining -= section.length; - } - for (const node of entry.nodes.slice(0, 6)) { + let remaining = Math.max(0, budget); + for (const node of renderedNodes.slice(0, 3)) { + const renderedNode = node.rendered.slice(0, NODE_RENDER_LIMIT); + if (!renderedNode) continue; const section = `${ escapeXml( - formatNode(node).slice(0, NODE_RENDER_LIMIT), + renderedNode, ) }`; if (section.length > remaining) break; @@ -231,10 +284,15 @@ export class RedisCacheService { nodeRefs.push(node.uuid); remaining -= section.length; } - for (const episode of entry.episodeSummaries?.slice(0, 4) ?? []) { + for (const episode of renderedEpisodes.slice(0, 2)) { + const sanitizedEpisode = episode.rendered.slice( + 0, + EPISODE_RENDER_LIMIT, + ); + if (!sanitizedEpisode) continue; const section = `${ escapeXml( - episode.slice(0, EPISODE_RENDER_LIMIT), + sanitizedEpisode, ) }`; if (section.length > remaining) break; @@ -242,6 +300,6 @@ export class RedisCacheService { remaining -= section.length; } - return { body: sections.join(""), factUuids, nodeRefs }; + return { body: sections.join(""), nodeRefs }; } } diff --git a/src/services/redis-client.test.ts b/src/services/redis-client.test.ts index 0c80b4b..f7a654b 100644 --- a/src/services/redis-client.test.ts +++ b/src/services/redis-client.test.ts @@ -1,17 +1,22 @@ import { assert, assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "./opencode-warning.ts"; import { RedisClient } from "./redis-client.ts"; +import { RedisSnapshotService } from "./redis-snapshot.ts"; type RedisEvent = "close" | "end" | "error" | "ready"; +setSuppressConsoleWarningsDuringTestsOverride(true); + class FakeRedisRuntime { private readonly values = new Map(); + private readonly lists = new Map(); private readonly listeners = new Map< RedisEvent, Set<(...args: unknown[]) => void> >(); - constructor(private readonly state: { available: boolean }) {} + constructor(protected readonly state: { available: boolean }) {} connect(): Promise { if (!this.state.available) { @@ -32,29 +37,97 @@ class FakeRedisRuntime { return Promise.resolve("OK"); } - lpush(_key: string, _value: string): Promise { - throw new Error("not implemented"); + protected ensureAvailable(): void { + if (!this.state.available) { + throw new Error("redis unavailable"); + } } - rpush(_key: string, _value: string): Promise { - throw new Error("not implemented"); + private ensureList(key: string): string[] { + if (this.values.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + const existing = this.lists.get(key); + if (existing) return existing; + const list: string[] = []; + this.lists.set(key, list); + return list; + } + + lpush(key: string, value: string): Promise { + this.ensureAvailable(); + const list = this.ensureList(key); + list.unshift(value); + return Promise.resolve(list.length); + } + + rpush(key: string, value: string): Promise { + this.ensureAvailable(); + const list = this.ensureList(key); + list.push(value); + return Promise.resolve(list.length); } lmove( - _source: string, - _destination: string, - _sourceSide: "LEFT" | "RIGHT", - _destinationSide: "LEFT" | "RIGHT", + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", ): Promise { - throw new Error("not implemented"); + this.ensureAvailable(); + if (this.values.has(source) || this.values.has(destination)) { + return Promise.reject( + new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ), + ); + } + + const sourceList = this.lists.get(source); + if (!sourceList || sourceList.length === 0) return Promise.resolve(null); + + const value = sourceSide === "LEFT" ? sourceList.shift() : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + + if (sourceList.length === 0) { + this.lists.delete(source); + } + + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") { + destinationList.unshift(value); + } else { + destinationList.push(value); + } + + return Promise.resolve(value); } - lrange(_key: string, _start: number, _stop: number): Promise { - throw new Error("not implemented"); + lrange(key: string, start: number, stop: number): Promise { + this.ensureAvailable(); + if (this.values.has(key)) { + return Promise.reject( + new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ), + ); + } + const list = this.lists.get(key) ?? []; + return Promise.resolve(list.slice(start, stop + 1)); } - llen(_key: string): Promise { - throw new Error("not implemented"); + llen(key: string): Promise { + this.ensureAvailable(); + if (this.values.has(key)) { + return Promise.reject( + new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ), + ); + } + return Promise.resolve((this.lists.get(key) ?? []).length); } ltrim(_key: string, _start: number, _stop: number): Promise { @@ -81,9 +154,14 @@ class FakeRedisRuntime { value: string, ..._args: Array ): Promise<"OK"> { - if (!this.state.available) { - return Promise.reject(new Error("redis unavailable")); + if (this.lists.has(key)) { + return Promise.reject( + new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ), + ); } + this.ensureAvailable(); this.values.set(key, value); return Promise.resolve("OK"); } @@ -96,10 +174,10 @@ class FakeRedisRuntime { } del(key: string): Promise { - if (!this.state.available) { - return Promise.reject(new Error("redis unavailable")); - } - return Promise.resolve(this.values.delete(key) ? 1 : 0); + this.ensureAvailable(); + const deletedValue = this.values.delete(key); + const deletedList = this.lists.delete(key); + return Promise.resolve(deletedValue || deletedList ? 1 : 0); } eval(_script: string, _numKeys: number, ..._args: string[]): Promise { @@ -121,6 +199,51 @@ class FakeRedisRuntime { listener(...args); } } + + getStringValue(key: string): string | null { + return this.values.get(key) ?? null; + } + + getListValues(key: string): string[] { + return [...(this.lists.get(key) ?? [])]; + } +} + +class HashReadOnlyRedisRuntime extends FakeRedisRuntime { + protected readonly hashes = new Map>(); + + hgetall(key: string): Promise> { + this.ensureAvailable(); + return Promise.resolve( + Object.fromEntries((this.hashes.get(key) ?? new Map()).entries()), + ); + } + + override del(key: string): Promise { + this.ensureAvailable(); + const deletedHash = this.hashes.delete(key); + return super.del(key).then(( + deleted, + ) => (deletedHash || deleted === 1 ? 1 : 0)); + } + + seedHash(key: string, values: Record): void { + this.hashes.set(key, new Map(Object.entries(values))); + } +} + +class HashRedisRuntime extends HashReadOnlyRedisRuntime { + hset(key: string, values: Record): Promise { + this.ensureAvailable(); + const hash = this.hashes.get(key) ?? new Map(); + let added = 0; + for (const [field, value] of Object.entries(values)) { + if (!hash.has(field)) added += 1; + hash.set(field, value); + } + this.hashes.set(key, hash); + return Promise.resolve(added); + } } class DeferredConnectRedisRuntime extends FakeRedisRuntime { @@ -209,16 +332,91 @@ describe("redis client", () => { await redis.setHashFields("memory-cache:group-1:meta", { lastQuery: "Continue overhaul", lastRefresh: 123, - factUuids: "fact-1,fact-2", + retainedField: "fact-1,fact-2", }, 60); assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { lastQuery: "Continue overhaul", lastRefresh: "123", - factUuids: "fact-1,fact-2", + retainedField: "fact-1,fact-2", }); }); + it("enforces TTL on in-memory hash fallbacks when the runtime lacks hash support", async () => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new FakeRedisRuntime({ available: true }), + }); + + await redis.connect(); + await redis.setHashFields("memory-cache:group-1:meta", { + lastQuery: "Continue overhaul", + }, 0.001); + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + lastQuery: "Continue overhaul", + }); + + await new Promise((resolve) => setTimeout(resolve, 10)); + + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), {}); + await redis.close(); + }); + + it("merges fallback hash fields with live hgetall reads when hset is unavailable", async () => { + const runtime = new HashReadOnlyRedisRuntime({ available: true }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => runtime, + }); + + await redis.connect(); + await redis.setHashFields("memory-cache:group-1:meta", { + fallbackOnly: "local", + shared: "fallback", + }); + runtime.seedHash("memory-cache:group-1:meta", { + liveOnly: "remote", + shared: "live", + }); + + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + liveOnly: "remote", + shared: "fallback", + fallbackOnly: "local", + }); + + await redis.close(); + }); + + it("keeps fallback hash reads consistent after reconnecting to a runtime without hset", async () => { + const state = { available: false }; + const runtime = new HashReadOnlyRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + await redis.setHashFields("memory-cache:group-1:meta", { + fallbackOnly: "local", + }); + + state.available = true; + runtime.seedHash("memory-cache:group-1:meta", { + liveOnly: "remote", + }); + await waitFor(() => redis.isConnected()); + + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + liveOnly: "remote", + fallbackOnly: "local", + }); + + await redis.close(); + }); + it("reconnects after startup failure and swaps back to live redis", async () => { const state = { available: false }; const runtime = new FakeRedisRuntime(state); @@ -270,13 +468,419 @@ describe("redis client", () => { state.available = true; await waitFor(() => redis.isConnected()); - assertEquals(await redis.getString("key"), "before-disconnect"); + assertEquals(await redis.getString("key"), "memory-during-outage"); + assertEquals(runtime.getStringValue("key"), "memory-during-outage"); await redis.setString("key", "after-reconnect"); assertEquals(await redis.getString("key"), "after-reconnect"); await redis.close(); }); + it("does not resurrect a stale fallback string after a live update and later reconnect", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), true); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + await redis.setString("key", "fallback-value"); + assertEquals(await redis.getString("key"), "fallback-value"); + + state.available = true; + await waitFor(() => redis.isConnected()); + await waitFor(() => runtime.getStringValue("key") === "fallback-value"); + + await redis.setString("key", "live-after-reconnect"); + assertEquals(await redis.getString("key"), "live-after-reconnect"); + assertEquals(runtime.getStringValue("key"), "live-after-reconnect"); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals(await redis.getString("key"), "live-after-reconnect"); + assertEquals(runtime.getStringValue("key"), "live-after-reconnect"); + + await redis.close(); + }); + + it("does not resurrect a stale fallback string after a live delete and later reconnect", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), true); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + await redis.setString("key", "fallback-value"); + assertEquals(await redis.getString("key"), "fallback-value"); + + state.available = true; + await waitFor(() => redis.isConnected()); + await waitFor(() => runtime.getStringValue("key") === "fallback-value"); + + await redis.deleteKey("key"); + assertEquals(await redis.getString("key"), null); + assertEquals(runtime.getStringValue("key"), null); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals(await redis.getString("key"), null); + assertEquals(runtime.getStringValue("key"), null); + + await redis.close(); + }); + + it("replays startup-race fallback writes before initial live runtime use", async () => { + const state = { available: true }; + const runtime = new DeferredConnectRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => runtime, + }); + + const connectPromise = redis.connect(); + await redis.setString("key", "written-during-connect"); + assertEquals(await redis.getString("key"), "written-during-connect"); + + runtime.resumeConnect(); + await connectPromise; + + assertEquals(redis.isConnected(), true); + assertEquals(await redis.getString("key"), "written-during-connect"); + assertEquals(runtime.getStringValue("key"), "written-during-connect"); + + await redis.close(); + }); + + it("replays pending fallback mutations only once across repeated ready paths", async () => { + const state = { available: true }; + const runtime = new DeferredConnectRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => runtime, + }); + + const connectPromise = redis.connect(); + await redis.appendToList("queue", "entry-1"); + await redis.appendToList("queue", "entry-2"); + + runtime.resumeConnect(); + await connectPromise; + runtime.emit("ready"); + runtime.emit("ready"); + + assertEquals(await redis.getRecentList("queue", 10), [ + "entry-1", + "entry-2", + ]); + assertEquals(runtime.getListValues("queue"), ["entry-1", "entry-2"]); + + await redis.close(); + }); + + it("does not replay the same pending fallback mutation set more than once across reconnect cycles", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + await redis.appendToList("queue", "live-entry"); + + state.available = false; + runtime.emit("close"); + await redis.appendToList("queue", "fallback-entry"); + + state.available = true; + await waitFor(() => redis.isConnected()); + assertEquals(await redis.getRecentList("queue", 10), [ + "live-entry", + "fallback-entry", + ]); + assertEquals(runtime.getListValues("queue"), [ + "live-entry", + "fallback-entry", + ]); + + state.available = false; + runtime.emit("close"); + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals(await redis.getRecentList("queue", 10), [ + "live-entry", + "fallback-entry", + ]); + assertEquals(runtime.getListValues("queue"), [ + "live-entry", + "fallback-entry", + ]); + + await redis.close(); + }); + + it("coalesces repeated fallback list replays for the same key while disconnected", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + state.available = false; + runtime.emit("close"); + + await redis.appendToList("queue", "first"); + await redis.appendToList("queue", "second"); + + const pendingFallbackReplays = (redis as unknown as { + pendingFallbackReplays: Map; + }).pendingFallbackReplays; + assertEquals(pendingFallbackReplays.size, 1); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals(await redis.getRecentList("queue", 10), ["first", "second"]); + assertEquals(runtime.getListValues("queue"), ["first", "second"]); + + await redis.close(); + }); + + it("keeps the non-durable side of a live mixed move synchronized for outage fallback", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + await redis.appendToList("cache:queue:group-1", "live-before-move"); + assertEquals( + await redis.moveListItem( + "cache:queue:group-1", + "drain:pending:group-1", + "LEFT", + "RIGHT", + ), + "live-before-move", + ); + + assertEquals(runtime.getListValues("cache:queue:group-1"), []); + assertEquals(runtime.getListValues("drain:pending:group-1"), [ + "live-before-move", + ]); + + state.available = false; + runtime.emit("close"); + + assertEquals(await redis.getRecentList("cache:queue:group-1", 10), []); + assertEquals(runtime.getListValues("cache:queue:group-1"), []); + assertEquals(await redis.getRecentList("drain:pending:group-1", 10), []); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals(runtime.getListValues("cache:queue:group-1"), []); + assertEquals(runtime.getListValues("drain:pending:group-1"), [ + "live-before-move", + ]); + + await redis.close(); + }); + + it("keeps live hash metadata available through outage fallback after reconnect", async () => { + const state = { available: true }; + const runtime = new HashRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + await redis.setHashFields("memory-cache:group-1:meta", { + lastQuery: "before outage", + }); + + state.available = false; + runtime.emit("close"); + await redis.setHashFields("memory-cache:group-1:meta", { + fallbackOnly: "during outage", + }); + + state.available = true; + await waitFor(() => redis.isConnected()); + await redis.setHashFields("memory-cache:group-1:meta", { + lastRefresh: 456, + }, 60); + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + lastQuery: "before outage", + fallbackOnly: "during outage", + lastRefresh: "456", + }); + + state.available = false; + runtime.emit("close"); + + assertEquals(await redis.getHashAll("memory-cache:group-1:meta"), { + lastQuery: "before outage", + fallbackOnly: "during outage", + lastRefresh: "456", + }); + + await redis.close(); + }); + + it("mirrors live snapshot touch TTL into fallback memory", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + const snapshots = new RedisSnapshotService(redis, { ttlSeconds: 1.2 }); + + await redis.connect(); + await snapshots.saveSnapshot("session-1", "snapshot-value"); + await new Promise((resolve) => setTimeout(resolve, 750)); + await snapshots.touchSnapshot("session-1"); + + state.available = false; + runtime.emit("close"); + await new Promise((resolve) => setTimeout(resolve, 500)); + + assertEquals(await snapshots.getSnapshot("session-1"), "snapshot-value"); + + await redis.close(); + }); + + it("fails closed for durable drain lock writes during an outage", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), true); + + assertEquals( + await redis.setStringIfAbsent( + "drain:claim-lock:group-1", + "live-token", + 30, + ), + true, + ); + assertEquals( + await redis.getString("drain:claim-lock:group-1"), + "live-token", + ); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + await assertRejects( + () => + redis.setStringIfAbsent("drain:claim-lock:group-1", "outage-token", 30), + Error, + "Redis hot tier unavailable for durable drain-state mutation", + ); + assertEquals(await redis.getString("drain:claim-lock:group-1"), null); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals( + await redis.getString("drain:claim-lock:group-1"), + "live-token", + ); + + await redis.close(); + }); + + it("fails closed for durable drain queue writes during an outage", async () => { + const state = { available: true }; + const runtime = new FakeRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime, + }); + + await redis.connect(); + assertEquals(redis.isConnected(), true); + + state.available = false; + runtime.emit("close"); + assertEquals(redis.isConnected(), false); + + await assertRejects( + () => redis.appendToList("drain:pending:group-1", '{"id":"entry-1"}', 30), + Error, + "Redis hot tier unavailable for durable drain-state mutation", + ); + assertEquals(await redis.getListLength("drain:pending:group-1"), 0); + + state.available = true; + await waitFor(() => redis.isConnected()); + + assertEquals( + await redis.appendToList("drain:pending:group-1", '{"id":"entry-2"}', 30), + 1, + ); + assertEquals(await redis.getListLength("drain:pending:group-1"), 1); + + await redis.close(); + }); + it("ignores stale runtime ready events during reconnect", async () => { const firstState = { available: true }; const secondState = { available: true }; diff --git a/src/services/redis-client.ts b/src/services/redis-client.ts index 8bbae16..d7b79d5 100644 --- a/src/services/redis-client.ts +++ b/src/services/redis-client.ts @@ -331,6 +331,34 @@ class InMemoryRedisStore implements RedisRuntime { existing.expiresAt = Date.now() + ttlSeconds * 1000; return Promise.resolve(true); } + + snapshot(key: string): + | { kind: "missing" } + | { kind: "string"; value: string; ttlSeconds?: number } + | { kind: "list"; values: string[]; ttlSeconds?: number } + | { kind: "hash"; values: Record; ttlSeconds?: number } { + this.cleanup(key); + const existing = this.values.get(key); + if (!existing) return { kind: "missing" }; + + const ttlSeconds = existing.expiresAt + ? Math.max(Math.ceil((existing.expiresAt - Date.now()) / 1000), 1) + : undefined; + + if (typeof existing.value === "string") { + return { kind: "string", value: existing.value, ttlSeconds }; + } + + if (Array.isArray(existing.value)) { + return { kind: "list", values: [...existing.value], ttlSeconds }; + } + + return { + kind: "hash", + values: Object.fromEntries(existing.value.entries()), + ttlSeconds, + }; + } } export interface RedisClientOptions { @@ -342,6 +370,11 @@ export interface RedisClientOptions { export class RedisClient { private readonly memory = new InMemoryRedisStore(); + private readonly hashFallbackKeys = new Set(); + private readonly pendingFallbackReplays = new Map< + string, + (runtime: RedisRuntime) => Promise + >(); private readonly runtimeListeners = new WeakMap< RedisRuntime, RuntimeListeners @@ -349,6 +382,7 @@ export class RedisClient { private redis: RedisRuntime | null = null; private connected = false; private closed = false; + private finalizingRuntime = false; private reconnectTimer: TimerHandle | null = null; private reconnectAttempts = 0; private connectAttempt: Promise | null = null; @@ -439,7 +473,7 @@ export class RedisClient { this.handleDisconnect(runtime, error); }, ready: () => { - if (runtime !== this.redis) return; + if (runtime !== this.redis || this.finalizingRuntime) return; this.connected = true; this.reconnectAttempts = 0; this.clearReconnectTimer(); @@ -476,9 +510,21 @@ export class RedisClient { if (previous === runtime) return; this.redis = runtime; - this.connected = true; - this.reconnectAttempts = 0; - this.clearReconnectTimer(); + this.connected = false; + this.finalizingRuntime = true; + + try { + await this.replayPendingFallbackMutations(runtime); + this.connected = true; + this.reconnectAttempts = 0; + this.clearReconnectTimer(); + } catch (error) { + this.finalizingRuntime = false; + this.handleDisconnect(runtime, error); + return; + } + + this.finalizingRuntime = false; if (!previous) return; @@ -562,6 +608,7 @@ export class RedisClient { private async useRuntime( operation: (runtime: RedisRuntime) => Promise, + options?: { allowMemoryFallback?: boolean }, ): Promise { const runtime = this.redis; if (this.connected && runtime) { @@ -569,21 +616,157 @@ export class RedisClient { return await operation(runtime); } catch (error) { this.handleDisconnect(runtime, error); + if (options?.allowMemoryFallback === false) throw error; } } + if (options?.allowMemoryFallback === false) { + throw new Error( + "Redis hot tier unavailable for durable drain-state mutation", + ); + } + return await operation(this.memory); } + private queuePendingFallbackReplay( + replayKey: string, + replay: (runtime: RedisRuntime) => Promise, + ): void { + this.pendingFallbackReplays.set(replayKey, replay); + } + + private async replayPendingFallbackMutations( + runtime: RedisRuntime, + ): Promise { + while (this.pendingFallbackReplays.size > 0) { + const nextReplay = this.pendingFallbackReplays.entries().next().value; + if (!nextReplay) return; + const [replayKey, replay] = nextReplay; + await replay(runtime); + this.pendingFallbackReplays.delete(replayKey); + } + } + + private queuePendingStringSnapshotReplay(key: string): void { + this.queuePendingFallbackReplay(`string:${key}`, async (runtime) => { + this.hashFallbackKeys.delete(key); + const snapshot = this.memory.snapshot(key); + await runtime.del(key); + + if (snapshot.kind === "missing") return; + if (snapshot.kind !== "string") return; + + if (snapshot.ttlSeconds) { + await runtime.set(key, snapshot.value, "EX", snapshot.ttlSeconds); + return; + } + + await runtime.set(key, snapshot.value); + }); + } + + private queuePendingHashSnapshotReplay(key: string): void { + this.queuePendingFallbackReplay(`hash:${key}`, async (runtime) => { + if (!runtime.hset) return; + const snapshot = this.memory.snapshot(key); + if (snapshot.kind !== "hash") return; + + await runtime.del(key); + this.hashFallbackKeys.delete(key); + await runtime.hset(key, snapshot.values); + if (snapshot.ttlSeconds) await runtime.expire(key, snapshot.ttlSeconds); + }); + } + + private queuePendingListSnapshotReplay(key: string): void { + this.queuePendingFallbackReplay(`list:${key}`, async (runtime) => { + const snapshot = this.memory.snapshot(key); + await runtime.del(key); + if (snapshot.kind !== "list") return; + + for (const value of snapshot.values) { + await runtime.rpush(key, value); + } + if (snapshot.ttlSeconds) { + await runtime.expire(key, snapshot.ttlSeconds); + } + }); + } + + private isDurableDrainKey(key: string): boolean { + return key.startsWith("drain:"); + } + + private async replaceMemoryList( + key: string, + values: string[], + ttlSeconds?: number, + ): Promise { + await this.memory.del(key); + for (const value of values) { + await this.memory.rpush(key, value); + } + if (ttlSeconds && values.length > 0) { + await this.memory.expire(key, ttlSeconds); + } + } + + private async syncNonDurableSourceListAfterLiveMove( + key: string, + side: "LEFT" | "RIGHT", + ): Promise { + const snapshot = this.memory.snapshot(key); + if (snapshot.kind !== "list") return; + const values = side === "LEFT" + ? snapshot.values.slice(1) + : snapshot.values.slice(0, -1); + await this.replaceMemoryList(key, values, snapshot.ttlSeconds); + } + + private async syncNonDurableDestinationListAfterLiveMove( + key: string, + side: "LEFT" | "RIGHT", + value: string, + ): Promise { + if (side === "LEFT") { + await this.memory.lpush(key, value); + return; + } + await this.memory.rpush(key, value); + } + + private async useMutationRuntime( + keys: string[], + operation: (runtime: RedisRuntime) => Promise, + onFallbackSuccess?: (result: T) => void | Promise, + ): Promise { + return await this.useRuntime(async (runtime) => { + const result = await operation(runtime); + if (runtime === this.memory) { + await onFallbackSuccess?.(result); + } + return result; + }, { + allowMemoryFallback: !keys.some((key) => this.isDurableDrainKey(key)), + }); + } + async prependToList( key: string, value: string, ttlSeconds?: number, ): Promise { - return await this.useRuntime(async (runtime) => { + return await this.useMutationRuntime([key], async (runtime) => { const length = await runtime.lpush(key, value); if (ttlSeconds) await runtime.expire(key, ttlSeconds); + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + await this.memory.lpush(key, value); + if (ttlSeconds) await this.memory.expire(key, ttlSeconds); + } return length; + }, () => { + this.queuePendingListSnapshotReplay(key); }); } @@ -592,10 +775,16 @@ export class RedisClient { value: string, ttlSeconds?: number, ): Promise { - return await this.useRuntime(async (runtime) => { + return await this.useMutationRuntime([key], async (runtime) => { const length = await runtime.rpush(key, value); if (ttlSeconds) await runtime.expire(key, ttlSeconds); + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + await this.memory.rpush(key, value); + if (ttlSeconds) await this.memory.expire(key, ttlSeconds); + } return length; + }, () => { + this.queuePendingListSnapshotReplay(key); }); } @@ -627,7 +816,18 @@ export class RedisClient { } async setListItem(key: string, index: number, value: string): Promise { - await this.useRuntime((runtime) => runtime.lset(key, index, value)); + await this.useMutationRuntime( + [key], + async (runtime) => { + await runtime.lset(key, index, value); + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + await this.memory.lset(key, index, value); + } + }, + () => { + this.queuePendingListSnapshotReplay(key); + }, + ); } async getListLength(key: string): Promise { @@ -640,20 +840,66 @@ export class RedisClient { sourceSide: "LEFT" | "RIGHT", destinationSide: "LEFT" | "RIGHT", ): Promise { - return await this.useRuntime((runtime) => - runtime.lmove(source, destination, sourceSide, destinationSide) + return await this.useMutationRuntime( + [source, destination], + async (runtime) => { + const sourceDurable = this.isDurableDrainKey(source); + const destinationDurable = this.isDurableDrainKey(destination); + const result = await runtime.lmove( + source, + destination, + sourceSide, + destinationSide, + ); + if (result !== null && runtime !== this.memory) { + if (!sourceDurable) { + await this.syncNonDurableSourceListAfterLiveMove( + source, + sourceSide, + ); + } + if (!destinationDurable) { + await this.syncNonDurableDestinationListAfterLiveMove( + destination, + destinationSide, + result, + ); + } + } + return result; + }, + (result) => { + if (result === null) return; + this.queuePendingListSnapshotReplay(source); + this.queuePendingListSnapshotReplay(destination); + }, ); } async trimOldest(key: string, count: number): Promise { if (count <= 0) return; - await this.useRuntime(async (runtime) => { + await this.useMutationRuntime([key], async (runtime) => { const length = await runtime.llen(key); if (length <= count) { await runtime.del(key); - return; + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + await this.memory.del(key); + } + return length > 0; } await runtime.ltrim(key, 0, length - count - 1); + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + const memoryLength = await this.memory.llen(key); + if (memoryLength <= count) { + await this.memory.del(key); + } else { + await this.memory.ltrim(key, 0, memoryLength - count - 1); + } + } + return true; + }, (changed) => { + if (!changed) return; + this.queuePendingListSnapshotReplay(key); }); } @@ -666,12 +912,22 @@ export class RedisClient { value: string, ttlSeconds?: number, ): Promise { - await this.useRuntime(async (runtime) => { + await this.useMutationRuntime([key], async (runtime) => { if (ttlSeconds) { await runtime.set(key, value, "EX", ttlSeconds); + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + this.hashFallbackKeys.delete(key); + await this.memory.set(key, value, "EX", ttlSeconds); + } return; } await runtime.set(key, value); + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + this.hashFallbackKeys.delete(key); + await this.memory.set(key, value); + } + }, () => { + this.queuePendingStringSnapshotReplay(key); }); } @@ -680,7 +936,7 @@ export class RedisClient { value: string, ttlSeconds?: number, ): Promise { - return await this.useRuntime(async (runtime) => { + return await this.useMutationRuntime([key], async (runtime) => { if (runtime === this.memory) { return await this.memory.setIfAbsent(key, value, ttlSeconds); } @@ -688,12 +944,42 @@ export class RedisClient { const result = ttlSeconds ? await runtime.set(key, value, "NX", "EX", ttlSeconds) : await runtime.set(key, value, "NX"); + if (result === "OK" && !this.isDurableDrainKey(key)) { + this.hashFallbackKeys.delete(key); + if (ttlSeconds) { + await this.memory.set(key, value, "EX", ttlSeconds); + } else { + await this.memory.set(key, value); + } + } return result === "OK"; + }, (acquired) => { + if (!acquired) return; + this.queuePendingStringSnapshotReplay(key); }); } async touch(key: string, ttlSeconds: number): Promise { - await this.useRuntime((runtime) => runtime.expire(key, ttlSeconds)); + await this.useMutationRuntime( + [key], + async (runtime) => { + const changed = await runtime.expire(key, ttlSeconds); + if ( + changed !== 0 && runtime !== this.memory && + !this.isDurableDrainKey(key) + ) { + await this.memory.expire(key, ttlSeconds); + } + return changed; + }, + (changed) => { + if (changed === 0) return; + this.queuePendingFallbackReplay( + `expire:${key}`, + (runtime) => runtime.expire(key, ttlSeconds).then(() => undefined), + ); + }, + ); } async getHashAll(key: string): Promise> { @@ -701,6 +987,18 @@ export class RedisClient { if (runtime === this.memory) { return await this.memory.hgetall(key); } + if (this.hashFallbackKeys.has(key)) { + const fallbackValues = await this.memory.hgetall(key); + if (!runtime.hgetall) { + return fallbackValues; + } + + const liveValues = await runtime.hgetall(key); + return { + ...liveValues, + ...fallbackValues, + }; + } return await runtime.hgetall?.(key) ?? {}; }); } @@ -717,11 +1015,19 @@ export class RedisClient { ); if (Object.keys(serialized).length === 0) return; - await this.useRuntime(async (runtime) => { + await this.useMutationRuntime([key], async (runtime) => { + let ttlTarget: RedisRuntime = runtime; if (runtime === this.memory) { + this.hashFallbackKeys.add(key); await this.memory.hset(key, serialized); + ttlTarget = this.memory; } else if (runtime.hset) { + this.hashFallbackKeys.delete(key); await runtime.hset(key, serialized); + if (!this.isDurableDrainKey(key)) { + await this.memory.hset(key, serialized); + if (ttlSeconds) await this.memory.expire(key, ttlSeconds); + } } else { const existing = await runtime.get(key); if (existing !== null) { @@ -729,10 +1035,14 @@ export class RedisClient { "WRONGTYPE Operation against a key holding the wrong kind of value", ); } + this.hashFallbackKeys.add(key); await this.memory.hset(key, serialized); + ttlTarget = this.memory; } - if (ttlSeconds) await runtime.expire(key, ttlSeconds); + if (ttlSeconds) await ttlTarget.expire(key, ttlSeconds); + }, () => { + this.queuePendingHashSnapshotReplay(key); }); } @@ -741,7 +1051,7 @@ export class RedisClient { expectedValue: string, ttlSeconds: number, ): Promise { - return await this.useRuntime(async (runtime) => { + return await this.useMutationRuntime([key], async (runtime) => { if (runtime === this.memory) { return await this.memory.compareAndExpire( key, @@ -758,15 +1068,49 @@ export class RedisClient { String(ttlSeconds), ) ?? 0; return extended === 1; + }, (extended) => { + if (!extended) return; + this.queuePendingFallbackReplay( + `compareAndTouch:${key}`, + async (runtime) => { + await runtime.eval?.( + "if redis.call('GET', KEYS[1]) == ARGV[1] then return redis.call('EXPIRE', KEYS[1], ARGV[2]) else return 0 end", + 1, + key, + expectedValue, + String(ttlSeconds), + ); + }, + ); }); } async deleteKey(key: string): Promise { - await this.useRuntime((runtime) => runtime.del(key)); + await this.useMutationRuntime( + [key], + async (runtime) => { + const deleted = await runtime.del(key); + if ( + deleted !== 0 && runtime !== this.memory && + !this.isDurableDrainKey(key) + ) { + this.hashFallbackKeys.delete(key); + await this.memory.del(key); + } + return deleted; + }, + (deleted) => { + if (deleted === 0) return; + this.queuePendingFallbackReplay(`del:${key}`, async (runtime) => { + this.hashFallbackKeys.delete(key); + await runtime.del(key); + }); + }, + ); } async deleteKeyIfValue(key: string, expectedValue: string): Promise { - return await this.useRuntime(async (runtime) => { + return await this.useMutationRuntime([key], async (runtime) => { if (runtime === this.memory) { return await this.memory.deleteIfValue(key, expectedValue); } @@ -777,7 +1121,17 @@ export class RedisClient { key, expectedValue, ) ?? 0; + if (deleted === 1 && !this.isDurableDrainKey(key)) { + this.hashFallbackKeys.delete(key); + await this.memory.del(key); + } return deleted === 1; + }, (deleted) => { + if (!deleted) return; + this.queuePendingFallbackReplay(`delIfValue:${key}`, async (runtime) => { + this.hashFallbackKeys.delete(key); + await runtime.del(key); + }); }); } } diff --git a/src/services/redis-events.test.ts b/src/services/redis-events.test.ts new file mode 100644 index 0000000..bc4d9d6 --- /dev/null +++ b/src/services/redis-events.test.ts @@ -0,0 +1,1020 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { spy } from "jsr:@std/testing@^1.0.0/mock"; +import type { SessionEvent } from "../types/index.ts"; +import { logger } from "./logger.ts"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "./opencode-warning.ts"; +import { RedisClient } from "./redis-client.ts"; +import { + drainClaimActiveKey, + drainClaimCheckpointKey, + drainClaimKey, + drainClaimLockKey, + drainDeadKey, + drainPendingKey, + RedisEventsService, +} from "./redis-events.ts"; + +type RedisEvent = "close" | "end" | "error" | "ready"; + +setSuppressConsoleWarningsDuringTestsOverride(true); + +class ToggleRedisRuntime { + protected readonly values = new Map(); + protected readonly lists = new Map(); + protected readonly listeners = new Map< + RedisEvent, + Set<(...args: unknown[]) => void> + >(); + + constructor(private readonly state: { available: boolean }) {} + + connect(): Promise { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + this.emit("ready"); + return Promise.resolve(); + } + + ping(): Promise<"PONG"> { + if (!this.state.available) { + return Promise.reject(new Error("redis unavailable")); + } + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + protected ensureAvailable(): void { + if (!this.state.available) { + throw new Error("redis unavailable"); + } + } + + protected ensureList(key: string): string[] { + if (this.values.has(key)) { + throw new Error( + "WRONGTYPE Operation against a key holding the wrong kind of value", + ); + } + + const existing = this.lists.get(key); + if (existing) return existing; + + const list: string[] = []; + this.lists.set(key, list); + return list; + } + + lpush(key: string, value: string): Promise { + this.ensureAvailable(); + const list = this.ensureList(key); + list.unshift(value); + return Promise.resolve(list.length); + } + + rpush(key: string, value: string): Promise { + this.ensureAvailable(); + const list = this.ensureList(key); + list.push(value); + return Promise.resolve(list.length); + } + + lmove( + _source: string, + _destination: string, + _sourceSide: "LEFT" | "RIGHT", + _destinationSide: "LEFT" | "RIGHT", + ): Promise { + throw new Error("not implemented"); + } + + lrange(key: string, start: number, stop: number): Promise { + this.ensureAvailable(); + const list = this.lists.get(key) ?? []; + const normalizedStop = stop < 0 ? list.length + stop : stop; + return Promise.resolve(list.slice(start, normalizedStop + 1)); + } + + llen(key: string): Promise { + this.ensureAvailable(); + return Promise.resolve((this.lists.get(key) ?? []).length); + } + + ltrim(_key: string, _start: number, _stop: number): Promise { + throw new Error("not implemented"); + } + + lindex(_key: string, _index: number): Promise { + throw new Error("not implemented"); + } + + lset(_key: string, _index: number, _value: string): Promise { + throw new Error("not implemented"); + } + + get(key: string): Promise { + this.ensureAvailable(); + return Promise.resolve(this.values.get(key) ?? null); + } + + set( + key: string, + value: string, + ..._args: Array + ): Promise<"OK"> { + this.ensureAvailable(); + this.values.set(key, value); + return Promise.resolve("OK"); + } + + expire(_key: string, _ttlSeconds: number): Promise { + this.ensureAvailable(); + return Promise.resolve(1); + } + + del(key: string): Promise { + this.ensureAvailable(); + const deletedValue = this.values.delete(key); + const deletedList = this.lists.delete(key); + return Promise.resolve(deletedValue || deletedList ? 1 : 0); + } + + eval(_script: string, _numKeys: number, ..._args: string[]): Promise { + throw new Error("not implemented"); + } + + on(event: RedisEvent, listener: (...args: unknown[]) => void): void { + const set = this.listeners.get(event) ?? new Set(); + set.add(listener); + this.listeners.set(event, set); + } + + off(event: RedisEvent, listener: (...args: unknown[]) => void): void { + this.listeners.get(event)?.delete(listener); + } + + emit(event: RedisEvent, ...args: unknown[]): void { + for (const listener of this.listeners.get(event) ?? []) { + listener(...args); + } + } +} + +class ClaimRuntime extends ToggleRedisRuntime { + override lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.ensureAvailable(); + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" ? sourceList.shift() : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") destinationList.unshift(value); + else destinationList.push(value); + return Promise.resolve(value); + } + + override eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + throw new Error("unsupported eval script"); + } + + getListSnapshot(key: string): string[] { + return [...(this.lists.get(key) ?? [])]; + } + + getValueSnapshot(key: string): string | null { + return this.values.get(key) ?? null; + } + + deleteStoredKey(key: string): void { + this.values.delete(key); + this.lists.delete(key); + } + + seedList(key: string, values: string[]): void { + this.lists.set(key, [...values]); + } +} + +describe("redis events", () => { + it("degrades durable queue writes to a warning during a redis outage", async () => { + const state = { available: true }; + const runtime = new ToggleRedisRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime as never, + }); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + }); + const warnSpy = spy(logger, "warn"); + + const event: SessionEvent = { + id: "event-1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "system", + summary: "Handled startup while redis was unavailable", + }; + + try { + await redis.connect(); + state.available = false; + runtime.emit("close"); + + assertEquals( + await redisEvents.recordEvent("session-1", "group-1", event), + 0, + ); + assertEquals( + (await redisEvents.getRecentSessionEvents("session-1")).map((item) => + item.id + ), + ["event-1"], + ); + assertEquals(await redisEvents.getPendingCount("group-1"), 0); + assertEquals(warnSpy.calls.length, 1); + assertEquals( + warnSpy.calls[0].args[0], + "Durable drain queue unavailable; skipping enqueue", + ); + assertEquals(warnSpy.calls[0].args[1], { + groupId: "group-1", + sessionId: "session-1", + eventId: "event-1", + category: "decision", + }); + } finally { + warnSpy.restore(); + await redis.close(); + } + }); + + it("dead-letters malformed claimed payloads and keeps valid entries claimable FIFO", async () => { + class ClaimRuntime extends ToggleRedisRuntime { + override lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.ensureAvailable(); + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" + ? sourceList.shift() + : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") destinationList.unshift(value); + else destinationList.push(value); + return Promise.resolve(value); + } + + override eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + throw new Error("unsupported eval script"); + } + } + + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ClaimRuntime({ available: true }) as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + const warnSpy = spy(logger, "warn"); + const validFirst = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-1", + ts: Date.now(), + category: "message", + priority: 0, + role: "user", + summary: "first", + body: "first", + }, + }; + const validSecond = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-2", + ts: Date.now() + 1, + category: "message", + priority: 0, + role: "user", + summary: "second", + body: "second", + }, + }; + + try { + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(validSecond), + 60, + ); + await redis.prependToList( + drainPendingKey("group-1"), + "not-json", + 60, + ); + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(validFirst), + 60, + ); + + const claimed = await redisEvents.getPendingBatch("group-1", 3, 20_000); + + assertEquals(claimed?.entries.map((entry) => entry.event.id), [ + "event-2", + "event-1", + ]); + assertEquals(await redis.getListRange(drainDeadKey("group-1"), 0, -1), [ + "not-json", + ]); + assertEquals(warnSpy.calls.length, 1); + assertEquals( + warnSpy.calls[0].args[0], + "Dead-lettered malformed claimed drain payload", + ); + } finally { + warnSpy.restore(); + await redis.close(); + } + }); + + it("cleans up empty claims when every claimed payload is malformed", async () => { + class ClaimRuntime extends ToggleRedisRuntime { + override lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.ensureAvailable(); + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" + ? sourceList.shift() + : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") destinationList.unshift(value); + else destinationList.push(value); + return Promise.resolve(value); + } + + override eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + throw new Error("unsupported eval script"); + } + } + + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ClaimRuntime({ available: true }) as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + + try { + await redis.prependToList(drainPendingKey("group-1"), "bad-1", 60); + await redis.prependToList(drainPendingKey("group-1"), "bad-2", 60); + + const claimed = await redisEvents.getPendingBatch("group-1", 2, 20_000); + + assertEquals(claimed, null); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + assertEquals(await redis.getListRange(drainDeadKey("group-1"), 0, -1), [ + "bad-1", + "bad-2", + ]); + } finally { + await redis.close(); + } + }); + + it("dead-letters an oversized oldest claimed entry, warns, and continues to later eligible entries", async () => { + class ClaimRuntime extends ToggleRedisRuntime { + override lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.ensureAvailable(); + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" + ? sourceList.shift() + : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") destinationList.unshift(value); + else destinationList.push(value); + return Promise.resolve(value); + } + + override eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + throw new Error("unsupported eval script"); + } + } + + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ClaimRuntime({ available: true }) as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + const warnSpy = spy(logger, "warn"); + const oversizedFirst = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-1", + ts: Date.now(), + category: "message", + priority: 0, + role: "user", + summary: "oversized", + body: "x".repeat(8_000), + }, + }; + const eligibleSecond = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-2", + ts: Date.now() + 1, + category: "message", + priority: 0, + role: "user", + summary: "fits", + body: "fits", + }, + }; + const maxBytes = 1_000; + + try { + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(oversizedFirst), + 60, + ); + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(eligibleSecond), + 60, + ); + + const claimed = await redisEvents.getPendingBatch("group-1", 2, maxBytes); + + assertEquals(claimed?.entries.map((entry) => entry.event.id), [ + "event-2", + ]); + assertEquals( + (await redis.getListRange(drainDeadKey("group-1"), 0, -1)).map((item) => + JSON.parse(item).event.id + ), + ["event-1"], + ); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); + assertEquals(warnSpy.calls.length, 1); + const warning = warnSpy.calls[0].args[1] as { + groupId: string; + claimToken: string; + eventId: string; + eventBytes: unknown; + batchMaxBytes: number; + }; + assertEquals( + warnSpy.calls[0].args[0], + "Dead-lettered oversized claimed drain payload", + ); + assertEquals(warning, { + groupId: "group-1", + claimToken: claimed!.claimToken, + eventId: "event-1", + eventBytes: warning.eventBytes, + batchMaxBytes: maxBytes, + }); + assertEquals(typeof warning.eventBytes, "number"); + } finally { + warnSpy.restore(); + await redis.close(); + } + }); + + it("recovers only the uncheckpointed suffix ahead of newer pending entries", async () => { + class ClaimRuntime extends ToggleRedisRuntime { + override lmove( + source: string, + destination: string, + sourceSide: "LEFT" | "RIGHT", + destinationSide: "LEFT" | "RIGHT", + ): Promise { + this.ensureAvailable(); + const sourceList = this.lists.get(source) ?? []; + const value = sourceSide === "LEFT" + ? sourceList.shift() + : sourceList.pop(); + if (value === undefined) return Promise.resolve(null); + const destinationList = this.ensureList(destination); + if (destinationSide === "LEFT") destinationList.unshift(value); + else destinationList.push(value); + return Promise.resolve(value); + } + + override eval( + script: string, + _numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('DEL', KEYS[1])") + ) { + if (this.values.get(args[0]) !== args[1]) return Promise.resolve(0); + this.values.delete(args[0]); + return Promise.resolve(1); + } + + if ( + script.includes("redis.call('GET', KEYS[1]) == ARGV[1]") && + script.includes("redis.call('EXPIRE', KEYS[1], ARGV[2])") + ) { + return Promise.resolve(this.values.get(args[0]) === args[1] ? 1 : 0); + } + + throw new Error("unsupported eval script"); + } + } + + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new ClaimRuntime({ available: true }) as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + const first: { sessionId: string; groupId: string; event: SessionEvent } = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-1", + ts: Date.now(), + category: "message", + priority: 0, + role: "user", + summary: "first", + body: "first", + }, + }; + const second: { sessionId: string; groupId: string; event: SessionEvent } = + { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-2", + ts: Date.now() + 1, + category: "message", + priority: 0, + role: "user", + summary: "second", + body: "second", + }, + }; + const third: { sessionId: string; groupId: string; event: SessionEvent } = { + sessionId: "session-2", + groupId: "group-1", + event: { + id: "event-3", + ts: Date.now() + 2, + category: "message", + priority: 0, + role: "user", + summary: "third", + body: "third", + }, + }; + + try { + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(first), + 60, + ); + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(second), + 60, + ); + + const claimed = await redisEvents.getPendingBatch("group-1", 2, 20_000); + assertEquals(claimed?.entries.map((entry) => entry.event.id), [ + "event-1", + "event-2", + ]); + + await redisEvents.markClaimEntrySuccess("group-1", claimed!.claimToken, { + sessionId: first.sessionId, + groupId: first.groupId, + event: first.event, + }); + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(third), + 60, + ); + await redis.deleteKey(drainClaimLockKey("group-1")); + + const recovered = await redisEvents.recoverAbandonedClaim("group-1"); + + assertEquals(recovered, true); + assertEquals( + (await redis.getListRange(drainPendingKey("group-1"), 0, -1)).map( + (item) => JSON.parse(item).event.id, + ), + ["event-3", "event-2"], + ); + + const replayed = await redisEvents.getPendingBatch("group-1", 2, 20_000); + assertEquals(replayed?.entries.map((entry) => entry.event.id), [ + "event-2", + "event-3", + ]); + assertEquals( + await redis.getString(drainClaimActiveKey("group-1")), + replayed?.claimToken ?? null, + ); + } finally { + await redis.close(); + } + }); + + it("fails closed for abandoned-claim recovery while redis is disconnected", async () => { + const state = { available: true }; + const runtime = new ClaimRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + const first = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-1", + ts: Date.now(), + category: "message", + priority: 0, + role: "user", + summary: "first", + body: "first", + } satisfies SessionEvent, + }; + const second = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-2", + ts: Date.now() + 1, + category: "message", + priority: 0, + role: "user", + summary: "second", + body: "second", + } satisfies SessionEvent, + }; + + try { + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(first), + 60, + ); + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(second), + 60, + ); + + const claimed = await redisEvents.getPendingBatch("group-1", 2, 20_000); + await redis.deleteKey(drainClaimLockKey("group-1")); + + state.available = false; + runtime.emit("close"); + + const recovered = await redisEvents.recoverAbandonedClaim("group-1"); + + assertEquals(recovered, false); + assertEquals(redis.isConnected(), false); + assertEquals( + runtime.getValueSnapshot(drainClaimActiveKey("group-1")), + claimed?.claimToken ?? null, + ); + assertEquals(runtime.getListSnapshot(drainPendingKey("group-1")), []); + assertEquals( + runtime.getListSnapshot(drainClaimKey("group-1", claimed!.claimToken)), + claimed!.entries.map((entry) => + JSON.stringify({ + sessionId: entry.sessionId, + groupId: entry.groupId, + event: entry.event, + }) + ), + ); + } finally { + await redis.close(); + } + }); + + it("fails closed for claim lease refresh when redis disconnects", async () => { + const state = { available: true }; + const runtime = new ClaimRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + + try { + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify({ + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-1", + ts: Date.now(), + category: "message", + priority: 0, + role: "user", + summary: "first", + body: "first", + } satisfies SessionEvent, + }), + 60, + ); + + const claimed = await redisEvents.getPendingBatch("group-1", 1, 20_000); + state.available = false; + runtime.emit("close"); + + const refreshed = await redisEvents.refreshClaimLease( + "group-1", + claimed!.claimToken, + 5, + ); + + assertEquals(refreshed, false); + assertEquals(redis.isConnected(), false); + assertEquals( + runtime.getValueSnapshot(drainClaimActiveKey("group-1")), + claimed!.claimToken, + ); + } finally { + await redis.close(); + } + }); + + it("cleans up only the same-token stale claim residue after reconnect", async () => { + const state = { available: true }; + const runtime = new ClaimRuntime(state); + const redis = new RedisClient({ + endpoint: "redis://unused", + reconnectBaseDelayMs: 10, + reconnectMaxDelayMs: 10, + runtimeFactory: () => runtime as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + claimLockTtlSeconds: 5, + }); + const first = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-1", + ts: Date.now(), + category: "message", + priority: 0, + role: "user", + summary: "first", + body: "first", + } satisfies SessionEvent, + }; + const second = { + sessionId: "session-1", + groupId: "group-1", + event: { + id: "event-2", + ts: Date.now() + 1, + category: "message", + priority: 0, + role: "user", + summary: "second", + body: "second", + } satisfies SessionEvent, + }; + const unrelated = { + sessionId: "session-9", + groupId: "group-1", + event: { + id: "event-9", + ts: Date.now() + 9, + category: "message", + priority: 0, + role: "user", + summary: "unrelated", + body: "unrelated", + } satisfies SessionEvent, + }; + const unrelatedToken = "other-token"; + + try { + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(first), + 60, + ); + await redis.prependToList( + drainPendingKey("group-1"), + JSON.stringify(second), + 60, + ); + + const claimed = await redisEvents.getPendingBatch("group-1", 2, 20_000); + runtime.seedList(drainClaimKey("group-1", unrelatedToken), [ + JSON.stringify(unrelated), + ]); + runtime.seedList(drainClaimCheckpointKey("group-1", unrelatedToken), []); + + state.available = false; + runtime.emit("error", new Error("redis unavailable")); + runtime.deleteStoredKey(drainClaimLockKey("group-1")); + + state.available = true; + await new Promise((resolve) => setTimeout(resolve, 30)); + + const refreshed = await redisEvents.refreshClaimLease( + "group-1", + claimed!.claimToken, + 5, + ); + + assertEquals(redis.isConnected(), true); + assertEquals(refreshed, false); + assertEquals( + await redis.getString(drainClaimActiveKey("group-1")), + claimed!.claimToken, + ); + assertEquals( + await redis.getListRange( + drainClaimKey("group-1", claimed!.claimToken), + 0, + -1, + ), + [JSON.stringify(first), JSON.stringify(second)], + ); + const recovered = await redisEvents.recoverAbandonedClaim("group-1"); + assertEquals(recovered, true); + assertEquals(await redis.getString(drainClaimActiveKey("group-1")), null); + assertEquals( + (await redis.getListRange(drainPendingKey("group-1"), 0, -1)).map(( + raw, + ) => JSON.parse(raw).event.id), + ["event-2", "event-1"], + ); + assertEquals( + await redis.getListRange( + drainClaimKey("group-1", unrelatedToken), + 0, + -1, + ), + [JSON.stringify(unrelated)], + ); + } finally { + await redis.close(); + } + }); +}); diff --git a/src/services/redis-events.ts b/src/services/redis-events.ts index abc597e..84238b5 100644 --- a/src/services/redis-events.ts +++ b/src/services/redis-events.ts @@ -1,10 +1,16 @@ import type { ClaimedDrainBatch, DrainQueueEntry, + PreparedDrainQueueEntry, SessionEvent, } from "../types/index.ts"; import { getSessionEventRecallText } from "../types/index.ts"; +import { logger } from "./logger.ts"; import type { RedisClient } from "./redis-client.ts"; +import { + sanitizeMemoryInput, + stripInjectedMemoryBlocks, +} from "./render-utils.ts"; const SESSION_EVENT_LIMIT = 40; const SESSION_RECALL_SCAN_LIMIT = 120; @@ -77,12 +83,19 @@ export const drainRetryKey = (groupId: string, batchKey: string): string => `drain:retry:${groupId}:${batchKey}`; export const drainClaimKey = (groupId: string, claimToken: string): string => `drain:claim:${groupId}:${claimToken}`; +export const drainClaimCheckpointKey = ( + groupId: string, + claimToken: string, +): string => `drain:claim-checkpoint:${groupId}:${claimToken}`; export const drainClaimActiveKey = (groupId: string): string => `drain:claim-active:${groupId}`; export const drainClaimLockKey = (groupId: string): string => `drain:claim-lock:${groupId}`; const makeClaimToken = (): string => crypto.randomUUID(); +const textEncoder = new TextEncoder(); +const DURABLE_DRAIN_MUTATION_UNAVAILABLE = + "Redis hot tier unavailable for durable drain-state mutation"; const parseEntry = (raw: string): DrainQueueEntry | null => { try { @@ -100,6 +113,94 @@ const parseSessionEvent = (raw: string): SessionEvent | null => { } }; +export const buildDrainEpisodeBody = (entry: DrainQueueEntry): string => { + const refs = entry.event.refs?.length + ? `\nRefs: ${entry.event.refs.join(", ")}` + : ""; + const keywords = entry.event.keywords?.length + ? `\nKeywords: ${entry.event.keywords.join(", ")}` + : ""; + return sanitizeMemoryInput(stripInjectedMemoryBlocks( + [ + `Category: ${entry.event.category}`, + `Role: ${entry.event.role}`, + `Summary: ${entry.event.summary}`, + entry.event.detail ? `Detail: ${entry.event.detail}` : "", + entry.event.continuityText + ? `Continuity: ${entry.event.continuityText}` + : getSessionEventRecallText(entry.event), + entry.event.body ? `Body: ${entry.event.body}` : "", + keywords, + refs, + ].filter(Boolean).join("\n"), + )); +}; + +export const prepareDrainQueueEntry = ( + entry: DrainQueueEntry, +): PreparedDrainQueueEntry => { + const episodeBody = buildDrainEpisodeBody(entry); + return { + ...entry, + episodeBody, + episodeBodyBytes: textEncoder.encode(episodeBody).length, + }; +}; + +export const getDrainEpisodeBodyBytes = (entry: DrainQueueEntry): number => + prepareDrainQueueEntry(entry).episodeBodyBytes; + +const sanitizeStoredValue = (value: unknown): unknown => { + if (typeof value === "string") { + const sanitized = sanitizeMemoryInput(value); + return sanitized || undefined; + } + if (Array.isArray(value)) { + return value.map((item) => sanitizeStoredValue(item)).filter((item) => + item !== undefined + ); + } + if (value && typeof value === "object") { + return Object.fromEntries( + Object.entries(value).flatMap(([key, entry]) => { + const sanitized = sanitizeStoredValue(entry); + return sanitized === undefined ? [] : [[key, sanitized]]; + }), + ); + } + return value; +}; + +const sanitizedStoredString = (value: unknown): string | undefined => { + const sanitized = sanitizeStoredValue(value); + return typeof sanitized === "string" ? sanitized : undefined; +}; + +const sanitizedStoredStringArray = (value: unknown): string[] | undefined => { + const sanitized = sanitizeStoredValue(value); + return Array.isArray(sanitized) ? sanitized as string[] : undefined; +}; + +const sanitizedStoredMetadata = ( + value: unknown, +): Record | undefined => { + const sanitized = sanitizeStoredValue(value); + return sanitized && typeof sanitized === "object" && !Array.isArray(sanitized) + ? sanitized as Record + : undefined; +}; + +const sanitizeStoredEvent = (event: SessionEvent): SessionEvent => ({ + ...event, + summary: sanitizeMemoryInput(event.summary), + body: sanitizedStoredString(event.body), + detail: sanitizedStoredString(event.detail), + continuityText: sanitizedStoredString(event.continuityText), + refs: sanitizedStoredStringArray(event.refs), + keywords: sanitizedStoredStringArray(event.keywords), + metadata: sanitizedStoredMetadata(event.metadata), +}); + const tokenizeRecallQuery = (query: string): string[] => { const matches = query.toLowerCase().match(/[a-z0-9._/-]{3,}/g) ?? []; return [...new Set(matches.filter((token) => !RECALL_STOP_WORDS.has(token)))]; @@ -143,13 +244,37 @@ export interface RedisEventsServiceOptions { } export class RedisEventsService { + private warnedInvalidClaimLockTtl = false; + constructor( private readonly redis: RedisClient, private readonly options: RedisEventsServiceOptions, ) {} getClaimLockTtlSeconds(): number { - return this.options.claimLockTtlSeconds ?? CLAIM_LOCK_TTL_SECONDS; + const configured = this.options.claimLockTtlSeconds; + if (configured === undefined) return CLAIM_LOCK_TTL_SECONDS; + + if (!Number.isFinite(configured) || configured <= 0) { + if (!this.warnedInvalidClaimLockTtl) { + logger.warn("Invalid drain claim TTL; falling back to default", { + configuredClaimLockTtlSeconds: configured, + effectiveClaimLockTtlSeconds: CLAIM_LOCK_TTL_SECONDS, + }); + this.warnedInvalidClaimLockTtl = true; + } + return CLAIM_LOCK_TTL_SECONDS; + } + + const normalized = Math.max(1, Math.ceil(configured)); + if (normalized !== configured && !this.warnedInvalidClaimLockTtl) { + logger.warn("Raised drain claim TTL to a sane minimum", { + configuredClaimLockTtlSeconds: configured, + effectiveClaimLockTtlSeconds: normalized, + }); + this.warnedInvalidClaimLockTtl = true; + } + return normalized; } async recordEvent( @@ -157,17 +282,47 @@ export class RedisEventsService { groupId: string, event: SessionEvent, ): Promise { - const queueEntry: DrainQueueEntry = { sessionId, groupId, event }; + const sanitizedEvent = sanitizeStoredEvent(event); + const queueEntry: DrainQueueEntry = { + sessionId, + groupId, + event: sanitizedEvent, + }; await this.redis.prependToList( sessionEventsKey(sessionId), - JSON.stringify(event), + JSON.stringify(sanitizedEvent), this.options.sessionTtlSeconds, ); - return await this.redis.prependToList( - drainPendingKey(groupId), - JSON.stringify(queueEntry), - DRAIN_TTL_SECONDS, - ); + try { + return await this.redis.prependToList( + drainPendingKey(groupId), + JSON.stringify(queueEntry), + DRAIN_TTL_SECONDS, + ); + } catch (error) { + if (!this.isDurableDrainMutationUnavailable(error)) { + throw error; + } + + logger.warn("Durable drain queue unavailable; skipping enqueue", { + groupId, + sessionId, + eventId: sanitizedEvent.id, + category: sanitizedEvent.category, + }); + return 0; + } + } + + private isDurableDrainMutationUnavailable(error: unknown): boolean { + return error instanceof Error && + error.message === DURABLE_DRAIN_MUTATION_UNAVAILABLE; + } + + private isRedisUnavailable(error: unknown): boolean { + return error instanceof Error && + (error.message === DURABLE_DRAIN_MUTATION_UNAVAILABLE || + error.message.includes("redis unavailable")); } async getRecentSessionEvents( @@ -252,6 +407,7 @@ export class RedisEventsService { const claimToken = makeClaimToken(); const claimKey = drainClaimKey(groupId, claimToken); + const checkpointKey = drainClaimCheckpointKey(groupId, claimToken); const lockAcquired = await this.redis.setStringIfAbsent( drainClaimLockKey(groupId), claimToken, @@ -259,16 +415,16 @@ export class RedisEventsService { ); if (!lockAcquired) return null; - await this.redis.setString( - drainClaimActiveKey(groupId), - claimToken, - DRAIN_TTL_SECONDS, - ); - - const selected: DrainQueueEntry[] = []; + const selected: PreparedDrainQueueEntry[] = []; let totalBytes = 0; try { + await this.redis.setString( + drainClaimActiveKey(groupId), + claimToken, + DRAIN_TTL_SECONDS, + ); + while (selected.length < maxItems) { const raw = await this.redis.moveListItem( pendingKey, @@ -280,22 +436,60 @@ export class RedisEventsService { await this.redis.touch(claimKey, DRAIN_TTL_SECONDS); const entry = parseEntry(raw); - if (!entry) continue; + if (!entry) { + await this.redis.moveListItem( + claimKey, + drainDeadKey(groupId), + "RIGHT", + "RIGHT", + ); + await this.redis.touch( + drainDeadKey(groupId), + DEAD_LETTER_TTL_SECONDS, + ); + logger.warn("Dead-lettered malformed claimed drain payload", { + groupId, + claimToken, + raw, + }); + continue; + } + + const preparedEntry = prepareDrainQueueEntry(entry); + const bytes = preparedEntry.episodeBodyBytes; + if (bytes > maxBytes) { + await this.redis.moveListItem( + claimKey, + drainDeadKey(groupId), + "RIGHT", + "RIGHT", + ); + await this.redis.touch( + drainDeadKey(groupId), + DEAD_LETTER_TTL_SECONDS, + ); + logger.warn("Dead-lettered oversized claimed drain payload", { + groupId, + claimToken, + eventId: entry.event.id, + eventBytes: bytes, + batchMaxBytes: maxBytes, + }); + continue; + } - const bytes = new TextEncoder().encode( - getSessionEventRecallText(entry.event), - ).length; if (selected.length > 0 && totalBytes + bytes > maxBytes) { await this.redis.moveListItem(claimKey, pendingKey, "RIGHT", "RIGHT"); break; } - selected.push(entry); + selected.push(preparedEntry); totalBytes += bytes; } if (selected.length === 0) { await this.redis.deleteKey(claimKey); + await this.redis.deleteKey(checkpointKey); await this.redis.deleteKeyIfValue( drainClaimActiveKey(groupId), claimToken, @@ -322,27 +516,87 @@ export class RedisEventsService { async refreshClaimLease( groupId: string, claimToken: string, - ttlSeconds = this.getClaimLockTtlSeconds(), + ttlSeconds: number = this.getClaimLockTtlSeconds(), ): Promise { - const lockRefreshed = await this.redis.compareAndTouch( - drainClaimLockKey(groupId), - claimToken, - ttlSeconds, - ); - if (!lockRefreshed) return false; + try { + const lockRefreshed = await this.redis.compareAndTouch( + drainClaimLockKey(groupId), + claimToken, + ttlSeconds, + ); + if (!lockRefreshed) { + return false; + } - const activeRefreshed = await this.redis.compareAndTouch( - drainClaimActiveKey(groupId), - claimToken, - DRAIN_TTL_SECONDS, - ); - if (!activeRefreshed) return false; + const activeRefreshed = await this.redis.compareAndTouch( + drainClaimActiveKey(groupId), + claimToken, + DRAIN_TTL_SECONDS, + ); + if (!activeRefreshed) { + return false; + } - await this.redis.touch( - drainClaimKey(groupId, claimToken), - DRAIN_TTL_SECONDS, - ); - return true; + await this.redis.touch( + drainClaimKey(groupId, claimToken), + DRAIN_TTL_SECONDS, + ); + return true; + } catch (error) { + if (!this.isRedisUnavailable(error)) throw error; + return false; + } + } + + private async cleanupStaleClaimIfConnected( + groupId: string, + claimToken: string, + ): Promise { + if (!this.redis.isConnected()) return false; + + const activeKey = drainClaimActiveKey(groupId); + const lockKey = drainClaimLockKey(groupId); + const claimKey = drainClaimKey(groupId, claimToken); + const checkpointKey = drainClaimCheckpointKey(groupId, claimToken); + let activeToken: string | null; + let lockToken: string | null; + let claimLength: number; + let checkpointLength: number; + try { + [activeToken, lockToken, claimLength, checkpointLength] = await Promise + .all([ + this.redis.getString(activeKey), + this.redis.getString(lockKey), + this.redis.getListLength(claimKey), + this.redis.getListLength(checkpointKey), + ]); + } catch (error) { + if (!this.isRedisUnavailable(error)) throw error; + return false; + } + + if (claimLength === 0 && checkpointLength === 0) return false; + + const missingLockForSameActive = activeToken === claimToken && + lockToken === null; + const missingActiveForSameLock = lockToken === claimToken && + activeToken !== claimToken; + const orphanedPointers = activeToken === null && lockToken === null; + + if ( + !missingLockForSameActive && + !missingActiveForSameLock && + !orphanedPointers + ) { + return false; + } + try { + await this.releaseClaim(groupId, claimToken); + return true; + } catch (error) { + if (!this.isRedisUnavailable(error)) throw error; + return false; + } } async markBatchSuccess( @@ -353,6 +607,7 @@ export class RedisEventsService { if (entries.length === 0) return; await this.redis.deleteKey(drainClaimKey(groupId, claimToken)); + await this.redis.deleteKey(drainClaimCheckpointKey(groupId, claimToken)); await this.redis.deleteKeyIfValue(drainClaimActiveKey(groupId), claimToken); await this.redis.deleteKeyIfValue(drainClaimLockKey(groupId), claimToken); await this.redis.setString( @@ -381,6 +636,7 @@ export class RedisEventsService { ): Promise { const pendingKey = drainPendingKey(groupId); const claimKey = drainClaimKey(groupId, claimToken); + const checkpointKey = drainClaimCheckpointKey(groupId, claimToken); while (true) { const raw = await this.redis.moveListItem( @@ -393,18 +649,79 @@ export class RedisEventsService { } await this.redis.deleteKey(claimKey); + await this.redis.deleteKey(checkpointKey); await this.redis.deleteKeyIfValue(drainClaimActiveKey(groupId), claimToken); await this.redis.deleteKeyIfValue(drainClaimLockKey(groupId), claimToken); } + async markClaimEntrySuccess( + groupId: string, + claimToken: string, + entry: DrainQueueEntry, + ): Promise { + const checkpointKey = drainClaimCheckpointKey(groupId, claimToken); + const checkpointCount = await this.redis.getListLength(checkpointKey); + if (checkpointCount > 0) { + const latestCheckpoint = await this.redis.getListRange( + checkpointKey, + checkpointCount - 1, + checkpointCount - 1, + ); + if (parseEntry(latestCheckpoint[0] ?? "")?.event.id === entry.event.id) { + return; + } + } + + const raw = await this.redis.moveListItem( + drainClaimKey(groupId, claimToken), + checkpointKey, + "LEFT", + "RIGHT", + ); + if (!raw) return; + + const claimedEntry = parseEntry(raw); + if (claimedEntry?.event.id !== entry.event.id) { + throw new Error( + `Drain claim checkpoint order mismatch for event ${entry.event.id}`, + ); + } + + await this.redis.touch(checkpointKey, DRAIN_TTL_SECONDS); + await this.redis.setString( + drainCursorKey(groupId), + entry.event.id, + DRAIN_TTL_SECONDS, + ); + } + async recoverAbandonedClaim(groupId: string): Promise { - const claimToken = await this.redis.getString(drainClaimActiveKey(groupId)); - if (!claimToken) return false; + if (!this.redis.isConnected()) return false; - const lockToken = await this.redis.getString(drainClaimLockKey(groupId)); - if (lockToken) return false; + let activeToken: string | null; + let lockToken: string | null; + try { + [activeToken, lockToken] = await Promise.all([ + this.redis.getString(drainClaimActiveKey(groupId)), + this.redis.getString(drainClaimLockKey(groupId)), + ]); + } catch (error) { + if (!this.isRedisUnavailable(error)) throw error; + return false; + } + const claimTokens = [activeToken, lockToken].filter( + (token): token is string => { + return typeof token === "string" && token.length > 0; + }, + ); + if (claimTokens.length === 0) return false; + + for (const claimToken of new Set(claimTokens)) { + if (await this.cleanupStaleClaimIfConnected(groupId, claimToken)) { + return true; + } + } - await this.releaseClaim(groupId, claimToken); - return true; + return false; } } diff --git a/src/services/redis-snapshot.ts b/src/services/redis-snapshot.ts index 775cd5d..5c991fa 100644 --- a/src/services/redis-snapshot.ts +++ b/src/services/redis-snapshot.ts @@ -2,14 +2,16 @@ import { getSessionEventPrimaryText, type SessionEvent, } from "../types/index.ts"; +import type { RedisClient } from "./redis-client.ts"; +import { sessionSnapshotKey } from "./redis-events.ts"; import { escapeXml, + normalizeMemoryText, renderXmlListSection, renderXmlSingleSection, - uniqueValues, + sanitizeMemoryInput, + uniqueNormalizedValues, } from "./render-utils.ts"; -import type { RedisClient } from "./redis-client.ts"; -import { sessionSnapshotKey } from "./redis-events.ts"; const SNAPSHOT_BUDGET = 3_000; const BLOCKER_PATTERN = /\b(blocker|blocked|blocking)\b/i; @@ -19,8 +21,9 @@ const selectRecent = ( predicate: (event: SessionEvent) => boolean, map: (event: SessionEvent) => string | string[] | undefined, limit: number, + excludedNormalized = new Set(), ): string[] => - uniqueValues( + uniqueNormalizedValues( events.flatMap((event) => { if (!predicate(event)) return []; const value = map(event); @@ -28,6 +31,7 @@ const selectRecent = ( return Array.isArray(value) ? value : [value]; }).reverse(), limit, + excludedNormalized, ); export const buildSessionSnapshotXml = ( @@ -37,15 +41,22 @@ export const buildSessionSnapshotXml = ( const decisions = selectRecent( events, (event) => ["decision", "preference"].includes(event.category), - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 5, ); + const occupiedNormalized = new Set( + decisions.map((value) => normalizeMemoryText(value)).filter(Boolean), + ); const constraints = selectRecent( events, (event) => event.category === "rule.load", - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 5, + occupiedNormalized, ); + for (const value of constraints) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const latestUserRequest = getSessionEventPrimaryText( events.findLast((event) => event.role === "user") ?? { id: "", @@ -56,42 +67,79 @@ export const buildSessionSnapshotXml = ( summary: "", }, ) || undefined; - const activeTask = - events.findLast((event) => - ["task.create", "task.update", "intent"].includes(event.category) - )?.summary ?? latestUserRequest; + const sanitizedLatestUserRequest = latestUserRequest + ? sanitizeMemoryInput(latestUserRequest) + : undefined; + const normalizedLatestUserRequest = sanitizedLatestUserRequest + ? normalizeMemoryText(sanitizedLatestUserRequest) + : ""; + if (normalizedLatestUserRequest) { + occupiedNormalized.add(normalizedLatestUserRequest); + } + const activeTask = events.findLast((event) => + ["task.create", "task.update", "task.complete"].includes(event.category) + )?.summary; + const sanitizedActiveTask = sanitizeMemoryInput(activeTask ?? ""); + const activeTaskValue = sanitizedActiveTask && + normalizeMemoryText(sanitizedActiveTask) !== normalizedLatestUserRequest + ? sanitizedActiveTask + : undefined; + if (activeTaskValue) { + occupiedNormalized.add(normalizeMemoryText(activeTaskValue)); + } const activeFiles = selectRecent( events, (event) => event.category.startsWith("file."), (event) => event.refs ?? [], 6, + occupiedNormalized, ); + for (const value of activeFiles) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const recentEdits = selectRecent( events, (event) => event.category === "file.write" || event.category === "file.edit", - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 5, + occupiedNormalized, ); + for (const value of recentEdits) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const subagentsOpen = selectRecent( events, (event) => event.category === "subagent.start", - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 4, + occupiedNormalized, ); + for (const value of subagentsOpen) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const unresolvedErrors = events.filter((event) => - event.category === "error" && event.metadata?.resolved !== true + event.category === "error" && event.metadata?.resolved !== true && + event.role !== "assistant" ); - const errors = uniqueValues( - unresolvedErrors.map((event) => getSessionEventPrimaryText(event)) + const errors = uniqueNormalizedValues( + unresolvedErrors.map((event) => + sanitizeMemoryInput(getSessionEventPrimaryText(event)) + ) .reverse(), 4, + occupiedNormalized, ); - const blockers = uniqueValues( + for (const value of errors) { + occupiedNormalized.add(normalizeMemoryText(value)); + } + const blockers = uniqueNormalizedValues( unresolvedErrors.flatMap((event) => { - const blockerText = event.detail?.trim() || - event.continuityText?.trim() || - event.body?.trim(); + const blockerText = sanitizeMemoryInput( + event.detail?.trim() || + event.continuityText?.trim() || + event.body?.trim() || "", + ); if (!blockerText || blockerText === event.summary) return []; if ( event.metadata?.blocking === true || @@ -103,50 +151,42 @@ export const buildSessionSnapshotXml = ( return []; }).reverse(), 3, + occupiedNormalized, ); + for (const value of blockers) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const environment = selectRecent( events, (event) => event.category === "cwd.change" || event.category === "env.change", - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 4, + occupiedNormalized, ); + for (const value of environment) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const gitState = selectRecent( events, (event) => event.category === "git.activity", - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 4, + occupiedNormalized, ); + for (const value of gitState) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const subagentsDone = selectRecent( events, (event) => event.category === "subagent.finish", - (event) => getSessionEventPrimaryText(event), + (event) => sanitizeMemoryInput(getSessionEventPrimaryText(event)), 4, + occupiedNormalized, ); - const openQuestions = selectRecent( - events, - (event) => event.category === "task.update", - (event) => getSessionEventPrimaryText(event), - 4, - ); - const discoveries = selectRecent( - events, - (event) => event.category === "discovery", - (event) => getSessionEventPrimaryText(event), - 4, - ); - const references = selectRecent( - events, - (event) => event.category === "data.import", - (event) => getSessionEventPrimaryText(event), - 4, - ); - const residualMessages = selectRecent( - events, - (event) => event.category === "message", - (event) => getSessionEventPrimaryText(event), - 3, - ); + for (const value of subagentsDone) { + occupiedNormalized.add(normalizeMemoryText(value)); + } const open = ` - renderXmlListSection("open_questions", "q", openQuestions, { - itemCharLimit: 220, - remaining, - }), - () => - renderXmlListSection("discoveries", "d", discoveries, { - itemCharLimit: 240, - remaining, - }), - () => - renderXmlListSection("references", "r", references, { - itemCharLimit: 220, - remaining, - }), - () => - renderXmlListSection("residual_messages", "m", residualMessages, { - itemCharLimit: 180, - remaining, - }), ]; for (const buildSection of sectionBuilders) { diff --git a/src/services/render-utils.test.ts b/src/services/render-utils.test.ts new file mode 100644 index 0000000..0f4dbb2 --- /dev/null +++ b/src/services/render-utils.test.ts @@ -0,0 +1,41 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; + +import { + isHighValueMemoryText, + renderXmlListSection, + sanitizeMemoryInput, + sanitizeMemoryInputPreservingMemoryBlocks, +} from "./render-utils.ts"; + +Deno.test("isHighValueMemoryText keeps concise architectural memories that mention transcript terms", () => { + const memory = + "Architecture decision: prefer session memory summaries over transcript bodies when updating src/session.ts."; + + assertEquals(isHighValueMemoryText(memory), true); +}); + +Deno.test("isHighValueMemoryText still rejects transcript-heavy tool-like content", () => { + const memory = [ + "tool output:", + "1: Architecture decision: prefer session memory summaries over transcript bodies", + "2: Update src/session.ts to keep Graphiti off the hot path", + "3: stdout captured from transcript review", + ].join("\n"); + + assertEquals(isHighValueMemoryText(memory), false); +}); + +Deno.test("sanitizeMemoryInputPreservingMemoryBlocks keeps literal memory XML while shared sanitize strips injected blocks", () => { + const input = + 'Example\n\nsample'; + + assertEquals( + sanitizeMemoryInputPreservingMemoryBlocks(input), + input, + ); + assertEquals(sanitizeMemoryInput(input), "Example"); +}); + +Deno.test("renderXmlListSection omits empty sections automatically", () => { + assertEquals(renderXmlListSection("active_tasks", "task", []), ""); +}); diff --git a/src/services/render-utils.ts b/src/services/render-utils.ts index 2925e71..fe5d9fc 100644 --- a/src/services/render-utils.ts +++ b/src/services/render-utils.ts @@ -5,19 +5,121 @@ export const escapeXml = (value: string): string => .replaceAll('"', """) .replaceAll("'", "'"); -export const uniqueValues = (values: string[], limit: number): string[] => { +const SESSION_MEMORY_BLOCK_PATTERN = + /]*>[\s\S]*?<\/session_memory>/gi; +const LEGACY_MEMORY_BLOCK_PATTERN = /]*>[\s\S]*?<\/memory>/gi; +const PERSISTENT_MEMORY_BLOCK_PATTERN = + /]*>[\s\S]*?<\/persistent_memory>/gi; +const TOOL_WRAPPER_DOCUMENT_PATTERN = + /^\s*(?:<(?:path|content|type)\b[^>]*>[\s\S]*?<\/(?:path|content|type)>\s*)+$/i; +const TOOL_WRAPPER_BLOCK_PATTERN = + /<(?:path|content|type)\b[^>]*>[\s\S]*?<\/(?:path|content|type)>/gi; +const TOOL_WRAPPER_LINE_PATTERN = + /^\s*<\/?(?:path|content|type)\b[^>]*>\s*$/gim; +const TOOL_WRAPPER_PREFIX_PATTERN = + /^\s*(?:|||]*>|]*>|]*>|<\/path>|<\/content>|<\/type>|.*<\/path>|.*<\/type>|\d+:\s*<(?:path|content|type)\b[^>]*>)\s*$/gim; +const TOOL_TRANSCRIPT_PATTERN = + /\b(?:tool(?:_use)?s?|orchestration|delegat(?:e|ed|ion)|subagent|wrapper|transcript|read output|read wrapper|session_memory|persistent_memory)\b/i; +const OPERATIONAL_CHATTER_PATTERN = + /^(?:plan per target:|i(?:'m| am| will| can| should| need to)\b|now\b.*\b(?:checking|reading|inspecting|updating|running)|next\b.*\b(?:checking|reading|updating|running))/i; +const LOW_VALUE_MEMORY_PATTERN = + /\b(?:assistant|meta chatter|planning chatter|phrasing suggestion|tool routing|orchestration|delegate|subagent|wrapper)\b/i; +const HIGH_VALUE_MEMORY_PATTERN = + /\b(?:architecture|decision|constraint|prefer|preference|must|should|rule|policy|hot path|async|graphiti|redis|falkordb|session memory|persistent memory|milestone|file|src\/|plans\/|docs\/|fix|implement|update)\b/i; +const TRANSCRIPT_HEAVY_PATTERN = + /```|(?:^|\n)\d+:\s|(?:^|\n)\$\s|\b(?:stdout|stderr|exit code|tool output|read output|file contents|transcript)\b/i; +const STRUCTURED_TRANSCRIPT_HEAVY_PATTERN = + /```|(?:^|\n)\d+:\s|(?:^|\n)\$\s|\b(?:stdout|stderr|exit code|tool output|read output|file contents)\b/i; + +export const stripInjectedMemoryBlocks = (value: string): string => + value.replace(SESSION_MEMORY_BLOCK_PATTERN, " ") + .replace(LEGACY_MEMORY_BLOCK_PATTERN, " ") + .replace(PERSISTENT_MEMORY_BLOCK_PATTERN, " "); + +export const stripToolTranscriptWrappers = (value: string): string => + TOOL_WRAPPER_DOCUMENT_PATTERN.test(value) + ? value.replace(TOOL_WRAPPER_BLOCK_PATTERN, " ") + .replace(TOOL_WRAPPER_LINE_PATTERN, " ") + .replace(TOOL_WRAPPER_PREFIX_PATTERN, " ") + : value; + +const normalizeSanitizedText = (value: string): string => + value.replace(/\r\n/g, "\n") + .replace(/\n{3,}/g, "\n\n") + .replace(/[\t ]+/g, " ") + .replace(/ ?\n ?/g, "\n") + .trim(); + +export const sanitizeMemoryInput = (value: string): string => + normalizeSanitizedText( + stripToolTranscriptWrappers(stripInjectedMemoryBlocks(value)), + ); + +export const sanitizeMemoryInputPreservingMemoryBlocks = ( + value: string, +): string => normalizeSanitizedText(stripToolTranscriptWrappers(value)); + +export const normalizeMemoryText = (value: string): string => + sanitizeMemoryInput(value) + .toLowerCase() + .replace(/&(?:amp|lt|gt|quot|apos);/g, " ") + .replace(/[^a-z0-9./_-]+/g, " ") + .replace(/\s+/g, " ") + .trim(); + +export const uniqueNormalizedValues = ( + values: string[], + limit: number, + excludedNormalized = new Set(), +): string[] => { const seen = new Set(); const result: string[] = []; for (const value of values) { - const cleaned = value.trim(); - if (!cleaned || seen.has(cleaned)) continue; - seen.add(cleaned); + const cleaned = sanitizeMemoryInput(value); + const normalized = normalizeMemoryText(cleaned); + if ( + !cleaned || !normalized || excludedNormalized.has(normalized) || + seen.has(normalized) + ) { + continue; + } + seen.add(normalized); result.push(cleaned); if (result.length >= limit) break; } return result; }; +export const looksLikeOperationalChatter = (value: string): boolean => + OPERATIONAL_CHATTER_PATTERN.test(sanitizeMemoryInput(value)); + +export const looksLikeToolTranscript = (value: string): boolean => + TOOL_WRAPPER_DOCUMENT_PATTERN.test(value) || + TOOL_TRANSCRIPT_PATTERN.test(value); + +export const looksTranscriptHeavy = (value: string): boolean => { + const sanitized = sanitizeMemoryInput(value); + if (!sanitized) return false; + return sanitized.length > 600 || sanitized.split("\n").length > 12 || + TRANSCRIPT_HEAVY_PATTERN.test(sanitized); +}; + +export const isHighValueMemoryText = (value: string): boolean => { + const sanitized = sanitizeMemoryInput(value); + if (!sanitized) return false; + const looksHighValue = HIGH_VALUE_MEMORY_PATTERN.test(sanitized); + if (!looksHighValue) return false; + const hasStructuredTranscriptEvidence = + TOOL_WRAPPER_DOCUMENT_PATTERN.test(value) || sanitized.length > 600 || + sanitized.split("\n").length > 12 || + STRUCTURED_TRANSCRIPT_HEAVY_PATTERN.test(sanitized); + if (looksLikeToolTranscript(sanitized) && hasStructuredTranscriptEvidence) { + return false; + } + if (LOW_VALUE_MEMORY_PATTERN.test(sanitized)) return false; + return true; +}; + const fitEscapedText = (value: string, maxEscapedLength: number): string => { const source = value.trim(); if (!source || maxEscapedLength <= 0) return ""; @@ -40,7 +142,6 @@ const fitEscapedText = (value: string, maxEscapedLength: number): string => { export interface RenderXmlListSectionOptions { itemCharLimit?: number; remaining?: number; - includeEmpty?: boolean; } export const renderXmlListSection = ( @@ -49,7 +150,7 @@ export const renderXmlListSection = ( values: string[], options: RenderXmlListSectionOptions = {}, ): string => { - const { itemCharLimit, remaining, includeEmpty = false } = options; + const { itemCharLimit, remaining } = options; const open = `<${tag}>`; const close = ``; @@ -77,7 +178,7 @@ export const renderXmlListSection = ( body += `${itemOpen}${escapeXml(content)}`; } - if (!body) return includeEmpty ? `${open}${close}` : ""; + if (!body) return ""; return `${open}${body}${close}`; }; diff --git a/src/services/runtime-teardown.test.ts b/src/services/runtime-teardown.test.ts index 88dbaf6..f92046d 100644 --- a/src/services/runtime-teardown.test.ts +++ b/src/services/runtime-teardown.test.ts @@ -1,4 +1,4 @@ -import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { logger } from "./logger.ts"; import { registerRuntimeTeardown } from "./runtime-teardown.ts"; @@ -23,6 +23,8 @@ describe("runtime teardown", () => { it("registers best-effort unload and signal handlers that share the same idempotent path", async () => { const eventHandlers = new Map void>(); const signalHandlers = new Map<"SIGINT" | "SIGTERM", () => void>(); + const removedEventHandlers: string[] = []; + const removedSignalHandlers: Array<"SIGINT" | "SIGTERM"> = []; const calls: string[] = []; const registration = registerRuntimeTeardown([ { @@ -35,10 +37,18 @@ describe("runtime teardown", () => { addEventListener(type, listener) { eventHandlers.set(type, listener as () => void); }, + removeEventListener(type) { + removedEventHandlers.push(type); + eventHandlers.delete(type); + }, Deno: { addSignalListener(signal, handler) { signalHandlers.set(signal, handler); }, + removeSignalListener(signal) { + removedSignalHandlers.push(signal); + signalHandlers.delete(signal); + }, }, }); @@ -50,6 +60,226 @@ describe("runtime teardown", () => { await registration.run(); assertEquals(calls, ["runtime"]); + assertEquals(removedEventHandlers.sort(), ["beforeunload", "unload"]); + assertEquals(removedSignalHandlers.sort(), ["SIGINT", "SIGTERM"]); + assertEquals(eventHandlers.size, 0); + assertEquals(signalHandlers.size, 0); + }); + + it("removes signal listeners as soon as graceful shutdown starts from a signal", async () => { + const signalHandlers = new Map<"SIGINT" | "SIGTERM", () => void>(); + const removedSignalHandlers: Array<"SIGINT" | "SIGTERM"> = []; + let releaseTask!: () => void; + const taskFinished = new Promise((resolve) => { + releaseTask = resolve; + }); + let exitReject!: (reason?: unknown) => void; + const exitPromise = new Promise((_, reject) => { + exitReject = reject; + }); + + registerRuntimeTeardown([ + { + name: "flush", + run: () => taskFinished, + }, + ], { + Deno: { + addSignalListener(signal, handler) { + signalHandlers.set(signal, handler); + }, + removeSignalListener(signal) { + removedSignalHandlers.push(signal); + signalHandlers.delete(signal); + }, + exit(code) { + exitReject(new Error(`exit:${code ?? 0}`)); + return undefined as never; + }, + }, + }); + + signalHandlers.get("SIGINT")?.(); + + assertEquals(signalHandlers.size, 0); + assertEquals(removedSignalHandlers.sort(), ["SIGINT", "SIGTERM"]); + + releaseTask(); + await assertRejects( + async () => { + await exitPromise; + }, + Error, + "exit:130", + ); + }); + + it("removes signal listeners when graceful shutdown starts from unload", async () => { + const eventHandlers = new Map void>(); + const signalHandlers = new Map<"SIGINT" | "SIGTERM", () => void>(); + const removedSignalHandlers: Array<"SIGINT" | "SIGTERM"> = []; + let releaseTask!: () => void; + const taskFinished = new Promise((resolve) => { + releaseTask = resolve; + }); + + const registration = registerRuntimeTeardown([ + { + name: "flush", + run: () => taskFinished, + }, + ], { + addEventListener(type, listener) { + eventHandlers.set(type, listener as () => void); + }, + removeEventListener(type) { + eventHandlers.delete(type); + }, + Deno: { + addSignalListener(signal, handler) { + signalHandlers.set(signal, handler); + }, + removeSignalListener(signal) { + removedSignalHandlers.push(signal); + signalHandlers.delete(signal); + }, + }, + }); + + eventHandlers.get("unload")?.(); + + assertEquals(signalHandlers.size, 0); + assertEquals(removedSignalHandlers.sort(), ["SIGINT", "SIGTERM"]); + + releaseTask(); + await registration.run(); + }); + + it("registers one listener set per registration and keeps them independent", () => { + const eventHandlers = new Map void>>(); + const signalHandlers = new Map<"SIGINT" | "SIGTERM", Set<() => void>>(); + + const runtime = { + addEventListener(type: string, listener: (event?: Event) => void) { + const handlers = eventHandlers.get(type) ?? new Set<() => void>(); + handlers.add(listener as () => void); + eventHandlers.set(type, handlers); + }, + removeEventListener(type: string, listener: (event?: Event) => void) { + eventHandlers.get(type)?.delete(listener as () => void); + }, + Deno: { + addSignalListener(signal: "SIGINT" | "SIGTERM", handler: () => void) { + const handlers = signalHandlers.get(signal) ?? new Set<() => void>(); + handlers.add(handler); + signalHandlers.set(signal, handlers); + }, + removeSignalListener( + signal: "SIGINT" | "SIGTERM", + handler: () => void, + ) { + signalHandlers.get(signal)?.delete(handler); + }, + }, + }; + + const firstRegistration = registerRuntimeTeardown([], runtime); + const secondRegistration = registerRuntimeTeardown([], runtime); + + assertEquals(firstRegistration === secondRegistration, false); + assertEquals( + [...eventHandlers.values()].map((handlers) => handlers.size), + [2, 2], + ); + assertEquals( + [...signalHandlers.values()].map((handlers) => handlers.size), + [2, 2], + ); + + firstRegistration.dispose(); + + assertEquals( + [...eventHandlers.values()].map((handlers) => handlers.size), + [1, 1], + ); + assertEquals( + [...signalHandlers.values()].map((handlers) => handlers.size), + [1, 1], + ); + + secondRegistration.dispose(); + + assertEquals( + [...eventHandlers.values()].map((handlers) => handlers.size), + [0, 0], + ); + assertEquals( + [...signalHandlers.values()].map((handlers) => handlers.size), + [0, 0], + ); + }); + + it("keeps multiple runtime registrations active until each is disposed", () => { + const eventHandlers = new Map void>>(); + const signalHandlers = new Map<"SIGINT" | "SIGTERM", Set<() => void>>(); + + const runtime = { + addEventListener(type: string, listener: (event?: Event) => void) { + const handlers = eventHandlers.get(type) ?? new Set<() => void>(); + handlers.add(listener as () => void); + eventHandlers.set(type, handlers); + }, + removeEventListener(type: string, listener: (event?: Event) => void) { + eventHandlers.get(type)?.delete(listener as () => void); + }, + Deno: { + addSignalListener(signal: "SIGINT" | "SIGTERM", handler: () => void) { + const handlers = signalHandlers.get(signal) ?? new Set<() => void>(); + handlers.add(handler); + signalHandlers.set(signal, handlers); + }, + removeSignalListener( + signal: "SIGINT" | "SIGTERM", + handler: () => void, + ) { + signalHandlers.get(signal)?.delete(handler); + }, + }, + }; + + const firstRegistration = registerRuntimeTeardown([], runtime); + const secondRegistration = registerRuntimeTeardown([], runtime); + + assertEquals( + [...eventHandlers.values()].map((handlers) => handlers.size), + [2, 2], + ); + assertEquals( + [...signalHandlers.values()].map((handlers) => handlers.size), + [2, 2], + ); + + firstRegistration.dispose(); + + assertEquals( + [...eventHandlers.values()].map((handlers) => handlers.size), + [1, 1], + ); + assertEquals( + [...signalHandlers.values()].map((handlers) => handlers.size), + [1, 1], + ); + + secondRegistration.dispose(); + + assertEquals( + [...eventHandlers.values()].map((handlers) => handlers.size), + [0, 0], + ); + assertEquals( + [...signalHandlers.values()].map((handlers) => handlers.size), + [0, 0], + ); }); it("continues teardown after a task failure", async () => { @@ -85,4 +315,127 @@ describe("runtime teardown", () => { logger.warn = originalWarn; } }); + + it("keeps signal listeners active during graceful SIGINT teardown so a second Ctrl+C can force exit", async () => { + const signalHandlers = new Map<"SIGINT" | "SIGTERM", () => void>(); + const removedSignalHandlers: Array<"SIGINT" | "SIGTERM"> = []; + const warnings: unknown[][] = []; + const exitCalls: number[] = []; + let resolveTask!: () => void; + const taskStarted = new Promise((resolve) => { + resolveTask = resolve; + }); + let releaseTask!: () => void; + const taskFinished = new Promise((resolve) => { + releaseTask = resolve; + }); + let exitReject!: (reason?: unknown) => void; + const exitPromise = new Promise((_, reject) => { + exitReject = reject; + }); + const originalWarn = logger.warn; + logger.warn = (...args: unknown[]) => { + warnings.push(args); + }; + + try { + registerRuntimeTeardown([ + { + name: "graphiti-drain", + run: async () => { + resolveTask(); + await taskFinished; + }, + }, + ], { + Deno: { + addSignalListener(signal, handler) { + signalHandlers.set(signal, handler); + }, + removeSignalListener(signal) { + removedSignalHandlers.push(signal); + signalHandlers.delete(signal); + }, + exit(code) { + exitCalls.push(code ?? 0); + exitReject(new Error(`exit:${code ?? 0}`)); + return undefined as never; + }, + }, + }); + + signalHandlers.get("SIGINT")?.(); + await taskStarted; + + assertEquals([...signalHandlers.keys()].sort(), []); + assertEquals(warnings.length, 1); + assertEquals( + warnings[0][0], + "Graceful shutdown in progress; waiting for pending memory flush. Press Ctrl+C again to exit immediately and drop pending memories.", + ); + + releaseTask(); + await assertRejects(async () => await exitPromise, Error, "exit:130"); + + assertEquals(exitCalls, [130]); + assertEquals(removedSignalHandlers.sort(), ["SIGINT", "SIGTERM"]); + } finally { + logger.warn = originalWarn; + } + }); + + it("exits after graceful teardown completes on first SIGINT", async () => { + const signalHandlers = new Map<"SIGINT" | "SIGTERM", () => void>(); + const removedSignalHandlers: Array<"SIGINT" | "SIGTERM"> = []; + const exitCalls: number[] = []; + const warnings: unknown[][] = []; + let exitReject!: (reason?: unknown) => void; + const exitPromise = new Promise((_, reject) => { + exitReject = reject; + }); + const originalWarn = logger.warn; + logger.warn = (...args: unknown[]) => { + warnings.push(args); + }; + + try { + registerRuntimeTeardown([ + { + name: "redis", + run: () => Promise.resolve(), + }, + ], { + Deno: { + addSignalListener(signal, handler) { + signalHandlers.set(signal, handler); + }, + removeSignalListener(signal) { + removedSignalHandlers.push(signal); + signalHandlers.delete(signal); + }, + exit(code) { + exitCalls.push(code ?? 0); + exitReject(new Error(`exit:${code ?? 0}`)); + return undefined as never; + }, + }, + }); + + await assertRejects( + async () => { + signalHandlers.get("SIGINT")?.(); + await exitPromise; + }, + Error, + "exit:130", + ); + + assertEquals(exitCalls, [130]); + assertEquals(warnings.length, 1); + assertEquals(removedSignalHandlers.sort(), ["SIGINT", "SIGTERM"]); + assertEquals(signalHandlers.size, 0); + } finally { + logger.warn = originalWarn; + } + }); }); diff --git a/src/services/runtime-teardown.ts b/src/services/runtime-teardown.ts index 97779c5..6292a85 100644 --- a/src/services/runtime-teardown.ts +++ b/src/services/runtime-teardown.ts @@ -7,61 +7,197 @@ export type RuntimeTeardownTask = { export interface RuntimeTeardownRegistration { run(): Promise; + dispose(): void; } +type ShutdownTrigger = + | { kind: "event"; type: (typeof SHUTDOWN_EVENTS)[number] } + | { kind: "signal"; signal: (typeof SHUTDOWN_SIGNALS)[number] }; + type ShutdownRegistrationAdapter = { addEventListener?: ( type: string, listener: (event?: Event) => void, options?: boolean | { once?: boolean; capture?: boolean }, ) => void; + removeEventListener?: ( + type: string, + listener: (event?: Event) => void, + options?: boolean | EventListenerOptions, + ) => void; Deno?: { addSignalListener?: ( signal: "SIGINT" | "SIGTERM", handler: () => void, ) => void; + removeSignalListener?: ( + signal: "SIGINT" | "SIGTERM", + handler: () => void, + ) => void; + exit?: (code?: number) => never; }; }; const SHUTDOWN_EVENTS = ["unload", "beforeunload"] as const; const SHUTDOWN_SIGNALS = ["SIGINT", "SIGTERM"] as const; +const SHUTDOWN_EXIT_CODE: Record<(typeof SHUTDOWN_SIGNALS)[number], number> = { + SIGINT: 130, + SIGTERM: 143, +}; +const activeRegistrations = new WeakMap void>>(); + +const getShutdownNotice = ( + signal: (typeof SHUTDOWN_SIGNALS)[number], +): string => + signal === "SIGINT" + ? "Graceful shutdown in progress; waiting for pending memory flush. Press Ctrl+C again to exit immediately and drop pending memories." + : "Graceful shutdown in progress; waiting for pending memory flush. Send the signal again to exit immediately and drop pending memories."; + +const getForcedShutdownNotice = ( + signal: (typeof SHUTDOWN_SIGNALS)[number], +): string => + signal === "SIGINT" + ? "Forced shutdown requested; exiting immediately and dropping pending memories." + : "Forced shutdown requested; exiting immediately after repeated shutdown signal and dropping pending memories."; export function registerRuntimeTeardown( tasks: RuntimeTeardownTask[], runtime: ShutdownRegistrationAdapter = globalThis, ): RuntimeTeardownRegistration { + const runtimeKey = runtime as object; let teardownPromise: Promise | null = null; + let eventListenersDisposed = false; + let signalListenersDisposed = false; + let registrationReleased = false; + let shutdownSignal: (typeof SHUTDOWN_SIGNALS)[number] | null = null; + let exitRequested = false; + let gracefulShutdownStarted = false; + const eventListeners: Array<{ + type: (typeof SHUTDOWN_EVENTS)[number]; + listener: () => void; + }> = []; + const signalListeners: Array<{ + signal: (typeof SHUTDOWN_SIGNALS)[number]; + handler: () => void; + }> = []; + + const disposeEventListeners = (): void => { + if (eventListenersDisposed) return; + eventListenersDisposed = true; + for (const { type, listener } of eventListeners) { + runtime.removeEventListener?.(type, listener, false); + } + }; + + const disposeSignalListeners = (): void => { + if (signalListenersDisposed) return; + signalListenersDisposed = true; + for (const { signal, handler } of signalListeners) { + runtime.Deno?.removeSignalListener?.(signal, handler); + } + }; + + const releaseRegistration = (): void => { + if (registrationReleased) return; + registrationReleased = true; + const registrations = activeRegistrations.get(runtimeKey); + if (!registrations) return; + registrations.delete(dispose); + if (registrations.size === 0) { + activeRegistrations.delete(runtimeKey); + } + }; + + const dispose = (): void => { + disposeEventListeners(); + disposeSignalListeners(); + releaseRegistration(); + }; + + const requestExit = (signal: (typeof SHUTDOWN_SIGNALS)[number]): void => { + if (exitRequested) return; + exitRequested = true; + dispose(); + runtime.Deno?.exit?.(SHUTDOWN_EXIT_CODE[signal]); + }; const run = (): Promise => { if (teardownPromise) return teardownPromise; teardownPromise = (async () => { - for (const task of tasks) { - try { - await task.run(); - } catch (err) { - logger.warn("Runtime teardown failed", { - resource: task.name, - err, - }); + disposeEventListeners(); + disposeSignalListeners(); + releaseRegistration(); + + try { + for (const task of tasks) { + try { + await task.run(); + } catch (err) { + logger.warn("Runtime teardown failed", { + resource: task.name, + err, + }); + } } + } finally { + disposeSignalListeners(); } })(); return teardownPromise; }; + const beginGracefulShutdown = (trigger: ShutdownTrigger): void => { + if (gracefulShutdownStarted) return; + gracefulShutdownStarted = true; + disposeEventListeners(); + disposeSignalListeners(); + + if (trigger.kind === "signal") { + shutdownSignal = trigger.signal; + logger.warn(getShutdownNotice(trigger.signal), { + signal: trigger.signal, + }); + void run().finally(() => { + requestExit(trigger.signal); + }); + return; + } + + void run(); + }; + for (const eventType of SHUTDOWN_EVENTS) { - runtime.addEventListener?.(eventType, () => { - void run(); - }, { once: true }); + const listener = () => { + beginGracefulShutdown({ kind: "event", type: eventType }); + }; + + runtime.addEventListener?.(eventType, listener, { once: true }); + eventListeners.push({ type: eventType, listener }); } for (const signal of SHUTDOWN_SIGNALS) { - runtime.Deno?.addSignalListener?.(signal, () => { - void run(); - }); + const handler = () => { + if (gracefulShutdownStarted) { + logger.warn(getForcedShutdownNotice(signal), { + signal, + initialSignal: shutdownSignal ?? signal, + }); + requestExit(signal); + return; + } + + beginGracefulShutdown({ kind: "signal", signal }); + }; + + runtime.Deno?.addSignalListener?.(signal, handler); + signalListeners.push({ signal, handler }); } - return { run }; + const registrations = activeRegistrations.get(runtimeKey) ?? new Set(); + registrations.add(dispose); + activeRegistrations.set(runtimeKey, registrations); + + return { run, dispose }; } diff --git a/src/services/session-snapshot.test.ts b/src/services/session-snapshot.test.ts index 4b9f194..8f7ce81 100644 --- a/src/services/session-snapshot.test.ts +++ b/src/services/session-snapshot.test.ts @@ -1,9 +1,34 @@ import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { SessionManager } from "../session.ts"; -import type { PersistentMemoryCacheEntry } from "../types/index.ts"; +import type { SessionEvent } from "../types/index.ts"; import { buildSessionSnapshotXml } from "./redis-snapshot.ts"; +const emptyCache = { + get() { + return null; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { body: "", nodeRefs: [] }; + }, + classifyRefresh() { + return { + classification: "miss", + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }; + }, +}; + +const createExplicitSessionNotFoundError = ( + details: Record = { status: 404 }, +): Error => Object.assign(new Error("Session not found"), details); + class FakeClock { now = 0; nextId = 1; @@ -53,6 +78,459 @@ describe("SessionManager", () => { assertEquals(state.pendingInjectionGeneration, 0); }); + it("treats missing startup sessions as temporary roots during canonical resolution", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + const canonicalSessionId = await manager.resolveCanonicalSessionId( + "session-1", + ); + const resolved = await manager.resolveSessionState("session-1"); + + assertEquals(canonicalSessionId, "session-1"); + assertEquals(resolved.resolved, true); + assertEquals(resolved.canonicalSessionId, "session-1"); + assertEquals(resolved.state?.isMain, true); + }); + + it("treats structured nested session-not-found codes as temporary roots", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw { + response: { + data: { + code: "session_not_found", + }, + }, + }; + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + const canonicalSessionId = await manager.resolveCanonicalSessionId( + "session-1", + ); + + assertEquals(canonicalSessionId, "session-1"); + assertEquals( + (await manager.resolveSessionState("session-1")).resolved, + true, + ); + }); + + it("treats message-only session-not-found strings as temporary roots", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw new Error("Session not found"); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + const canonicalSessionId = await manager.resolveCanonicalSessionId( + "session-1", + ); + const resolved = await manager.resolveSessionState("session-1"); + + assertEquals(canonicalSessionId, "session-1"); + assertEquals(resolved.resolved, true); + assertEquals(resolved.canonicalSessionId, "session-1"); + assertEquals(resolved.state?.isMain, true); + }); + + it("migrates temporary-root session state into the canonical parent on attachment", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + manager.setParentId("parent-session", null); + const parentState = manager.createDefaultState("group-1", "user-1"); + parentState.messageCount = 1; + parentState.latestUserRequest = "parent request"; + parentState.pendingInjectionGeneration = 2; + manager.setState("parent-session", parentState); + + const childCanonicalSessionId = await manager.resolveCanonicalSessionId( + "child-session", + ); + const childResolved = await manager.resolveSessionState("child-session"); + const childState = childResolved.state; + + assertEquals(childCanonicalSessionId, "child-session"); + assertEquals(childResolved.canonicalSessionId, "child-session"); + + childState!.messageCount = 2; + childState!.hotTierReady = true; + childState!.latestUserRequest = "child request"; + childState!.latestRefreshQuery = "child refresh"; + childState!.pendingInjectionGeneration = 5; + + manager.setParentId("child-session", "parent-session"); + + const canonicalResolved = await manager.resolveSessionState( + "child-session", + ); + + assertEquals(manager.getState("child-session"), undefined); + assertEquals(canonicalResolved.canonicalSessionId, "parent-session"); + assertEquals(parentState.messageCount, 3); + assertEquals(parentState.hotTierReady, true); + assertEquals(parentState.latestUserRequest, "child request"); + assertEquals(parentState.latestRefreshQuery, "child refresh"); + assertEquals(parentState.pendingInjectionGeneration, 5); + }); + + it("keeps the newer canonical pending injection when a provisional child attaches later", () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + manager.setParentId("parent-session", null); + + const parentState = manager.createDefaultState("group-1", "user-1"); + const newerPrepared = { + envelope: "newer", + nodeRefs: ["node-parent"], + refreshDecision: { + classification: "aligned" as const, + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "newer query", + }, + }; + parentState.pendingInjection = newerPrepared; + parentState.pendingInjectionGeneration = 7; + manager.setState("parent-session", parentState); + + const childState = manager.createDefaultState("group-1", "user-1"); + const olderPrepared = { + envelope: "older", + nodeRefs: ["node-child"], + refreshDecision: { + classification: "miss" as const, + shouldRefresh: true, + similarity: 0, + threshold: 0.5, + cachedQuery: null, + }, + }; + childState.pendingInjection = olderPrepared; + childState.pendingInjectionGeneration = 3; + manager.setState("child-session", childState); + + manager.setParentId("child-session", "parent-session"); + + const mergedParentState = manager.getState("parent-session"); + assertEquals(mergedParentState?.pendingInjection, newerPrepared); + assertEquals(mergedParentState?.pendingInjectionGeneration, 7); + assertEquals(manager.getState("child-session"), undefined); + }); + + it("re-resolves a provisional temporary root onto its discovered canonical parent later", async () => { + let childLookupCount = 0; + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get({ path }: { path: { id: string } }) { + if (path.id === "child-session") { + childLookupCount += 1; + if (childLookupCount === 1) { + throw createExplicitSessionNotFoundError(); + } + return { data: { parentID: "parent-session" } }; + } + if (path.id === "parent-session") { + return { data: { parentID: null } }; + } + throw new Error(`Unexpected session lookup: ${path.id}`); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + const firstCanonicalSessionId = await manager.resolveCanonicalSessionId( + "child-session", + ); + const provisionalState = manager.createDefaultState("group-1", "user-1"); + provisionalState.messageCount = 2; + provisionalState.latestUserRequest = "child request"; + manager.setState("child-session", provisionalState); + + const laterResolved = await manager.resolveSessionState("child-session"); + + assertEquals(firstCanonicalSessionId, "child-session"); + assertEquals(childLookupCount, 2); + assertEquals(laterResolved.canonicalSessionId, "parent-session"); + assertEquals(manager.getState("child-session"), undefined); + assertEquals(manager.getState("parent-session")?.messageCount, 2); + assertEquals( + manager.getState("parent-session")?.latestUserRequest, + "child request", + ); + }); + + it("migrates existing child session state into the canonical parent on attachment", () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + manager.setParentId("parent-session", null); + const parentState = manager.createDefaultState("group-1", "user-1"); + parentState.messageCount = 1; + manager.setState("parent-session", parentState); + + const childState = manager.createDefaultState("group-1", "user-1"); + childState.messageCount = 2; + childState.contextLimit = 123_456; + childState.hotTierReady = true; + childState.latestUserRequest = "child request"; + childState.latestRefreshQuery = "child refresh"; + childState.pendingInjectionGeneration = 5; + manager.setState("child-session", childState); + + manager.setParentId("child-session", "parent-session"); + + assertEquals(manager.getState("child-session"), undefined); + assertEquals(manager.getState("parent-session")?.messageCount, 3); + assertEquals(manager.getState("parent-session")?.contextLimit, 200_000); + assertEquals(manager.getState("parent-session")?.hotTierReady, true); + assertEquals( + manager.getState("parent-session")?.latestUserRequest, + "child request", + ); + assertEquals( + manager.getState("parent-session")?.latestRefreshQuery, + "child refresh", + ); + assertEquals( + manager.getState("parent-session")?.pendingInjectionGeneration, + 5, + ); + }); + + it("rekeys assistant pending and finalized buffers onto canonical session ids after attachment", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + manager.setParentId("parent-session", null); + manager.setState( + "parent-session", + manager.createDefaultState("group-1", "user-1"), + ); + + const childResolved = await manager.resolveSessionState("child-session"); + const childState = childResolved.state!; + + manager.bufferAssistantPart( + "child-session", + "pending-message", + "pending text", + ); + manager.bufferAssistantPart("child-session", "done-message", "done text"); + assertEquals( + manager.finalizeAssistantMessage( + childState, + "child-session", + "done-message", + "test", + ), + "done text", + ); + assertEquals( + manager.isAssistantBuffered("child-session", "done-message"), + true, + ); + + manager.setParentId("child-session", "parent-session"); + + const parentState = manager.getState("parent-session")!; + assertEquals( + manager.finalizeAssistantMessage( + parentState, + "parent-session", + "pending-message", + "test", + ), + "pending text", + ); + assertEquals( + manager.isAssistantBuffered("parent-session", "pending-message"), + true, + ); + assertEquals( + manager.isAssistantBuffered("child-session", "pending-message"), + false, + ); + assertEquals( + manager.isAssistantBuffered("parent-session", "done-message"), + true, + ); + assertEquals( + manager.isAssistantBuffered("child-session", "done-message"), + false, + ); + assertEquals( + manager.finalizeAssistantMessage( + parentState, + "parent-session", + "done-message", + "test", + ), + null, + ); + + manager.purgeAssistantBufferSource("child-session"); + assertEquals( + manager.isAssistantBuffered("parent-session", "pending-message"), + false, + ); + assertEquals( + manager.isAssistantBuffered("parent-session", "done-message"), + false, + ); + }); + + it("migrates idle lifecycle state so parent cleanup semantics continue after attachment", async () => { + const clock = new FakeClock(); + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + { + idleRetentionMs: 100, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }, + ); + + manager.setParentId("parent-session", null); + manager.setState( + "parent-session", + manager.createDefaultState("group-1", "user-1"), + ); + manager.markSessionActive("parent-session"); + const staleParentGeneration = manager.captureIdleCleanupGeneration( + "parent-session", + ); + manager.scheduleIdleSessionCleanup("parent-session"); + + await manager.resolveSessionState("child-session"); + manager.markSessionActive("child-session"); + manager.markSessionActive("child-session"); + manager.scheduleIdleSessionCleanup("child-session"); + + manager.setParentId("child-session", "parent-session"); + + assertEquals(manager.captureIdleCleanupGeneration("parent-session"), 2); + + clock.tick(150); + assertEquals(manager.getState("parent-session")?.groupId, "group-1"); + + manager.scheduleIdleSessionCleanup( + "parent-session", + staleParentGeneration ?? undefined, + ); + clock.tick(150); + assertEquals(manager.getState("parent-session")?.groupId, "group-1"); + + const currentGeneration = manager.captureIdleCleanupGeneration( + "parent-session", + ); + manager.scheduleIdleSessionCleanup( + "parent-session", + currentGeneration ?? undefined, + ); + clock.tick(100); + assertEquals(manager.getState("parent-session"), undefined); + }); + it("prepareInjection builds canonical session_memory with optional persistent_memory", async () => { const manager = new SessionManager( "group-1", @@ -89,40 +567,115 @@ describe("SessionManager", () => { } as never, { getSnapshot() { - return ''; + return ''; + }, + } as never, + { + get() { + return { + query: "Continue the overhaul", + refreshedAt: Date.now(), + nodes: [{ uuid: "node-1", name: "Context Overhaul" }], + nodeRefs: ["node-1"], + }; + }, + getMeta() { + return null; + }, + renderPersistentMemory() { + return { + body: "Context Overhaul: cached cross-session recall", + nodeRefs: ["node-1"], + }; + }, + classifyRefresh() { + return { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "Continue the overhaul", + }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection( + "session-1", + "Continue the overhaul", + ); + + assertStringIncludes( + prepared?.envelope ?? "", + '', + ); + assertStringIncludes( + prepared?.envelope ?? "", + "Keep Graphiti off the hot path", + ); + assertStringIncludes(prepared?.envelope ?? "", " { + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: "Use cached memory only", + }]; + }, + } as never, + { + getSnapshot() { + return null; }, } as never, { get() { return { - query: "Continue the overhaul", + query: "Use cached memory only", refreshedAt: Date.now(), - facts: [{ - uuid: "fact-1", - fact: "The user prefers local injection", - }], - nodes: [{ uuid: "node-1", name: "Context Overhaul" }], - factUuids: ["fact-1"], + nodes: [{ uuid: "node-1", name: "Cached recall" }], + episodeSummaries: [ + "ArchitectureDecision → HotPath: Keep Graphiti off synchronous injection", + ], nodeRefs: ["node-1"], }; }, + getMeta() { + return null; + }, renderPersistentMemory() { return { - body: "The user prefers local injection", - factUuids: ["fact-1"], + body: + "Cached recallArchitectureDecision → HotPath: Keep Graphiti off synchronous injection", nodeRefs: ["node-1"], }; }, - getMeta() { - return null; - }, classifyRefresh() { return { classification: "aligned", shouldRefresh: false, similarity: 1, threshold: 0.5, - cachedQuery: "Continue the overhaul", + cachedQuery: "Use cached memory only", }; }, } as never, @@ -135,17 +688,15 @@ describe("SessionManager", () => { ); const prepared = await manager.prepareInjection( "session-1", - "Continue the overhaul", + "Use cached memory only", ); - assertStringIncludes(prepared?.envelope ?? "", " { @@ -209,26 +760,206 @@ describe("SessionManager", () => { return snapshot; }, } as never, + emptyCache as never, + ); + + manager.setParentId("session-1", null); + manager.setState( + "session-1", + manager.createDefaultState("group-1", "user-1"), + ); + const prepared = await manager.prepareInjection("session-1", "continue"); + + assertStringIncludes(prepared?.envelope ?? "", decisionText); + assertEquals(prepared?.envelope.includes(""), true); + }); + + it("includes child-derived canonical memory in later snapshot and session_memory output", async () => { + const childDecision = + "Child session decided to reuse the canonical parent memory flow"; + const childTask = + "Child session continued the parent implementation after handoff"; + const canonicalEvents: SessionEvent[] = [{ + id: "1", + ts: Date.now() - 1, + category: "decision", + priority: 0, + role: "user", + summary: childDecision, + continuityText: childDecision, + }, { + id: "2", + ts: Date.now(), + category: "task.update", + priority: 0, + role: "user", + summary: childTask, + continuityText: childTask, + }]; + const snapshot = buildSessionSnapshotXml("parent-session", canonicalEvents); + + assertStringIncludes(snapshot, childDecision); + assertStringIncludes(snapshot, '"); + assertStringIncludes(prepared?.envelope ?? "", ""); + }); + + it("prepareInjection reconciles provisional child history onto the real root once discovered", async () => { + const childDecision = + "Temporary root captured the delegated child decision"; + const childTask = + "Temporary root tracked the delegated task before parent discovery"; + let childLookupCount = 0; + + const manager = new SessionManager( + "group-1", + "user-1", + { + session: { + get({ path }: { path: { id: string } }) { + if (path.id === "child-session") { + childLookupCount += 1; + if (childLookupCount === 1) { + throw createExplicitSessionNotFoundError(); + } + return { data: { parentID: "parent-session" } }; + } + if (path.id === "parent-session") { + return { data: { parentID: null } }; + } + throw new Error(`Unexpected session lookup: ${path.id}`); + }, + }, + } as never, + { + recallSessionEvents(sessionId: string) { + return sessionId === "parent-session" ? [] : []; + }, + getRecentSessionEvents(sessionId: string) { + if (sessionId === "parent-session") { + return [{ + id: "1", + ts: Date.now() - 1, + category: "decision", + priority: 0, + role: "user", + summary: childDecision, + continuityText: childDecision, + }, { + id: "2", + ts: Date.now(), + category: "task.update", + priority: 0, + role: "user", + summary: childTask, + continuityText: childTask, + }]; + } + throw new Error(`Unexpected recent event lookup: ${sessionId}`); + }, + } as never, + { + getSnapshot(sessionId: string) { + if (sessionId === "parent-session") { + return buildSessionSnapshotXml("parent-session", [{ + id: "1", + ts: Date.now() - 1, + category: "decision", + priority: 0, + role: "user", + summary: childDecision, + continuityText: childDecision, + }]); + } + throw new Error(`Unexpected snapshot lookup: ${sessionId}`); + }, + } as never, + emptyCache as never, + ); + + const firstCanonicalSessionId = await manager.resolveCanonicalSessionId( + "child-session", + ); + assertEquals(firstCanonicalSessionId, "child-session"); + + const provisional = manager.createDefaultState("group-1", "user-1"); + provisional.latestUserRequest = childTask; + manager.setState("child-session", provisional); + + const resolved = await manager.resolveSessionState("child-session"); + assertEquals(resolved.canonicalSessionId, "parent-session"); + assertEquals(manager.getState("child-session"), undefined); + + const prepared = await manager.prepareInjection( + resolved.canonicalSessionId!, + "continue after root arrives", + ); + + assertStringIncludes(prepared?.envelope ?? "", childDecision); + assertStringIncludes(prepared?.envelope ?? "", childTask); + assertStringIncludes(prepared?.envelope ?? "", ""); + }); + + it("prepareInjection omits empty continuity sections automatically", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "intent", + priority: 0, + role: "user", + summary: "continue", + }]; + }, + } as never, + { + getSnapshot() { + return null; }, } as never, + emptyCache as never, ); manager.setParentId("session-1", null); @@ -238,11 +969,17 @@ describe("SessionManager", () => { ); const prepared = await manager.prepareInjection("session-1", "continue"); - assertStringIncludes(prepared?.envelope ?? "", decisionText); - assertEquals(prepared?.envelope.includes(""), true); + assertStringIncludes( + prepared?.envelope ?? "", + "continue", + ); + assertEquals((prepared?.envelope ?? "").includes(""), false); + assertEquals((prepared?.envelope ?? "").includes(""), false); + assertEquals((prepared?.envelope ?? "").includes(""), false); + assertEquals((prepared?.envelope ?? "").includes(""), false); }); - it("prepareInjection prefers the freshest user event over stale fallback", async () => { + it("prepareInjection keeps state.latestUserRequest as the canonical source over history fallback", async () => { const manager = new SessionManager( "group-1", "user-1", @@ -268,26 +1005,7 @@ describe("SessionManager", () => { return null; }, } as never, - { - get() { - return null; - }, - getMeta() { - return null; - }, - renderPersistentMemory() { - return { body: "", factUuids: [], nodeRefs: [] }; - }, - classifyRefresh() { - return { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }; - }, - } as never, + emptyCache as never, ); manager.setParentId("session-1", null); @@ -295,6 +1013,9 @@ describe("SessionManager", () => { "session-1", manager.createDefaultState("group-1", "user-1"), ); + const state = manager.createDefaultState("group-1", "user-1"); + state.latestUserRequest = "canonical request"; + manager.setState("session-1", state); const prepared = await manager.prepareInjection( "session-1", "stale fallback", @@ -302,7 +1023,7 @@ describe("SessionManager", () => { assertStringIncludes( prepared?.envelope ?? "", - "fresh request", + "canonical request", ); assertEquals(prepared?.refreshDecision.classification, "miss"); }); @@ -350,33 +1071,13 @@ describe("SessionManager", () => { return null; }, } as never, - { - get() { - return null; - }, - getMeta() { - return null; - }, - renderPersistentMemory() { - return { body: "", factUuids: [], nodeRefs: [] }; - }, - classifyRefresh() { - return { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }; - }, - } as never, + emptyCache as never, ); manager.setParentId("session-1", null); - manager.setState( - "session-1", - manager.createDefaultState("group-1", "user-1"), - ); + const state = manager.createDefaultState("group-1", "user-1"); + state.latestUserRequest = "fresh request"; + manager.setState("session-1", state); const prepared = await manager.prepareInjection( "session-1", "Investigate recall behavior", @@ -387,8 +1088,8 @@ describe("SessionManager", () => { "Prefer recalled decisions for injection", ); assertEquals( - prepared?.envelope.match(/Investigate recall behavior/g)?.length, - 2, + prepared?.envelope.includes("Investigate recall behavior"), + false, ); }); @@ -439,26 +1140,7 @@ describe("SessionManager", () => { return null; }, } as never, - { - get() { - return null; - }, - getMeta() { - return null; - }, - renderPersistentMemory() { - return { body: "", factUuids: [], nodeRefs: [] }; - }, - classifyRefresh() { - return { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }; - }, - } as never, + emptyCache as never, ); manager.setParentId("session-1", null); @@ -573,26 +1255,7 @@ describe("SessionManager", () => { return snapshot; }, } as never, - { - get() { - return null; - }, - getMeta() { - return null; - }, - renderPersistentMemory() { - return { body: "", factUuids: [], nodeRefs: [] }; - }, - classifyRefresh() { - return { - classification: "miss", - shouldRefresh: true, - similarity: 0, - threshold: 0.5, - cachedQuery: null, - }; - }, - } as never, + emptyCache as never, ); manager.setParentId("session-1", null); @@ -669,27 +1332,19 @@ describe("SessionManager", () => { return { query: "compact session memory", refreshedAt: Date.now(), - facts: [{ uuid: "fact-1", fact: hugeTranscript }], nodes: [{ uuid: "node-1", name: "Context Overhaul", summary: hugeTranscript, }], - factUuids: ["fact-1"], nodeRefs: ["node-1"], }; }, getMeta() { return null; }, - renderPersistentMemory(cache: PersistentMemoryCacheEntry | null) { - return { - body: cache - ? `${cache.facts[0].fact.slice(0, 220)}` - : "", - factUuids: cache ? ["fact-1"] : [], - nodeRefs: cache ? ["node-1"] : [], - }; + renderPersistentMemory() { + return { body: "", nodeRefs: [] }; }, classifyRefresh() { return { @@ -867,10 +1522,10 @@ describe("SessionManager", () => { assertEquals(snapshot.length <= 3000, true); assertStringIncludes(snapshot, ""); - assertStringIncludes(snapshot, ""); + assertEquals(snapshot.includes(""), false); }); - it("snapshot keeps an active_task section by falling back to the latest user request", () => { + it("snapshot omits active_task when it would duplicate the latest user request", () => { const long = "plan ".repeat(120); const snapshot = buildSessionSnapshotXml("session-1", [ ...Array.from({ length: 5 }, (_, index) => ({ @@ -900,11 +1555,63 @@ describe("SessionManager", () => { }, ]); - assertStringIncludes(snapshot, ""); + assertEquals(snapshot.includes(""), false); assertEquals(snapshot.length <= 3000, true); }); - it("snapshot keeps blockers distinct from summary-only errors", () => { + it("prepareInjection sanitizes history fallback and does not override canonical state.latestUserRequest", async () => { + const manager = new SessionManager( + "group-1", + "user-1", + { session: {} } as never, + { + recallSessionEvents() { + return []; + }, + getRecentSessionEvents() { + return [{ + id: "1", + ts: Date.now(), + category: "message", + priority: 4, + role: "user", + summary: + 'old polluted history', + body: + 'legacy polluted history', + }]; + }, + } as never, + { + getSnapshot() { + return null; + }, + } as never, + { + ...emptyCache, + getMeta() { + return { lastQuery: "history query" }; + }, + } as never, + ); + + manager.setParentId("session-1", null); + const state = manager.createDefaultState("group-1", "user-1"); + state.latestUserRequest = "canonical request"; + manager.setState("session-1", state); + const prepared = await manager.prepareInjection("session-1"); + + assertStringIncludes( + prepared?.envelope ?? "", + "canonical request", + ); + assertEquals( + (prepared?.envelope ?? "").includes("polluted history"), + false, + ); + }); + + it("snapshot keeps summary-only errors and avoids duplicating blocker text across sections", () => { const snapshot = buildSessionSnapshotXml("session-1", [ { id: "1", @@ -934,14 +1641,11 @@ describe("SessionManager", () => { snapshot, "Refresh blocked while waiting on Redis lock", ); - assertStringIncludes( - snapshot, - "Refresh blocked while waiting on Redis lock", - ); + assertEquals(snapshot.includes(""), false); assertEquals(snapshot.includes("Command failed"), false); }); - it("snapshot renders the expanded context sections when those events exist", () => { + it("snapshot keeps only the high-value conservative sections when those events exist", () => { const snapshot = buildSessionSnapshotXml("session-1", [ { id: "1", @@ -1013,9 +1717,9 @@ describe("SessionManager", () => { assertStringIncludes(snapshot, ""); assertStringIncludes(snapshot, ""); assertStringIncludes(snapshot, ""); - assertStringIncludes(snapshot, ""); - assertStringIncludes(snapshot, ""); - assertStringIncludes(snapshot, ""); - assertStringIncludes(snapshot, ""); + assertEquals(snapshot.includes(""), false); + assertEquals(snapshot.includes(""), false); + assertEquals(snapshot.includes(""), false); + assertEquals(snapshot.includes(""), false); }); }); diff --git a/src/session.ts b/src/session.ts index dd18f98..b9ef1f5 100644 --- a/src/session.ts +++ b/src/session.ts @@ -1,39 +1,113 @@ import type { OpencodeClient } from "@opencode-ai/sdk"; import { DEFAULT_CONTEXT_LIMIT } from "./services/constants.ts"; import { logger } from "./services/logger.ts"; -import type { RedisCacheService } from "./services/redis-cache.ts"; +import { + PERSISTENT_MEMORY_BODY_BUDGET, + type RedisCacheService, +} from "./services/redis-cache.ts"; import type { RedisEventsService } from "./services/redis-events.ts"; +import type { RedisSnapshotService } from "./services/redis-snapshot.ts"; import { escapeXml, + normalizeMemoryText, renderXmlListSection, - uniqueValues, + sanitizeMemoryInput, + uniqueNormalizedValues, } from "./services/render-utils.ts"; -import type { RedisSnapshotService } from "./services/redis-snapshot.ts"; import { getSessionEventPrimaryText, + type PersistentMemoryCacheEntry, + type PersistentMemoryCacheMeta, type PreparedSessionMemory, type SessionEvent, } from "./types/index.ts"; const findLatestUserRequest = ( events: SessionEvent[], - fallback?: string, ): string => { - const lastUser = events.findLast((event) => event.role === "user"); - return lastUser - ? getSessionEventPrimaryText(lastUser, fallback) - : fallback ?? ""; + for (let index = events.length - 1; index >= 0; index -= 1) { + const event = events[index]; + if (event.role !== "user") continue; + const candidate = sanitizeMemoryInput(getSessionEventPrimaryText(event)); + if (candidate) return candidate; + } + return ""; }; const RECENT_BASELINE_LIMIT = 20; const RECALL_RESULT_LIMIT = 12; +const EXPLICIT_NOT_FOUND_CODES = new Set([ + "not_found", + "session_not_found", +]); + +const asRecord = (value: unknown): Record | null => + typeof value === "object" && value !== null + ? value as Record + : null; + +const normalizeErrorToken = (value: unknown): string | null => { + if (typeof value !== "string") return null; + const normalized = value.trim().toLowerCase(); + return normalized.length > 0 ? normalized : null; +}; + +const isExplicitNotFoundCode = (value: unknown): boolean => { + const normalized = normalizeErrorToken(value); + return normalized !== null && EXPLICIT_NOT_FOUND_CODES.has(normalized); +}; + +const isExplicitSessionNotFoundMessage = (value: unknown): boolean => { + if (typeof value !== "string") return false; + return /\bsession not found\b/i.test(value); +}; + +const isExplicitSessionNotFoundError = (error: unknown): boolean => { + const queue: unknown[] = [error]; + const visited = new Set(); + + while (queue.length > 0) { + const current = queue.shift(); + const record = asRecord(current); + if (!record) continue; + if (visited.has(record)) continue; + visited.add(record); + + const status = record.status; + const statusCode = record.statusCode; + if (status === 404 || statusCode === 404) return true; + + if ( + isExplicitNotFoundCode(record.code) || + isExplicitNotFoundCode(record.errorCode) || + isExplicitNotFoundCode(record.type) || + isExplicitSessionNotFoundMessage(record.message) + ) { + return true; + } + + queue.push( + record.cause, + record.data, + record.body, + record.error, + record.response, + ); + } + + return false; +}; + const mergeSessionEvents = ( recentEvents: SessionEvent[], recalledEvents: SessionEvent[], ): SessionEvent[] => { const merged = new Map(); - for (const event of [...recentEvents, ...recalledEvents]) { + for (const event of recentEvents) { + if (!merged.has(event.id)) merged.set(event.id, event); + } + for (const event of recalledEvents) { if (!merged.has(event.id)) merged.set(event.id, event); } return [...merged.values()].sort((left, right) => { @@ -46,24 +120,75 @@ const collectRecentUniqueValues = ( events: SessionEvent[], collect: (event: SessionEvent) => string | string[] | null | undefined, limit: number, + excludedNormalized = new Set(), ): string[] => - uniqueValues( + uniqueNormalizedValues( events.flatMap((event) => { const value = collect(event); if (value === null || value === undefined) return []; return Array.isArray(value) ? value : [value]; }).reverse(), limit, + excludedNormalized, + ); + +const addNormalizedValues = (target: Set, values: string[]): void => { + for (const value of values) { + const normalized = normalizeMemoryText(value); + if (normalized) target.add(normalized); + } +}; + +const filterDuplicateSnapshotLeaves = ( + snapshot: string | null, + excludedNormalized: Set, +): string => { + if (!snapshot) return ""; + let filtered = snapshot.replace( + /<([a-z_]+)>([^<>]*)<\/\1>/gi, + (match, tag: string, text: string) => { + if (tag.toLowerCase() === "snapshot") return match; + const normalized = normalizeMemoryText(text); + return normalized && excludedNormalized.has(normalized) ? "" : match; + }, + ); + filtered = filtered.replace(/<(?!snapshot\b)([a-z_]+)>\s*<\/\1>/gi, ""); + return filtered; +}; + +const collectSectionValues = ( + events: SessionEvent[], + predicate: (event: SessionEvent) => boolean, + limit: number, + excludedNormalized = new Set(), +): string[] => + collectRecentUniqueValues( + events, + (event) => + predicate(event) + ? sanitizeMemoryInput(getSessionEventPrimaryText(event)) + : null, + limit, + excludedNormalized, + ); + +const collectPathValues = ( + events: SessionEvent[], + limit: number, + excludedNormalized = new Set(), +): string[] => + collectRecentUniqueValues( + events, + (event) => event.category.startsWith("file.") ? event.refs ?? [] : [], + limit, + excludedNormalized, ); export type SessionState = { groupId: string; userGroupId: string; injectedMemories: boolean; - lastInjectionFactUuids: string[]; - visibleFactUuids: string[]; messageCount: number; - pendingMessages: string[]; contextLimit: number; isMain: boolean; hotTierReady: boolean; @@ -86,15 +211,385 @@ type SessionLifecycle = { idleCleanupTimer: TimerHandle | null; }; +type PreparedInjectionData = { + cache: PersistentMemoryCacheEntry | null; + cacheMeta: PersistentMemoryCacheMeta | null; + events: SessionEvent[]; + latestRequest: string; + snapshot: string | null; +}; + +class AssistantMessageBuffer { + private pendingMessages = new Map< + string, + { sessionId: string; text: string; sourceSessionId: string } + >(); + private pendingCompletions = new Set(); + private bufferedMessageIds = new Map(); + + bufferPart( + sessionId: string, + messageId: string, + text: string, + sourceSessionId = sessionId, + ): void { + this.pendingMessages.set(`${sessionId}:${messageId}`, { + sessionId, + text, + sourceSessionId, + }); + } + + isBuffered(sessionId: string, messageId: string): boolean { + return this.bufferedMessageIds.has(`${sessionId}:${messageId}`); + } + + hasPendingCompletion(sessionId: string, messageId: string): boolean { + return this.pendingCompletions.has(`${sessionId}:${messageId}`); + } + + finalize( + sessionId: string, + messageId: string, + source: string, + ): string | null { + const key = `${sessionId}:${messageId}`; + if (this.bufferedMessageIds.has(key)) return null; + + const buffered = this.pendingMessages.get(key); + const messageText = buffered?.text?.trim() ?? ""; + if (!messageText) { + this.pendingCompletions.add(key); + return null; + } + + this.pendingCompletions.delete(key); + this.pendingMessages.delete(key); + this.bufferedMessageIds.set(key, buffered?.sourceSessionId ?? sessionId); + logger.info("Assistant message completed", { + hook: source, + sessionId, + messageID: messageId, + messageLength: messageText.length, + }); + return messageText; + } + + deletePending(sessionId: string, messageId: string): void { + const key = `${sessionId}:${messageId}`; + this.pendingMessages.delete(key); + this.pendingCompletions.delete(key); + } + + purgeSource(sourceSessionId: string): void { + for (const [key, buffered] of [...this.pendingMessages.entries()]) { + if (buffered.sourceSessionId === sourceSessionId) { + this.pendingMessages.delete(key); + this.pendingCompletions.delete(key); + } + } + for ( + const [key, bufferedSourceSessionId] of [ + ...this.bufferedMessageIds.entries(), + ] + ) { + if (bufferedSourceSessionId === sourceSessionId) { + this.bufferedMessageIds.delete(key); + } + } + } + + migrateSession(sessionId: string, canonicalSessionId: string): void { + const sessionPrefix = `${sessionId}:`; + for (const [key, buffered] of [...this.pendingMessages.entries()]) { + if (!key.startsWith(sessionPrefix)) continue; + const messageId = key.slice(sessionPrefix.length); + const canonicalKey = `${canonicalSessionId}:${messageId}`; + if (!this.pendingMessages.has(canonicalKey)) { + this.pendingMessages.set(canonicalKey, { + ...buffered, + sessionId: canonicalSessionId, + }); + } + this.pendingMessages.delete(key); + } + + for ( + const [key, sourceSessionId] of [...this.bufferedMessageIds.entries()] + ) { + if (!key.startsWith(sessionPrefix)) continue; + const messageId = key.slice(sessionPrefix.length); + const canonicalKey = `${canonicalSessionId}:${messageId}`; + if (!this.bufferedMessageIds.has(canonicalKey)) { + this.bufferedMessageIds.set(canonicalKey, sourceSessionId); + } + this.bufferedMessageIds.delete(key); + } + + for (const key of [...this.pendingCompletions]) { + if (!key.startsWith(sessionPrefix)) continue; + const messageId = key.slice(sessionPrefix.length); + this.pendingCompletions.add(`${canonicalSessionId}:${messageId}`); + this.pendingCompletions.delete(key); + } + } + + deleteSession(sessionId: string): void { + const prefix = `${sessionId}:`; + for (const key of [...this.pendingMessages.keys()]) { + if (key.startsWith(prefix)) { + this.pendingMessages.delete(key); + this.pendingCompletions.delete(key); + } + } + for (const [key] of [...this.bufferedMessageIds.entries()]) { + if (key.startsWith(prefix)) this.bufferedMessageIds.delete(key); + } + for (const key of [...this.pendingCompletions]) { + if (key.startsWith(prefix)) this.pendingCompletions.delete(key); + } + } +} + +class SessionLifecycleRegistry { + private lifecycles = new Map(); + + constructor( + private readonly idleRetentionMs: number, + private readonly setTimerImpl: ( + callback: () => void, + delayMs: number, + ) => TimerHandle, + private readonly clearTimerImpl: (timer: TimerHandle) => void, + ) {} + + markActive(sessionId: string): void { + const lifecycle = this.get(sessionId); + lifecycle.activityGeneration += 1; + if (lifecycle.idleCleanupTimer !== null) { + this.clearTimerImpl(lifecycle.idleCleanupTimer); + lifecycle.idleCleanupTimer = null; + } + } + + captureGeneration(sessionId: string, isMain: boolean): number | null { + if (!isMain) return null; + return this.get(sessionId).activityGeneration; + } + + scheduleCleanup( + sessionId: string, + isMain: boolean, + deleteSession: () => void, + expectedActivityGeneration?: number, + ): void { + if (!isMain) { + deleteSession(); + return; + } + + const lifecycle = this.get(sessionId); + if ( + expectedActivityGeneration !== undefined && + lifecycle.activityGeneration !== expectedActivityGeneration + ) { + return; + } + + if (this.idleRetentionMs <= 0) { + deleteSession(); + return; + } + + if (lifecycle.idleCleanupTimer !== null) { + this.clearTimerImpl(lifecycle.idleCleanupTimer); + lifecycle.idleCleanupTimer = null; + } + + const activityGeneration = expectedActivityGeneration ?? + lifecycle.activityGeneration; + const timerHandle = this.setTimerImpl(() => { + const currentLifecycle = this.lifecycles.get(sessionId); + if (!currentLifecycle) return; + if (currentLifecycle.idleCleanupTimer !== timerHandle) return; + if (currentLifecycle.activityGeneration !== activityGeneration) return; + deleteSession(); + }, this.idleRetentionMs); + + lifecycle.idleCleanupTimer = timerHandle; + } + + migrate(sessionId: string, canonicalSessionId: string): void { + const sourceLifecycle = this.lifecycles.get(sessionId); + const targetLifecycle = this.lifecycles.get(canonicalSessionId); + if (!sourceLifecycle) return; + + const targetIdleCleanupTimer = targetLifecycle?.idleCleanupTimer ?? null; + if (sourceLifecycle.idleCleanupTimer !== null) { + this.clearTimerImpl(sourceLifecycle.idleCleanupTimer); + } + if (targetIdleCleanupTimer !== null) { + this.clearTimerImpl(targetIdleCleanupTimer); + } + this.lifecycles.set(canonicalSessionId, { + activityGeneration: Math.max( + targetLifecycle?.activityGeneration ?? 0, + sourceLifecycle.activityGeneration, + ), + idleCleanupTimer: null, + }); + this.lifecycles.delete(sessionId); + } + + delete(sessionId: string): void { + const lifecycle = this.lifecycles.get(sessionId); + if (lifecycle?.idleCleanupTimer != null) { + this.clearTimerImpl(lifecycle.idleCleanupTimer); + } + this.lifecycles.delete(sessionId); + } + + private get(sessionId: string): SessionLifecycle { + let lifecycle = this.lifecycles.get(sessionId); + if (!lifecycle) { + lifecycle = { activityGeneration: 0, idleCleanupTimer: null }; + this.lifecycles.set(sessionId, lifecycle); + } + return lifecycle; + } +} + +const buildPreparedInjectionEnvelope = ( + events: SessionEvent[], + snapshot: string | null, + latestRequest: string, + persistent: { body: string; nodeRefs: string[] }, +): string => { + const occupiedNormalized = new Set(); + const normalizedLatestRequest = normalizeMemoryText(latestRequest); + if (normalizedLatestRequest) { + occupiedNormalized.add(normalizedLatestRequest); + } + + const activeTasks = collectSectionValues( + events, + (event) => + ["task.create", "task.update", "task.complete"].includes( + event.category, + ), + 4, + occupiedNormalized, + ); + addNormalizedValues(occupiedNormalized, activeTasks); + + const decisions = collectSectionValues( + events, + (event) => ["decision", "preference"].includes(event.category), + 5, + occupiedNormalized, + ); + addNormalizedValues(occupiedNormalized, decisions); + + const files = collectPathValues(events, 6, occupiedNormalized); + addNormalizedValues(occupiedNormalized, files); + + const rules = collectSectionValues( + events, + (event) => event.category === "rule.load", + 6, + occupiedNormalized, + ); + addNormalizedValues(occupiedNormalized, rules); + + const unresolvedErrors = collectRecentUniqueValues( + events, + (event) => + event.category === "error" && event.metadata?.resolved !== true && + event.role !== "assistant" + ? sanitizeMemoryInput(getSessionEventPrimaryText(event)) + : null, + 4, + occupiedNormalized, + ); + addNormalizedValues(occupiedNormalized, unresolvedErrors); + + const gitState = collectSectionValues( + events, + (event) => event.category === "git.activity", + 4, + occupiedNormalized, + ); + addNormalizedValues(occupiedNormalized, gitState); + + const subagentWork = collectSectionValues( + events, + (event) => + event.category === "subagent.start" || + event.category === "subagent.finish", + 4, + occupiedNormalized, + ); + addNormalizedValues(occupiedNormalized, subagentWork); + + const filteredSnapshot = filterDuplicateSnapshotLeaves( + snapshot, + occupiedNormalized, + ); + + const sections = [ + `${escapeXml(latestRequest)}`, + renderXmlListSection( + "active_tasks", + "task", + activeTasks, + { itemCharLimit: 280 }, + ), + renderXmlListSection("key_decisions", "decision", decisions, { + itemCharLimit: 280, + }), + renderXmlListSection("files_in_play", "file", files, { + itemCharLimit: 280, + }), + renderXmlListSection("project_rules", "rule", rules, { + itemCharLimit: 280, + }), + unresolvedErrors.length > 0 + ? renderXmlListSection("unresolved_errors", "error", unresolvedErrors, { + itemCharLimit: 280, + }) + : "", + gitState.length > 0 + ? renderXmlListSection("git_state", "item", gitState, { + itemCharLimit: 280, + }) + : "", + subagentWork.length > 0 + ? renderXmlListSection("subagent_work", "item", subagentWork, { + itemCharLimit: 280, + }) + : "", + filteredSnapshot + ? `${filteredSnapshot}` + : "", + persistent.body + ? `${persistent.body}` + : "", + ].filter(Boolean); + + return `${ + sections.join("") + }`; +}; + export class SessionManager { private sessions = new Map(); private parentIdCache = new Map(); - private pendingAssistantMessages = new Map< - string, - { sessionId: string; text: string } - >(); - private bufferedAssistantMessageIds = new Set(); - private sessionLifecycles = new Map(); + private canonicalSessionIdCache = new Map(); + private temporaryRootSessionIds = new Set(); + private readonly assistantBuffer = new AssistantMessageBuffer(); + private readonly lifecycleRegistry: SessionLifecycleRegistry; private readonly idleRetentionMs: number; private readonly setTimerImpl: ( callback: () => void, @@ -116,6 +611,11 @@ export class SessionManager { ((callback, delayMs) => setTimeout(callback, delayMs)); this.clearTimerImpl = options.clearTimer ?? ((timer) => clearTimeout(timer)); + this.lifecycleRegistry = new SessionLifecycleRegistry( + this.idleRetentionMs, + this.setTimerImpl, + this.clearTimerImpl, + ); } createDefaultState(groupId: string, userGroupId: string): SessionState { @@ -123,10 +623,7 @@ export class SessionManager { groupId, userGroupId, injectedMemories: false, - lastInjectionFactUuids: [], - visibleFactUuids: [], messageCount: 0, - pendingMessages: [], contextLimit: DEFAULT_CONTEXT_LIMIT, isMain: true, hotTierReady: false, @@ -141,23 +638,49 @@ export class SessionManager { return this.sessions.get(sessionId); } + getTrackedGroupIds(): string[] { + return [ + ...new Set( + [...this.sessions.values()] + .filter((state) => state.isMain) + .map((state) => state.groupId) + .filter(Boolean), + ), + ]; + } + setState(sessionId: string, state: SessionState): void { this.sessions.set(sessionId, state); } markSessionActive(sessionId: string): void { - const lifecycle = this.getLifecycle(sessionId); - lifecycle.activityGeneration += 1; - if (lifecycle.idleCleanupTimer !== null) { - this.clearTimerImpl(lifecycle.idleCleanupTimer); - lifecycle.idleCleanupTimer = null; + this.markLifecycleActive(sessionId); + const canonicalSessionId = this.canonicalSessionIdCache.get(sessionId); + if (canonicalSessionId && canonicalSessionId !== sessionId) { + this.markLifecycleActive(canonicalSessionId); + } + } + + markResolvedSessionActive( + sessionId: string, + canonicalSessionId?: string, + ): void { + this.markLifecycleActive(sessionId); + if (canonicalSessionId && canonicalSessionId !== sessionId) { + this.markLifecycleActive(canonicalSessionId); } } + private markLifecycleActive(sessionId: string): void { + this.lifecycleRegistry.markActive(sessionId); + } + captureIdleCleanupGeneration(sessionId: string): number | null { const state = this.sessions.get(sessionId); - if (!state?.isMain) return null; - return this.getLifecycle(sessionId).activityGeneration; + return this.lifecycleRegistry.captureGeneration( + sessionId, + state?.isMain === true, + ); } scheduleIdleSessionCleanup( @@ -165,50 +688,110 @@ export class SessionManager { expectedActivityGeneration?: number, ): void { const state = this.sessions.get(sessionId); - if (!state?.isMain) { - this.deleteSession(sessionId); - return; - } + this.lifecycleRegistry.scheduleCleanup( + sessionId, + state?.isMain === true, + () => this.deleteSession(sessionId), + expectedActivityGeneration, + ); + } - const lifecycle = this.getLifecycle(sessionId); - if ( - expectedActivityGeneration !== undefined && - lifecycle.activityGeneration !== expectedActivityGeneration - ) { + setParentId(sessionId: string, parentId: string | null): void { + const wasTemporaryRoot = this.temporaryRootSessionIds.has(sessionId); + this.parentIdCache.set(sessionId, parentId); + if (!parentId) { + this.temporaryRootSessionIds.delete(sessionId); + this.canonicalSessionIdCache.set(sessionId, sessionId); return; } - if (this.idleRetentionMs <= 0) { - this.deleteSession(sessionId); + const parentCanonical = this.canonicalSessionIdCache.get(parentId); + if (parentCanonical) { + this.canonicalSessionIdCache.set(sessionId, parentCanonical); + if (parentCanonical !== sessionId) { + this.migrateTemporaryRootRuntimeState(sessionId, parentCanonical); + } + if (wasTemporaryRoot) { + this.temporaryRootSessionIds.delete(sessionId); + } return; } - if (lifecycle.idleCleanupTimer !== null) { - this.clearTimerImpl(lifecycle.idleCleanupTimer); - lifecycle.idleCleanupTimer = null; + this.canonicalSessionIdCache.delete(sessionId); + } + + private mergeSessionState( + target: SessionState, + source: SessionState, + ): void { + target.injectedMemories ||= source.injectedMemories; + target.messageCount += source.messageCount; + target.contextLimit = Math.max(target.contextLimit, source.contextLimit); + target.isMain ||= source.isMain; + target.hotTierReady ||= source.hotTierReady; + if (source.latestUserRequest) { + target.latestUserRequest = source.latestUserRequest; + } + if (source.latestRefreshQuery) { + target.latestRefreshQuery = source.latestRefreshQuery; } + if ( + source.pendingInjection !== undefined && + ( + source.pendingInjectionGeneration > target.pendingInjectionGeneration || + ( + source.pendingInjectionGeneration === + target.pendingInjectionGeneration && + target.pendingInjection === undefined + ) + ) + ) { + target.pendingInjection = source.pendingInjection; + } + target.pendingInjectionGeneration = Math.max( + target.pendingInjectionGeneration, + source.pendingInjectionGeneration, + ); + } - const activityGeneration = expectedActivityGeneration ?? - lifecycle.activityGeneration; - const timerHandle = this.setTimerImpl(() => { - const currentLifecycle = this.sessionLifecycles.get(sessionId); - if (!currentLifecycle) return; - if (currentLifecycle.idleCleanupTimer !== timerHandle) return; - if (currentLifecycle.activityGeneration !== activityGeneration) return; - this.deleteSession(sessionId); - }, this.idleRetentionMs); + private migrateTemporaryRootRuntimeState( + sessionId: string, + canonicalSessionId: string, + ): void { + if (sessionId === canonicalSessionId) return; - lifecycle.idleCleanupTimer = timerHandle; - } + const sourceState = this.sessions.get(sessionId); + const targetState = this.sessions.get(canonicalSessionId); + if (sourceState) { + if (targetState) { + this.mergeSessionState(targetState, sourceState); + } else { + this.sessions.set(canonicalSessionId, sourceState); + } + this.sessions.delete(sessionId); + } - setParentId(sessionId: string, parentId: string | null): void { - this.parentIdCache.set(sessionId, parentId); + this.lifecycleRegistry.migrate(sessionId, canonicalSessionId); + this.assistantBuffer.migrateSession(sessionId, canonicalSessionId); + + for ( + const [cachedSessionId, cachedCanonicalSessionId] of [ + ...this.canonicalSessionIdCache.entries(), + ] + ) { + if (cachedCanonicalSessionId === sessionId) { + this.canonicalSessionIdCache.set(cachedSessionId, canonicalSessionId); + } + } } async resolveParentId( sessionId: string, ): Promise { - if (this.parentIdCache.has(sessionId)) { + if ( + this.parentIdCache.has(sessionId) && + !this.temporaryRootSessionIds.has(sessionId) + ) { return this.parentIdCache.get(sessionId) ?? null; } try { @@ -222,86 +805,175 @@ export class SessionManager { if (!sessionInfo) return undefined; const parentId = sessionInfo.parentID ?? null; this.parentIdCache.set(sessionId, parentId); + this.temporaryRootSessionIds.delete(sessionId); + if (parentId === null) { + this.canonicalSessionIdCache.set(sessionId, sessionId); + } else { + this.canonicalSessionIdCache.delete(sessionId); + } return parentId; } catch (err) { + if (isExplicitSessionNotFoundError(err)) { + this.parentIdCache.set(sessionId, null); + this.canonicalSessionIdCache.set(sessionId, sessionId); + this.temporaryRootSessionIds.add(sessionId); + logger.debug( + "Session not found during parent resolution; treating as temporary root", + { sessionId }, + ); + return null; + } logger.debug("Failed to resolve session parentID", { sessionId, err }); return undefined; } } - async resolveSessionState( + async resolveCanonicalSessionId( sessionId: string, - ): Promise<{ state: SessionState | null; resolved: boolean }> { + visited: Set = new Set(), + ): Promise { + const cached = this.canonicalSessionIdCache.get(sessionId); + const hasProvisionalTemporaryRoot = + this.temporaryRootSessionIds.has(sessionId) && cached === sessionId; + if (cached && !hasProvisionalTemporaryRoot) return cached; + if (visited.has(sessionId)) { + logger.debug("Detected cycle while resolving canonical session", { + sessionId, + visited: [...visited], + }); + return undefined; + } + + visited.add(sessionId); const parentId = await this.resolveParentId(sessionId); - if (parentId === undefined) return { state: null, resolved: false }; - if (parentId) { - this.deleteSession(sessionId); - return { state: null, resolved: true }; + if (parentId === undefined) { + return hasProvisionalTemporaryRoot ? cached : undefined; + } + if (!parentId) { + this.canonicalSessionIdCache.set(sessionId, sessionId); + return sessionId; + } + + const canonicalSessionId = await this.resolveCanonicalSessionId( + parentId, + visited, + ); + if (!canonicalSessionId) return undefined; + if (canonicalSessionId !== sessionId) { + this.migrateTemporaryRootRuntimeState(sessionId, canonicalSessionId); + this.temporaryRootSessionIds.delete(sessionId); + } + this.canonicalSessionIdCache.set(sessionId, canonicalSessionId); + return canonicalSessionId; + } + + async resolveSessionState( + sessionId: string, + ): Promise<{ + state: SessionState | null; + resolved: boolean; + canonicalSessionId?: string; + }> { + const canonicalSessionId = await this.resolveCanonicalSessionId(sessionId); + if (!canonicalSessionId) { + return { state: null, resolved: false, canonicalSessionId: undefined }; } - let state = this.sessions.get(sessionId); + let state = this.sessions.get(canonicalSessionId); if (!state) { state = this.createDefaultState( this.defaultGroupId, this.defaultUserGroupId, ); - this.sessions.set(sessionId, state); + this.sessions.set(canonicalSessionId, state); } - return { state, resolved: true }; + return { state, resolved: true, canonicalSessionId }; } bufferAssistantPart( sessionId: string, messageId: string, text: string, + sourceSessionId = sessionId, ): void { - const key = `${sessionId}:${messageId}`; - this.pendingAssistantMessages.set(key, { sessionId, text }); + this.assistantBuffer.bufferPart( + sessionId, + messageId, + text, + sourceSessionId, + ); } isAssistantBuffered(sessionId: string, messageId: string): boolean { - return this.bufferedAssistantMessageIds.has(`${sessionId}:${messageId}`); + return this.assistantBuffer.isBuffered(sessionId, messageId); + } + + hasPendingAssistantCompletion(sessionId: string, messageId: string): boolean { + return this.assistantBuffer.hasPendingCompletion(sessionId, messageId); } finalizeAssistantMessage( - state: SessionState, + _state: SessionState, sessionId: string, messageId: string, source: string, ): string | null { - const key = `${sessionId}:${messageId}`; - if (this.bufferedAssistantMessageIds.has(key)) return null; + return this.assistantBuffer.finalize(sessionId, messageId, source); + } - const buffered = this.pendingAssistantMessages.get(key); - this.pendingAssistantMessages.delete(key); - this.bufferedAssistantMessageIds.add(key); + deletePendingAssistant(sessionId: string, messageId: string): void { + this.assistantBuffer.deletePending(sessionId, messageId); + } - const messageText = buffered?.text?.trim() ?? ""; - if (!messageText) return null; - state.pendingMessages.push(`Assistant: ${messageText}`); - logger.info("Assistant message completed", { - hook: source, - sessionId, - messageID: messageId, - messageLength: messageText.length, - }); - return messageText; + clearPendingInjection( + state: SessionState, + prepared?: PreparedSessionMemory | null, + ): void { + if (!prepared) return; + if (state.pendingInjection === prepared) { + state.pendingInjection = undefined; + } } - deletePendingAssistant(sessionId: string, messageId: string): void { - this.pendingAssistantMessages.delete(`${sessionId}:${messageId}`); + purgeAssistantBufferSource(sourceSessionId: string): void { + this.assistantBuffer.purgeSource(sourceSessionId); } async prepareInjection( sessionId: string, lastRequest?: string, - visibleFactUuids?: string[], ): Promise { const state = this.sessions.get(sessionId); if (!state?.isMain) return null; const generation = state.pendingInjectionGeneration + 1; state.pendingInjectionGeneration = generation; + const data = await this.collectPreparedInjectionData( + sessionId, + state, + lastRequest, + ); + const prepared = this.buildPreparedInjection(state, data); + if (!prepared) return null; + + const currentState = this.sessions.get(sessionId); + if (currentState !== state || !currentState.isMain) return null; + if (state.pendingInjectionGeneration !== generation) return null; + + this.applyPreparedInjection( + state, + prepared, + data.cacheMeta, + data.latestRequest, + ); + return prepared; + } + + private async collectPreparedInjectionData( + sessionId: string, + state: SessionState, + lastRequest?: string, + ): Promise { const [recentEvents, snapshot, cache, cacheMeta] = await Promise.all([ this.redisEvents.getRecentSessionEvents( sessionId, @@ -313,177 +985,86 @@ export class SessionManager { this.redisCache.getMeta(state.groupId), ]); - const latestRequest = findLatestUserRequest( - recentEvents, - lastRequest ?? state.latestUserRequest ?? state.latestRefreshQuery ?? - cacheMeta?.lastQuery, + const canonicalLatestRequest = sanitizeMemoryInput( + state.latestUserRequest ?? "", ); + const directFallbackRequest = sanitizeMemoryInput(lastRequest ?? ""); + const cachedFallbackRequest = sanitizeMemoryInput( + state.latestRefreshQuery ?? cacheMeta?.lastQuery ?? "", + ); + const historyFallbackRequest = findLatestUserRequest(recentEvents); + const latestRequest = canonicalLatestRequest || directFallbackRequest || + cachedFallbackRequest || historyFallbackRequest; const recalledEvents = latestRequest ? await this.redisEvents.recallSessionEvents(sessionId, latestRequest, { resultLimit: RECALL_RESULT_LIMIT, }) : []; - const events = mergeSessionEvents(recentEvents, recalledEvents); - const activeTasks = collectRecentUniqueValues( - events, - (event) => - ["task.create", "task.update", "intent"].includes(event.category) - ? getSessionEventPrimaryText(event) - : null, - 4, - ); - const decisions = collectRecentUniqueValues( - events, - (event) => - ["decision", "preference"].includes(event.category) - ? getSessionEventPrimaryText(event) - : null, - 5, - ); - const files = collectRecentUniqueValues( - events, - (event) => event.category.startsWith("file.") ? event.refs ?? [] : [], - 6, - ); - const rules = collectRecentUniqueValues( - events, - (event) => - event.category === "rule.load" - ? getSessionEventPrimaryText(event) - : null, - 6, - ); - const unresolvedErrors = collectRecentUniqueValues( - events, - (event) => - event.category === "error" && event.metadata?.resolved !== true - ? getSessionEventPrimaryText(event) - : null, - 4, - ); - const gitState = collectRecentUniqueValues( - events, - (event) => - event.category === "git.activity" - ? getSessionEventPrimaryText(event) - : null, - 4, - ); - const subagentWork = collectRecentUniqueValues( - events, - (event) => - event.category === "subagent.start" || - event.category === "subagent.finish" - ? getSessionEventPrimaryText(event) - : null, - 4, - ); - const persistent = this.redisCache.renderPersistentMemory( + return { cache, - visibleFactUuids ?? state.visibleFactUuids, + cacheMeta, + events: mergeSessionEvents(recentEvents, recalledEvents), + latestRequest, + snapshot, + }; + } + + private buildPreparedInjection( + _state: SessionState, + data: PreparedInjectionData, + ): PreparedSessionMemory { + const persistent = this.redisCache.renderPersistentMemory( + data.cache, + PERSISTENT_MEMORY_BODY_BUDGET, ); const refreshDecision = this.redisCache.classifyRefresh( - cache, - latestRequest, + data.cache, + data.latestRequest, ); - const sections = [ - `${escapeXml(latestRequest)}`, - renderXmlListSection( - "active_tasks", - "task", - activeTasks.length > 0 - ? activeTasks - : latestRequest - ? [latestRequest] - : [], - { itemCharLimit: 280, includeEmpty: true }, + return { + envelope: buildPreparedInjectionEnvelope( + data.events, + data.snapshot, + data.latestRequest, + persistent, ), - renderXmlListSection("key_decisions", "decision", decisions, { - itemCharLimit: 280, - includeEmpty: true, - }), - renderXmlListSection("files_in_play", "file", files, { - itemCharLimit: 280, - includeEmpty: true, - }), - renderXmlListSection("project_rules", "rule", rules, { - itemCharLimit: 280, - includeEmpty: true, - }), - unresolvedErrors.length > 0 - ? renderXmlListSection("unresolved_errors", "error", unresolvedErrors, { - itemCharLimit: 280, - }) - : "", - gitState.length > 0 - ? renderXmlListSection("git_state", "item", gitState, { - itemCharLimit: 280, - }) - : "", - subagentWork.length > 0 - ? renderXmlListSection("subagent_work", "item", subagentWork, { - itemCharLimit: 280, - }) - : "", - snapshot ? `${snapshot}` : "", - persistent.body - ? `${persistent.body}` - : "", - ].filter(Boolean); - - const envelope = - `${ - sections.join("") - }`; - const prepared = { - envelope, - factUuids: persistent.factUuids, nodeRefs: persistent.nodeRefs, refreshDecision, }; + } - const currentState = this.sessions.get(sessionId); - if (currentState !== state || !currentState.isMain) return null; - if (state.pendingInjectionGeneration !== generation) return null; - + private applyPreparedInjection( + state: SessionState, + prepared: PreparedSessionMemory, + cacheMeta: PersistentMemoryCacheMeta | null, + latestRequest: string, + ): void { state.pendingInjection = prepared; - state.lastInjectionFactUuids = persistent.factUuids; state.hotTierReady = true; state.latestRefreshQuery = latestRequest || cacheMeta?.lastQuery; - return prepared; } deleteSession(sessionId: string): void { - const lifecycle = this.sessionLifecycles.get(sessionId); - if (lifecycle?.idleCleanupTimer != null) { - this.clearTimerImpl(lifecycle.idleCleanupTimer); - } - this.sessionLifecycles.delete(sessionId); + this.lifecycleRegistry.delete(sessionId); this.sessions.delete(sessionId); this.parentIdCache.delete(sessionId); - const prefix = `${sessionId}:`; - for (const key of [...this.pendingAssistantMessages.keys()]) { - if (key.startsWith(prefix)) this.pendingAssistantMessages.delete(key); - } - for (const key of [...this.bufferedAssistantMessageIds]) { - if (key.startsWith(prefix)) this.bufferedAssistantMessageIds.delete(key); + this.canonicalSessionIdCache.delete(sessionId); + this.temporaryRootSessionIds.delete(sessionId); + for ( + const [childSessionId, parentId] of [...this.parentIdCache.entries()] + ) { + if (parentId === sessionId) this.parentIdCache.delete(childSessionId); } - } - - private getLifecycle(sessionId: string): SessionLifecycle { - let lifecycle = this.sessionLifecycles.get(sessionId); - if (!lifecycle) { - lifecycle = { - activityGeneration: 0, - idleCleanupTimer: null, - }; - this.sessionLifecycles.set(sessionId, lifecycle); + for ( + const [childSessionId, canonicalSessionId] of [ + ...this.canonicalSessionIdCache.entries(), + ] + ) { + if (canonicalSessionId === sessionId) { + this.canonicalSessionIdCache.delete(childSessionId); + } } - return lifecycle; + this.assistantBuffer.deleteSession(sessionId); } } diff --git a/src/types/index.ts b/src/types/index.ts index bb5a8e9..1b8b75e 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -1,6 +1,6 @@ -/** FalkorDB/Redis hot-tier configuration. */ -export interface FalkorDbConfig { - redisEndpoint: string; +/** Redis hot-tier configuration. */ +export interface RedisConfig { + endpoint: string; batchSize: number; batchMaxBytes: number; sessionTtlSeconds: number; @@ -13,25 +13,25 @@ export interface GraphitiServiceConfig { endpoint: string; groupIdPrefix: string; driftThreshold: number; - factStaleDays: number; +} + +export interface RawGraphitiConfig { + redis?: Partial; + graphiti?: Partial; + endpoint?: string; + groupIdPrefix?: string; + driftThreshold?: number; } /** Plugin configuration for hot-tier + Graphiti async integration. */ export interface GraphitiConfig { - falkordb: FalkorDbConfig; + redis: RedisConfig; graphiti: GraphitiServiceConfig; - // Legacy top-level keys retained for compatibility. + // Legacy top-level Graphiti keys retained for compatibility. endpoint?: string; groupIdPrefix?: string; driftThreshold?: number; - factStaleDays?: number; - redisEndpoint?: string; - batchSize?: number; - batchMaxBytes?: number; - sessionTtlSeconds?: number; - cacheTtlSeconds?: number; - drainRetryMax?: number; } /** A fact retrieved from the Graphiti knowledge graph. */ @@ -153,17 +153,14 @@ export const getSessionEventRecallText = (event: SessionEvent): string => export interface PersistentMemoryCacheEntry { query: string; refreshedAt: number; - facts: GraphitiFact[]; nodes: GraphitiNode[]; episodeSummaries?: string[]; - factUuids: string[]; nodeRefs: string[]; } export interface PersistentMemoryCacheMeta { lastQuery?: string; lastRefresh?: number; - factUuids: string[]; } export type CacheRefreshClassification = @@ -187,16 +184,20 @@ export interface DrainQueueEntry { event: SessionEvent; } +export interface PreparedDrainQueueEntry extends DrainQueueEntry { + episodeBody: string; + episodeBodyBytes: number; +} + export interface ClaimedDrainBatch { claimToken: string; claimKey: string; lockTtlSeconds: number; - entries: DrainQueueEntry[]; + entries: PreparedDrainQueueEntry[]; } export interface PreparedSessionMemory { envelope: string; - factUuids: string[]; nodeRefs: string[]; refreshDecision: CacheRefreshDecision; } diff --git a/src/utils.test.ts b/src/utils.test.ts index c952059..d653334 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -1,7 +1,14 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { stub } from "jsr:@std/testing@^1.0.0/mock"; import type { Part, TextPart } from "@opencode-ai/sdk"; -import { extractTextFromParts, isTextPart } from "./utils.ts"; +import os from "node:os"; +import { + extractTextFromParts, + isTextPart, + makeGroupId, + makeUserGroupId, +} from "./utils.ts"; const makeTextPart = ( text: string, @@ -97,4 +104,36 @@ describe("utils", () => { assertEquals(extractTextFromParts(parts), ""); }); }); + + describe("group id helpers", () => { + it("normalizes Windows-style project paths", () => { + assertEquals( + makeGroupId("graphiti", "C:\\Users\\tester\\My Project"), + "graphiti_MyProject__main", + ); + }); + + it("normalizes Windows-style home directories for user group ids", () => { + using _homedir = stub(os, "homedir", () => "C:\\Users\\tester"); + assertEquals( + makeUserGroupId("graphiti", "C:\\Users\\tester\\My Project"), + "graphiti_MyProject__user_tester", + ); + }); + + it("keeps a stable non-default segment for unicode-only project names", () => { + assertEquals( + makeGroupId("graphiti", "/projects/مشروع"), + "graphiti_مشروع__main", + ); + }); + + it("keeps unicode-only project names in user group ids", () => { + using _homedir = stub(os, "homedir", () => "/home/tester"); + assertEquals( + makeUserGroupId("graphiti", "/projects/東京"), + "graphiti_東京__user_tester", + ); + }); + }); }); diff --git a/src/utils.ts b/src/utils.ts index 7db1a1c..3527bf0 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,9 +1,33 @@ import type { Part } from "@opencode-ai/sdk"; import os from "node:os"; +import path from "node:path"; import process from "node:process"; +const getPathBasename = (value: string): string => { + const trimmed = value.trim(); + if (!trimmed) return ""; + const normalized = trimmed.replaceAll("\\", "/"); + return path.posix.basename(normalized); +}; + const getProjectName = (directory: string) => - directory.split("/").filter(Boolean).at(-1)?.trim() || "default"; + getPathBasename(directory) || "default"; + +const toPascalCaseSegment = (value: string): string => { + const words = value.match(/[\p{L}\p{N}]+/gu) ?? []; + const pascal = words.map((word) => { + if (!word) return ""; + const [first = "", ...rest] = Array.from(word); + return first.toLocaleUpperCase() + rest.join("").toLocaleLowerCase(); + }).join(""); + return pascal || "Default"; +}; + +const sanitizeGroupSegment = (value: string): string => + value.replace(/[^A-Za-z0-9_]/g, "_"); + +const sanitizeProjectSegment = (value: string): string => + value.replace(/[^\p{L}\p{N}_]/gu, "_"); const getHomeDirectory = (): string | undefined => { try { @@ -14,19 +38,20 @@ const getHomeDirectory = (): string | undefined => { }; const getUserName = () => - getHomeDirectory()?.split("/").filter(Boolean).at(-1)?.trim() || undefined; + getPathBasename(getHomeDirectory() ?? "") || undefined; /** * Build a sanitized Graphiti group ID from a prefix and project directory. */ export const makeGroupId = ( prefix?: string, - directory = process.cwd(), + directory: string = process.cwd(), ): string => { - const projectName = getProjectName(directory); - const prefixPart = prefix ? `${prefix}-` : ""; - const rawGroupId = `${prefixPart}${projectName}__main`; - return rawGroupId.replace(/[^A-Za-z0-9_-]/g, "_"); + const projectName = sanitizeProjectSegment( + toPascalCaseSegment(getProjectName(directory)), + ); + const prefixPart = prefix ? `${sanitizeGroupSegment(prefix)}_` : ""; + return `${prefixPart}${projectName}__main`; }; /** @@ -34,13 +59,15 @@ export const makeGroupId = ( */ export const makeUserGroupId = ( prefix?: string, - directory = process.cwd(), + directory: string = process.cwd(), ): string => { - const projectName = getProjectName(directory); + const projectName = sanitizeProjectSegment( + toPascalCaseSegment(getProjectName(directory)), + ); const userName = getUserName() ?? "unknown"; - const prefixPart = prefix ? `${prefix}-` : ""; - const rawGroupId = `${prefixPart}${projectName}__user-${userName}`; - return rawGroupId.replace(/[^A-Za-z0-9_-]/g, "_"); + const prefixPart = prefix ? `${sanitizeGroupSegment(prefix)}_` : ""; + const userSegment = sanitizeGroupSegment(userName); + return `${prefixPart}${projectName}__user_${userSegment}`; }; /** From 64a0bc878b452402c33f847b8f85040a357ae383 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Mon, 23 Mar 2026 00:27:12 +0800 Subject: [PATCH 03/38] feat: add local session MCP runtime and tool routing Keep session memory and session tool continuity on the local hot path while preserving canonical child-session attachment and build portability. --- README.md | 70 +- {plans => docs}/ConnectionManager.md | 0 docs/ContextOverhaul.md | 1006 +++++++++ {plans => docs}/ContextOverhaulTests.md | 24 +- ...0-context-mode-mcp-first-implementation.md | 934 +++++++++ .../2026-03-20-context-mode-mcp-first.md | 1019 +++++++++ .../plans/2026-03-22-task-2-final-fixes.md | 176 ++ plans/ContextOverhaul.md | 1078 ---------- src/handlers/chat.test.ts | 43 + src/handlers/compacting.test.ts | 32 + src/handlers/event.test.ts | 147 +- src/handlers/event.ts | 111 +- src/handlers/messages.test.ts | 37 + src/handlers/tool-after.test.ts | 126 ++ src/handlers/tool-after.ts | 33 + src/handlers/tool-before.test.ts | 223 ++ src/handlers/tool-before.ts | 91 + src/index.test.ts | 140 +- src/index.ts | 48 +- src/services/connection-manager.ts | 7 +- src/services/redis-client.ts | 55 +- src/services/session-corpus.test.ts | 863 ++++++++ src/services/session-corpus.ts | 1841 +++++++++++++++++ src/services/session-mcp-runtime.test.ts | 609 ++++++ src/services/session-mcp-runtime.ts | 591 ++++++ src/services/session-mcp-types.ts | 170 ++ src/services/tool-guidance-cache.test.ts | 64 + src/services/tool-guidance-cache.ts | 23 + src/services/tool-guidance.ts | 60 + .../tool-routing-outcome-cache.test.ts | 55 + src/services/tool-routing-outcome-cache.ts | 26 + src/services/tool-routing.test.ts | 259 +++ src/services/tool-routing.ts | 204 ++ src/session.test.ts | 176 ++ src/session.ts | 124 +- src/types/index.ts | 8 + 36 files changed, 9318 insertions(+), 1155 deletions(-) rename {plans => docs}/ConnectionManager.md (100%) create mode 100644 docs/ContextOverhaul.md rename {plans => docs}/ContextOverhaulTests.md (97%) create mode 100644 docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md create mode 100644 docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md create mode 100644 docs/superpowers/plans/2026-03-22-task-2-final-fixes.md delete mode 100644 plans/ContextOverhaul.md create mode 100644 src/handlers/tool-after.test.ts create mode 100644 src/handlers/tool-after.ts create mode 100644 src/handlers/tool-before.test.ts create mode 100644 src/handlers/tool-before.ts create mode 100644 src/services/session-corpus.test.ts create mode 100644 src/services/session-corpus.ts create mode 100644 src/services/session-mcp-runtime.test.ts create mode 100644 src/services/session-mcp-runtime.ts create mode 100644 src/services/session-mcp-types.ts create mode 100644 src/services/tool-guidance-cache.test.ts create mode 100644 src/services/tool-guidance-cache.ts create mode 100644 src/services/tool-guidance.ts create mode 100644 src/services/tool-routing-outcome-cache.test.ts create mode 100644 src/services/tool-routing-outcome-cache.ts create mode 100644 src/services/tool-routing.test.ts create mode 100644 src/services/tool-routing.ts create mode 100644 src/session.test.ts diff --git a/README.md b/README.md index 28a8c75..cf48b89 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,32 @@ Graphiti stays off the steady-state hook path entirely: hook-time injection uses only Redis/local cached recall, while fresh Graphiti data arrives through the existing background refresh path on later turns. +### MCP-First Execution Surface + +The plugin exposes a set of `session_*` MCP tools as the **primary execution +surface** for data-heavy work. These tools run in-process alongside the plugin +hooks and share the same canonical root-session identity and Redis/FalkorDB hot +tier. + +- **Bounded execution** (`session_execute`, `session_execute_file`, + `session_batch_execute`) — run commands or process files locally, store full + output in the local corpus, and return only a bounded summary to the model. +- **Local indexing and search** (`session_index`, `session_search`, + `session_fetch_and_index`) — index content into a per-session local corpus in + Redis/FalkorDB and search it with bounded result sets. +- **Diagnostics** (`session_stats`, `session_doctor`) — inspect session state + and corpus health. + +The plugin hooks enforce this preference: when the model falls back to risky +native tools (e.g. unbounded `WebFetch` or raw `curl`), the hook layer may +redirect or deny the call and suggest the corresponding `session_*` tool. Hooks +remain secondary — they handle enforcement, continuity capture, snapshot +assembly, and `` injection, but are not the primary execution +path. + +For the full MCP-first architecture, see +`docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`. + ## Prerequisites Start the @@ -209,17 +235,20 @@ Removed top-level Redis aliases are no longer supported. ### Injection Format -The plugin currently injects a `` XML envelope into the last -user message. This envelope is assembled from short-term memory in Redis and can -contain structured sections such as ``, ``, -``, ``, ``, and an optional -``. - -When long-term memory is available, a nested `` section is -included with a `node_refs` attribute naming the emitted cached entities. On a -cold first turn or when Graphiti is unreachable, `` is simply -absent — the rest of the session memory is always available from short-term -storage in FalkorDB/Redis. +The plugin injects a **local-first** `` XML envelope into the +last user message. Every section except `` is assembled +entirely from Redis/FalkorDB state — no external service is on the synchronous +path. + +- **Local continuity sections** (``, ``, + ``, ``, ``, etc.) are derived + from structured session events stored in Redis/FalkorDB. +- **``** is produced by the local snapshot service, which + continuously rebuilds a priority-tiered summary from those events. +- **``** is an **optional, cache-only** augmentation. When + Graphiti-sourced facts are cached locally, they are included; on a cold first + turn or when Graphiti is unreachable, this section is simply absent. It never + blocks the current turn. ```xml @@ -237,17 +266,18 @@ storage in FalkorDB/Redis. ### Session Memory Preparation (`chat.message`) -On each user message the plugin assembles the current session memory from three -sources: +On each user message the plugin assembles the current session memory from +local-only sources: -- Recent structured session events -- The continuously rebuilt priority-tiered snapshot -- Cached long-term facts from Graphiti +- **Session events** stored in Redis/FalkorDB +- **Priority-tiered snapshot** rebuilt by the local snapshot service +- **Cached Graphiti facts** (optional; read from the local Redis cache, never + from a synchronous Graphiti call) These are composed into a `` envelope and staged for the -transform hook. The hook-time reads are local/cache-backed only; any fresh -Graphiti lookup remains on the existing background refresh path and benefits the -next turn instead of blocking the current one. +transform hook. All reads are local/cache-backed; Graphiti is never called +synchronously. Any fresh Graphiti lookup remains on the existing background +refresh path and benefits the next turn instead of blocking the current one. ### User Message Injection (`experimental.chat.messages.transform`) @@ -308,7 +338,7 @@ regardless of how aggressively the conversation was summarized. > work as summarized tool events. This plugin promotes child sessions to > first-class participants in the root session's state so that decisions, file > edits, and errors from delegated work are fully visible to the parent session. -> See `plans/ContextOverhaul.md` §10.1 for the design rationale. +> See `docs/ContextOverhaul.md` §11.1 for the design rationale. When OpenCode spawns a child session (e.g. a subagent or delegated task), the plugin resolves the child's `sessionID` to the root/parent session by walking diff --git a/plans/ConnectionManager.md b/docs/ConnectionManager.md similarity index 100% rename from plans/ConnectionManager.md rename to docs/ConnectionManager.md diff --git a/docs/ContextOverhaul.md b/docs/ContextOverhaul.md new file mode 100644 index 0000000..2266eb9 --- /dev/null +++ b/docs/ContextOverhaul.md @@ -0,0 +1,1006 @@ +# Context Overhaul — Context-Mode-Aligned Hot Path on FalkorDB + +**Status:** Superseded — retained as historical context only\ +**Superseded by:** + +- `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md` (architecture) +- `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` + (implementation) + +**Date:** 2026-03-20\ +**Canonical refs:** `AGENTS.md`, `README.md`, `docs/ContextOverhaulTests.md` + +--- + +## 1 Problem + +The current plugin is only partially aligned with the hot-path behavior that +makes `context-mode` effective. + +Today, this repository does well at: + +- keeping Graphiti off the steady-state chat path +- extracting compact continuity events instead of replaying full transcripts +- rebuilding a deterministic `` envelope from local state + +But it still falls short in the most important real-time token-saving area: + +- native heavy tool calls are usually allowed to run first +- large tool outputs can still enter the live OpenCode transcript +- the plugin mainly compresses what it remembers and re-injects later +- it does not yet consistently prevent high-volume context from being created at + the source + +In contrast, `context-mode` achieves most of its context savings by intercepting +tool calls before execution and routing them toward lighter, bounded behavior. +For this plugin to follow that design closely enough, the hot path must shift +from "compact after the fact" to "prevent or bound transcript growth before it +happens." + +This plan updates the architecture to target at least **80% behavioral +alignment** with `context-mode` on the hot path while preserving this repo's two +intentional differences: + +1. **Storage layer:** short-term state remains in FalkorDB via the Redis + protocol and existing `redis.*` config keys. +2. **Session lineage model:** child sessions remain first-class participants in + the root session's continuity state rather than being reduced to summarized + agent-tool output only. + +--- + +## 2 Goals + +1. **Adopt source-side token reduction.** Heavy native tool calls must be + intercepted before execution and denied, bounded, or rewritten so raw + payloads do not enter the live transcript unnecessarily. +2. **Reach >=80% context-mode hot-path alignment.** Match `context-mode` on + pre-tool routing, deterministic enforcement, compact event extraction, and + conservative session snapshotting. +3. **Keep Graphiti off the hot path.** No synchronous Graphiti call may block + `tool.execute.before`, `chat.message`, `messages.transform`, + `session.compacting`, or any per-message event hook. +4. **Keep short-term state in FalkorDB.** The hot tier continues to use the + Redis-compatible FalkorDB endpoint configured through canonical `redis.*` + settings only. +5. **Preserve session continuity.** The plugin must still inject deterministic + `` derived from local typed events, snapshots, and optional + cached Graphiti recall. +6. **Preserve intentional divergence for child sessions.** Child/subagent work + must continue to accumulate into the canonical root session instead of being + flattened to opaque tool summaries. + +--- + +## 3 Alignment Target + +### 3.1 What "80% aligned" means + +This repo does **not** need to become a clone of `context-mode`. It does need to +match its core hot-path mechanics closely enough that the same practical +benefits appear in OpenCode sessions. + +The required alignment surface is: + +- **Pre-tool interception** for heavy tools +- **Deterministic routing policy** implemented in code, not by a separate LLM +- **Allow / modify / deny** style decisions at tool-call time +- **Compact post-tool continuity extraction** from metadata and short summaries +- **Priority-tiered session snapshot building** from typed events +- **Stable reinjection** of compact continuity state before LLM calls + +#### 3.1.1 Concrete alignment checklist + +The 80% target is met when **all** of the following are true: + +| # | Criterion | Measurement | +| -- | ----------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| A1 | Pre-tool interception exists | `tool.execute.before` hook is registered and exercised for every tool in the minimum set (§6.2). | +| A2 | Deterministic routing decisions | Each tool in the minimum set has a coded policy that returns allow / modify / deny without calling an LLM. | +| A3 | Source-side token prevention | At least one heavy-tool class (`Read`, `Bash`, `WebFetch`) is demonstrably bounded or denied before execution. | +| A4 | Compact event extraction | No `SessionEvent.body` exceeds 4 KB; no raw tool output stored as a hot-tier event. | +| A5 | Priority-tiered snapshot | Snapshot respects P0–P3 tiers and stays within `SNAPSHOT_BODY_BUDGET`. | +| A6 | Stable reinjection | `` is injected on every `messages.transform` and `session.compacting` call. | +| A7 | No Graphiti on hot path | Zero synchronous MCP calls during any hook return (existing invariant, must not regress). | +| A8 | Context-mode-style routing guidance | Read/Grep/Bash guidance is injected once per session; WebFetch is blocked; Task prompt routing is rewritten. | + +Criteria A1–A3 are the **new** requirements from this plan. Criteria A4–A7 are +**existing** invariants that must not regress. Criterion A8 captures the +session-scoped guidance and prompt-rewrite mechanics that make `context-mode`'s +OpenCode routing practical without replacing native tools. + +The allowed divergence surface is: + +- FalkorDB/Redis instead of SQLite for local state +- root-session promotion for child/subagent continuity +- Graphiti-backed async long-term memory and cache refresh +- this repo's existing `` envelope instead of `context-mode`'s + `` format + +### 3.2 Non-goals + +This plan does **not** include: + +- replacing FalkorDB with SQLite +- moving Graphiti back onto the hot path +- removing the existing Graphiti async drain/cache architecture +- reverting child-session aggregation to summarized-only agent events +- introducing a second LLM summarization pass for the hot tier + +--- + +## 4 Architecture + +```text +opencode-graphiti plugin (TypeScript / Deno) + | + |- Hot path — OpenCode hooks + FalkorDB over Redis protocol + | |- tool.execute.before + | | - inspect native tool call + | | - allow / modify / deny based on deterministic routing rules + | | - prevent oversized raw outputs from entering transcript + | | + | |- event / chat.message / messages.transform / session.compacting + | | - extract typed continuity events + | | - rebuild compact snapshot from FalkorDB state + | | - inject canonical + | | + | '- FalkorDB storage via Redis commands + | - session events + | - snapshots + | - memory cache + | - pending async drain batches + | + '- Async tier — Graphiti MCP + - drain semantic episodes + - refresh cached long-term recall + - prime cold sessions opportunistically + - never block hook returns +``` + +### 4.1 Architectural shift + +The old hot-path posture was: + +- let native tools run +- observe their output afterward +- store only a compact continuity representation + +The revised hot-path posture becomes: + +- intercept the tool call first +- prevent or rewrite the expensive form when appropriate +- only then observe the resulting bounded tool activity +- store compact continuity from the bounded result + +This is the single biggest design change in the plan. + +--- + +## 5 Hook Model + +### 5.1 Required hooks + +| Hook | Purpose | +| -------------------------------------- | -------------------------------------------------------------- | +| `tool.execute.before` | Pre-tool routing, deny/modify/allow decisions | +| `event` | Session lifecycle + typed event capture | +| `chat.message` | Prepare local continuity state for the current turn | +| `experimental.chat.messages.transform` | Inject canonical `` into the last user message | +| `experimental.session.compacting` | Inject the same continuity envelope into compaction | + +### 5.2 Hook API contract (from `@opencode-ai/plugin@1.2.26`) + +The OpenCode plugin SDK exposes these tool-lifecycle hooks: + +```ts +// tool.execute.before — fires before tool execution +"tool.execute.before"?: ( + input: { tool: string; sessionID: string; callID: string }, + output: { args: any }, +) => Promise; + +// tool.execute.after — fires after tool execution +"tool.execute.after"?: ( + input: { tool: string; sessionID: string; callID: string; args: any }, + output: { title: string; output: string; metadata: any }, +) => Promise; +``` + +**Key constraint:** `tool.execute.before` can only mutate `output.args`. There +is no first-class `deny` return value in the SDK. + +`context-mode`'s published OpenCode plugin resolves this by **throwing an +Error** from `tool.execute.before` for `deny` and `ask` decisions, and by using +in-place arg mutation for `modify` decisions. This plan adopts the same +mechanism. + +Therefore: + +1. **Hard deny** = throw an error from `tool.execute.before`. +2. **Modify** = mutate args in place before native tool execution. +3. **Context guidance** = no-op at the SDK layer; routing guidance should reach + the model through `AGENTS.md`, injected subagent prompt blocks, or bounded + tool-arg rewrites. + +### 5.3 `tool.execute.after` + +Unlike the speculative earlier draft, `context-mode`'s OpenCode plugin does not +use `tool.execute.after` to rewrite or truncate visible tool output. It uses the +after-hook for continuity capture only. + +This plan follows that design: + +- `tool.execute.after` remains available for event extraction and metadata + capture +- it is **not** part of the primary routing/token-reduction mechanism +- source-side prevention must happen in `tool.execute.before` + +### 5.4 New hot-path invariant + +`tool.execute.before` becomes part of the core hot-path contract. + +No heavy native tool class should be considered fully supported unless it has: + +1. an explicit routing decision policy +2. tests for allow / modify / deny behavior +3. a documented bounded-output rationale + +--- + +## 6 Pre-Tool Routing Design + +### 6.1 Decision model + +The plugin should adopt a `context-mode`-style routing engine that returns one +of these decisions: + +- `allow` — safe to run unchanged +- `modify` — safe only after input is rewritten or bounded +- `deny` — unsafe/raw-output-heavy; reject with actionable guidance + +Unlike `context-mode`, this repo does not need to reproduce every external +sandbox tool. But it must reproduce the same **mechanical behavior**: + +- decisions are deterministic and local +- decisions happen before execution +- decisions are based on tool name, arguments, and risk heuristics +- denial/modification prevents transcript blow-up at the source + +#### 6.1.1 Routing principles + +1. **Deterministic, not heuristic-heavy.** Each tool's policy is a short + decision tree based on argument inspection (file extension, path pattern, + presence/absence of `limit`, command prefix). No LLM calls, no embedding + lookups. +2. **Follow context-mode's guidance-first posture.** Prefer a once-per-session + routing nudge for broad native tools (`Read`, `Grep`, general `Bash`) and + reserve hard blocks for tools/patterns that are known context sinks (for + example `WebFetch`, raw `curl`, `wget`, and certain build-tool invocations). +3. **Composable policies.** Each tool's policy is a pure function + `(toolName, args) => RoutingDecision`. The routing engine dispatches by tool + name and delegates to the per-tool policy. New tools are added by registering + a new policy function. +4. **No cross-tool state.** Routing decisions are stateless per call. The engine + does not track how many times a tool has been called or accumulate context + across calls. +5. **Fail-open for unknown tools.** Tools not in the minimum set (§6.2) are + allowed unchanged. This mirrors `context-mode`'s OpenCode posture more + closely than an aggressive deny-by-default design. + +#### 6.1.2 `RoutingDecision` type + +```ts +type RoutingDecision = + | { action: "allow" } + | { action: "modify"; args: Record; reason: string } + | { action: "deny"; guidance: string } + | { action: "context"; guidance: string }; +``` + +The routing engine applies the decision: + +- `allow` → no mutation to `output.args` +- `modify` → replace `output.args` with the rewritten args +- `deny` → throw an error from `tool.execute.before` +- `context` → deliver once-per-session guidance through the routing layer + +#### 6.1.3 Session-scoped guidance throttling + +`context-mode` only emits its advisory guidance once per session so the model is +nudged without flooding the transcript with repeated routing instructions. This +plan should do the same. + +The guidance throttle should be: + +- keyed by the **canonical root session ID**, not the raw child session ID +- keyed by guidance type (`read`, `grep`, `bash`, etc.) +- held in local process state only; no FalkorDB round-trip is required +- shared across parent and child sessions in the same lineage because child work + contributes to the same continuity stream + +This preserves the intentional child-session model while still matching +`context-mode`'s once-per-session guidance behavior closely. + +### 6.2 Tool classes in scope + +Initial routing coverage must include at least: + +- `Read` +- `WebFetch` +- `Bash` +- `Grep` +- `Glob` +- `Task` + +Additional coverage may later expand to tools such as browser snapshots or other +large-payload integrations, but these six are the minimum alignment set. + +### 6.3 Routing policy matrix + +The following matrix defines the concrete routing policy for each tool in the +minimum set. Each row describes the argument conditions that trigger each +decision. + +#### Summary matrix + +| Tool | Allow when | Modify when | Deny when | +| ---------- | ------------------------------------------------------ | -------------------------------------------- | ------------------------------------------------------------------ | +| `Read` | Usually allow | Never rewrite args by default | Never hard deny by default; emit once-per-session guidance | +| `WebFetch` | — | — | Always hard deny and redirect to the safer context-mode-style path | +| `Bash` | Allow by default | Rewrite known bad patterns to safe guidance | Hard deny only for explicit security/policy matches | +| `Grep` | Usually allow | Never rewrite args by default | Never hard deny by default; emit once-per-session guidance | +| `Glob` | Allow | Optionally scope `path` only if clearly safe | Avoid speculative rewrites; do not invent unsupported excludes | +| `Task` | Allow, but rewrite delegated prompt with routing block | Rewrite prompt field to append routing block | — | + +#### `Read` — detailed policy + +```text +if tool is Read: + → emit a once-per-session routing guidance block that nudges the agent toward + the safer bounded/file-processing path + → otherwise allow the native tool call to proceed unchanged +``` + +This follows `context-mode`'s OpenCode behavior more closely than silently +rewriting read limits. The goal is to change agent behavior at the source while +preserving the native tool contract unless a stricter block is truly necessary. + +#### `WebFetch` — detailed policy + +```text +if tool is WebFetch: + → hard deny by throwing an error + → denial guidance must redirect to the safer fetch/index/search flow rather + than allowing raw page content into transcript +``` + +This is the clearest source-side prevention mechanism in `context-mode`'s +OpenCode plugin and should be copied directly. + +#### `Bash` — detailed policy + +```text +if command hits explicit security-policy deny pattern: + → hard deny (same as context-mode security layer) + +if command contains raw network patterns (`curl`, `wget`, inline HTTP clients): + → modify command into a short guidance command that redirects to the safer + fetch/index or sandbox-execute path + +if command invokes high-volume build tools (`gradle`, `mvn`, wrappers): + → modify command into a short guidance command that redirects to a safer + sandboxed execution path + +otherwise: + → allow, but emit once-per-session routing guidance for Bash +``` + +**Design note:** `Bash` should follow `context-mode`'s actual OpenCode strategy: +pattern-based rewrites for the worst offenders, not an oversized allowlist plus +post-hoc truncation design. + +**Ordering note:** the Bash policy should run in this order: + +1. repo security-policy deny/ask checks +2. raw network rewrite checks (`curl`, `wget`, inline HTTP) +3. high-volume build-tool rewrite checks +4. once-per-session Bash guidance fallback +5. otherwise passthrough + +This keeps security authoritative while preserving the same routing shape as +`context-mode`. + +#### `Grep` — detailed policy + +```text +if tool is Grep: + → emit a once-per-session routing guidance block that nudges the agent toward + safer bounded execution/search behavior + → otherwise allow the native tool call to proceed unchanged +``` + +#### `Glob` — detailed policy + +```text +if `path` is omitted: + → allow (native tool already defaults to cwd) + +if `pattern` is pathologically broad: + → prefer guidance in docs/tests rather than speculative arg mutation + +→ allow unless a future verified-safe rewrite exists +``` + +**Implementation note:** the native OpenCode `Glob` tool only accepts `pattern` +and optional `path`. It has no exclusion parameter, so this plan should not rely +on synthetic exclude rewrites. + +#### `Task` — detailed policy + +```text +detect prompt field (`prompt`, `request`, `objective`, `question`, `query`, or +`task`) +append a routing block to the delegated prompt +preserve `subagent_type` unless a validated future change is explicitly chosen +→ modify +``` + +**Rationale:** this follows `context-mode`'s actual delegated-prompt rewrite +mechanic while preserving this repo's child-session-first continuity model. + +### 6.4 Guardrails against over-copying `context-mode` + +This section documents where this repo **intentionally does not** follow +`context-mode`, even when the behavior looks similar: + +1. **No SQLite local store.** `context-mode` uses SQLite for local state. This + repo uses FalkorDB via Redis protocol. The routing engine must not assume + SQLite-style queries or schema. +2. **No second LLM summarization pass.** `context-mode` may use an LLM to + summarize tool output. This repo's hot tier is deterministic and + programmatic. Summaries come from structured event extraction, not LLM calls. +3. **No flattened subagent events.** `context-mode` records subagent work as + summarized tool events. This repo promotes child sessions to first-class + participants in the root session (§11). +4. **No `` envelope.** This repo uses `` with + its own section taxonomy (§9). The envelope shape is not a copy target. +5. **No external sandbox tools in Phase 1.** `context-mode` routes users toward + its own custom tooling. This repo copies the pre-tool mechanics first without + requiring the full tool ecosystem in the initial phase. +6. **Hard deny is supported by thrown errors.** This repo should follow + `context-mode`'s OpenCode implementation and treat thrown errors in + `tool.execute.before` as the authoritative deny mechanism. + +### 6.5 User-facing denial behavior + +When a tool call is denied, the plugin should return a concise actionable error +that explains the safer bounded path. + +The goal is not just to block. It is to steer the agent toward the same safer +workflow that `context-mode` would have chosen. + +Denial messages must: + +- be ≤ 200 characters +- name the denied tool and the problematic argument +- suggest a concrete alternative (e.g. "Use Read with limit=200 instead") +- not include raw argument values that could themselves be large + +Guidance messages should follow the same philosophy: + +- concise enough to fit comfortably in a single tool result or prompt suffix +- specific about the safer path to take next +- stable across repeated runs so tests can assert against them + +--- + +## 7 Short-Term Storage Layer + +### 7.1 Storage decision + +Short-term state remains in FalkorDB, accessed over the Redis protocol using the +existing `RedisClient` and canonical `redis.*` config. + +There is **no new `falkordb.*` config section** in the revised plan. + +`redis.*` remains canonical because: + +- the transport is Redis-compatible +- the runtime already uses Redis-oriented primitives +- FalkorDB is the deployment choice behind that endpoint + +### 7.2 Key layout + +| Key | Type | Purpose | +| ----------------------------- | ------ | ------------------------------------------ | +| `session:{id}:events` | List | typed hot-tier continuity events | +| `session:{id}:snapshot` | String | compact snapshot XML | +| `memory-cache:{groupId}` | String | cached Graphiti-derived recall | +| `memory-cache:{groupId}:meta` | Hash | cache query / refresh metadata | +| `drain:pending:{groupId}` | List | queued semantic drain entries for Graphiti | +| `drain:dead:{groupId}` | List | dead-lettered drain entries | + +### 7.3 Invariant + +FalkorDB is the hot-path system of record for: + +- session continuity +- compact restore snapshots +- cached long-term memory projections +- pending async Graphiti consolidation work + +Graphiti is never required for the current turn to proceed. + +--- + +## 8 Revised Hot-Tier Data Contract + +### 8.1 Event contract + +The hot tier should continue using compact typed events rather than raw copied +transcripts, but the contract becomes stricter: + +1. **pre-tool routing first** +2. **sanitize before extraction** +3. **extract compact typed events only** +4. **build conservative snapshot** +5. **inject stable canonical memory envelope** +6. **drain semantic episodes asynchronously** + +### 8.2 Event policy + +Keep: + +- file paths +- search queries +- tool names +- exit/error signals +- explicit task/decision state +- terse subagent summaries +- concrete environment/git state + +Reject as durable hot-tier memory: + +- raw file contents from `Read` +- large shell/web transcripts +- wrapper tags like `` / `` when they come from replayed output +- assistant operational narration +- previously injected memory blocks +- verbose delegated reports + +### 8.3 Snapshot policy + +The snapshot should move even closer to `context-mode`'s priority-tiered style: + +- P0/P1: last request, active tasks, user decisions, files in play, rules +- P2: unresolved blockers, environment, git state +- P3: subagent summaries, low-volume integration markers +- drop low-value residue aggressively under budget pressure + +The point is resumability, not archival completeness. + +--- + +## 9 Injection Strategy + +The canonical injected shape remains: + +```xml + + ... + ... + ... + ... + ... + ... + ... + ... + ... + ... + +``` + +This is intentionally different from `context-mode`'s resume envelope, but it +must be generated from the same style of compact typed state. + +### 9.1 Important distinction + +This plugin's injection layer is **not** the primary token-saving mechanism. + +Under the revised plan, token savings come from two layers together: + +1. **source-side prevention** via `tool.execute.before` +2. **compact continuity reinjection** via `` + +Without the first layer, alignment remains incomplete. + +--- + +## 10 Async Tier + +The async tier remains structurally the same: + +- Graphiti MCP drains semantic episodes in the background +- cache refreshes happen asynchronously on drift or after new facts land +- primers remain best-effort +- no Graphiti request may block a hot-path hook return + +This is an intentional divergence from `context-mode`, not an alignment gap. + +--- + +## 10A Hook Interaction Model + +This section documents how the new `tool.execute.before` and +`tool.execute.after` hooks interact with the existing hook pipeline. + +### 10A.1 Hook execution order (per user turn) + +```text +1. chat.message + → Prepare session state from FalkorDB. + → Stage for injection. + +2. experimental.chat.messages.transform + → Inject into last user message. + → LLM generates response (may include tool calls). + +3. [For each tool call in the LLM response:] + a. tool.execute.before ← NEW: routing decision + → allow / modify / deny the tool call args, with optional once-per-session guidance. + b. [Native tool executes with (possibly modified) args.] + c. tool.execute.after ← continuity capture / metadata only + → Observe resulting bounded tool activity. + d. event (tool.called / tool.completed) + → Extract compact SessionEvent from tool activity. + → Store in FalkorDB via RedisEventsService. + +4. event (message.updated) + → Finalize assistant message as SessionEvent. + +5. [If idle:] event (session.idle) + → Drain pending events to Graphiti (async). + → Rebuild snapshot. + +6. [If compacting:] experimental.session.compacting + → Inject into compaction context. + → event (session.compacted) → async drain + snapshot rebuild. +``` + +### 10A.2 Data flow between hooks + +| Producer hook | Data produced | Consumer hook | +| ------------------------ | ----------------------------------------------- | -------------------------------------------------------------- | +| `chat.message` | Staged `` envelope | `messages.transform` | +| `tool.execute.before` | Modified args / thrown deny / one-time guidance | Native tool execution, routed failure, or prompt/tool guidance | +| `tool.execute.after` | Tool metadata for continuity capture | `event` extraction / hot-tier state | +| `event` (tool.completed) | Compact `SessionEvent` | FalkorDB → snapshot → next `chat.message` | +| `session.compacting` | Injected compaction context | OpenCode compaction summarizer | + +### 10A.3 Invariants across hooks + +1. **No hook reads Graphiti synchronously.** This applies to the new hooks too. +2. **`tool.execute.before` must not call FalkorDB.** Routing decisions are pure + functions of tool name and args. No Redis round-trip. +3. **No hook-level output rewriting is required for alignment.** + `tool.execute.after` may remain metadata/event focused; token prevention + should be achieved in `tool.execute.before`. +4. **Event extraction happens after tool execution or routed denial handling**, + not during routing policy evaluation. `tool.execute.before` may cache compact + routing metadata, but routed `SessionEvent`s are only emitted later through + `tool.execute.after` and the existing event extraction pipeline. + +--- + +## 11 Session Lifecycle and Child Sessions + +### 11.1 Kept divergence + +This repo continues to resolve child/subagent sessions to a canonical root +session and stores their work as first-class continuity events in the root +session state. + +This diverges from `context-mode`, which summarizes subagent work more narrowly, +but the divergence remains intentional and in-scope. + +### 11.2 Constraint on new routing logic + +The new pre-tool routing layer must work correctly for both parent and child +sessions. + +Specifically: + +- routed decisions should be evaluated per live tool call regardless of lineage +- post-tool compact event extraction should still aggregate into the root + session +- child session teardown must never delete canonical root continuity state + +--- + +## 12 Configuration + +Canonical config shape remains: + +```jsonc +{ + "redis": { + "endpoint": "redis://localhost:6379", + "batchSize": 20, + "batchMaxBytes": 51200, + "sessionTtlSeconds": 86400, + "cacheTtlSeconds": 600, + "drainRetryMax": 3 + }, + "graphiti": { + "endpoint": "http://localhost:8000/mcp", + "groupIdPrefix": "opencode", + "driftThreshold": 0.5 + } +} +``` + +### 12.1 Config decision + +- `redis.*` stays canonical for the FalkorDB-backed hot tier +- `graphiti.*` stays canonical for async Graphiti integration +- legacy compatibility may remain temporarily in implementation if needed, but + the plan no longer treats `falkordb.*` as a target configuration shape + +--- + +## 13 File Changes + +### 13.1 New files + +```text +src/handlers/tool-before.ts — OpenCode tool.execute.before hook handler +src/services/tool-routing.ts — deterministic routing engine + per-tool policy functions +src/services/tool-guidance.ts — shared once-per-session guidance blocks / routing text +src/services/tool-guidance-cache.ts — in-memory per-session guidance throttle keyed by canonical session +``` + +### 13.2 Modified files + +```text +src/index.ts — register tool.execute.before; wire routing deps +src/handlers/event.ts — extract compact events from routed tool activity (deny/modify/context signals) +src/handlers/chat.ts — no structural change; continues local prep from FalkorDB state +src/handlers/messages.ts — no structural change; continues canonical injection from local state +src/handlers/compacting.ts — no structural change; continues local-only compaction injection +src/services/event-extractor.ts — add extraction rules for routing denial/modification events +src/services/redis-snapshot.ts — classify routing events as P2; tighten budget enforcement +src/session.ts — ensure routing hooks resolve canonical session ID for child sessions and guidance throttling +README.md — document source-side routing and updated hot-path mechanics +AGENTS.md — add tool.execute.before to hot-path section +docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases +``` + +--- + +## 14 Implementation Phases + +### Phase 1: Routing contract + +**Scope:** `src/services/tool-routing.ts`, `src/services/tool-guidance.ts` + +**Tasks:** + +1. Implement the `tool.execute.before` deny path by throwing an error, matching + `context-mode`'s OpenCode plugin (§5.2). +2. Define the `RoutingDecision` type (§6.1.2). +3. Implement the routing engine: dispatch by tool name, delegate to per-tool + policy functions. +4. Implement once-per-session guidance for `Read`, `Grep`, and general `Bash`. +5. Implement hard deny for `WebFetch`. +6. Implement delegated prompt rewriting for `Task`. +7. Implement the guidance throttle keyed by canonical root session ID. +8. Write unit tests for the engine dispatch and the `RoutingDecision` type. + +**Acceptance criteria:** + +- [ ] `RoutingDecision` type exists and is exported. +- [ ] Routing engine accepts `(toolName: string, args: unknown)` and returns + `RoutingDecision`. +- [ ] Policies exist for `Read`, `WebFetch`, `Bash`, `Grep`, `Glob`, `Task`. +- [ ] Hard deny uses thrown errors from `tool.execute.before`. +- [ ] Guidance is emitted at most once per canonical session lineage per type. +- [ ] `deno test` passes; `deno task check` passes. + +### Phase 2: Pre-tool hook wiring + +**Scope:** `src/handlers/tool-before.ts`, `src/index.ts` + +**Tasks:** + +1. Create `tool-before.ts` handler that calls the routing engine and applies the + decision to `output.args` or throws for deny. +2. Wire the hook in `src/index.ts` alongside the existing hooks. +3. Ensure the hook resolves the canonical session ID via `SessionManager` so + child sessions are handled correctly. +4. Thread canonical session identity into the guidance throttle so parent and + child sessions share the same once-per-session routing nudges. + +**Acceptance criteria:** + +- [ ] `tool.execute.before` hook is registered in the plugin return value. +- [ ] The hook fires for parent and child sessions. +- [ ] `tool.execute.before` does not call FalkorDB or Graphiti. +- [ ] Parent and child sessions share one guidance throttle namespace. +- [ ] `deno test` passes; `deno task check` passes. + +### Phase 3: Heavy-tool policies + +**Scope:** `src/services/tool-routing.ts`, `src/services/tool-guidance.ts` + +**Tasks:** + +1. Implement the `Read` guidance policy per §6.3. +2. Implement the `WebFetch` policy per §6.3. +3. Implement the `Bash` policy per §6.3 with command-pattern rewrites and + once-per-session guidance. +4. Implement the `Grep` and `Glob` policies per §6.3. +5. Implement the `Task` prompt-rewrite policy. +6. Write unit tests for each policy covering allow, modify, and deny cases. + +**Acceptance criteria:** + +- [ ] Each tool in the minimum set has ≥ 3 test cases (allow, modify, deny). +- [ ] `Read` emits guidance once per session and otherwise preserves native + args. +- [ ] `WebFetch` is denied with actionable redirect guidance. +- [ ] `Bash` rewrites `curl`/`wget`/inline HTTP/build-tool patterns. +- [ ] Bash routing preserves the documented evaluation order from §6.3. +- [ ] `Grep` emits guidance once per session and otherwise preserves native + args. +- [ ] `Glob` does not rely on unsupported exclusion args. +- [ ] `Task` rewrites delegated prompt text with routing instructions. +- [ ] `deno test` passes; `deno task check` passes. + +### Phase 4: Extraction tightening + +**Scope:** `src/handlers/event.ts`, `src/services/event-extractor.ts` + +**Tasks:** + +1. Ensure `tool.called` and `tool.completed` events from routed tool calls + extract only compact metadata (tool name, file path, exit code, summary). +2. Verify that `SessionEvent.body` never contains raw tool output. +3. Add extraction rules for the new `tool.execute.before` deny/modify/context + signals so they appear as lightweight events. + +**Acceptance criteria:** + +- [ ] No `SessionEvent.body` exceeds 4 KB after routing is active. +- [ ] Denied tool calls produce a compact event with the denial reason. +- [ ] Modified/context-guided tool calls produce a compact event noting the + routing action. +- [ ] `deno test` passes; `deno task check` passes. + +### Phase 5: Snapshot tightening + +**Scope:** `src/services/redis-snapshot.ts` + +**Tasks:** + +1. Review snapshot builder against the P0–P3 tier definitions in §8.3. +2. Ensure routing-related events (denials, modifications, guidance nudges) are + classified as P2 or P3 and dropped first under budget pressure. +3. Verify snapshot stays within `SNAPSHOT_BODY_BUDGET` with the new event types. + +**Acceptance criteria:** + +- [ ] Snapshot with 50+ events (including routing events) stays within budget. +- [ ] P0/P1 content (last request, active tasks, decisions) is never dropped. +- [ ] Routing denial events are classified as P2. +- [ ] `deno test` passes; `deno task check` passes. + +### Phase 6: Integration validation + documentation + +**Scope:** tests, `README.md`, `docs/ContextOverhaulTests.md`, `AGENTS.md` + +**Tasks:** + +1. Add Suite N (pre-tool routing) to `docs/ContextOverhaulTests.md`. +2. Run the full test suite including new routing tests. +3. Update `README.md` to document source-side routing. +4. Update `AGENTS.md` hot-path section to include `tool.execute.before`. +5. Verify all alignment checklist items from §3.1.1. + +**Acceptance criteria:** + +- [ ] All §3.1.1 alignment criteria (A1–A8) are met. +- [ ] `deno test` passes; `deno task check` passes; `deno lint` passes; + `deno fmt --check` passes. +- [ ] `README.md` documents the pre-tool routing behavior. +- [ ] `AGENTS.md` lists `tool.execute.before` in the hot-path section. +- [ ] `docs/ContextOverhaulTests.md` includes Suite N with ≥ 10 test cases. + +--- + +## 15 Validation Requirements + +### 15.1 Required tests — Suite N (Pre-Tool Routing) + +Add to `docs/ContextOverhaulTests.md` as Suite N: + +| ID | Test case | Tier | +| ---- | -------------------------------------------------------------------- | ----------- | +| N-1 | `Read` with ordinary args passes through after guidance handling | Unit | +| N-2 | `Read` emits guidance once, then falls through | Unit | +| N-3 | `WebFetch` throws hard deny with actionable guidance | Unit | +| N-4 | `Bash` with `curl` rewrites to guidance command | Unit | +| N-5 | `Bash` with inline HTTP rewrites to guidance command | Unit | +| N-6 | `Bash` with build tool command rewrites to guidance command | Unit | +| N-7 | `Bash` with ordinary command emits guidance once, then falls through | Unit | +| N-8 | `Grep` emits guidance once, then falls through | Unit | +| N-9 | `Glob` with ordinary args passes through unchanged | Unit | +| N-10 | `Task` appends routing block to delegated prompt | Unit | +| N-11 | guidance throttle emits once per canonical root session | Unit | +| N-12 | child-session tool calls share the same guidance throttle | Integration | +| N-13 | `Task` preserves child-session-first continuity model | Integration | +| N-14 | `tool.execute.before` does not call FalkorDB | Unit | +| N-15 | `tool.execute.before` fires for child session tool calls | Integration | +| N-16 | Unknown tool name → allow (fail-open) | Unit | + +### 15.2 Required full-suite checks + +Before merging any part of this plan: + +- `deno test` +- `deno task check` +- `deno lint` +- `deno fmt --check` + +### 15.3 Behavioral success criteria + +The implementation is only considered successful when all of these are true: + +1. large native tool outputs are materially reduced because the expensive call + is prevented or bounded before execution +2. hot-tier memory no longer depends on observing large transcript dumps first +3. `` remains compact and deterministic +4. Graphiti remains fully async +5. FalkorDB remains the hot-tier storage backend through `redis.*` +6. child-session aggregation still works as designed +7. all §3.1.1 alignment criteria (A1–A8) are met +8. Suite N tests all pass + +--- + +## 16 Tradeoffs + +| Tradeoff | Impact | Mitigation | +| ------------------------------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| More pre-tool blocking | Some previously tolerated raw tool usage will now be rejected | Return clear actionable denial messages and safe bounded defaults | +| More policy complexity | Routing adds maintenance cost | Centralize all heuristics in `tool-routing.ts` and `tool-guidance.ts` | +| Not a full context-mode clone | Some behavior still differs | Alignment target is explicit: hot-path mechanics, not storage or session-lineage identity | +| Bounded results may omit detail | Some calls will return less raw data | Agent can make additional focused bounded calls when needed | + +--- + +## 17 Confirmed Decisions + +- The repo should move to **>=80% context-mode alignment on the hot path**. +- The key missing mechanic to copy is **pre-tool routing and source-side token + prevention**. +- The storage layer remains **FalkorDB over the Redis protocol**. +- Canonical config remains **`redis.*` + `graphiti.*`** only. +- Graphiti remains **async-only**. +- Child sessions remain **first-class entries in root continuity state**. +- The hot tier remains **deterministic and programmatic**, not LLM-summarized. + +--- + +## 18 Immediate Next Step + +Implement Phase 1 first: + +1. **Implement thrown-error deny** in `tool.execute.before`, matching + `context-mode`'s OpenCode plugin. +2. **Define `RoutingDecision`** and the routing engine dispatch. +3. **Implement actual context-mode-aligned baseline policies** for `Read`, + `WebFetch`, `Bash`, `Grep`, `Glob`, and `Task`. +4. **Write unit tests** for the engine and these baseline policies. +5. **Cleanly document any repo-specific divergence** only where required by the + child-session model or FalkorDB storage boundary. diff --git a/plans/ContextOverhaulTests.md b/docs/ContextOverhaulTests.md similarity index 97% rename from plans/ContextOverhaulTests.md rename to docs/ContextOverhaulTests.md index be1dc9e..0f4a94a 100644 --- a/plans/ContextOverhaulTests.md +++ b/docs/ContextOverhaulTests.md @@ -1,12 +1,16 @@ # Context Overhaul — Test Plan -**Status:** Draft (planned automation not yet implemented) **Date:** 2026-03-14 -**Canonical design:** [`plans/ContextOverhaul.md`](plans/ContextOverhaul.md) - -> **Note:** This document outlines the _intended_ test strategy. The test -> infrastructure (Docker Compose fixtures, baseline files, deno task runner) is -> not yet in the repo. Current runnable tasks: -> `deno task build|deploy|dev|check|lint|fmt`. Full automation is aspirational. +**Status:** Superseded — retained as historical context only\ +**Date:** 2026-03-14\ +**Original design:** [`docs/ContextOverhaul.md`](ContextOverhaul.md) (also +superseded)\ +**Active acceptance matrix:** +`docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` + +> **Note:** This test plan was written for the native-hook-first hot-path +> design. The active architecture is now MCP-first; see the implementation plan +> linked above for the current acceptance criteria. The suites below remain as +> historical reference for the original hot-path invariants. --- @@ -205,7 +209,7 @@ and within budget. - [ ] B-3: Total injected payload (session + persistent) does not exceed 5% of a 128k-token model context (≈ 25 600 chars). - [ ] B-4: Snapshot XML conforms to the priority-tiered schema from - `ContextOverhaul.md` §4.3. + `ContextOverhaul.md` §8.3. - [ ] B-5: Snapshot respects the 3 KB budget — lower-priority sections are truncated first. - [ ] B-6: Each `session_memory` always contains `last_request`; list sections @@ -537,11 +541,11 @@ parent. **Tier:** Unit + Integration -**Canonical design reference:** `plans/ContextOverhaul.md` §10.1 +**Canonical design reference:** `docs/ContextOverhaul.md` §11.1 **Divergence note:** This behavior intentionally differs from official `mksglu/context-mode`, which treats subagent work as summarized tool events -rather than first-class session participants. See §10.1 of the design doc for +rather than first-class session participants. See §11.1 of the design doc for the rationale and alignment guidance. #### Checklist diff --git a/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md new file mode 100644 index 0000000..f8a4663 --- /dev/null +++ b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md @@ -0,0 +1,934 @@ +# Context-Mode-Aligned MCP-First Replacement — Implementation Task Plan + +**Status:** Planned\ +**Date:** 2026-03-20\ +**Primary architecture:** +`docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`\ +**This plan supersedes:** +`docs/superpowers/plans/2026-03-20-context-overhaul-hot-path.md` and any +in-progress native-hook-first implementation work derived from it\ +**Grounding sources used:** `AGENTS.md`, `README.md`, `src/index.ts`, +`src/session.ts`, `docs/ContextOverhaul.md`, `docs/ContextOverhaulTests.md`, +`docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`, `deno.json` + +--- + +## 1. Purpose + +This document is the execution plan for the MCP-first replacement architecture. +It is for implementation work only. It is not a design exploration document. + +Every task below is ordered, concrete, and intended for subagent-by-subagent +execution without reinterpretation. + +The implementation must keep these facts true throughout: + +1. `session_*` MCP tools are the primary product surface for bounded execution, + fetch, file processing, indexing, and search. +2. OpenCode hooks remain secondary: enforcement, attribution, continuity + capture, and `` injection only. +3. Redis/FalkorDB remains the hot-tier system of record. +4. Graphiti stays asynchronous and off the hot path. +5. Parent and child sessions share one canonical root-session-local corpus. + +--- + +## 2. Locked Defaults For This Milestone + +These defaults are mandatory for the implementation. Do not reopen them during +execution. + +### 2.1 Runtime and transport + +- Use an **in-process** `session_*` runtime owned by the same plugin runtime as + `src/index.ts`. +- Do **not** introduce an out-of-process MCP server as the default path. +- Do **not** invent undocumented OpenCode capabilities. The implementation must + use only exported APIs from the installed `@opencode-ai/plugin` and + `@modelcontextprotocol/sdk` packages already in `deno.json`. +- If the installed OpenCode plugin package does not expose a documented tool + registration surface compatible with this plan, stop implementation and update + the controlling architecture docs instead of inventing a private integration. + +### 2.2 Storage and scope + +- Local corpora are scoped to the canonical root session only. +- Namespace all local corpus keys under the concrete shape + `session:{groupId}:{root}:...`; bare `session:{root}:...` keys are not + acceptable final implementation output for this milestone. +- Use Redis-compatible primitives only: strings, hashes, sets, lists, TTLs, and + pipelined/multi operations. +- Do **not** assume RediSearch, SQLite, FTS5, BM25, or undocumented FalkorDB + full-text features. + +### 2.3 Search and chunking + +- Markdown / normalized HTML: heading-aware chunks with intact fenced code + blocks preserved under the nearest heading. +- HTML fetches must be normalized into a markdown-oriented or + markdown-equivalent representation before chunking; flat tag-stripped blobs do + not satisfy the target parity level. +- HTML normalization must preserve at minimum: headings, paragraph/section + boundaries, ordered/unordered list boundaries, and pre/code blocks. +- Plain text / logs: 1200-character chunks with 200-character overlap. +- JSON: pretty-print before chunking; do not index minified JSON directly. +- Search ranking order is fixed: + 1. query normalization + porter-equivalent stemming + 2. token/stem candidate collection + 3. conditional trigram candidate expansion when token/stem recall is sparse or + query form is partial-string oriented + 4. BM25-style scoring for token/stem candidates + 5. trigram scoring for substring candidates + 6. Reciprocal Rank Fusion (RRF) across ranked token/stem and trigram lists + 7. multi-term proximity reranking + 8. light recency boost + 9. light shorter-chunk boost +- Token and trigram evidence must remain distinguishable in the scorer so the + implementation resembles the practical retrieval behavior of `context-mode` + rather than degenerating into one undifferentiated bag-of-hits ranking pass. +- Retrieval phases are also locked: collect token candidates first, and add + trigram candidates only when token recall is sparse or when the query form is + explicitly partial-string oriented. +- Fuzzy correction is required for typo-tolerant retrieval before the search + path gives up on local recall. +- Candidate scoring is bounded to the top 200 candidate chunks before final + ranking. +- `session_search` returns at most 5 results, each with one bounded snippet of + at most 320 characters. + +### 2.4 Bounded result contracts + +- `session_execute`, `session_execute_file`, and `session_batch_execute` return + a bounded human-readable summary plus references, never an unbounded raw + payload. +- Tool response body budget: 8 KB maximum serialized response payload per + `session_*` call. +- Large execution/fetch/file artifacts are stored locally and referenced by + artifact or corpus ID. +- `session_batch_execute` is sequential only in v1. No hidden parallelism. +- Default command timeout for `session_execute`: 30 seconds. +- Maximum allowed command timeout in this milestone: 120 seconds. +- Default fetch timeout for `session_fetch_and_index`: 15 seconds. +- Maximum indexed source body per single fetch/file/command artifact in this + milestone: 512 KB after normalization. Larger bodies are truncated before + indexing and the truncation must be surfaced in metadata. + +### 2.5 TTL behavior + +- Session events: `redis.sessionTtlSeconds`. +- Snapshots: `2 * redis.sessionTtlSeconds`. +- Local corpora, chunks, postings, artifacts, and stats: + `redis.sessionTtlSeconds`. +- Graphiti cache: `redis.cacheTtlSeconds`. +- Pending drain + dead-letter state: `3 * redis.sessionTtlSeconds`. +- Successful writes and successful local search hits refresh TTL for the full + affected local corpus family. +- Expired local corpus lookups must return a structured not-found / expired + result, not throw an unhandled error. + +### 2.6 Execution environment + +- `session_execute*` uses the local plugin process and the current project + directory. Do not add container orchestration or remote execution in this + milestone. +- `session_fetch_and_index` uses standard HTTP fetch from the plugin runtime. +- `session_execute_file` reads local files directly through Deno APIs and must + never depend on native `Read` as its implementation path. + +--- + +## 3. Required File Structure + +Create or modify these files unless a task below says otherwise. + +### 3.1 New files to create + +- `src/services/session-mcp-types.ts` +- `src/services/session-mcp-runtime.ts` +- `src/services/session-mcp-runtime.test.ts` +- `src/services/session-corpus.ts` +- `src/services/session-corpus.test.ts` +- `src/services/session-executor.ts` +- `src/services/session-executor.test.ts` +- `src/session.test.ts` + +### 3.2 Existing files to modify + +- `src/index.ts` +- `src/index.test.ts` +- `src/session.ts` +- `src/types/index.ts` +- `src/handlers/tool-before.ts` +- `src/handlers/tool-before.test.ts` +- `src/handlers/tool-after.ts` +- `src/handlers/tool-after.test.ts` +- `src/handlers/event.ts` +- `src/handlers/event.test.ts` +- `src/handlers/chat.ts` +- `src/handlers/chat.test.ts` +- `src/handlers/messages.ts` +- `src/handlers/messages.test.ts` +- `src/handlers/compacting.ts` +- `src/handlers/compacting.test.ts` +- `src/services/tool-routing.ts` +- `src/services/tool-routing.test.ts` +- `src/services/redis-client.test.ts` +- `README.md` +- `docs/ContextOverhaul.md` +- `docs/ContextOverhaulTests.md` + +### 3.3 Files to delete or explicitly retire + +- `docs/superpowers/plans/2026-03-20-context-overhaul-hot-path.md` + - delete it if it exists in the working tree or branch under implementation + - if it is already absent, keep it absent and remove any references to it +- Do **not** delete `docs/ContextOverhaul.md`; keep it as a superseded + historical document with corrected references. + +--- + +## 4. Ordered Top-Level Tasks + +Execute tasks in this exact order. Do not reorder them. + +1. Define the `session_*` MCP server surface and bounded result contracts. +2. Implement local corpus storage/index/search on Redis/FalkorDB. +3. Thread canonical root-session identity into all `session_*` calls. +4. Integrate `session_*` results into continuity capture and stats. +5. Rewrite `tool.execute.before` / `tool.execute.after` around enforcement and + attribution. +6. Extend temporary-root migration and teardown coverage to new MCP local state. +7. Validate compaction continuity and async Graphiti augmentation remain intact. + +No native-hook-first task may start ahead of Task 1 or Task 2. + +--- + +## 5. Task 1 — Define `session_*` MCP surface and bounded result contracts + +### 5.1 Goal + +Create the in-process `session_*` runtime, schemas, and registration layer +first. This task establishes the primary product surface before any +enforcement-hook rewrite. + +### 5.2 Files + +**Create** + +- `src/services/session-mcp-types.ts` +- `src/services/session-mcp-runtime.ts` +- `src/services/session-mcp-runtime.test.ts` + +**Modify** + +- `src/index.ts` +- `src/index.test.ts` +- `src/types/index.ts` + +### 5.3 Implementation requirements + +1. Define zod-backed request/response schemas in + `src/services/session-mcp-types.ts` for exactly these tools: + - `session_execute` + - `session_execute_file` + - `session_batch_execute` + - `session_index` + - `session_search` + - `session_fetch_and_index` + - `session_stats` + - `session_doctor` +2. Every request schema must require `root_session_id`. +3. Every response schema must include `status` and enough metadata to attribute + results later in hooks. +4. Add a runtime module in `src/services/session-mcp-runtime.ts` that: + - owns tool registration + - dispatches to typed handlers + - exposes `dispose()` for teardown + - does not perform Graphiti I/O + - returns minimal **valid** schema-conforming responses from any initial stub + handler implementation; schema-only placeholders that return `undefined`, + partial payloads, or shape-invalid objects are forbidden +5. Lock response contracts now: + - `session_execute`: + `{ status, summary, artifact_ref?, exit_code, + timed_out, truncated, bytes_captured }` + - `session_batch_execute`: `{ status, summary, results[], truncated }` + - `session_execute_file`: + `{ status, summary, artifact_ref?, corpus_ref?, + file_count, truncated }` + - `session_index`: `{ status, corpus_ref, chunk_count, query_hints[] }` + - `session_search`: `{ status, results[], corpus_refs[], truncated }` + - `session_fetch_and_index`: + `{ status, corpus_ref, summary, query_hints[], + fetched_url, content_type, truncated }` + - `session_stats`: + `{ status, counters, corpus_count, artifact_count, + bytes_saved_estimate }` + - `session_doctor`: `{ status, checks, redis, graphiti_cache, runtime }` +6. `src/index.ts` must instantiate the new runtime inside the existing runtime + initialization path and register its `dispose()` inside the same teardown + chain as Redis and Graphiti. +7. Do not make `tool.execute.before` the owner of any `session_*` execution + semantics. + +### 5.4 TDD steps + +Write failing tests first in `src/services/session-mcp-runtime.test.ts` and +`src/index.test.ts` covering: + +- runtime registers exactly the 8 `session_*` tools +- each tool schema rejects calls without `root_session_id` +- initial stub handlers return minimal valid responses for all 8 registered + tools +- response payloads are capped to the exact 8 KB response budget +- at least one large-output case crossing the 8 KB boundary falls back to local + artifact storage/reference instead of returning an oversized inline payload +- `session_batch_execute` executes sequentially in request order +- `src/index.ts` wires runtime initialization and teardown in-process + +### 5.5 Verification commands + +```bash +deno test src/services/session-mcp-runtime.test.ts src/index.test.ts +deno task check +``` + +### 5.6 Completion gate + +Task 1 is done only when the repo has a real in-process `session_*` runtime with +typed contracts and teardown coverage, even if the handlers still return stubbed +results internally. + +--- + +## 6. Task 2 — Implement local corpus storage/index/search on Redis/FalkorDB + +### 6.1 Goal + +Build the local session-scoped corpus/index/search layer before any hook +rewrite. + +This task must aim for close feature resemblance to `context-mode`'s practical +corpus behavior, not merely any local index that passes a tiny baseline test. + +### 6.2 Files + +**Create** + +- `src/services/session-corpus.ts` +- `src/services/session-corpus.test.ts` + +**Modify** + +- `src/services/redis-client.test.ts` +- `src/services/session-mcp-runtime.ts` +- `src/services/session-mcp-runtime.test.ts` + +### 6.3 Implementation requirements + +1. `src/services/session-corpus.ts` must own: + - corpus metadata writes + - chunk storage + - term postings + - trigram postings + - artifact metadata for oversized execution/fetch/file outputs + - corpus-family TTL refresh + - search ranking +2. Use these key families exactly: + - `session:{groupId}:{root}:corpora` + - `session:{groupId}:{root}:corpus:{corpusId}:meta` + - `session:{groupId}:{root}:corpus:{corpusId}:chunks` + - `session:{groupId}:{root}:chunk:{chunkId}` + - `session:{groupId}:{root}:term:{token}` + - `session:{groupId}:{root}:tri:{trigram}` + - `session:{groupId}:{root}:artifact:{artifactId}:meta` + - `session:{groupId}:{root}:artifact:{artifactId}:body` + - `session:{groupId}:{root}:stats` +3. `session_index` must write normalized content into the above structures. +4. `session_fetch_and_index` must: + - fetch content with local HTTP fetch + - normalize HTML into a markdown-oriented or markdown-equivalent text form + that preserves heading/section structure closely enough to resemble + `context-mode` retrieval behavior + - preserve pre/code blocks as fenced-code-style units in the normalized + representation + - preserve list and paragraph boundaries in the normalized representation + - normalize Markdown/text/JSON + - index through the same corpus service + - never touch Graphiti +5. `session_execute` and `session_execute_file` must write searchable artifact + text through the same corpus service when output is large enough to exceed + the bounded response surface. +6. `session_search` must read only local corpus structures and rank in process. +7. TTL refresh must apply to the whole related corpus family, not just the hit + chunk. +8. Expired data must yield structured empty/not-found results. +9. Markdown/HTML chunking must preserve fenced code blocks with their nearest + heading rather than splitting them arbitrarily. +10. Artifact storage must avoid keeping duplicate full-body copies when one + canonical artifact representation plus derived searchable index text is + sufficient. +11. Production runtime wiring must be completed as part of this task: the + in-process `session_*` runtime in `src/index.ts` must receive the live + Redis-backed corpus dependencies so local indexing/search is not test-only. +12. `src/index.ts` must explicitly pass the live `redisClient`, + `config.redis.sessionTtlSeconds`, and the resolved project `groupId` into + `createSessionMcpRuntime(...)`; leaving the runtime in stub-only mode is a + Task 2 failure. +13. Token and trigram retrieval must remain distinct in both retrieval order and + scoring: token candidate collection happens first; trigram candidate + expansion happens only for sparse token recall or partial-string queries. +14. The chunking algorithm must explicitly treat fenced code blocks as atomic + units that cannot be split by the plain-text windowing pass. +15. The retrieval implementation must include all of these `context-mode`-style + behaviors in application code over Redis/FalkorDB postings: + - porter-equivalent stemming + - BM25-style scoring for token/stem matches + - trigram substring retrieval + - Reciprocal Rank Fusion (RRF) + - fuzzy correction for misspelled queries + - proximity reranking for multi-term queries + +### 6.4 TDD steps + +Write failing tests first in `src/services/session-corpus.test.ts` for: + +- `session_fetch_and_index` +- `session_search` +- TTL expiry graceful behavior +- heading-preserving HTML normalization that produces query-visible section + structure rather than a flat stripped blob +- HTML normalization preserving pre/code blocks and list/paragraph boundaries in + the intermediate normalized representation +- fenced code blocks remaining intact under their nearest heading after + chunking/indexing +- the small-corpus relevance baseline: + - doc A `Redis Session TTLs` + - doc B `Graphiti Async Drain` + - doc C `Child Session Canonicalization` + - query `session ttl` + - doc A must rank first +- partial-string retrieval behavior where trigram-style matching can surface the + intended chunk when an exact token form is absent +- stemming behavior where an inflected query still finds the intended indexed + document family +- BM25-style ranking behavior where repeated/title-weighted query terms outrank + weaker candidates +- RRF behavior where token/stem and trigram result lists are fused rather than + one simply replacing the other +- fuzzy-correction behavior where a misspelled query still retrieves the + intended result +- proximity-reranking behavior where near-adjacent multi-term matches outrank + distant matches for the same terms +- artifact storage + bounded summary behavior for large outputs +- no duplicate canonical full-body storage for one oversized artifact + +Write failing tests first in `src/index.test.ts` covering: + +- `src/index.ts` passes the live Redis-backed corpus dependencies into + `createSessionMcpRuntime(...)` +- the runtime produced by `src/index.ts` is not left in corpus/search stub mode + when Redis is available + +Extend `src/services/redis-client.test.ts` so the fake runtime can support any +additional Redis primitives needed by `session-corpus.ts` tests. + +Extend `src/services/session-mcp-runtime.test.ts` so production-style runtime +construction with a real `RedisClient` exercises the local corpus path rather +than only test-only injected behavior. + +### 6.5 Verification commands + +```bash +deno test src/services/session-corpus.test.ts src/services/redis-client.test.ts src/services/session-mcp-runtime.test.ts src/index.test.ts +deno task check +``` + +### 6.6 Completion gate + +Task 2 is done only when local indexing and search work fully without Graphiti +and the small-corpus relevance baseline passes exactly. + +Task 2 is NOT done if corpus/search parity exists only in tests while +`src/index.ts` still constructs a stub-only runtime with no live corpus wiring. + +--- + +## 7. Task 3 — Thread canonical root-session identity into all `session_*` calls + +### 7.1 Goal + +Make canonical root-session identity mandatory for all `session_*` activity and +shared across parent/child sessions. + +### 7.2 Files + +**Create** + +- `src/session.test.ts` + +**Modify** + +- `src/session.ts` +- `src/handlers/tool-before.ts` +- `src/handlers/tool-before.test.ts` +- `src/services/session-mcp-runtime.ts` +- `src/services/session-mcp-runtime.test.ts` +- `src/services/session-corpus.ts` +- `src/services/session-corpus.test.ts` + +### 7.3 Implementation requirements + +1. Reuse `SessionManager` as the only canonical lineage authority. +2. `tool.execute.before` must inject `root_session_id` into every `session_*` + call using canonical resolution from `src/session.ts`. +3. The `session_*` runtime must reject mismatched or missing `root_session_id` + after schema validation; it must not invent a second lineage model. +4. All corpus/artifact/stats writes must use `root_session_id`, never the raw + child session ID. +5. Parent and child sessions must read from the same root corpus namespace. +6. Temporary-root sessions must remain supported until later migration work in + Task 6. + +### 7.4 TDD steps + +Write failing tests first in `src/session.test.ts`, +`src/handlers/tool-before.test.ts`, and `src/services/session-corpus.test.ts` +covering: + +- parent and child `session_*` calls share one root corpus namespace +- `tool.execute.before` injects `root_session_id` on `session_*` calls +- native tool calls do not receive `root_session_id` +- the runtime rejects `session_*` calls when `root_session_id` is absent or + mismatched + +### 7.5 Verification commands + +```bash +deno test src/session.test.ts src/handlers/tool-before.test.ts src/services/session-corpus.test.ts src/services/session-mcp-runtime.test.ts +deno task check +``` + +### 7.6 Completion gate + +Task 3 is done only when parent and child sessions demonstrably share a single +root-session-local corpus and all `session_*` calls are rooted through +`SessionManager`. + +--- + +## 8. Task 4 — Integrate `session_*` results into continuity capture and stats + +### 8.1 Goal + +Capture bounded MCP-first tool activity into local continuity and local stats +without polluting events or `` with raw payloads. + +### 8.2 Files + +**Modify** + +- `src/handlers/event.ts` +- `src/handlers/event.test.ts` +- `src/handlers/chat.ts` +- `src/handlers/chat.test.ts` +- `src/types/index.ts` +- `src/services/session-corpus.ts` +- `src/services/session-corpus.test.ts` +- `src/services/session-mcp-runtime.ts` +- `src/services/session-executor.ts` +- `src/services/session-executor.test.ts` + +### 8.3 Implementation requirements + +1. Add typed event metadata for `session_*` tool activity: + - tool name + - root session ID + - corpus refs + - artifact refs + - bytes captured + - bytes omitted from transcript + - truncation flag +2. Keep event bodies compact. No stored event body may exceed existing hot-tier + event limits. +3. Add root-session-local stats counters in `session:{root}:stats` for at least: + - `session_execute_calls` + - `session_execute_file_calls` + - `session_batch_execute_calls` + - `session_index_calls` + - `session_search_calls` + - `session_fetch_and_index_calls` + - `artifact_count` + - `corpus_count` + - `bytes_indexed_total` + - `bytes_returned_total` + - `bytes_saved_estimate` +4. `session_stats` must read those counters directly from local state. +5. `chat.message` preparation must remain local-first and deterministic. + `persistent_memory` stays optional and cache-backed only. +6. Do not inject full `session_*` artifacts into ``. + +### 8.4 TDD steps + +Write failing tests first in: + +- `src/handlers/event.test.ts` +- `src/handlers/chat.test.ts` +- `src/services/session-executor.test.ts` +- `src/services/session-corpus.test.ts` + +Required coverage: + +- `session_execute` stores bounded continuity metadata and stats +- `session_batch_execute` aggregates per-item results without raw concatenation +- `session_execute_file` captures file-analysis continuity without raw file dump +- local-first `` still renders with optional cached + `` + +### 8.5 Verification commands + +```bash +deno test src/handlers/event.test.ts src/handlers/chat.test.ts src/services/session-executor.test.ts src/services/session-corpus.test.ts +deno task check +``` + +### 8.6 Completion gate + +Task 4 is done only when `session_*` activity contributes compact continuity and +measurable local stats without hot-tier raw dumps. + +--- + +## 9. Task 5 — Rewrite `tool.execute.before` / `tool.execute.after` around enforcement + attribution + +### 9.1 Goal + +Reduce native-tool routing to a secondary enforcement layer that pushes the +model toward `session_*` tools and attributes outcomes cleanly. + +### 9.2 Files + +**Modify** + +- `src/handlers/tool-before.ts` +- `src/handlers/tool-before.test.ts` +- `src/handlers/tool-after.ts` +- `src/handlers/tool-after.test.ts` +- `src/services/tool-routing.ts` +- `src/services/tool-routing.test.ts` +- `src/handlers/event.ts` +- `src/handlers/event.test.ts` + +### 9.3 Implementation requirements + +1. Keep `session_*` calls simple in `tool.execute.before`: + - inject canonical `root_session_id` + - allow the call to proceed +2. Rewrite native-tool policy so it is explicitly secondary: + - `WebFetch` -> deny with direct guidance to `session_fetch_and_index` + - data-heavy `Bash` patterns -> deny or bounded rewrite toward + `session_execute` / `session_batch_execute` + - large-analysis `Read` patterns -> guidance toward `session_execute_file` + - `Grep` / `Glob` remain lightweight helpers, not primary retrieval + - `Task` guidance must tell delegated agents to prefer `session_*` for + data-heavy operations +3. `tool.execute.after` must only attach routing/attribution metadata; it must + not become a second output-rewriting engine. +4. Rework or trim existing native-routing-only logic in + `src/services/tool-routing.ts` so the success condition is no longer + “intercept more native tools.” + +### 9.4 TDD steps + +Write failing tests first in the existing hook/routing test files covering: + +- `session_*` calls are allowed with injected `root_session_id` +- `WebFetch` is denied toward `session_fetch_and_index` +- data-heavy `Bash` is routed toward `session_execute` +- `Task` prompt rewriting adds MCP-first routing guidance +- `tool.execute.after` records attribution only + +### 9.5 Verification commands + +```bash +deno test src/handlers/tool-before.test.ts src/handlers/tool-after.test.ts src/services/tool-routing.test.ts src/handlers/event.test.ts +deno task check +``` + +### 9.6 Completion gate + +Task 5 is done only when hooks clearly serve MCP-first enforcement and +attribution rather than acting as the main product surface. + +--- + +## 10. Task 6 — Extend temporary-root migration and teardown coverage to new MCP local state + +### 10.1 Goal + +Make temporary-root resolution and runtime re-initialization safe for local +corpora, artifacts, postings, stats, and new MCP runtime resources. + +### 10.2 Files + +**Modify** + +- `src/session.ts` +- `src/session.test.ts` +- `src/index.ts` +- `src/index.test.ts` +- `src/services/session-corpus.ts` +- `src/services/session-corpus.test.ts` +- `src/services/session-mcp-runtime.ts` +- `src/services/session-mcp-runtime.test.ts` + +### 10.3 Implementation requirements + +1. Extend temporary-root migration in `src/session.ts` so it covers: + - corpus manifests + - corpus metadata + - chunk lists + - chunk payloads + - term postings + - trigram postings + - artifact metadata and bodies + - local stats +2. Use a single atomic or pipeline-disciplined migration strategy. Lock it now: + - use a Redis `MULTI/EXEC` pipeline where key enumeration happens first and + every rename/copy/delete step for one provisional root is committed as one + migration unit + - preserve remaining TTL for each moved key by reading TTL before migration + and reapplying it after the move when the primitive used does not retain + expiry automatically +3. After successful migration, remove obsolete provisional-root keys. +4. If migration fails, surface failure and do not silently continue with split + ownership. +5. Extend `src/index.ts` teardown registration so it disposes: + - `session-mcp-runtime` + - any executor worker state + - any corpus caches + - any new timers introduced for TTL refresh/cleanup + +### 10.4 TDD steps + +Write failing tests first in: + +- `src/session.test.ts` +- `src/services/session-corpus.test.ts` +- `src/index.test.ts` +- `src/services/session-mcp-runtime.test.ts` + +Required coverage: + +- temporary-root migration of corpora/stat keys +- parent/child shared root-session corpora after migration +- runtime re-initialization disposes all new MCP-first resources exactly once +- deletion of a child session does not delete root-owned corpora or stats + +### 10.5 Verification commands + +```bash +deno test src/session.test.ts src/services/session-corpus.test.ts src/index.test.ts src/services/session-mcp-runtime.test.ts +deno task check +``` + +### 10.6 Completion gate + +Task 6 is done only when temporary-root migration covers all new local MCP state +and runtime restart/teardown leaves no duplicate workers, timers, or orphaned +root-local corpus state. + +--- + +## 11. Task 7 — Validate compaction continuity and async Graphiti augmentation remain intact + +### 11.1 Goal + +Prove that the MCP-first replacement did not break the existing local continuity +and async Graphiti invariants. + +### 11.2 Files + +**Modify** + +- `src/handlers/chat.ts` +- `src/handlers/chat.test.ts` +- `src/handlers/messages.ts` +- `src/handlers/messages.test.ts` +- `src/handlers/compacting.ts` +- `src/handlers/compacting.test.ts` +- `src/handlers/event.ts` +- `src/handlers/event.test.ts` +- `README.md` +- `docs/ContextOverhaul.md` +- `docs/ContextOverhaulTests.md` + +### 11.3 Implementation requirements + +1. Keep `` local-first: + - local continuity sections from Redis/FalkorDB + - `` from the local snapshot service + - optional `` from cache only +2. Do not add any synchronous Graphiti dependency to: + - `chat.message` + - `experimental.chat.messages.transform` + - `experimental.session.compacting` + - `tool.execute.before` + - `tool.execute.after` + - synchronous `event` handling +3. Ensure `session_*` activity survives compaction through the same event and + snapshot model as other continuity events. +4. Update docs: + - `README.md`: add MCP-first `session_*` overview, local corpus behavior, and + local-first `` wording + - `docs/ContextOverhaul.md`: keep historical doc but mark it superseded by + the replacement architecture and this implementation plan; fix stale + `plans/...` references to actual `docs/...` paths and normalize any stale + internal cross-references that still point at pre-move locations + - `docs/ContextOverhaulTests.md`: mark prior hot-path test plan superseded; + fix stale path references, normalize any stale internal cross-references, + and point readers to this implementation plan for the active acceptance + matrix + - delete or keep absent + `docs/superpowers/plans/2026-03-20-context-overhaul-hot-path.md` + +### 11.4 TDD steps + +Write failing tests first in: + +- `src/handlers/chat.test.ts` +- `src/handlers/messages.test.ts` +- `src/handlers/compacting.test.ts` +- `src/handlers/event.test.ts` + +Required coverage: + +- local-first `` with optional cached `` +- compaction continuity still includes session-derived MCP-first events +- Graphiti remains off the hot path for all synchronous hooks + +### 11.5 Verification commands + +```bash +deno test src/handlers/chat.test.ts src/handlers/messages.test.ts src/handlers/compacting.test.ts src/handlers/event.test.ts +deno task check +``` + +### 11.6 Completion gate + +Task 7 is done only when compaction continuity and async Graphiti augmentation +still behave as before, with `session_*` activity folded into the same local +continuity model. + +--- + +## 12. Migration / Removal Work For Superseded Native-Routing Plan + +This cleanup is mandatory and not optional follow-up polish. + +### 12.1 Delete or retire + +1. Delete `docs/superpowers/plans/2026-03-20-context-overhaul-hot-path.md` if it + exists anywhere in the active branch. +2. Remove any stale references to that file from docs, tasks, or review notes. + +### 12.2 Rewrite and retain + +1. Retain `src/services/tool-routing.ts`, but rewrite it as a secondary + enforcement layer only. +2. Retain `src/handlers/tool-before.ts` and `src/handlers/tool-after.ts`, but + narrow them to: + - `root_session_id` injection for `session_*` + - native fallback enforcement + - routing attribution metadata +3. Retain `src/session.ts` as lineage authority and extend it for corpus/state + migration. +4. Retain existing Redis events, snapshots, and Graphiti async services. + +### 12.3 Remove old success language + +Delete or rewrite any comments, tests, or docs that define success mainly as: + +- “native hot-path alignment” +- “80% native routing parity” +- “intercept Read/Bash/WebFetch first, then call it context-mode aligned” + +Replace them with MCP-first success language centered on `session_*`. + +--- + +## 13. Required Acceptance Test Matrix + +All of the following must exist by the end of implementation. + +| Requirement | Required test location | +| ------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | +| `session_fetch_and_index` | `src/services/session-corpus.test.ts`, `src/services/session-mcp-runtime.test.ts` | +| `session_index` | `src/services/session-corpus.test.ts`, `src/services/session-mcp-runtime.test.ts` | +| `session_execute` / `session_batch_execute` | `src/services/session-executor.test.ts`, `src/services/session-mcp-runtime.test.ts` | +| `session_execute_file` | `src/services/session-executor.test.ts`, `src/services/session-mcp-runtime.test.ts` | +| `session_search` | `src/services/session-corpus.test.ts` | +| parent/child shared root-session corpora | `src/session.test.ts`, `src/services/session-corpus.test.ts` | +| temporary-root migration of corpora/stat keys | `src/session.test.ts`, `src/services/session-corpus.test.ts` | +| TTL expiry graceful behavior | `src/services/session-corpus.test.ts`, `src/services/redis-client.test.ts` | +| local-first `` with optional cached `` | `src/handlers/chat.test.ts`, `src/handlers/messages.test.ts`, `src/handlers/compacting.test.ts` | +| small-corpus relevance baseline | `src/services/session-corpus.test.ts` | + +In addition to the top-level rows above, named coverage for `session_stats` and +`session_doctor` is mandatory in `src/services/session-mcp-runtime.test.ts` and +must verify valid bounded responses backed by local state/health checks rather +than placeholder payloads. + +--- + +## 14. Final Verification Sequence + +Run this exact sequence after Task 7. + +```bash +deno test src/services/session-mcp-runtime.test.ts src/services/session-corpus.test.ts src/services/session-executor.test.ts src/session.test.ts +deno test src/handlers/tool-before.test.ts src/handlers/tool-after.test.ts src/handlers/chat.test.ts src/handlers/messages.test.ts src/handlers/compacting.test.ts src/handlers/event.test.ts +deno test src/index.test.ts src/services/tool-routing.test.ts src/services/redis-client.test.ts +deno test +deno task check +deno task lint +deno fmt --check +``` + +Do not mark the milestone complete if any command above fails. + +--- + +## 15. Out of Scope For This Milestone + +The implementation must not expand into any of the following: + +- `session_upgrade` +- SQLite / FTS5 / BM25 adoption +- Graphiti on the hot path +- project-wide or cross-session local corpora beyond the canonical root session +- child-session-only corpus namespaces +- remote execution backends, containers, or Docker orchestration for + `session_execute*` +- semantic/vector search for local corpora +- undocumented OpenCode APIs or private plugin internals +- UI work, telemetry pipelines, or non-test benchmarking infrastructure + +--- + +## 16. Definition of Done + +This milestone is done only when all seven ordered tasks are complete and all of +the following are true: + +1. `session_*` tools are the primary bounded execution and retrieval surface. +2. Local Redis/FalkorDB corpora and search work without Graphiti. +3. Parent and child sessions share one canonical root-session corpus. +4. Temporary-root migration covers corpus/artifact/stat state. +5. Hook logic is clearly secondary enforcement + attribution. +6. `` remains local-first with optional cached + ``. +7. Async Graphiti augmentation remains intact and off the hot path. +8. The superseded hot-path implementation plan file is deleted or verified + absent, and stale references are removed. diff --git a/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md new file mode 100644 index 0000000..99d0425 --- /dev/null +++ b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md @@ -0,0 +1,1019 @@ +# Context-Mode-Aligned MCP-First Replacement Plan + +**Status:** Superseding plan\ +**Date:** 2026-03-20\ +**Supersedes:** `plans/ContextOverhaul.md` and any in-progress Task 1 / Task 2 +work derived from that native-hook-first plan\ +**Grounding sources:** `AGENTS.md`, `README.md`, `src/index.ts`, +`src/session.ts`, `plans/ContextOverhaul.md`, `plans/ContextOverhaulTests.md`, +and the public `mksglu/context-mode` README already established for this session + +--- + +## 1. Executive Decision + +This document **replaces**, not extends, the current +`tool.execute.before`-centric native-routing plan. + +The prior plan drifted from the actual target in one decisive way: it treated +**native tool interception** as the product architecture, while `context-mode` +is fundamentally an **MCP-first system** where dedicated tools are the primary +execution surface and hooks exist to enforce that preference, capture +continuity, and preserve state across compaction. + +That drift produced the wrong center of gravity: + +- it optimized how native OpenCode tools should be blocked or rewritten +- it did **not** define a first-class `session_*` MCP tool surface analogous to + `context-mode` +- it risked making this repository a smarter native-tool filter instead of a + local session-runtime with its own bounded execution and local retrieval path + +This plan corrects that. The architecture target is now: + +1. **MCP-first execution** through `session_*` tools +2. **Redis/FalkorDB hot-path persistence** instead of SQLite/FTS5 +3. **Canonical root-session participation** for parent and child sessions alike +4. **Async Graphiti consolidation** that augments `` with cached + `` and never blocks the hot path + +--- + +## 2. Replacement Scope + +### 2.1 In scope + +- define the MCP-first architecture for this repository +- define the `session_*` tool suite and each tool's contract-level role +- define the Redis/FalkorDB local indexing and search design +- define the plugin/hook role as enforcement + continuity only +- define child-session behavior for both continuity and MCP-tool activity +- define TTL rules for all non-Graphiti Redis/FalkorDB state +- define Graphiti's async role after the local hot path +- define migration/replacement rules for current native-routing work +- define measurable validation criteria tied to MCP-first behavior + +### 2.2 Explicitly not in scope + +- copying `context-mode`'s SQLite storage, FTS5 schema, or `ctx_*` naming +- moving Graphiti onto the hot path +- flattening child sessions into summarized tool events +- turning hooks into the primary execution system +- introducing undocumented OpenCode capabilities beyond the documented plugin + hooks already used in this repo +- implementing an auto-upgrade workflow in this phase (`session_upgrade` is out + of scope) +- storing non-Graphiti session data indefinitely in Redis/FalkorDB + +--- + +## 3. Architecture Decision Summary + +### 3.1 Primary architectural split + +| Layer | Owns | Must not own | +| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------- | +| **`session_*` MCP server** | bounded execution, file processing, fetch+index, local indexing, local search, utility diagnostics/stats | compaction injection, session lineage resolution via OpenCode parent chain, Graphiti hot-path reads | +| **OpenCode plugin hooks** | root-session canonicalization, native-tool enforcement, continuity event capture, snapshot assembly, `` injection, async Graphiti scheduling | primary execution semantics, large-result processing, long-lived search/index ranking state | +| **Async Graphiti tier** | background semantic consolidation and cached `` refresh | any synchronous hook-time dependency | + +### 3.2 Required execution model + +1. The model should prefer `session_*` tools for data-heavy work. +2. The MCP server should keep raw data local and return bounded results. +3. The plugin should enforce the preference when the model falls back to risky + native tools. +4. The plugin should continue to build and inject deterministic + `` from local Redis/FalkorDB state. +5. Graphiti should remain a later, asynchronous enhancer. + +This is the canonical target. Any implementation choice that recenters the +system on native tool routing is out of compliance with this plan. + +### 3.3 MCP server lifecycle and transport default + +The default for OpenCode is locked: + +- the `session_*` MCP server/runtime must run **in process**, owned by the same + plugin runtime that owns the hooks +- MCP tool handlers and hooks must therefore share the same canonical + root-session identity source, teardown discipline, and process-local caches +- a separate out-of-process MCP transport is **not** the default for this repo + and must not be assumed in the first implementation plan + +Rationale: + +- `src/index.ts` already centralizes runtime initialization and teardown +- `src/session.ts` already centralizes canonical root-session identity +- the approved hardening goal is to avoid split-brain lifecycle handling between + tool runtime and hook runtime + +If a later plan proposes a different transport boundary, it must justify how it +preserves shared root identity, shared teardown, and non-divergent cache/state +behavior. That justification is out of scope here. + +--- + +## 4. MCP-First Runtime Model + +```text +OpenCode session + | + |- Model chooses tools + | |- preferred path: session_* MCP tools + | '- fallback path: native tools (plugin may allow, redirect, or deny) + | + |- session_* MCP server + | |- session_execute / session_execute_file / session_batch_execute + | |- session_index / session_search / session_fetch_and_index + | '- session_stats / session_doctor + | + |- Redis/FalkorDB hot tier + | |- session events + | |- snapshots + | |- local indexed corpora + chunk postings + | |- execution/search stats + | '- pending Graphiti drain state + | + |- OpenCode plugin hooks + | |- canonical root-session identity + | |- continuity extraction + injection + | '- native-tool enforcement toward session_* + | + '- Graphiti async tier + |- consolidate selected local events in background + '- refresh cached persistent memory for later injection +``` + +### 4.1 Default behavioral rule + +When a task would otherwise dump large raw output into the transcript, the +correct path is: + +1. use a `session_*` MCP tool +2. store or index the full local artifact in Redis/FalkorDB +3. return only a bounded summary/snippet/handle to the model + +The plugin exists to keep the system on that path; it is not the path itself. + +### 4.2 Runtime resources that must join teardown discipline + +Any new MCP-first runtime component must join the same teardown/cleanup +discipline already visible in `src/index.ts`. + +The follow-on implementation must register cleanup for all of these resources if +they exist: + +1. in-process `session_*` MCP server/runtime registration +2. local indexing workers or task queues +3. fetch/normalize/index pipelines for `session_fetch_and_index` +4. chunking or artifact-processing pipelines for `session_execute_file` +5. bounded execution worker pools or subprocess supervisors for + `session_execute` / `session_batch_execute` +6. any in-memory corpus/query caches used by the MCP runtime +7. any per-session search candidate caches or snippet caches +8. any local timers/background loops for corpus cleanup, TTL refresh, or + deferred indexing +9. existing Graphiti async flush/dispose resources +10. Redis client / connection resources already owned by the runtime + +No new background worker, queue, cache, or timer may be introduced without an +explicit teardown path. + +### 4.3 How models discover and prefer `session_*` tools in OpenCode + +The default discovery/preference stack is also locked: + +1. `session_*` tools are registered as MCP tools and are visible to the model as + first-class tool choices +2. project `AGENTS.md` guidance must teach the model to prefer `session_*` tools + for data-heavy work +3. plugin-side guidance/enforcement in `tool.execute.before` remains active as a + fallback when the model attempts risky native tools instead + +OpenCode preference therefore comes from **all three** layers together: + +- MCP registration makes the tools available +- `AGENTS.md` teaches the preference early +- hook enforcement keeps the session on the bounded path when the model drifts + +The implementation must not assume MCP registration alone is sufficient, and it +must not rely on hook enforcement alone as the primary discovery mechanism. + +--- + +## 5. `session_*` Tool Suite + +All new MCP tools must use the `session_*` prefix. `ctx_*` naming is forbidden +in this repository. + +### 5.1 Tool suite and exact role + +| Tool | Role | Primary inputs | Primary outputs | Notes | +| ------------------------- | ----------------------------------------------------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------ | ----------------------------------------------------- | +| `session_execute` | Run one bounded sandbox execution task | command/script, runtime, intent, timeout, `root_session_id` | bounded result, summary, optional artifact/index handle | primary replacement for raw data-heavy Bash workflows | +| `session_execute_file` | Run one bounded sandbox file-processing task | path(s), processing intent, runtime/handler, `root_session_id` | findings, summary, optional artifact/index handle | primary replacement for raw file-dump analysis | +| `session_batch_execute` | Combine multiple execute/search sub-operations into one call | list of execute/search/file subrequests, `root_session_id` | bounded multi-result response + handles | sequential in v1; no hidden parallelism | +| `session_index` | Normalize and locally index supplied content into the hot-tier corpus | content or pre-normalized text, source metadata, `root_session_id` | corpus id, chunk count, query hints | local-only indexing; no Graphiti involvement | +| `session_search` | Query the local indexed corpus for the canonical root session | query or query list, optional corpus filters, `root_session_id` | ranked bounded snippets + corpus/chunk refs | searches only local session-scoped indexed data | +| `session_fetch_and_index` | Fetch a URL in sandbox, normalize it, then index it locally | url, fetch options, content-type hint, `root_session_id` | corpus id, summary, query hints | primary replacement for native `WebFetch` | +| `session_stats` | Show local context-savings and tool/index activity for the root session | optional scope, `root_session_id` | counters, byte ratios, corpus counts, queue depth | in scope | +| `session_doctor` | Diagnose MCP/plugin/hot-tier health | optional checks, `root_session_id` | health report for Redis, hooks, cache, Graphiti connectivity | in scope | + +### 5.2 Scope decision for `session_upgrade` + +`session_upgrade` is **out of scope** for this replacement plan. + +Reason: + +- the replacement goal is architectural correctness, not self-update mechanics +- this repository's current documented scope is memory continuity + async + Graphiti integration, not installer/update orchestration +- adding upgrade behavior now would broaden scope before the MCP-first runtime + is stable + +The implementation may reserve the name, but it must not be part of the first +replacement milestone, the validation bar, or the migration work. + +### 5.3 Tool behavior defaults + +The following defaults are mandatory unless later superseded by a narrower +implementation plan: + +1. Every `session_*` tool must accept `root_session_id`. +2. In OpenCode, the plugin must populate `root_session_id` in + `tool.execute.before` for every `session_*` call using canonical root-session + resolution from `src/session.ts`. +3. `session_*` tools are session-scoped by default; they do not create + indefinite project-wide local corpora. +4. If a full result exceeds the bounded response budget, the tool must + store/index the full artifact locally and return only: + - a concise summary + - a handle/corpus reference + - suggested follow-up queries or next actions +5. `session_search` must return snippets and references, not full stored + documents. + +### 5.4 Default semantics for `session_batch_execute` + +`session_batch_execute` must behave deterministically in v1. + +Locked defaults: + +1. sub-operations execute **sequentially** in request order +2. there is **no hidden parallelism** in v1 +3. each sub-operation returns its own status (`ok`, `error`, or `skipped`) +4. later sub-operations may continue after an earlier error unless the request + explicitly asks for fail-fast behavior in a future version; fail-fast is not + the default in this plan +5. the tool returns a bounded **combined** response plus per-item references, + not full raw outputs concatenated together +6. if any sub-operation produces a large artifact, the artifact is + stored/indexed locally and represented in the combined response by a summary + and reference + +This default is chosen to keep execution understandable, auditable, and easy to +test while the MCP-first runtime is being established. + +--- + +## 6. Local Indexing and Search on Redis/FalkorDB + +## 6.1 Storage decision + +This repository must **not** reproduce `context-mode`'s SQLite/FTS5 layer. + +Instead, local indexing/search must run on the already-documented Redis/FalkorDB +hot tier used by this repo's short-term memory system. The implementation must +rely on Redis-compatible primitives that are already consistent with the +existing repository architecture. It must **not** assume RediSearch, SQLite +FTS5, or undocumented FalkorDB full-text features. + +### 6.2 Responsibility split + +| Concern | Responsibility | +| --------------------------------------- | ----------------------------------------------------------- | +| text normalization | MCP server | +| chunking | MCP server | +| postings/materialized search structures | Redis/FalkorDB hot tier | +| ranking | MCP server process using Redis/FalkorDB candidate retrieval | +| continuity injection | plugin, not the MCP server | +| long-term semantic memory | Graphiti async tier, not local search | + +### 6.3 Corpus scope + +Local indexed corpora are scoped to the **canonical root session** and the +project `groupId` already used by the plugin. + +Default namespace: + +`groupId + root_session_id` + +Concrete default key prefix for implementation: + +`session:{groupId}:{root}:...` + +Any later shorthand that omits `{groupId}` is documentation shorthand only and +must not be implemented as a bare root-only namespace. + +That scope is mandatory because: + +- the repo already centers continuity on canonical root-session identity +- child sessions are intentionally first-class participants in the same + workstream +- Graphiti, not Redis/FalkorDB, is responsible for cross-session persistence +- TTL-based cleanup is required to avoid hoarding prior sessions + +### 6.4 Required index structures + +The local index must store, at minimum: + +| Key family | Purpose | +| ----------------------------------------------------- | ------------------------------------------------------------------------- | +| `session:{groupId}:{root}:corpora` | corpus manifest set/list for the root session | +| `session:{groupId}:{root}:corpus:{corpusId}:meta` | corpus metadata: source, title, created/updated time, chunk count, format | +| `session:{groupId}:{root}:corpus:{corpusId}:chunks` | ordered chunk references | +| `session:{groupId}:{root}:chunk:{chunkId}` | chunk payload + heading/title/order metadata | +| `session:{groupId}:{root}:term:{token}` | token-retrieval posting set of chunk ids containing a normalized token | +| `session:{groupId}:{root}:tri:{trigram}` | trigram-retrieval posting set for substring matching | +| `session:{groupId}:{root}:artifact:{artifactId}:meta` | canonical artifact metadata | +| `session:{groupId}:{root}:artifact:{artifactId}:body` | canonical artifact body | +| `session:{groupId}:{root}:stats` | local execution/index/search counters and byte totals | + +This plan intentionally chooses Redis-compatible sets/hashes/lists and +application-side ranking, because those are compatible with the repository's +current documented storage model. + +### 6.5 Chunking rules + +To stay close to `context-mode` without copying SQLite mechanics, chunking must +follow these defaults: + +1. **Markdown / HTML-normalized content**: heading-aware chunks; preserve code + blocks with their nearest heading. +2. **HTML fetches**: normalize into a markdown-oriented or markdown-equivalent + text form before chunking; simple tag stripping is not sufficient for the + target architecture because it loses heading/section structure that + `context-mode` relies on for navigational retrieval. +3. **Plain text / logs**: fixed-size windows with overlap. +4. **JSON**: normalized pretty text or selected-path projections before + chunking; do not index raw minified blobs unchanged. +5. **Execution outputs**: store the full artifact locally, index either the + normalized full text or a derived searchable text representation, and return + only the bounded surface response. + +Required implementation discipline: + +- fenced code blocks must survive chunking as intact units associated with the + nearest heading/section context +- heading/title structure must remain query-visible after normalization +- fetched HTML must not degrade into one flat whitespace-collapsed blob before + indexing +- HTML normalization must preserve at minimum headings, paragraph/section + boundaries, ordered/unordered list boundaries, and pre/code blocks in the + markdown-oriented intermediate form +- if a fenced code block would cross a chunk boundary, it becomes its own atomic + chunk tied to the nearest heading rather than being split by the plain-text + windowing pass + +### 6.6 Search algorithm defaults + +The local search path must be deterministic and implemented in process. + +Required ranking flow: + +1. normalize query text +2. apply stemming to normalized query tokens with a porter-equivalent stemming + pass so inflected token forms can retrieve the same indexed concept family +3. if exact/stem retrieval would otherwise miss the intended target, apply fuzzy + correction before or alongside a retry/expansion pass for typo-tolerant + recovery +4. collect token/stem candidates from `term:*` postings +5. if token/stem recall is sparse, add trigram candidates from `tri:*` +6. rank each retrieval strategy independently in process: + - token/stem strategy uses an in-process BM25-style score over local postings + - trigram strategy uses substring-match scoring with lower base weight than + token/stem results +7. fuse the ranked lists with Reciprocal Rank Fusion (RRF) +8. apply proximity reranking for multi-term queries so chunks where terms appear + close together outrank chunks where the same terms are widely separated +9. return bounded snippets around the matched region plus refs + +To stay closer to `context-mode` feature behavior, the implementation must also: + +- keep token matching and trigram matching as two distinct retrieval strategies + whose evidence is combined during ranking rather than treating trigram hits as + an undifferentiated append-only fallback +- keep the retrieval phases distinct as well: collect token candidates first, + and add trigram candidates only when token recall is sparse or when a + partial-string query explicitly needs substring recovery +- include a porter-equivalent stemming stage in both indexing and/or query + normalization so singular/plural/inflected forms are not treated as unrelated + terms +- implement BM25-style scoring in process over Redis/FalkorDB postings rather + than dropping BM25 parity entirely; the divergence from `context-mode` is the + storage engine, not the retrieval feature target +- use Reciprocal Rank Fusion (RRF) to merge token/stem and trigram ranked lists +- include fuzzy correction for misspelled queries before failing closed on local + retrieval +- apply proximity reranking for multi-term queries after the base ranked lists + are fused +- expose deterministic query behavior for partial-string lookups that mimics the + practical role of `context-mode`'s porter+trigram pairing, even though BM25 + and porter stemming are not available as built-in Redis/FalkorDB database + features +- bias snippet extraction around matched query regions, not first-chunk text + +### 6.7 Known limitations versus `context-mode` + +This design deliberately accepts these differences from `context-mode`: + +| Area | `context-mode` | This plan | +| -------------------- | ----------------------------------------------------------- | ---------------------------------------------------------- | +| local DB | SQLite | Redis/FalkorDB via Redis protocol | +| full-text engine | FTS5 with BM25 and tokenizer support | application-side BM25-style scoring over Redis postings | +| stemming | documented Porter stemming | application-side porter-equivalent stemming | +| ranking fusion | FTS5/trigram/RRF stack | application-side token/stem + trigram + RRF stack | +| fuzzy correction | Levenshtein-based retry/correction | application-side fuzzy correction before/alongside retry | +| ranking depth | DB-native full-text scoring + proximity reranking | bounded candidate scoring in process + proximity reranking | +| persistence lifetime | local DB survives until session lifecycle policy deletes it | TTL-governed Redis/FalkorDB state | + +The intended feature target is therefore closer to `context-mode` than the +earlier draft: the storage engine diverges, but the retrieval feature set should +still include BM25-style ranking, porter-equivalent stemming, trigram matching, +RRF fusion, fuzzy correction, and proximity reranking. These features must be +documented and tested, not hidden. + +### 6.8 Pitfalls specific to Redis/FalkorDB indexing + +Implementers must treat these as design constraints, not optional warnings: + +1. **No FTS5 assumptions** — no SQL ranking clauses, tokenizer extensions, or + BM25 dependency may leak into the design. +2. **Key explosion risk** — token/trigram postings can multiply quickly; chunk + sizes and TTLs must be conservative. +3. **TTL coherence risk** — corpus manifests, chunk payloads, and postings must + expire together or be refreshed together. +4. **Large-artifact duplication risk** — store one canonical artifact + representation and derive index text from it; do not keep multiple full + copies. +5. **Ranking drift risk** — bounded in-process ranking will be simpler than + BM25; tests must measure useful retrieval behavior explicitly. +6. **HTML-structure loss risk** — if normalization destroys heading/section + structure, retrieval quality will drift too far from `context-mode`'s actual + navigational behavior. +7. **Test-only parity illusion** — corpus/search behavior must be wired into the + real in-process runtime, not only exercised through test-only dependency + injection, or the implementation will falsely appear context-mode-aligned + while production remains stubbed. +8. **Feature-parity erosion risk** — if BM25-style scoring, porter-equivalent + stemming, trigram fusion, fuzzy correction, or proximity reranking are + quietly dropped, the system will only superficially resemble `context-mode`. + +Required default for artifact indexing: + +- store exactly one canonical full artifact body under the artifact key family +- derive searchable text/chunks from that canonical body during indexing, but do + not persist a second redundant full-body copy as the chunk payload set + +### 6.9 Minimum search relevance baseline + +The implementation plan must include at least one small-corpus relevance test +for `session_search` with a fully known expected ordering. + +Required baseline: + +1. index a three-document corpus under one canonical root session: + - doc A titled `Redis Session TTLs` containing repeated terms about session + TTLs and expiration + - doc B titled `Graphiti Async Drain` containing Graphiti drain/retry text + - doc C titled `Child Session Canonicalization` containing parent/root/child + lineage text +2. query `session ttl` +3. expected result: doc A must rank first +4. expected result: returned snippet for doc A must include both `session` and + `TTL`/`ttls` in the snippet window +5. expected result: doc B and doc C may appear after doc A, but neither may rank + above doc A + +This baseline is intentionally small and mechanical. It does not claim semantic +search quality; it verifies that exact-term and title-weighted retrieval behave +as designed. + +Additional required retrieval-parity checks for the follow-on implementation +plan: + +1. stemming behavior: a query using an inflected form still retrieves the same + intended document family +2. partial-string behavior: a substring query can retrieve the intended chunk + through trigram matching +3. typo behavior: a misspelled query is corrected or retried through fuzzy + matching and still surfaces the intended result +4. multi-term proximity behavior: a chunk where terms appear close together + ranks above one where the same terms are far apart +5. rank-fusion behavior: token/stem and trigram result lists are merged through + RRF rather than one list blindly overwriting the other + +--- + +## 7. OpenCode Hook Model in the MCP-First Architecture + +The hooks remain important, but their role changes from “primary routing +architecture” to “enforcement + continuity around the MCP-first runtime.” + +### 7.1 Hook responsibilities + +| Hook | Required role in the new model | +| -------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tool.execute.before` | populate canonical `root_session_id` on `session_*` calls; enforce fallback from risky native tools toward `session_*`; never become the main execution engine | +| `tool.execute.after` | capture bounded tool events, context-savings stats, artifact refs, and routing outcomes; never rewrite large raw output after the fact as the primary mechanism | +| `chat.message` | assemble local `` from events, snapshot, and cached persistent memory; schedule async refresh decisions only | +| `experimental.chat.messages.transform` | prepend the prepared `` envelope to the last user message | +| `experimental.session.compacting` | inject the same prepared local continuity envelope into compaction | +| `event` | capture user/assistant/session lifecycle events, maintain canonical root-session lineage state, schedule snapshot rebuilds and async Graphiti drain | + +### 7.2 Hook interaction sequence + +```text +1. chat.message + -> load canonical root-session local state + -> prepare + +2. experimental.chat.messages.transform + -> inject into the user message + +3. tool call selected by the model + a. tool.execute.before + - if tool is session_*: inject canonical root_session_id and allow + - if tool is risky native fallback: redirect/deny toward session_* + - if tool is safe bounded native fallback: allow + b. tool runs + c. tool.execute.after + - record bounded event and stats only + d. event hook(s) + - persist compact continuity event under canonical root session + +4. session.idle / session.compacted events + -> rebuild snapshot locally + -> flush eligible Graphiti drain work asynchronously +``` + +### 7.3 Enforcement defaults for native OpenCode tools + +To stay close to `context-mode` while remaining MCP-first, the defaults are: + +| Native tool | Enforcement default | +| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| `WebFetch` | deny and direct to `session_fetch_and_index` | +| `Bash` | allow ordinary bounded shell use; deny or rewrite data-heavy/network/raw-dump patterns toward `session_execute` or `session_batch_execute` | +| `Read` | allow bounded file inspection; direct large/analysis-oriented use toward `session_execute_file` | +| `Grep` / `Glob` | allow as lightweight native helpers; do not make them the primary retrieval path when `session_index` / `session_search` is the better fit | +| `Task` | preserve child-session behavior, but append routing guidance so delegated agents prefer `session_*` tools | + +This is a **secondary enforcement layer**. The success criteria for the system +are no longer “how many native tools were intercepted,” but “whether data-heavy +work primarily flows through `session_*` tools.” + +### 7.4 OpenCode-specific root identity rule + +`src/session.ts` already makes canonical root-session identity the core +continuity concept. The new architecture must reuse that logic. + +Rule: + +- the plugin is authoritative for canonical root-session identity in OpenCode +- `tool.execute.before` must add `root_session_id` to all `session_*` tool calls +- `tool.execute.after` and `event` must attribute all resulting continuity + events, stats, corpora, and artifacts to that same canonical root session + +The MCP server must not invent a competing lineage model. + +### 7.5 Existing in-memory routing caches under the new architecture + +The current in-memory `ToolGuidanceCache` and `ToolRoutingOutcomeCache` remain +process-local enforcement-layer caches. + +Locked behavior: + +1. they stay **in memory only** in v1 +2. they are not promoted to Redis/FalkorDB durable state +3. they continue to be keyed by canonical root-session lineage where applicable +4. they must be cleared naturally on plugin runtime re-initialization/teardown +5. they are advisory/enforcement helpers only; no continuity-critical behavior + may depend on them surviving restart + +Their role narrows under this architecture: + +- `ToolGuidanceCache` throttles repeated native-tool fallback guidance +- `ToolRoutingOutcomeCache` tracks transient routing outcomes for tool-lifecycle + handling + +Neither cache is allowed to become a second durable session-state system. + +--- + +## 8. Child Sessions as First-Class Participants + +This repository keeps its intentional divergence from `context-mode`: child +sessions are not reduced to opaque summarized tool invocations. They are +first-class contributors to the canonical root session. + +### 8.1 Mandatory behavior + +1. Child and parent sessions share one canonical root session identity. +2. Child-created `session_*` corpora, execution artifacts, and stats are stored + under the root session namespace. +3. Child-origin events continue to appear in the same event log and snapshot + stream used by the parent. +4. Future parent or child `` injections reflect the combined + lineage state. +5. Deleting a child session must not delete root-owned corpora, events, + snapshots, or cached local index state. + +### 8.2 Temporary-root handling + +`src/session.ts` already contains temporary-root behavior for sessions whose +lineage is not yet resolved. The replacement architecture must preserve one +rule: + +- if a child session temporarily behaves like a root and later resolves to an + actual parent, all runtime state and local MCP artifacts created during the + temporary-root phase must migrate to the canonical root session namespace + exactly once + +This includes: + +- in-memory session state +- assistant buffers +- guidance/routing state +- local corpus manifests +- chunk keys/postings +- per-session stats + +If this migration is not exact, the implementation will leak or orphan indexed +artifacts and break root-session continuity. + +Required Redis/FalkorDB migration behavior for temporary-root resolution: + +1. the implementation must migrate the full local key family from provisional + root namespace to canonical root namespace, including: + - `session:{root}:corpora` + - `session:{root}:corpus:{corpusId}:meta` + - `session:{root}:corpus:{corpusId}:chunks` + - `session:{root}:chunk:{chunkId}` + - `session:{root}:term:{token}` + - `session:{root}:tri:{trigram}` + - `session:{root}:stats` + - any future artifact-manifest keys created for bounded execution outputs +2. migration must preserve existing TTL semantics for the moved data; it must + not silently reset indefinite lifetimes or strip expiry from migrated keys +3. the follow-on implementation must use **atomic or pipeline-based migration** + so partial moves cannot leave postings, chunks, or manifests split across old + and new roots +4. after successful migration, the obsolete provisional-root key family must be + removed +5. if migration fails partway, the implementation must fail in a way that avoids + partial ownership ambiguity; it must not continue as though migration fully + succeeded + +The implementation plan must name the exact migration strategy it chooses +(`MULTI/EXEC`, Lua/scripted move, or an equivalent pipeline discipline) and must +justify how TTL preservation is guaranteed. + +### 8.3 Delegation rule for `Task` + +When the agent delegates work through `Task`, the plugin must append guidance +that child work remains inside the same canonical continuity model and should +prefer `session_*` tools for data-heavy operations. The implementation must not +create a second “child-local” MCP corpus model. + +--- + +## 9. TTL Strategy for All Non-Graphiti Redis/FalkorDB State + +All non-Graphiti data stored in Redis/FalkorDB must have TTLs. This is +mandatory. + +### 9.1 TTL categories and defaults + +| State category | Default TTL | Rationale | +| ------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------- | +| session events (`session:{id}:events`) | `redis.sessionTtlSeconds` (default 24h) | current-session continuity should survive compaction and short idle periods, not become indefinite history | +| root snapshots | `2 * redis.sessionTtlSeconds` (default 48h) | matches existing snapshot-retention posture and gives compaction recovery more headroom than event lists | +| local MCP corpora manifests/chunks/postings/artifacts | `redis.sessionTtlSeconds` (default 24h) refreshed on write and on successful search/access | session-scoped local knowledge should expire with the workstream | +| lineage maps / canonical session bookkeeping mirrored in Redis, if added | `2 * redis.sessionTtlSeconds` (default 48h) | root resolution must outlive brief child churn and compaction windows | +| local stats and counters | `redis.sessionTtlSeconds` (default 24h) | useful during active work only | +| Graphiti cache (`memory-cache:*`) | `redis.cacheTtlSeconds` (default 10m) | already documented as cached persistent-memory projection | +| Graphiti cache metadata | `redis.cacheTtlSeconds` (default 10m) | must expire with the cache body | +| pending drain batches | `3 * redis.sessionTtlSeconds` (default 72h) | retries and delayed Graphiti recovery need more time than active session memory | +| dead-letter drain entries | `3 * redis.sessionTtlSeconds` (default 72h) | enough time for inspection without indefinite retention | + +### 9.2 TTL invariants + +1. Related local-index keys must be expired together. +2. A search hit on a local corpus may refresh that corpus family TTL, but only + within the root-session namespace. +3. No non-Graphiti key family may be created without an explicit TTL assignment. +4. If TTL expiry removes session-local corpora, the system must degrade + gracefully by returning “not found / expired” rather than an error cascade. + +--- + +## 10. Async Graphiti Integration After the Local Hot Path + +Graphiti remains an enhancer, not a dependency. + +### 10.1 Fixed role + +Graphiti continues to do exactly these jobs: + +1. receive selected semantic episodes from the local event stream in the + background +2. refresh cached persistent-memory projections in Redis +3. provide later-turn `` augmentation inside + `` + +Graphiti must not do any of these jobs: + +- answer current-turn local search requests +- block `session_*` tool execution +- block `chat.message`, `messages.transform`, `session.compacting`, `event`, or + tool hooks +- become the local index for fetched pages, file processing, or command outputs + +### 10.2 Required integration sequence + +```text +session_* or native tool activity + -> compact continuity event written locally + -> eligible events queued for async Graphiti drain + -> snapshot rebuilt locally on idle/compaction + -> Graphiti drain runs later + -> Graphiti cache refresh updates Redis cache + -> next turn may include refreshed +``` + +### 10.3 `` contract remains local-first + +The injected envelope remains local-first and deterministic: + +```xml + + ...local continuity sections... + ... + ... + +``` + +Rules: + +1. local continuity sections and snapshot come from Redis/FalkorDB hot-tier + state +2. `` is optional and cache-backed only +3. absence or staleness of Graphiti data must never prevent injection of the + rest of `` + +--- + +## 11. Migration and Replacement Strategy for Current Native-Routing Work + +This section is normative. It tells implementers what to keep, rewrite, and +remove from the current workstream. + +### 11.1 Keep and reuse + +These existing capabilities remain aligned and should be retained: + +- `SessionManager` root-session canonicalization and parent-chain handling in + `src/session.ts` +- temporary-root migration mechanics, expanded to include MCP local-index state +- current `` assembly model and transform/compaction injection + flow +- Redis-backed events, snapshots, and cached persistent-memory services +- async Graphiti drain/cache architecture and the “Graphiti off the hot path” + invariant +- runtime teardown orchestration in `src/index.ts` + +### 11.2 Rewrite + +These parts must be rewritten around the MCP-first design: + +- `tool.execute.before` logic: it must become `session_*` argument injection + + native fallback enforcement, not the primary product architecture +- `tool.execute.after` logic: it must focus on bounded event capture, stats, and + artifact refs for both native and `session_*` calls +- any routing policy or documentation that defines success mainly in terms of + intercepting native `Read`/`Bash`/`WebFetch` +- any in-progress task text that frames the target as “80% native hot-path + alignment” rather than “context-mode-style MCP-first bounded execution plus + continuity” + +### 11.3 Remove + +The following target assumptions from the prior plan must be removed outright: + +1. that the main implementation milestone is a deterministic native-tool routing + engine +2. that the architecture can be considered context-mode-aligned without a + first-class `session_*` MCP surface +3. that local search/indexing can be deferred while still claiming close + mechanism parity with `context-mode` +4. that hook-time blocking alone is enough to replace `context-mode`'s sandbox + tool model + +### 11.4 Task 1 / Task 2 replacement rules + +| Current workstream item | Action under this plan | +| ------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Task 1 native-routing contract/policy work | cancel as the main deliverable; salvage only generic utilities that remain useful for enforcement, canonical session lookup, and concise guidance | +| Task 2 pre-tool hook wiring | rewrite so hook wiring serves `session_*` root-session injection and native fallback enforcement; do not continue expanding native-tool policy as the center of the system | + +### 11.5 File-level migration guidance + +| File / area | Migration directive | +| --------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | +| `src/index.ts` | keep runtime/service wiring pattern; add MCP-first services and ensure teardown also covers new local-index/runtime resources | +| `src/session.ts` | keep canonical lineage ownership; extend migration and cleanup rules to MCP corpora/artifacts/stats | +| `src/handlers/chat.ts`, `src/handlers/messages.ts`, `src/handlers/compacting.ts`, `src/handlers/event.ts` | preserve continuity role; add any new local-index-derived metadata only if it remains compact and deterministic | +| `src/handlers/tool-before.ts` | rewrite around `session_*` argument injection and native fallback enforcement | +| `src/handlers/tool-after.ts` | rewrite around bounded event capture, stats, and artifact refs | +| any new native-tool policy module created for the prior plan | either delete or reduce to the minimal enforcement layer required to push work toward `session_*` | + +### 11.6 Documentation supersession requirements + +The old plan must be explicitly marked superseded in repository documentation. + +Minimum requirement: + +1. `plans/ContextOverhaul.md` must carry a factual superseded status/header that + points to this replacement plan +2. future implementation planning/docs must refer to this replacement plan as + the controlling architecture document for MCP-first work +3. no new task list or milestone text may describe `plans/ContextOverhaul.md` as + the active target architecture + +--- + +## 12. Validation and Acceptance Criteria + +Success must now be measured against MCP-first behavior, not against +native-routing sophistication. + +### 12.1 Acceptance criteria + +| ID | Criterion | Pass condition | +| --- | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| A1 | `session_*` tool surface exists | `session_batch_execute`, `session_execute`, `session_execute_file`, `session_index`, `session_search`, `session_fetch_and_index`, `session_stats`, and `session_doctor` are implemented and registered | +| A2 | MCP-first path is primary | representative data-heavy tasks use `session_*` tools without raw payloads entering the transcript | +| A3 | Native enforcement is secondary | hook logic exists, but core success does not depend on native-tool-only workflows | +| A4 | Root-session identity is unified | parent and child `session_*` tool calls store corpora, events, and stats under the same canonical root session | +| A5 | Local indexing/search works without Graphiti | indexed fetch/file/command outputs are retrievable via `session_search` while Graphiti is offline | +| A6 | TTL discipline is complete | every non-Graphiti Redis/FalkorDB key family created by the new architecture has an explicit TTL | +| A7 | Hot path remains local | no Graphiti call is required during `tool.execute.before`, `tool.execute.after`, `chat.message`, `messages.transform`, `session.compacting`, or synchronous `event` handling | +| A8 | `` remains deterministic | compaction and chat injection still work when only Redis/FalkorDB local state is available | +| A9 | Migration is clean | prior Task 1 / Task 2 work is either repurposed or removed; no remaining milestone text describes native routing as the primary architecture | +| A10 | Out-of-scope boundaries are honored | no `session_upgrade`, no SQLite dependency, no child-session flattening, no Graphiti hot-path dependence | + +### 12.2 Required test additions and rewrites + +The follow-on implementation plan must replace native-routing-only success cases +with tests that prove MCP-first behavior. Required measurable coverage: + +1. `session_fetch_and_index` replaces native `WebFetch` for at least one + end-to-end fetch/search flow. +2. `session_execute` or `session_batch_execute` handles a data-heavy command and + returns only bounded output plus a searchable artifact handle. +3. `session_execute_file` handles a large-file analysis case without injecting + raw file contents into the transcript. +4. `session_search` retrieves relevant snippets from a locally indexed corpus + while Graphiti is unavailable. +5. parent and child sessions share the same root-session-local corpus namespace. +6. temporary-root migration moves local corpora/stat keys to the resolved + canonical root. +7. TTL expiry of local corpora causes graceful expiration behavior, not + corruption. +8. `chat.message`, `messages.transform`, and `session.compacting` still inject + valid local-first `` with optional cached + ``. +9. the minimum small-corpus relevance baseline from §6.9 passes exactly as + specified. + +### 12.3 Regression thresholds that must remain true + +The following existing invariants from the repository remain mandatory: + +- Graphiti stays off the hot path +- compaction survival continues to work +- Redis/FalkorDB remains the local system of record for the hot path +- child-session writes do not corrupt root-session continuity + +--- + +## 13. Failure Modes That Would Cause Goal Drift + +The implementation/tasks must explicitly prevent these drift modes. + +### 13.1 Architecture drift modes + +1. **Native-routing recentering**\ + Symptom: most design effort remains in `tool.execute.before` heuristics while + `session_*` tools are delayed or thin wrappers.\ + Prevention: implementation order must start with the `session_*` surface and + local index/search, then add enforcement hooks. + +2. **Graphiti creep back onto the hot path**\ + Symptom: current-turn search, fetch, or injection waits on Graphiti.\ + Prevention: all `session_*` tool functionality must be satisfiable from local + sandbox + Redis/FalkorDB only. + +3. **Child-session split brain**\ + Symptom: child `session_*` calls create separate corpora or stats outside the + root session.\ + Prevention: plugin-injected `root_session_id` is mandatory for all + `session_*` calls; no alternative local-session namespace is allowed. + +4. **Temporary-root orphaning**\ + Symptom: artifacts indexed before lineage resolution remain under obsolete + keys.\ + Prevention: canonicalization migration must include local corpus, chunk, + posting, and stat families in addition to existing in-memory session state. + +5. **Runtime re-initialization leakage**\ + Symptom: plugin re-init leaves duplicate timers, stale local-index workers, + or orphaned drain tasks.\ + Prevention: any new MCP-first runtime components must join the existing + teardown discipline visible in `src/index.ts`. + +6. **TTL inconsistency**\ + Symptom: chunk payloads expire but postings remain, or manifests remain + without chunks.\ + Prevention: index keys must be managed as explicit families with synchronized + TTL refresh/cleanup. + +7. **Search parity overclaim**\ + Symptom: docs claim BM25/FTS5-equivalent behavior without those mechanisms.\ + Prevention: plan and implementation must document the exact local ranking + method and its limitations. + +8. **Scope creep into upgrade/install workflows**\ + Symptom: `session_upgrade` or installer automation consumes the milestone.\ + Prevention: keep utilities to `session_stats` and `session_doctor` only in + the first replacement milestone. + +--- + +## 14. Ordered Implementation Priorities for the Follow-On Plan + +This document is not the implementation plan, but it locks the order that the +implementation plan must follow. + +1. **Define the `session_*` MCP server surface** and bounded result contracts. +2. **Implement local corpus storage/index/search on Redis/FalkorDB**. +3. **Thread canonical root-session identity into all `session_*` calls**. +4. **Integrate `session_*` results into continuity capture and stats**. +5. **Rewrite `tool.execute.before` / `tool.execute.after` around enforcement + + attribution**. +6. **Extend temporary-root migration and teardown coverage to new MCP local + state**. +7. **Validate compaction continuity and async Graphiti augmentation remain + intact**. + +Any implementation plan that starts with native-tool policy expansion instead of +the `session_*` tool surface is out of compliance with this document. + +--- + +## 15. Locked Defaults and Remaining Uncertainty + +This plan leaves little room for interpretation. The only meaningful +uncertainties are implementation details, and they are resolved here with +defaults. + +| Uncertainty | Locked default | +| ---------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- | +| Should local search use Redis-only primitives or assume RediSearch/FalkorDB full-text support? | Redis-compatible primitive key families + application-side ranking only | +| Should local corpora be project-wide or session-scoped? | session-scoped to canonical root session | +| Should child sessions have their own MCP corpus namespace? | no; child work joins the canonical root session | +| Should Graphiti answer current-turn local fetch/search queries? | no | +| Does `session_upgrade` belong in the first replacement scope? | no | +| Which layer owns root-session identity for OpenCode? | the plugin, using `src/session.ts` lineage logic | + +No further ambiguity is allowed on those points in the follow-on implementation +plan. + +--- + +## 16. Final Replacement Statement + +The repository target is now a **context-mode-style MCP-first local session +runtime** with: + +- `session_*` tools as the primary bounded execution and retrieval surface +- Redis/FalkorDB hot-tier local persistence instead of SQLite/FTS5 +- canonical root-session participation for parent and child work alike +- existing `` continuity and compaction preservation retained +- Graphiti kept async-only as a persistent-memory consolidator + +The previous native-hook-first plan is superseded because it optimized the wrong +center of gravity. From this point forward, implementation work must be judged +against the MCP-first architecture defined in this document. diff --git a/docs/superpowers/plans/2026-03-22-task-2-final-fixes.md b/docs/superpowers/plans/2026-03-22-task-2-final-fixes.md new file mode 100644 index 0000000..df9d8fd --- /dev/null +++ b/docs/superpowers/plans/2026-03-22-task-2-final-fixes.md @@ -0,0 +1,176 @@ +# Task 2 Final Fixes Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Bring Task 2 to ready state by reproducing and fixing any remaining +`session_search` candidate-cap compliance gap, and by adding structured non-OK +handling for `session_fetch_and_index`. + +**Architecture:** Keep the existing Redis-backed local corpus design and +retrieval stages intact. First verify whether the current search implementation +still has a real 200-candidate-cap compliance gap; only patch `session_search` +if a new failing regression test proves it. Separately, make the fetch path +reject non-success HTTP responses before indexing while still returning a +schema-valid `session_fetch_and_index` response. + +**Tech Stack:** Deno, TypeScript, `jsr:@std/testing`, Redis-backed in-memory +test client, existing `session-corpus` service. + +--- + +### Task 1: Reproduce the remaining search-cap audit claim before changing + +search logic + +**Files:** + +- Modify: `src/services/session-corpus.test.ts` +- Modify: `src/services/session-corpus.ts` +- Test: `src/services/session-corpus.test.ts` + +- [ ] **Step 1: Run the existing regression first** + +Run: +`deno test src/services/session-corpus.test.ts --filter "applies the 200-candidate cap"` +Expected: Determine whether the existing regression already covers the reported +blocker. + +- [ ] **Step 2: Write a sharper failing test only if the existing regression + passes** + +```ts +it("keeps RRF- and proximity-relevant chunks eligible until the final 200-candidate cap", async () => { + // Construct a corpus where a chunk is only promoted by the full compliant + // retrieval pipeline, not by the intermediate preliminary sum alone. +}); +``` + +- [ ] **Step 3: Run the new test to verify it fails** + +Run: +`deno test src/services/session-corpus.test.ts --filter "RRF- and proximity-relevant"` +Expected: FAIL only if the current implementation still drops a chunk that +should remain eligible until final ranking. + +- [ ] **Step 4: Write minimal implementation only if the new regression fails** + +```ts +// Keep candidate collection and ranking phases intact, but make the bounded +// 200-candidate selection derive from the same compliant evidence used by the +// final scorer so no chunk needed by the final ranking is dropped early. +``` + +- [ ] **Step 5: Run the relevant search tests to verify they pass** + +Run: +`deno test src/services/session-corpus.test.ts --filter "candidate cap|RRF|trigram|proximity"` +Expected: PASS + +- [ ] **Step 6: If no failing repro is found, stop changing `session_search` and + record that the blocker could not be reproduced from the current tree** + +```text +Do not refactor search heuristics without a red test. +If the existing and sharper regressions both pass, leave +`src/services/session-corpus.ts` unchanged for search. +``` + +### Task 2: Lock non-OK fetch handling with tests + +**Files:** + +- Modify: `src/services/session-corpus.test.ts` +- Modify: `src/services/session-mcp-runtime.test.ts` +- Modify: `src/services/session-corpus.ts` +- Test: `src/services/session-corpus.test.ts` +- Test: `src/services/session-mcp-runtime.test.ts` + +- [ ] **Step 1: Write the failing test** + +```ts +it("returns a structured error when fetch responds with non-ok status", async () => { + // Stub fetch to return new Response("missing", { status: 404 }) + // and assert status=error, non-empty corpusRef, URL echo, and HTTP status in + // summary. +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `deno test src/services/session-corpus.test.ts --filter "non-ok status"` +Expected: FAIL because the current code treats the response as a successful +indexed document. + +- [ ] **Step 3: Write minimal implementation** + +```ts +const contentType = response.headers.get("content-type")?.split(";")[0] ?? + "text/plain"; +if (!response.ok) { + return { + status: "error", + corpusRef: corpusRefFor( + input.rootSessionId, + `error-http-${response.status}`, + ), + summary: `Fetch failed for ${input.url} with HTTP ${response.status}.`, + queryHints: [], + fetchedUrl: input.url, + contentType, + truncated: false, + }; +} +const content = await response.text(); +``` + +This replaces the existing `contentType`/`response.text()` sequence in +`src/services/session-corpus.ts`; do not duplicate the declaration. + +- [ ] **Step 4: Run test to verify it passes** + +Run: `deno test src/services/session-corpus.test.ts --filter "non-ok status"` +Expected: PASS + +- [ ] **Step 5: Add and run the runtime-layer regression** + +```ts +it("serializes a schema-valid error response for non-ok fetches", async () => { + // Execute session_fetch_and_index through the runtime boundary and assert the + // parsed response survives schema validation with a non-empty corpus_ref. +}); +``` + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "schema-valid error response"` +Expected: PASS + +### Task 3: Verify the full affected surface + +**Files:** + +- Modify: `src/services/session-corpus.ts` (only if cleanup is needed after + tests pass) +- Test: `src/services/session-corpus.test.ts` +- Test: `src/services/session-mcp-runtime.test.ts` + +- [ ] **Step 1: Run focused corpus and runtime tests** + +Run: +`deno test src/services/session-corpus.test.ts src/services/session-mcp-runtime.test.ts` +Expected: PASS + +- [ ] **Step 2: Run repository verification** + +Run: `deno task check && deno lint && deno fmt --check` Expected: PASS + +- [ ] **Step 3: Confirm Task 2 exit criteria** + +```text +- search cap applied through the compliant ranking path +- non-ok fetch responses return structured errors +- targeted tests pass +- broader Deno verification passes +``` diff --git a/plans/ContextOverhaul.md b/plans/ContextOverhaul.md deleted file mode 100644 index 16f9ddb..0000000 --- a/plans/ContextOverhaul.md +++ /dev/null @@ -1,1078 +0,0 @@ -# Context Overhaul — FalkorDB Hot Path + Async Graphiti Consolidation - -**Status:** In Implementation **Date:** 2026-03-13 (revised) | README overhaul -completed 2026-03-15 | Child-session routing documented 2026-03-15 - ---- - -## 1 Problem - -The current plugin routes every write and every query through Graphiti (via -MCP). Each `addEpisode` triggers LLM-backed entity extraction (~200–400 ms). -Each `searchFacts`/`searchNodes` issues an embedding + vector search on the hot -path of `chat.message`, adding 100–300 ms of synchronous latency per user -message. Compaction context augmentation also calls Graphiti synchronously with -no timeout. If Graphiti or its backing LLM is slow or down, the session degrades -silently with no local fallback. - -The current design also copies raw message strings rather than extracting -structured session events, and has no local searchable session history. - ---- - -## 2 Goals - -1. **Zero Graphiti on the hot path.** No synchronous Graphiti call may block - `chat.message`, `messages.transform`, `session.compacting`, or any - per-message event hook. All Graphiti interaction is asynchronous. -2. **Session continuity from local state.** FalkorDB/Redis owns verbatim event - history, structured snapshots, and cached memory. Compaction survives - Graphiti outages. -3. **Preserved long-term memory.** Graphiti's vector search, entity extraction, - and cross-session graph remain available — populated asynchronously and - cached in Redis for chat-time injection. -4. **Minimal async backend.** Graphiti MCP is the sole consolidation backend. It - is private infrastructure — hidden behind the async worker, never exposed to - users, and never called on the hot path. -5. **Structured event extraction.** Context-mode-style categorised events with - priority-tiered snapshot generation, not raw message copying. - ---- - -## 3 Architecture - -``` -opencode-graphiti plugin (TypeScript / Deno) - │ - ├── Hot path — ioredis → FalkorDB :6379 (Redis protocol) - │ WRITES (every event, sub-ms): - │ LPUSH session:{id}:events - │ SET session:{id}:snapshot - │ LPUSH drain:pending:{groupId} - │ READS (chat.message / compacting, sub-ms): - │ LRANGE session:{id}:events (recent session context) - │ GET session:{id}:snapshot (post-compaction restore) - │ GET memory-cache:{groupId} (cached Graphiti outputs) - │ - └── Async tier — Graphiti MCP (configured via `graphiti.endpoint`) - REQUIRED tool capabilities: - - add_memory - - search_memory_facts - - search_nodes - - get_episodes - - get_status (health check; used to verify MCP reachability) - All calls are async and never block hook returns. -``` - -### 3.1 Connectivity - -| Target | Protocol | Default Port | Connection | -| -------- | --------------- | ------------ | ----------------------------------------------------- | -| Redis | Redis (ioredis) | 6379 | Direct TCP; configured via `redis.endpoint` | -| Graphiti | MCP over HTTP | 8000 | Direct MCP client; configured via `graphiti.endpoint` | - -**Integration decision (final):** Graphiti MCP is the async consolidation -backend. Direct Graphiti HTTP is not used; all Graphiti interaction goes through -the configured MCP endpoint. - -**Deployment note:** both FalkorDB and Graphiti MCP are operator-provisioned -services. The plugin connects to whatever addresses are supplied in config. - -**Hot-path rule:** hot-path hooks never talk to MCP or Graphiti synchronously. -All MCP communication is queued, async, and hidden behind the plugin's local hot -path. - -**User-facing invariant:** MCP is private infrastructure. Users see only the -plugin's existing memory features and the new context-mode-style resumability — -no extra workflow, no manual sync, no awareness that MCP exists. - ---- - -## 4 Data Model - -### 4.1 Structured Event Schema - -Events are extracted from hooks, not copied verbatim. The taxonomy is designed -to preserve the useful parts of context-mode: active file state, task state, -decisions, blockers, environment changes, and searchable local history. - -```typescript -interface SessionEvent { - id: string; // UUID - ts: number; // epoch ms - category: EventCategory; - priority: 0 | 1 | 2 | 3 | 4; - role: "user" | "assistant" | "tool" | "system"; - summary: string; // <= 200 chars, human-readable - body?: string; // full content, truncated to 4 KB - refs?: string[]; // file paths, task IDs, session IDs, UUIDs - metadata?: Record; // tool name, exit code, cwd, env deltas -} - -type EventCategory = - | "task.create" - | "task.update" - | "task.complete" - | "decision" - | "preference" - | "rule.load" - | "file.read" - | "file.write" - | "file.edit" - | "file.search" - | "cwd.change" - | "env.change" - | "git.activity" - | "error" - | "subagent.start" - | "subagent.finish" - | "integration.call" - | "intent" - | "data.import" - | "discovery" - | "message" - | "session.meta"; -``` - -### 4.1.1 Extraction Targets - -| Context-mode benefit to preserve | SessionEvent categories | Notes | -| --------------------------------- | ----------------------------------------------------- | -------------------------------------------------------------------- | -| Active files and code touchpoints | `file.read`, `file.write`, `file.edit`, `file.search` | Track most recent touched files, not just raw tool output. | -| Task state and progress | `task.create`, `task.update`, `task.complete` | Preserve current goal, checkpoints, and completion markers. | -| Decisions and user corrections | `decision`, `preference` | Highest-priority resumability signal. | -| Rules / operating constraints | `rule.load` | Capture AGENTS/plugin rules loaded into the session. | -| Errors and unresolved blockers | `error` | Include failing command/tool, status, and whether resolved. | -| Environment / cwd state | `cwd.change`, `env.change` | Preserve working directory and setup changes. | -| Git milestones | `git.activity` | Branch, commit, merge, push, stash, rebase milestones when present. | -| Subagent orchestration | `subagent.start`, `subagent.finish` | Track dispatched work and returned outcomes. | -| Remote/tool-service usage | `integration.call` | Track Graphiti MCP calls and other remote tool/service interactions. | -| Large pasted/reference data | `data.import`, `discovery` | Store pointers/summaries instead of re-injecting full payloads. | -| Session framing | `intent`, `session.meta`, `message` | Preserve intent, compaction markers, and low-value chat residue. | - -### 4.2 Redis Key Layout - -| Key | Type | Content | TTL | -| ----------------------------- | ------ | ------------------------------------------------------ | ------ | -| `session:{id}:events` | List | JSON `SessionEvent` objects | 24 h | -| `session:{id}:snapshot` | String | Priority-tiered XML snapshot (≤ 3 KB) | 48 h | -| `memory-cache:{groupId}` | String | Serialized Graphiti search results | 10 min | -| `memory-cache:{groupId}:meta` | Hash | `lastQuery`, `lastRefresh` (+ optional extra metadata) | 10 min | -| `drain:pending:{groupId}` | List | Serialized drain-batch entries awaiting Graphiti | 7 d | -| `drain:cursor:{groupId}` | String | Last successfully drained event ID | 7 d | - -### 4.3 Priority-Tiered Snapshot Format - -Generated at `session.idle` and `session.compacted` from structured Redis -events. Sections are filled in priority order; lower-priority sections are -truncated first when the snapshot budget (3 KB) is exceeded. - -| Priority | Sections | Source categories | -| -------- | ------------------------------------------------ | ------------------------------------------------ | -| P0 | `decisions`, `constraints`, `active_task` | `decision`, `preference`, `rule.load`, `task.*` | -| P1 | `active_files`, `recent_edits`, `subagents_open` | `file.*`, `subagent.start` | -| P2 | `errors`, `blockers`, `environment` | `error`, `cwd.change`, `env.change` | -| P3 | `git_state`, `subagents_done`, `open_questions` | `git.activity`, `subagent.finish`, `task.update` | -| P4 | `discoveries`, `references`, `residual_messages` | `discovery`, `data.import`, `message` | - -```xml - - - Plugin hot path must talk directly to FalkorDB; Graphiti remains async behind MCP. - - - - Graphiti stays off the hot path; Redis owns compaction survival. - - - - Redesign context pipeline around FalkorDB hot path. - Planning revised; Graphiti MCP endpoint confirmed reachable. - - - - plans/ContextOverhaul.md - - - - No open errors at snapshot time. - - - - /workspace/project - - - - Graphiti bulk ingestion is documented, but docs warn it skips edge invalidation. - - -``` - -### 4.4 Cold Tier (Graphiti — unchanged schema) - -No changes to Graphiti's internal entity/fact/node model. The plugin sends the -same semantic payloads through MCP tool calls (`add_memory`, -`search_memory_facts`, `search_nodes`, `get_episodes`). - ---- - -## 5 Hook Mapping - -### 5.1 Hot Path (synchronous, sub-ms) - -All hooks resolve the incoming `sessionID` to the canonical (root) session ID -before accessing state, events, or snapshots. Child/subagent sessions are routed -to the parent session's state transparently (see §10.1). - -| Hook | Action | -| -------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `event: session.created` | Cache parent/child linkage; resolve canonical ID; `EXPIRE` reset; bootstrap best-effort async warmup / cross-session primer | -| `event: message.part.updated` | Buffer assistant part under canonical session ID | -| `event: message.updated` (completed) | Extract `SessionEvent` → `LPUSH session:{canonicalId}:events` | -| `chat.message` | Extract user `SessionEvent` → `LPUSH`; read `memory-cache:{groupId}` + recent session state from Redis; prepare transform input | -| `event: session.idle` | Build priority-tiered snapshot → `SET session:{canonicalId}:snapshot`; trigger async cache refresh + drain | -| `event: session.compacted` | Build snapshot from events → `SET session:{canonicalId}:snapshot`; enqueue drain batch | -| `event: session.deleted` | Delete only the reported session's local bookkeeping; canonical/root session state is preserved (see §10.1) | -| `experimental.session.compacting` | Compose the same canonical `` envelope for compaction from Redis snapshot + cached memory | -| `experimental.chat.messages.transform` | Actual chat-time injection point: compose canonical `` with optional `` from Redis-backed state | - -### 5.2 Async Tier (fire-and-forget, non-blocking) - -| Trigger | Action | -| ------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | -| `session.idle` / `session.compacted` / buffer threshold | Drain pending events through Graphiti MCP `add_memory` | -| `session.idle` / first `chat.message` | Refresh `memory-cache:{groupId}` via MCP `search_memory_facts` + `search_nodes` | -| `session.created` | Best-effort async cross-session primer via MCP `get_episodes`; prewarm reusable cache if timing allows | - -**No Graphiti call ever blocks a hook return.** - -### 5.3 Backend Rule - -| Consolidation backend | When used | Constraint | -| --------------------- | --------- | ------------------------------------------------------------------------ | -| Graphiti MCP | Always | Used only behind the async consolidation worker; never in hot-path hooks | - ---- - -## 6 Cached Memory Strategy - -### 6.1 Problem - -The current design calls `searchFacts` + `searchNodes` synchronously on every -`chat.message` (or on drift detection). This puts Graphiti + embedding latency -on the critical path. - -### 6.2 Solution: Redis-Resident Memory Cache - -``` -Session starts (`event: session.created`) - ├── [sync] Initialize empty session state; restore reusable cache keys if present - ├── [async] Fire-and-forget: best-effort warm `memory-cache:{groupId}` via MCP `get_episodes` - └── [future option, non-final] Schedule proactive `search_memory_facts`/`search_nodes` refresh for broader project scope - -First user message arrives (`chat.message`) - ├── [sync] Read memory-cache:{groupId} from Redis (sub-ms) - │ If cache hit + not stale → make cached facts/nodes available to the transform hook - │ If cache miss or stale → use last cached value (or empty) - ├── [sync] Read session:{id}:events from Redis for session context - ├── [sync] Prepare Redis-backed inputs for `experimental.chat.messages.transform` - └── [async] Fire-and-forget: refresh cache from Graphiti MCP using this prompt - search_memory_facts + search_nodes → parse results → SET memory-cache:{groupId} -``` - -### 6.3 Cache Lifecycle - -| Event | Cache Action | -| --------------------- | --------------------------------------------------------------------------------------- | -| Plugin startup | Restore Redis clients only; no synchronous Graphiti warmup | -| `session.created` | Best-effort async prewarm of reusable cache and cross-session primer | -| first `chat.message` | Read cache (sync); inject if available via transform; schedule prompt-specific refresh | -| later `chat.message` | Read cache (sync); schedule refresh if stale or drifted (async) | -| `session.idle` | Refresh cache (async) — incorporates recently drained facts | -| Drain completes | Refresh cache (async) — new facts now searchable | -| Cache miss / cold run | Omit `persistent_memory`; first injection still includes Redis-sourced `session_memory` | - -### 6.4 New-Session First-Turn Behavior - -Because OpenCode does not expose `SessionStart`, the plan relies on the -combination of `event: session.created`, `chat.message`, and -`experimental.chat.messages.transform`: - -- `event: session.created` cannot inject memories directly. It only bootstraps - async warmup and restores reusable cached state. -- The first actual injection point in a brand-new session is the first - `experimental.chat.messages.transform` after the user's opening message. -- `persistent_memory` on that first reply is **best-effort**, not guaranteed. -- If `memory-cache:{groupId}` was already warm from prior work, or if the - `session.created` bootstrap finishes before the first transform runs, relevant - `persistent_memory` may appear on the first reply. -- If the cache is cold, the first reply still receives `session_memory` from - FalkorDB, while `persistent_memory` may be absent until the async MCP refresh - completes. -- In practice this means long-term memory is often cold-first-turn / warmer on a - later turn, while session continuity remains available immediately. - -### 6.5 Drift Detection (Revised) - -Drift detection currently calls `searchFacts` synchronously. Under the new -design: - -- On each `chat.message`, compare the user's message against the query that - produced the current cache. -- If the topic has drifted (Jaccard on current query text vs cached query text < - threshold), schedule an async cache refresh with the new query. The _current_ - cached context is still injected immediately; the refreshed cache is available - for the next message. -- This trades one message of staleness for eliminating synchronous Graphiti - latency entirely. - ---- - -## 7 Injection Strategy - -Injected continuity context uses one canonical `` envelope with -an optional nested `` section. The Session Guide is assembled -from Redis hot-tier state and optional Graphiti cache data. - -Historically, the plugin's Graphiti-derived memory was injected as a standalone -`...` block. This plan keeps the caller's -current naming (`session_memory` + `persistent_memory`) and treats the older -UUID-bearing shapes as legacy compatibility details, not as a separate top-level -layer. - -```xml - - Continue the current task without asking for recap. - - - Redesign plugin around FalkorDB hot path. - - - - Keep Graphiti off the hot path; use MCP only in async consolidation. - - - - plans/ContextOverhaul.md - - - - Preserve context-mode-style resumability behavior. - - - - - - - - - - -``` - -### 7.1 Session Guide Sections - -The injected sections intentionally mirror context-mode's continuity model and -should be rendered in this order: - -| Section | Source | Required | Notes | -| ------------------- | -------------------------------- | ---------- | ------------------------------------------------------------------------------------------------------ | -| `last_request` | latest user prompt / task intent | Yes | Primary resume anchor. | -| `active_tasks` | structured task events | If present | Omitted when empty. Checkbox/task-state style when rendered. | -| `key_decisions` | decision + preference events | If present | Omitted when empty. Preserve user corrections and constraints. | -| `files_in_play` | recent file events | If present | Omitted when empty. Mirrors context-mode active-files continuity. | -| `project_rules` | loaded AGENTS/rules | If present | Omitted when empty. Must survive compaction. | -| `unresolved_errors` | open error events | If present | Show only unresolved blockers. | -| `git_state` | git activity events | If present | Include only meaningful milestones. | -| `subagent_work` | subagent events | If present | Summaries only, not raw logs. | -| `session_snapshot` | priority-tiered snapshot | If present | Compact state restore layer. | -| `persistent_memory` | Graphiti cache | Optional | Current emitted shape carries `node_refs`; legacy UUID-bearing blocks remain parse-only compatibility. | - -### 7.2 Budget Allocation - -| Section group | Budget | Source | Latency | -| ------------------------------------------------------------------- | ------------------------------ | -------------------------- | ------- | -| Session Guide core (`last_request`, tasks, decisions, files, rules) | up to 1 600 chars | Redis events + snapshot | < 1 ms | -| Session snapshot detail | up to 800 chars | Redis `GET` | < 1 ms | -| Persistent memory | remainder of 5% context budget | Redis `GET memory-cache:*` | < 1 ms | - -`persistent_memory` is omitted (not an error) if cache has not been warmed yet, -the session is on its first cold turn, or Graphiti is unreachable. The rest of -the Session Guide is always available because it is sourced from FalkorDB/Redis. - -### 7.3 Compatibility Note - -- **Current plan:** emit one canonical `` envelope with optional - ``. -- **Historical implementation:** Graphiti-derived memory previously appeared as - ``. -- **Migration stance:** preserve UUID/fact metadata semantics, but do not - describe or reintroduce the old shape as a separate "layer" in new plan text. - ---- - -## 8 Async Batch Drain - -### 8.1 Drain Policy - -Events are batched in a Redis list (`drain:pending:{groupId}`) and drained to -Graphiti asynchronously: - -| Parameter | Value | Rationale | -| --------------- | ------------------------------------------------------------------ | ---------------------------------------- | -| Max batch size | 20 events | Keeps Graphiti LLM call duration bounded | -| Max batch bytes | 50 KB combined body | Avoids oversized episode payloads | -| Drain triggers | `session.idle`, `session.compacted`, buffer threshold | Natural pause points | -| Retry policy | Exponential backoff, 3 attempts, then dead-letter | Bounded retry cost | -| Idempotency | Each event has a UUID; Graphiti deduplicates by episode name+group | At-least-once safe | - -**Important Graphiti constraint:** the drain path uses standard `add_memory` -sequentially per `groupId` to ensure normal entity invalidation semantics on an -active agent graph. Bulk ingestion (`add_episode_bulk`, if available) is -documented by Graphiti as skipping edge invalidation and is reserved for -bootstrap/backfill scenarios only; it is not part of the current plan. - -### 8.2 Ordering Guarantees - -- Events within a session are appended to the Redis list in order. -- Drain reads from the list head (FIFO). A cursor (`drain:cursor:{groupId}`) - tracks the last successfully drained event ID. -- If a batch partially fails, the cursor is not advanced; the entire batch is - retried. -- Cross-session ordering is best-effort (sessions drain independently). - -### 8.3 Crash Recovery - -- On plugin restart, the drain scheduler reads `drain:pending:{groupId}` and - `drain:cursor:{groupId}` from Redis. -- Events after the cursor are re-drained. Because drain is idempotent - (UUID-keyed), duplicates are harmless. -- If Redis itself is lost, pending events in memory are lost. This is acceptable - because they are session-local and Graphiti is the durable store — the lost - events simply won't be consolidated into the knowledge graph. - -### 8.4 Dead-Letter Handling - -After 3 failed drain attempts for a batch: - -- Log a warning with the batch event IDs. -- Move the batch to `drain:dead:{groupId}` (Redis list, 30-day TTL). -- Advance the cursor past the failed batch. -- A manual retry command (or scheduled job) can re-enqueue dead-letter batches. - ---- - -## 9 Compaction Flow (Revised) - -``` -session.compacting hook fires - ├── [sync] GET session:{id}:snapshot from Redis - ├── [sync] GET memory-cache:{groupId} from Redis (cached Graphiti facts) - ├── [sync] Compose the same `` envelope used for chat injection - ├── [sync] Inject into output.context - └── [done] ← no Graphiti call, no timeout risk - -session.compacted event fires (after LLM compaction completes) - ├── [sync] LRANGE session:{id}:events → rebuild snapshot → SET - ├── [async] Enqueue compaction summary to drain batch - └── [async] Refresh memory cache (new context post-compaction) -``` - -No timeout/fallback needed for compaction augmentation because all reads are -from Redis (sub-ms). The Graphiti-sourced context comes from the pre-warmed -Redis cache, not a live query. - ---- - -## 10 Session Lifecycle - -### 10.1 Session ID Semantics and Child-Session Routing - -> **Intentional divergence from `mksglu/context-mode`.** Official context-mode -> treats subagent activity as summarized `Agent` tool events within the parent -> session's continuity log — the child session is not a first-class participant -> in session state; only its summary is recorded. -> -> This plugin intentionally promotes child sessions to first-class participants -> in the root session's state. The rationale is that OpenCode's multi-agent -> orchestration (swarm, delegated tasks) produces work that is semantically part -> of the parent session — decisions made, files edited, and errors encountered -> by a child agent are as important to session continuity as parent-originated -> events. Treating them as opaque summaries would lose actionable detail that -> the parent (or a sibling child) needs on the next turn. -> -> **Do not "fix" this by reverting to summarized-only subagent tracking unless -> the design decision is explicitly revisited.** Future alignment sessions with -> upstream context-mode should treat this section as the canonical record of the -> divergence. - -Every hook-provided `sessionID` is resolved to a **canonical (root) session ID** -before any state access. When a session has a `parentID`, the plugin walks the -parent chain to find the root session and uses that root ID for all Redis keys, -event storage, snapshot builds, `` injection, and compaction -context. This means child/subagent sessions are first-class participants in the -parent session's memory: - -- **Event log**: child prompts, responses, tool calls, and structured events are - recorded under the root session's `session:{canonicalId}:events` key. -- **Snapshot**: child activity is included when the priority-tiered snapshot is - rebuilt at `session.idle` or `session.compacted`. -- **`` injection**: the same prepared envelope is used - regardless of whether the triggering hook fires from a parent or child - session. `chat.message`, `experimental.chat.messages.transform`, and - `experimental.session.compacting` all resolve to the canonical session before - reading or writing state. -- **Compaction**: child-derived events survive compaction because they live in - the same event list and snapshot as the parent. -- **Future `` injections**: because child events are stored - alongside parent events, they are included in later snapshot rebuilds and - appear in subsequent `` injections for any session in the same - lineage. - -Parent/child linkage is established at `session.created` time via -`setParentId()` and cached for the process lifetime. The canonical ID is -resolved lazily (with an SDK lookup fallback) and cached once resolved. Cycle -detection prevents infinite loops in malformed parent chains. - -#### Child-Session Deletion Semantics - -When a `session.deleted` event fires for a child session, **only that child's -local bookkeeping is removed** (parent-ID cache entry, canonical-ID cache entry, -buffered assistant messages scoped to the child). The canonical/root session's -state, event log, snapshot, and lifecycle are **not** deleted. This prevents a -child session teardown from accidentally wiping the parent's accumulated memory. - -- Session state is local to the plugin process; Redis keys provide persistence - across plugin restarts within TTL windows. - -### 10.2 Startup / Bootstrap - -1. Plugin initializes `ioredis` connection to FalkorDB Redis port. -2. If Redis is unreachable: log error, disable hot tier, fall back to in-memory - event buffer (degraded but functional — same as current behavior without - Redis). Retry connection with exponential backoff. -3. Plugin initializes the Graphiti MCP client. Graphiti availability is checked - lazily on first drain attempt. -4. Async: if reusable cache context is identifiable, start best-effort warmup of - `memory-cache:{groupId}`. -5. If Graphiti is unreachable at startup: log warning, continue. Memory cache - remains empty until Graphiti comes online and a drain/refresh succeeds. - -### 10.3 Failure Modes - -| Component Down | Impact | Recovery | -| ---------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | -| Redis (FalkorDB) | No session events, no snapshot, no cache. In-memory fallback for current session; no cross-restart persistence. | Auto-reconnect (ioredis built-in). State rebuilds on reconnect. | -| Graphiti | No drain, no cache refresh. Cached memory stales out (10 min TTL). Session continuity unaffected. | Drain retries on next trigger. Cache refreshes when Graphiti returns. | -| Both | Plugin operates with in-memory session buffer only. Equivalent to current plugin without Graphiti, minus cross-session memory. | Both auto-recover independently. | -| Plugin crash | In-memory state lost. Redis state survives within TTL. | On restart, read `drain:pending` + `drain:cursor` from Redis; resume drain. Session snapshot available for next session. | - ---- - -## 11 Searchable Session History - -### 11.1 Local Session Recall (reuse existing stack only) - -Do not introduce a separate SQLite store. Local session recall stays within the -existing FalkorDB/Graphiti stack: - -- **Primary local source:** Redis/FalkorDB hot-tier event log + snapshot keys. -- **Optional secondary index:** if the FalkorDB deployment includes RediSearch, - use it to index `SessionEvent.summary` and selected `body` fields. -- **Fallback:** if RediSearch is unavailable, use bounded linear scan over the - hot-tier event list for recent-session diagnostics and compaction recovery. - -### 11.2 Cross-Session Search (Graphiti) - -Cross-session search goes through Graphiti's vector/graph search, but only via -the async cache layer — never as a synchronous hot-path call. - ---- - -## 12 Tradeoffs - -| Tradeoff | Impact | Mitigation | -| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **One-message staleness on topic drift** | After a topic shift, the first message uses the old cached memory; the refresh arrives for the next message. | Acceptable for most conversations. Cache refresh latency is ~200 ms; user won't notice the one-turn delay. | -| **Cold-start empty persistent memory** | First reply in a new or cold session may have no Graphiti-derived `persistent_memory`. | Redis-sourced `session_memory` still provides immediate continuity. Warmup is best-effort and improves later turns when it wins the race. | -| **Redis as SPOF for hot tier** | If Redis is down, session events and snapshots are unavailable. | In-memory fallback provides degraded session continuity. ioredis auto-reconnects. | -| **Eventual consistency of knowledge graph** | Graphiti facts lag behind conversation by drain interval (seconds to minutes). | Acceptable — knowledge graph is for cross-session recall, not intra-session continuity. | -| **Lost events on plugin crash before drain** | Events buffered in-memory but not yet in Redis `drain:pending` are lost. | Use Redis `drain:pending` as the durable queue (write-ahead). Events are written to `drain:pending` at the same time as `session:{id}:events`. | -| **10-min cache TTL may serve stale facts** | Facts invalidated in Graphiti may still appear in cache for up to 10 minutes. | Current design has the same staleness issue (search results are point-in-time). Configurable TTL. | -| **No snapshot for very short sessions** | Sessions that end before `session.idle` fires produce no snapshot. | Acceptable — short sessions have minimal context to preserve. | -| **MCP tool-call abstraction** | MCP adds protocol overhead vs direct HTTP and limits control over request shaping. | Overhead is irrelevant on the async path. Direct HTTP remains a future option only if the API surface is later confirmed; it is not part of the current plan. | - ---- - -## 13 Config Changes - -`GraphitiConfig` keeps only the original top-level Graphiti keys for backward -compatibility, while using explicit nested sections for Redis and Graphiti. -Canonical nested values take precedence whenever both forms are supplied. - -```typescript -interface GraphitiConfig { - // Preferred nested config - redis?: { - endpoint?: string; // Redis URL for the plugin hot tier (default: "redis://localhost:6379") - batchSize?: number; // max events per drain batch (default: 20) - batchMaxBytes?: number; // max combined body bytes per batch (default: 51200) - sessionTtlSeconds?: number; // session:{id}:events TTL (default: 86400) - cacheTtlSeconds?: number; // memory-cache TTL (default: 600) - drainRetryMax?: number; // max drain retry attempts (default: 3) - }; - - graphiti?: { - endpoint?: string; // Graphiti MCP URL (e.g. "http://localhost:8000/mcp") - groupIdPrefix?: string; - driftThreshold?: number; - }; - - // Legacy top-level keys still accepted during migration (Graphiti settings) - endpoint?: string; - groupIdPrefix?: string; - driftThreshold?: number; - - // Legacy nested compatibility during migration - falkordb?: { - redisEndpoint?: string; - batchSize?: number; - batchMaxBytes?: number; - sessionTtlSeconds?: number; - cacheTtlSeconds?: number; - drainRetryMax?: number; - }; -} -``` - -Resolution rules for the implementation: - -1. Read Redis settings from `redis.*` first; fall back to legacy nested - `falkordb.*` only when the higher-precedence value is absent. -2. Read Graphiti settings from `graphiti.*` first; fall back to legacy top-level - Graphiti keys only when the nested value is absent. -3. New docs, examples, validation, and runtime lookups should use the nested - shape as canonical; only Graphiti top-level keys remain for compatibility. - ---- - -## 14 File Changes - -### New Files - -``` -src/services/redis-client.ts — ioredis wrapper, connection management, fallback -src/services/redis-events.ts — SessionEvent extraction, LPUSH/LRANGE helpers -src/services/redis-snapshot.ts — priority-tiered snapshot builder -src/services/redis-cache.ts — memory-cache read/write/refresh logic -src/services/graphiti-mcp.ts — Graphiti MCP client wrapper -src/services/graphiti-async.ts — async consolidation worker backed by Graphiti MCP -src/services/batch-drain.ts — drain scheduler, cursor management, dead-letter -src/services/event-extractor.ts — structured event extraction from hook payloads -``` - -### Modified Files - -``` -src/config.ts — add canonical `redis`/`graphiti` sections, retain nested `falkordb` compatibility and top-level Graphiti compatibility, and resolve precedence -src/types/index.ts — add SessionEvent, EventCategory types -src/session.ts — SessionState gains hotTierReady; wire Redis client and async Graphiti consolidation worker; remove direct GraphitiClient dependency; add canonical session ID resolution, parent/child linkage cache, and child-safe deletion -src/services/connection-manager.ts — adapt existing MCP transport lifecycle for the new graphiti-mcp.ts wrapper (reconnect backoff, request queuing already implemented) -src/handlers/event.ts — hot tier writes on all event types, async drain triggers; all hooks resolve to canonical session ID; child deletion preserves parent state -src/handlers/chat.ts — read from Redis cache instead of sync Graphiti calls; resolves to canonical session ID for child sessions -src/handlers/compacting.ts — read snapshot + cache from Redis, no Graphiti calls; resolves to canonical session ID for child sessions -src/handlers/messages.ts — compose canonical `session_memory` envelope from Redis-sourced data; resolves to canonical session ID for child sessions -src/index.ts — wire Redis client + async Graphiti MCP worker -``` - -### Removed/Deprecated Files - -``` -src/services/client.ts — replaced by graphiti-mcp.ts -``` - ---- - -## 15 Implementation Order - -| Phase | Files | Depends On | Acceptance Criteria | -| -------------------------------- | ----------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| 0. Normalize MCP contract | — | — | Confirm tool payload/response handling against a reachable Graphiti MCP endpoint. | -| 1. Consolidation backend | `graphiti-mcp.ts`, `graphiti-async.ts` | Phase 0 | Async worker can drain, refresh cache, and load primers through Graphiti MCP with no hot-path blocking. | -| 2. Redis primitives | `redis-client.ts`, `redis-events.ts` | — | LPUSH/LRANGE/GET/SET work against FalkorDB. Connection retry works. | -| 3. Event extractor | `event-extractor.ts`, `types/index.ts` | — | Hook payloads produce context-mode-equivalent `SessionEvent` categories. Unit tests. | -| 4. Snapshot builder | `redis-snapshot.ts` | Phase 3 | Priority-tiered XML snapshot generated from event list. Budget enforcement. Unit tests. | -| 5. Local search strategy | — | Phases 2, 4 | Redis/FalkorDB-only session recall path works; optional RediSearch path documented if available. | -| 6. Memory cache | `redis-cache.ts` | Phases 1, 2 | Async Graphiti search results written to and read from Redis. TTL expiry. Stale-read behavior. | -| 7. Batch drain | `batch-drain.ts` | Phases 1, 2, 3 | Events drain to Graphiti async with sequential ingest semantics by `groupId`. Cursor tracking. Crash recovery. | -| 8. Wire handlers | `event.ts`, `chat.ts`, `compacting.ts`, `messages.ts` | Phases 2–7 | All hooks use Redis hot path. No synchronous Graphiti calls remain. Existing test assertions hold. | -| 9. Config & bootstrap | `config.ts`, `index.ts`, `session.ts` | Phase 8 | Nested `redis`/`graphiti` config is validated, legacy nested `falkordb` compatibility remains, top-level Graphiti fallback works, and canonical nested values take precedence. | -| 10. Docs alignment ✓ (completed) | `README.md` | Phase 9 | ✓ README incorporates all adopted context-mode feature descriptions and credits the original author/project by name. | -| 11. Integration tests | — | All | End-to-end: message -> Redis event -> snapshot -> async drain -> Graphiti -> cache refresh -> injection. | - ---- - -## 16 Confirmed Decisions, Remaining Validation, and Future Options - -### 16.1 Confirmed decisions for this plan - -- **Hot path:** FalkorDB/Redis (configured canonically via `redis.endpoint`, - with legacy fallback to nested `falkordb.*`) is the hot path for writes, - snapshots, and cached reads. -- **Cold/async backend:** Graphiti stays off the hot path. The consolidation - backend is Graphiti MCP (configured via `graphiti.endpoint`, with legacy - fallback to `endpoint`). -- **Hook model:** because OpenCode lacks `SessionStart`, first-turn memory must - rely on `event: session.created` bootstrap + `chat.message` + - `experimental.chat.messages.transform`. -- **Naming:** the canonical injected structure remains `session_memory` with - optional `persistent_memory`. -- **Storage scope:** do not add new independent storage such as SQLite. -- **Docs alignment:** README has been updated to reflect the two-layer - architecture design and includes acknowledgement of the context-mode - inspiration with proper attribution. -- **Child-session routing diverges from context-mode (intentional):** official - context-mode records subagent work as summarized `Agent` tool events. This - plugin instead resolves every child/subagent session to the canonical root - session and treats child events as first-class entries in the shared event - log, snapshot, and `` injection. See §10.1 for the full - rationale. This is a deliberate design choice, not an alignment gap. - -### 16.2 Remaining implementation validation - -- [ ] **MCP payload/response normalization**: the endpoint is already verified - as reachable; implementation still needs to lock down exact - request/response handling for `add_memory`, `search_memory_facts`, - `search_nodes`, and `get_episodes`. -- [ ] **Graphiti bulk semantics**: official docs warn `add_episode_bulk` skips - edge invalidation. Confirm whether any bootstrap/backfill path here can - safely use bulk, or whether all non-empty-graph traffic must remain - sequential `add_memory`. -- [ ] **RediSearch in FalkorDB**: if the image includes RediSearch, decide - whether to use it for optional local session search over structured - events. -- [ ] **Cache key namespacing**: if multiple plugin instances share the same - FalkorDB, cache keys need instance-level namespacing to avoid collisions. - Current `groupId` prefix may suffice. -- [ ] **Drift detection heuristic**: the cached Jaccard approach compares query - UUID sets rather than issuing a live search. Validate that this is good - enough in practice. -- [ ] **Connection manager reuse**: the existing - `src/services/connection-manager.ts` (from `plans/ConnectionManager.md`) - already implements MCP transport lifecycle, reconnect backoff, and request - queuing. Decide whether `graphiti-mcp.ts` wraps it as-is, adapts it, or - replaces it. - -### 16.3 Pending: Memory Hygiene and Legacy Injection Cleanup - -**Status:** Implemented and verified in repo tests (live-session -cleanup/validation still pending) - -The current implementation still has a serious memory-quality problem even -though the hot-path architecture itself has been migrated to FalkorDB/Redis + -async Graphiti MCP. In live sessions, the canonical `` envelope -is being polluted by duplicated user text, assistant operational chatter, -tool-call scaffolding, and transcript-heavy residue that should never be treated -as durable continuity state. The same user instruction is often copied into -multiple sections such as `last_request`, `active_tasks`, and `key_decisions`, -which wastes prompt budget and weakens the signal that these sections are -supposed to carry. Assistant-authored analysis and planning text is also being -promoted into `unresolved_errors`, `discoveries`, and `residual_messages`, -causing the plugin to remember its own commentary rather than the user's actual -goals, decisions, blockers, and file work. - -The problem is broader than simple duplication. Raw tool transcript content is -still entering the memory pipeline: `Read` output dumps, wrapper tags such as -`` and ``, agent/tool orchestration text, and previously injected -memory blocks are being re-consumed as fresh session evidence. This creates a -feedback loop where memory injection becomes self-referential: old injected -memory is parsed again, assistant summaries are stored as facts, and the next -turn receives an even noisier envelope. The result is a prompt that is larger, -less stable, and less representative of the true session state than the -context-mode-style continuity model this overhaul is trying to preserve. - -Persistent memory quality is also compromised by stale or low-value Graphiti -facts. Instead of surfacing durable project knowledge, the current -`persistent_memory` block can include meta-facts about planning files, assistant -actions, prior phrasing suggestions, and historical implementation chatter that -is no longer relevant to the active turn. At the same time, the legacy top-level -`...` format is still appearing alongside the -canonical `` path in some live runs, which indicates that -compatibility handling is still leaking into effective prompt output. Until -these hygiene issues are fixed, the architecture change is only partially -successful: Graphiti is off the hot path, but the injected continuity state is -still too noisy, too repetitive, and too contaminated by assistant/tool -artifacts to deliver the intended resumability benefits. - -#### 16.3.1 Alignment target - -This cleanup should intentionally move the hot path closer to context-mode's -session-continuity behavior. The design goal is not simply "less verbose" -memory; it is a narrower contract for what counts as durable working state. -Context-mode's implementation works because it primarily stores compact, -category-specific events and reconstructs a small resume snapshot from those -events rather than replaying transcripts. The same principle should govern this -plugin's hot tier. - -The target behavior is: - -- event storage is compact, typed, and continuity-oriented rather than - transcript-oriented -- tool outputs are used to infer structure, not replayed as durable memory text -- assistant operational prose is not treated as project memory -- injected memory is stable, small, and semantically partitioned -- Graphiti acts as an optional background knowledge source, not a second - transcript channel - -In practice, that means the hot path should remember things like the user's last -request, active tasks, files in play, key decisions, and concrete blockers, but -not the raw `Read` result, not the assistant's planning narration, and not the -XML/text wrappers of previously injected memory. - -#### 16.3.2 Revised hot-tier data contract - -The hot-path pipeline should enforce a stricter contract at each stage: - -1. **Sanitize before extraction**: remove injected memory blocks and obvious - wrapper text before any new event extraction occurs. -2. **Extract compact events**: store concise, typed continuity events with hard - length limits and category-specific schemas. -3. **Build a conservative snapshot**: synthesize only high-value continuity - sections; treat everything else as discardable. -4. **Render a stable envelope**: produce a deterministic `` - block whose sections do not duplicate each other. -5. **Drain only semantic episodes**: send Graphiti compact facts about work - state, not conversational residue. - -Each stage should be allowed to throw away information aggressively. The point -of the hot tier is resumability, not archival completeness. - -#### 16.3.3 Input sanitization and reinjection prevention - -The first concrete change should be to prevent the pipeline from re-consuming -its own output. - -Planned implementation details: - -- In `src/handlers/chat.ts` and any extraction entrypoint, strip leading - canonical `...` blocks before deriving - `last_request` or user events. -- In `src/handlers/messages.ts`, continue parsing visible UUID metadata from - legacy `` blocks for compatibility, but strip legacy block - text from the effective user content before it can be re-extracted. -- Add a shared sanitizer utility that removes: - - canonical injected memory blocks - - legacy injected memory blocks - - wrapper lines such as ``, ``, and similar tool-output tags - when they are part of replayed tool transcript rather than true user input -- Ensure this sanitizer runs before both hot-tier event extraction and async - Graphiti drain preparation. - -This stage is required to break the self-referential loop visible in live -sessions, where injected memory and tool transcript wrappers become fresh memory -material on the next turn. - -#### 16.3.4 Extraction redesign around context-mode-like compact events - -`src/services/event-extractor.ts` should be narrowed so it behaves more like -context-mode's compact event extraction model. - -Planned extraction policy by source: - -- **User message events** - - Keep: explicit request/intent, user decisions, preferences, task updates, - user-pasted data references when genuinely user-originated. - - Reject: repeated injected memory text, quoted assistant prose, copied tool - output, and orchestration chatter. -- **Read/search tool events** - - Keep: file path, query, maybe a tiny summary derived from metadata. - - Reject: full returned content, wrapper blocks, and long bodies. -- **Edit/write tool events** - - Keep: touched file path plus a short semantic summary if one is reliably - derivable. -- **Error events** - - Keep: concrete failing command/tool name, status, concise failure text. - - Reject: assistant hypotheses, debugging commentary, and narrative status - updates. -- **Subagent events** - - Keep: launch intent and terse completion result. - - Reject: full delegated report bodies. -- **Integration/MCP events** - - Keep: service call occurred, optional tool name, success/failure signal. - - Reject: request/response payload bodies. - -This redesign should also reduce the default payload size of each stored event. -By default, event bodies should be one sentence or one path-like datum, not an -open-ended transcript field. - -#### 16.3.5 Section-specific rendering rules and dedupe - -The canonical `` envelope should follow a more rigid section -contract so the same sentence cannot be repeated across multiple sections. - -Planned section semantics: - -- `last_request` - - exactly one normalized user request from the latest turn - - never duplicated verbatim in any other section -- `active_tasks` - - only explicit task-state items or inferred work items with task-like shape - - should not restate `last_request` if no real task structure exists -- `key_decisions` - - only user decisions/preferences/corrections that materially changed the - direction of work -- `files_in_play` - - paths only -- `project_rules` - - rule paths or compact rule summaries only -- `unresolved_errors` - - concrete unresolved blockers only -- `session_snapshot` - - compact secondary restore layer only; never a replay of upper sections - -Implementation should normalize candidate strings and use explicit precedence -when deduping: - -- `last_request` outranks `active_tasks` -- `active_tasks` outrank `key_decisions` when text is effectively the same work - item -- explicit user decisions outrank generic discoveries -- `session_snapshot` must not restate text already emitted in top-level fields - -This is the direct fix for the failure mode where one user sentence currently -lands in `last_request`, `active_tasks`, and `key_decisions` simultaneously. - -#### 16.3.6 Snapshot simplification - -`src/services/redis-snapshot.ts` should become more conservative and closer to -context-mode's priority-tiered snapshot builder. - -Planned changes: - -- preserve a small number of high-value sections only: - - decisions / constraints - - active task state - - active files / recent edits - - concrete blockers / unresolved errors - - environment / git state -- heavily cap or omit low-value sections such as: - - `discoveries` - - `references` - - `residual_messages` -- make omission the default for weak sections rather than filling them with - low-quality text -- enforce deterministic ordering and small fixed limits so the same session - state renders similarly across turns - -The snapshot should be boring and durable. If a section cannot be represented in -compact, high-signal form, it should not be injected. - -#### 16.3.7 Graphiti drain and cache filtering - -The async Graphiti tier should inherit the same compact-memory discipline; -otherwise `persistent_memory` will remain polluted even if the hot-tier snapshot -improves. - -Planned changes: - -- Drain only semantic episodes built from structured events, not raw transcript - fragments. -- Reject drain entries dominated by: - - tool scaffolding - - injected memory text - - assistant operational narration - - agent-control syntax - - file-content dumps -- During cache refresh, prefer durable facts about: - - architecture decisions - - constraints - - explicit user preferences - - major work milestones - - meaningful project entities -- Filter out stale or low-value facts about: - - prior phrasing suggestions - - assistant planning chatter - - tool routing advice - - historical meta-discussion unrelated to active work -- Prefer rendering facts over nodes, and render nodes only when they add unique - value. - -This should make `persistent_memory` act like sparse background knowledge, -closer to context-mode's retrieval posture, rather than an echo chamber of old -agent conversation. - -#### 16.3.8 Rollout and cleanup - -Because existing Redis and Graphiti data are already polluted, the rollout must -include a cleanup step after the code-level hygiene fixes land. - -Planned rollout steps: - -- land sanitization, extraction, snapshot, and drain filtering changes first -- validate behavior in unit tests and targeted integration tests -- reset or namespace polluted Redis hot-tier keys for the affected project -- reset or namespace Graphiti group data so stale low-value facts stop - repopulating cache -- verify fresh-session behavior after cleanup, not just behavior in an already - poisoned namespace - -Without this cleanup, old low-value facts may continue to dominate recall and -hide whether the new extraction rules are actually working. - -#### 16.3.9 Required verification - -This work should only be considered complete when both code-level and live-run -verification show that the hot path now behaves more like context-mode's compact -continuity model. - -Required verification targets: - -- sanitizer tests proving injected memory cannot be re-consumed as new input -- extraction tests proving `Read`/search outputs store refs rather than bodies -- section-dedupe tests proving the same normalized text cannot occupy - `last_request`, `active_tasks`, and `key_decisions` together -- transform tests proving canonical and legacy memory blocks cannot coexist in - final injection -- Graphiti drain/cache tests proving assistant chatter and transcript wrappers - are rejected -- live-session validation proving assistant planning text no longer appears in - `unresolved_errors`, `discoveries`, or `persistent_memory` -- live-session validation proving the injected envelope is smaller, more stable, - and more continuity-focused across turns - -- [x] **Strip injected memory before extraction**: before processing a new user - turn, remove leading legacy `...` and canonical - `...` blocks so injected context is - not re-learned as fresh content. -- [x] **Harden memory hygiene filters**: never persist raw tool payloads, `Read` - output dumps, XML-like wrappers, assistant operational chatter, or agent - orchestration text into hot-tier summaries or Graphiti drain batches. -- [x] **Make extraction allowlist-based**: only promote durable continuity - signals such as user intent, explicit decisions, active tasks, file - edits/writes, meaningful git milestones, and real unresolved errors. -- [x] **Stop storing transcript-heavy tool bodies**: keep refs and compact - summaries for file reads/searches, but do not retain full returned file - contents in session memory or Graphiti episodes. -- [x] **Gate async Graphiti writes more aggressively**: skip semantic drain - entries whose content is primarily tool-call scaffolding, injected memory, - assistant self-narration, or agent-control text. -- [x] **Shrink the injected envelope**: favor `last_request`, `active_tasks`, - `key_decisions`, and `files_in_play`; heavily cap or suppress noisy - `discoveries`, `residual_messages`, and assistant-originated - `unresolved_errors`. -- [x] **Add regression coverage**: verify that legacy `` does not leak - into new injections, duplicated text does not land across multiple - sections, assistant chatter is not stored as errors, and noisy persistent - memory facts are filtered out. -- [ ] **Plan one-time cleanup of poisoned state**: after code fixes land, reset - or namespace polluted Redis hot-tier keys and Graphiti group data so stale - low-value memories stop resurfacing. - -### 16.4 Future options (non-final) - -- [ ] **More proactive cache prewarm**: broaden warmup beyond `get_episodes` - into project-scope `search_memory_facts`/`search_nodes` if the extra async - work is worth the cache-hit improvement. -- [ ] **Alternative Graphiti transport**: direct Graphiti HTTP could be - revisited later only if its API surface is confirmed and there is a - concrete reason to move away from MCP. It is not part of the current plan. diff --git a/src/handlers/chat.test.ts b/src/handlers/chat.test.ts index b45ce06..50940a4 100644 --- a/src/handlers/chat.test.ts +++ b/src/handlers/chat.test.ts @@ -330,6 +330,49 @@ describe("chat handler", () => { assertEquals(graphitiAsync.refreshCalls, []); }); + it("prepares local-first session memory even when cached persistent memory is absent", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.prepareInjectionResult = { + envelope: + 'Continue locally', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "Continue locally", + }, + }; + const redisEvents = new MockRedisEvents(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createChatHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await handler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Continue locally" }] } as never, + ); + + assertStringIncludes( + sessionManager.state.pendingInjection?.envelope ?? "", + "", + ); + assertEquals( + sessionManager.state.pendingInjection?.envelope.includes( + " { for ( const decision of [ diff --git a/src/handlers/compacting.test.ts b/src/handlers/compacting.test.ts index cceb797..cf4eaf6 100644 --- a/src/handlers/compacting.test.ts +++ b/src/handlers/compacting.test.ts @@ -73,6 +73,38 @@ describe("compacting handler", () => { }]); }); + it("preserves local-first session memory shape during compaction with cached persistent memory optional", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.prepareInjection = ((sessionId: string) => { + sessionManager.prepareInjectionCalls.push(sessionId); + const prepared = { + envelope: + 'continuecached recall', + nodeRefs: ["node-1"], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + sessionManager.state.pendingInjection = prepared; + return prepared; + }) as typeof sessionManager.prepareInjection; + const handler = createCompactingHandler({ + sessionManager: sessionManager as never, + }); + + const output = { context: [] as string[] }; + await handler({ sessionID: "session-1" }, output as never); + + assertEquals(output.context.length, 1); + assertStringIncludes(output.context[0], ""); + assertStringIncludes(output.context[0], " { const sessionManager = new MockSessionManager(); sessionManager.canonicalSessionId = "parent-session"; diff --git a/src/handlers/event.test.ts b/src/handlers/event.test.ts index 576f544..4ffe0b0 100644 --- a/src/handlers/event.test.ts +++ b/src/handlers/event.test.ts @@ -1,9 +1,10 @@ -import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { createEventHandler } from "./event.ts"; import { resolveContextLimit } from "../services/context-limit.ts"; import { setLoggerSilentOverride } from "../services/logger.ts"; import type { SessionState } from "../session.ts"; +import type { SessionEvent } from "../types/index.ts"; class FakeClock { now = 0; @@ -323,36 +324,29 @@ class MockRedisEvents { body?: string; continuityText?: string; }> = []; + events: SessionEvent[] = []; touchedSessionIds: string[] = []; recordEvent( sessionId: string, groupId: string, - event: { summary: string; category?: string }, + event: SessionEvent, ) { this.calls.push({ sessionId, groupId, summary: event.summary, category: event.category, - body: (event as { body?: string }).body, - continuityText: (event as { continuityText?: string }).continuityText, + body: event.body, + continuityText: event.continuityText, }); + this.events.push(event); return 1; } - async getRecentSessionEvents() { + async getRecentSessionEvents(_sessionId: string, limit = 40) { await Promise.resolve(); - return [ - { - id: "1", - ts: Date.now(), - category: "intent", - priority: 0, - role: "user", - summary: "Finish the overhaul", - }, - ]; + return this.events.slice(-limit); } async touchSessionEvents(sessionId: string) { @@ -364,11 +358,11 @@ class MockRedisEvents { class DeferredRedisEvents extends MockRedisEvents { resume!: () => void; - override async getRecentSessionEvents() { + override async getRecentSessionEvents(sessionId: string, limit = 40) { await new Promise((resolve) => { this.resume = resolve; }); - return super.getRecentSessionEvents(); + return super.getRecentSessionEvents(sessionId, limit); } } @@ -376,8 +370,13 @@ class MockRedisSnapshot { saved: Array<{ sessionId: string; snapshot: string }> = []; touchedSessionIds: string[] = []; - rebuildAndSave(sessionId: string) { - const snapshot = ``; + rebuildAndSave(sessionId: string, events: SessionEvent[]) { + const refs = [...new Set(events.flatMap((event) => event.refs ?? []))].join( + ",", + ); + const snapshot = refs.length > 0 + ? `${refs}` + : ``; this.saved.push({ sessionId, snapshot }); return snapshot; } @@ -600,7 +599,7 @@ describe("event handler", () => { }); it("records the compaction summary as a structured event before rebuilding the snapshot", async () => { - const sessionManager = new MockSessionManager(); + const sessionManager = new MockSessionManager({ idleRetentionMs: 100 }); const state = sessionManager.createDefaultState("group-1", "user-1"); sessionManager.setState("session-1", state); const redisEvents = new MockRedisEvents(); @@ -1964,6 +1963,114 @@ describe("event handler", () => { assertEquals(typeof redisEvents.calls[0].continuityText, "string"); }); + it("records compact continuity metadata for session_* tool results without requiring Graphiti on the hot path", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.setState( + "session-1", + sessionManager.createDefaultState("group-1", "user-1"), + ); + const { handler, redisEvents, graphitiAsync } = createHandler( + sessionManager, + ); + + await handler({ + event: { + type: "tool.completed", + properties: { + sessionID: "session-1", + tool: "session_execute_file", + args: { + root_session_id: "session-1", + paths: ["src/session.ts"], + }, + output: JSON.stringify({ + status: "ok", + summary: "Indexed src/session.ts for continuity checks", + artifact_ref: "local://session_execute_file/1", + corpus_ref: "local://session/root/corpus/1", + file_count: 1, + truncated: true, + }), + }, + } as never, + }); + + assertEquals(redisEvents.calls.length, 1); + assertEquals(redisEvents.calls[0].category, "file.read"); + assertStringIncludes( + redisEvents.calls[0].continuityText ?? "", + "src/session.ts", + ); + assertEquals(graphitiAsync.primerCalls, []); + assertEquals(graphitiAsync.drainCalls, []); + assertEquals(graphitiAsync.refreshCalls, []); + }); + + it("keeps session_* continuity in the local snapshot model across compaction and idle rebuilds", async () => { + const sessionManager = new MockSessionManager(); + const state = sessionManager.createDefaultState("group-1", "user-1"); + sessionManager.setState("session-1", state); + const redisEvents = new MockRedisEvents(); + const redisSnapshot = new MockRedisSnapshot(); + const redisCache = new MockRedisCache(); + const graphitiAsync = new MockGraphitiAsync(); + + const handler = createEventHandler({ + sessionManager: sessionManager as never, + redisEvents: redisEvents as never, + redisCache: redisCache as never, + redisSnapshot: redisSnapshot as never, + graphitiAsync: graphitiAsync as never, + defaultGroupId: "group-1", + defaultUserGroupId: "user-1", + sdkClient: { provider: { list: () => ({ data: [] }) } } as never, + directory: "/tmp/project", + }); + + await handler({ + event: { + type: "tool.completed", + properties: { + sessionID: "session-1", + tool: "session_execute_file", + args: { + root_session_id: "session-1", + paths: ["src/session.ts"], + }, + output: JSON.stringify({ + status: "ok", + summary: "Indexed src/session.ts for continuity checks", + artifact_ref: "local://session_execute_file/1", + corpus_ref: "local://session/root/corpus/1", + file_count: 1, + truncated: false, + }), + }, + } as never, + }); + + await handler({ + event: { + type: "session.idle", + properties: { sessionID: "session-1" }, + } as never, + }); + + await handler({ + event: { + type: "session.compacted", + properties: { + sessionID: "session-1", + summary: "Compacted continuity after session_execute_file", + }, + } as never, + }); + + assertEquals(redisSnapshot.saved.length, 1); + assertStringIncludes(redisSnapshot.saved[0].snapshot, "src/session.ts"); + assertEquals(graphitiAsync.drainCalls.length >= 1, true); + }); + it("routes child-session passthrough events onto the canonical parent session", async () => { const sessionManager = new MockSessionManager(); sessionManager.setParentId("session-1", null); diff --git a/src/handlers/event.ts b/src/handlers/event.ts index e266266..6214aee 100644 --- a/src/handlers/event.ts +++ b/src/handlers/event.ts @@ -74,6 +74,110 @@ const getCompactionSummary = (value: unknown): string => { return typeof summary === "string" ? summary : ""; }; +const parseJsonRecord = ( + value: unknown, +): Record | undefined => { + const direct = asRecord(value); + if (direct) return direct; + if (typeof value !== "string") return undefined; + + try { + return asRecord(JSON.parse(value)); + } catch { + return undefined; + } +}; + +const buildSessionToolActivity = ( + props: Record, +): { + properties: Record; + messageText?: string; +} => { + const tool = asString(props.tool) ?? asString(props.name); + if (!tool?.startsWith("session_")) { + return { properties: props }; + } + + const args = asRecord(props.args) ?? {}; + const output = parseJsonRecord(props.output) ?? {}; + const merged = { + ...props, + ...output, + paths: args.paths ?? props.paths, + path: args.path ?? props.path, + query: args.query ?? props.query, + url: args.url ?? props.url, + command: args.command ?? props.command, + commands: args.commands ?? props.commands, + }; + const summary = asString(output.summary) ?? asString(props.summary) ?? tool; + + if (tool === "session_execute_file") { + const paths = Array.isArray(args.paths) + ? args.paths.filter((value): value is string => typeof value === "string") + : []; + const target = paths.slice(0, 2).join(", "); + return { + properties: merged, + messageText: target + ? `Read file ${target} — ${summary}` + : `Read file via ${tool} — ${summary}`, + }; + } + + if (tool === "session_search") { + const query = asString(args.query); + return { + properties: merged, + messageText: query + ? `Searched local corpus for ${query}` + : "Searched local corpus", + }; + } + + if (tool === "session_fetch_and_index") { + const url = asString(args.url) ?? asString(output.fetched_url); + return { + properties: merged, + messageText: url + ? `Fetched and indexed ${url} — ${summary}` + : `Fetched and indexed content — ${summary}`, + }; + } + + if (tool === "session_index") { + return { + properties: merged, + messageText: `Indexed local session content — ${summary}`, + }; + } + + if (tool === "session_execute") { + const command = asString(args.command); + return { + properties: merged, + messageText: command ? `${summary} — ${command}` : summary, + }; + } + + if (tool === "session_batch_execute") { + const commands = Array.isArray(args.commands) + ? args.commands + .map((value) => asRecord(value)?.command) + .filter((value): value is string => typeof value === "string") + : []; + return { + properties: merged, + messageText: commands.length > 0 + ? `${summary} — ${commands.slice(0, 2).join("; ")}` + : summary, + }; + } + + return { properties: merged, messageText: summary }; +}; + export function createEventHandler(deps: EventHandlerDeps): EventHook { const { sessionManager, @@ -404,11 +508,16 @@ export function createEventHandler(deps: EventHandlerDeps): EventHook { if (!resolved || !state?.isMain || !canonicalSessionId) return; sessionManager.markResolvedSessionActive(sessionId, canonicalSessionId); + const normalizedToolActivity = buildSessionToolActivity( + event.properties as Record, + ); + for ( const structured of extractStructuredEvents({ eventType: event.type, sessionId, - properties: event.properties as Record, + properties: normalizedToolActivity.properties, + messageText: normalizedToolActivity.messageText, }) ) { await redisEvents.recordEvent( diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts index 70b8676..2d743b5 100644 --- a/src/handlers/messages.test.ts +++ b/src/handlers/messages.test.ts @@ -84,6 +84,43 @@ describe("messages handler", () => { }]); }); + it("injects local-first session memory with optional cached persistent memory unchanged", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'freshcached recall', + nodeRefs: ["node-1"], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "fresh", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "Continue work" }], + }], + }; + await handler({}, output as never); + + assertStringIncludes( + output.messages[0].parts[0].text, + "", + ); + assertStringIncludes( + output.messages[0].parts[0].text, + " { const sessionManager = new MockSessionManager(); sessionManager.state.pendingInjection = undefined; diff --git a/src/handlers/tool-after.test.ts b/src/handlers/tool-after.test.ts new file mode 100644 index 0000000..f57cf8f --- /dev/null +++ b/src/handlers/tool-after.test.ts @@ -0,0 +1,126 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; + +import { createToolAfterHandler } from "./tool-after.ts"; +import { ToolRoutingOutcomeCache } from "../services/tool-routing-outcome-cache.ts"; + +describe("tool execute after handler", () => { + const routingOutcomes = new ToolRoutingOutcomeCache(); + + afterEach(() => { + routingOutcomes.clearAll(); + }); + + it("makes routed rewrite outcomes available to continuity capture after tool execution", async () => { + routingOutcomes.set("call-1", { + source: "tool-routing", + action: "modify", + reason: "bash-network-rewrite", + }); + const handler = createToolAfterHandler({ routingOutcomes }); + const output: { + title: string; + output: string; + metadata: Record; + } = { + title: "Bash", + output: "tool output", + metadata: { existing: true }, + }; + + await handler( + { + tool: "Bash", + sessionID: "root-session", + callID: "call-1", + args: { command: "curl https://example.com" }, + } as never, + output as never, + ); + + assertEquals(output.metadata, { + existing: true, + toolRouting: { + source: "tool-routing", + action: "modify", + reason: "bash-network-rewrite", + }, + }); + }); + + it("surfaces denied outcomes as compact continuity metadata without requiring raw tool payloads", async () => { + routingOutcomes.set("call-2", { + source: "tool-routing", + action: "deny", + reason: "webfetch-denied", + }); + const handler = createToolAfterHandler({ routingOutcomes }); + const output: { + title: string; + output: string; + metadata?: Record; + } = { + title: "WebFetch", + output: "", + metadata: undefined, + }; + + await handler( + { + tool: "WebFetch", + sessionID: "root-session", + callID: "call-2", + args: { url: "https://example.com" }, + } as never, + output as never, + ); + + assertEquals(output.metadata, { + toolRouting: { + source: "tool-routing", + action: "deny", + reason: "webfetch-denied", + }, + }); + }); + + it("remains continuity-focused and does not rewrite visible tool output", async () => { + routingOutcomes.set("call-3", { + source: "tool-routing", + action: "context", + guidanceType: "read", + reason: "read-guidance", + }); + const handler = createToolAfterHandler({ routingOutcomes }); + const output: { + title: string; + output: string; + metadata: Record; + } = { + title: "Read", + output: "visible tool output", + metadata: {}, + }; + + await handler( + { + tool: "Read", + sessionID: "root-session", + callID: "call-3", + args: { filePath: "/tmp/example.ts" }, + } as never, + output as never, + ); + + assertEquals(output.title, "Read"); + assertEquals(output.output, "visible tool output"); + assertEquals(output.metadata, { + toolRouting: { + source: "tool-routing", + action: "context", + guidanceType: "read", + reason: "read-guidance", + }, + }); + }); +}); diff --git a/src/handlers/tool-after.ts b/src/handlers/tool-after.ts new file mode 100644 index 0000000..d9cf94d --- /dev/null +++ b/src/handlers/tool-after.ts @@ -0,0 +1,33 @@ +import type { Hooks } from "@opencode-ai/plugin"; +import type { ToolRoutingOutcomeCache } from "../services/tool-routing-outcome-cache.ts"; + +type ToolAfterHook = NonNullable; +type ToolAfterInput = Parameters[0]; +type ToolAfterOutput = Parameters[1]; + +export interface ToolAfterHandlerDeps { + routingOutcomes: ToolRoutingOutcomeCache; +} + +const asMetadataRecord = (value: unknown): Record => + value && typeof value === "object" && !Array.isArray(value) + ? { ...(value as Record) } + : {}; + +export function createToolAfterHandler( + deps: ToolAfterHandlerDeps, +): ToolAfterHook { + return ( + { callID: callId }: ToolAfterInput, + output: ToolAfterOutput, + ) => { + const outcome = deps.routingOutcomes.take(callId); + if (!outcome) return Promise.resolve(); + + output.metadata = { + ...asMetadataRecord(output.metadata), + toolRouting: outcome, + }; + return Promise.resolve(); + }; +} diff --git a/src/handlers/tool-before.test.ts b/src/handlers/tool-before.test.ts new file mode 100644 index 0000000..58f1a53 --- /dev/null +++ b/src/handlers/tool-before.test.ts @@ -0,0 +1,223 @@ +import { + assertEquals, + assertRejects, + assertStringIncludes, +} from "jsr:@std/assert@^1.0.0"; +import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; + +import { createToolBeforeHandler } from "./tool-before.ts"; +import { ToolGuidanceCache } from "../services/tool-guidance-cache.ts"; +import { ToolRoutingOutcomeCache } from "../services/tool-routing-outcome-cache.ts"; +import { routeToolCall } from "../services/tool-routing.ts"; + +class MockSessionCanonicalizer { + cached = new Map(); + resolved = new Map(); + cachedCalls: string[] = []; + resolveCalls: string[] = []; + + getCachedCanonicalSessionId(sessionId: string): string | undefined { + this.cachedCalls.push(sessionId); + return this.cached.get(sessionId); + } + + resolveCanonicalSessionId(sessionId: string): Promise { + this.resolveCalls.push(sessionId); + return Promise.resolve(this.resolved.get(sessionId)); + } +} + +describe("tool execute before handler", () => { + const routingOutcomes = new ToolRoutingOutcomeCache(); + + afterEach(() => { + routingOutcomes.clearAll(); + }); + + it("throws on denied WebFetch calls", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + + await assertRejects( + () => + handler( + { + tool: "WebFetch", + sessionID: "root-session", + callID: "call-1", + } as never, + { args: { url: "https://example.com" } } as never, + ), + Error, + "WebFetch", + ); + + assertEquals(routingOutcomes.take("call-1"), { + source: "tool-routing", + action: "deny", + reason: "webfetch-denied", + }); + }); + + it("throws on denied WebFetch calls from a child session after first-call canonical lookup", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.resolved.set("child-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + + await assertRejects( + () => + handler( + { + tool: "WebFetch", + sessionID: "child-session", + callID: "call-2", + } as never, + { args: { url: "https://example.com" } } as never, + ), + Error, + "WebFetch", + ); + + assertEquals(canonicalizer.cachedCalls, ["child-session"]); + assertEquals(canonicalizer.resolveCalls, ["child-session"]); + assertEquals(routingOutcomes.take("call-2"), { + source: "tool-routing", + action: "deny", + reason: "webfetch-denied", + }); + }); + + it("mutates args for Bash rewrite cases", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + const output = { args: { command: "curl https://example.com/api" } }; + + await handler( + { + tool: "Bash", + sessionID: "root-session", + callID: "call-3", + } as never, + output as never, + ); + + assertStringIncludes(String(output.args.command), "Routing note"); + assertEquals(routingOutcomes.take("call-3"), { + source: "tool-routing", + action: "modify", + reason: "bash-network-rewrite", + }); + }); + + it("emits guidance only once per canonical root session across parent and child sessions", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + canonicalizer.cached.set("child-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + + await handler( + { + tool: "Read", + sessionID: "root-session", + callID: "call-4", + } as never, + { args: { filePath: "/tmp/a.ts" } } as never, + ); + await handler( + { + tool: "Read", + sessionID: "child-session", + callID: "call-5", + } as never, + { args: { filePath: "/tmp/b.ts" } } as never, + ); + + assertEquals(routingOutcomes.take("call-4"), { + source: "tool-routing", + action: "context", + guidanceType: "read", + reason: "read-guidance", + }); + assertEquals(routingOutcomes.take("call-5"), undefined); + }); + + it("keeps allow decisions as true no-op passthrough", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + const args = { pattern: "src/**/*.ts", path: "/workspace/project" }; + const output = { args }; + + await handler( + { + tool: "Glob", + sessionID: "root-session", + callID: "call-6", + } as never, + output as never, + ); + + assertEquals(output.args, args); + assertEquals(routingOutcomes.take("call-6"), undefined); + }); + + it("does not perform Redis or Graphiti access on the before-hook path", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const unexpectedCalls: string[] = []; + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + redisEvents: { + recordEvent: () => { + unexpectedCalls.push("redisEvents.recordEvent"); + }, + }, + graphitiAsync: { + scheduleDrain: () => { + unexpectedCalls.push("graphitiAsync.scheduleDrain"); + }, + }, + } as never); + + await handler( + { + tool: "Read", + sessionID: "root-session", + callID: "call-7", + } as never, + { args: { filePath: "/tmp/a.ts" } } as never, + ); + + assertEquals(unexpectedCalls, []); + }); +}); diff --git a/src/handlers/tool-before.ts b/src/handlers/tool-before.ts new file mode 100644 index 0000000..0a8dea5 --- /dev/null +++ b/src/handlers/tool-before.ts @@ -0,0 +1,91 @@ +import type { Hooks } from "@opencode-ai/plugin"; +import type { ToolGuidanceCache } from "../services/tool-guidance-cache.ts"; +import { + routeToolCall as defaultRouteToolCall, + type RouteToolCallInput, + type RoutingDecision, +} from "../services/tool-routing.ts"; +import type { ToolRoutingOutcomeCache } from "../services/tool-routing-outcome-cache.ts"; +import type { ToolRoutingSessionCanonicalizer } from "../session.ts"; + +type ToolBeforeHook = NonNullable; +type ToolBeforeInput = Parameters[0]; +type ToolBeforeOutput = Parameters[1]; + +export interface ToolBeforeHandlerDeps { + sessionCanonicalizer: ToolRoutingSessionCanonicalizer; + guidanceThrottle: ToolGuidanceCache; + routingOutcomes: ToolRoutingOutcomeCache; + routeToolCall?: (input: RouteToolCallInput) => RoutingDecision; +} + +const toRecord = (value: unknown): Record => + value && typeof value === "object" && !Array.isArray(value) + ? value as Record + : {}; + +const resolveCanonicalSessionId = async ( + sessionCanonicalizer: ToolRoutingSessionCanonicalizer, + sessionId: string, +): Promise => { + const cached = sessionCanonicalizer.getCachedCanonicalSessionId(sessionId); + if (cached) return cached; + + // Task 2 explicitly chooses the async first-call canonicalization path: + // if a child lineage is not cached yet, resolve through the SDK-backed + // session manager path once, then fall back to the raw session ID only when + // canonical lineage cannot be resolved. + return await sessionCanonicalizer.resolveCanonicalSessionId(sessionId) ?? + sessionId; +}; + +export function createToolBeforeHandler( + deps: ToolBeforeHandlerDeps, +): ToolBeforeHook { + const route = deps.routeToolCall ?? defaultRouteToolCall; + + return async ( + { tool, sessionID, callID }: ToolBeforeInput, + output: ToolBeforeOutput, + ) => { + const canonicalSessionId = await resolveCanonicalSessionId( + deps.sessionCanonicalizer, + sessionID, + ); + const args = toRecord(output.args); + const decision = route({ + canonicalSessionId, + toolName: tool, + args, + guidanceThrottle: deps.guidanceThrottle, + }); + + switch (decision.action) { + case "allow": + return; + case "modify": + output.args = decision.args; + deps.routingOutcomes.set(callID, { + source: "tool-routing", + action: "modify", + reason: decision.reason, + }); + return; + case "context": + deps.routingOutcomes.set(callID, { + source: "tool-routing", + action: "context", + guidanceType: decision.guidanceType, + reason: decision.reason, + }); + return; + case "deny": + deps.routingOutcomes.set(callID, { + source: "tool-routing", + action: "deny", + reason: decision.reason, + }); + throw new Error(decision.guidance); + } + }; +} diff --git a/src/index.test.ts b/src/index.test.ts index 874c79c..8233660 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -53,6 +53,11 @@ function createEntrypointHarnessWithOptions(options: { chat: { kind: "chat" }, compacting: { kind: "compacting" }, messages: { kind: "messages" }, + tool: { + session_execute: { kind: "session_execute" }, + }, + toolBefore: { kind: "tool-before" }, + toolAfter: { kind: "tool-after" }, }; const records = { loadConfigCalls: [] as string[], @@ -70,6 +75,9 @@ function createEntrypointHarnessWithOptions(options: { redisCloseCalls: 0, graphitiAsyncDisposeCalls: 0, graphitiAsyncFlushCalls: [] as string[][], + sessionMcpRuntimeArgs: [] as Array | undefined>, + sessionMcpRuntimeDisposeCalls: 0, + sessionMcpRuntimeInstances: [] as unknown[], teardownTaskRuns: [] as string[], teardownRegistrations: [] as Array< { @@ -105,13 +113,17 @@ function createEntrypointHarnessWithOptions(options: { unknown, unknown, unknown, - { idleRetentionMs: number }, + { idleRetentionMs: number; runtimeStateMigrator: unknown }, ]>, sessionManagerInstances: [] as unknown[], createEventHandlerArgs: [] as Array>, createChatHandlerArgs: [] as Array>, createCompactingHandlerArgs: [] as Array>, createMessagesHandlerArgs: [] as Array>, + createToolBeforeHandlerArgs: [] as Array>, + createToolAfterHandlerArgs: [] as Array>, + toolGuidanceCacheInstances: [] as unknown[], + toolRoutingOutcomeCacheInstances: [] as unknown[], }; class MockGraphitiConnectionManager { @@ -229,6 +241,14 @@ function createEntrypointHarnessWithOptions(options: { } class MockSessionManager { + getCachedCanonicalSessionId(sessionId: string) { + return sessionId; + } + + resolveCanonicalSessionId(sessionId: string) { + return Promise.resolve(sessionId); + } + getTrackedGroupIds() { return ["group-id"]; } @@ -240,7 +260,7 @@ function createEntrypointHarnessWithOptions(options: { redisEvents: unknown, redisSnapshot: unknown, redisCache: unknown, - options: { idleRetentionMs: number }, + options: { idleRetentionMs: number; runtimeStateMigrator: unknown }, ) { records.sessionManagerArgs.push([ defaultGroupId, @@ -255,6 +275,33 @@ function createEntrypointHarnessWithOptions(options: { } } + class MockToolGuidanceCache { + constructor() { + records.toolGuidanceCacheInstances.push(this); + } + } + + class MockToolRoutingOutcomeCache { + constructor() { + records.toolRoutingOutcomeCacheInstances.push(this); + } + } + + class MockSessionMcpRuntime { + tools = hooks.tool; + + constructor(args?: Record) { + records.sessionMcpRuntimeArgs.push(args); + records.sessionMcpRuntimeInstances.push(this); + } + + dispose() { + records.sessionMcpRuntimeDisposeCalls += 1; + records.teardownTaskRuns.push("session-mcp-runtime"); + return Promise.resolve(); + } + } + const dependencies = { loadConfig: (directory: string) => { records.loadConfigCalls.push(directory); @@ -292,6 +339,8 @@ function createEntrypointHarnessWithOptions(options: { RedisCacheService: MockRedisCacheService, BatchDrainService: MockBatchDrainService, GraphitiAsyncService: MockGraphitiAsyncService, + createSessionMcpRuntime: (args?: Record) => + new MockSessionMcpRuntime(args), SessionManager: MockSessionManager, createEventHandler: (args: Record) => { records.createEventHandlerArgs.push(args); @@ -309,6 +358,16 @@ function createEntrypointHarnessWithOptions(options: { records.createMessagesHandlerArgs.push(args); return hooks.messages; }, + createToolBeforeHandler: (args: Record) => { + records.createToolBeforeHandlerArgs.push(args); + return hooks.toolBefore; + }, + createToolAfterHandler: (args: Record) => { + records.createToolAfterHandlerArgs.push(args); + return hooks.toolAfter; + }, + ToolGuidanceCache: MockToolGuidanceCache, + ToolRoutingOutcomeCache: MockToolRoutingOutcomeCache, makeGroupId: (prefix: string | undefined, directory: string) => { records.makeGroupIdCalls.push([prefix, directory]); return "group-id"; @@ -571,17 +630,30 @@ describe("index", () => { assertEquals(records.teardownRegistrations.length, 1); assertEquals( records.teardownRegistrations[0].tasks.map((task) => task.name), - ["graphiti-drain-flush", "graphiti-async", "graphiti", "redis"], + [ + "graphiti-drain-flush", + "graphiti-async", + "session-mcp-runtime", + "graphiti", + "redis", + ], ); records.teardownRegistrations[0].tasks[0].run(); records.teardownRegistrations[0].tasks[1].run(); records.teardownRegistrations[0].tasks[2].run(); records.teardownRegistrations[0].tasks[3].run(); + records.teardownRegistrations[0].tasks[4].run(); assertEquals(records.graphitiAsyncFlushCalls, [["group-id"]]); assertEquals(records.graphitiAsyncDisposeCalls, 1); + assertEquals(records.sessionMcpRuntimeDisposeCalls, 1); assertEquals(records.connectionStopCalls, 1); assertEquals(records.redisCloseCalls, 1); + assertEquals(records.sessionMcpRuntimeArgs, [{ + redisClient: records.redisClientInstances[0], + sessionTtlSeconds: config.redis.sessionTtlSeconds, + groupId: "group-id", + }]); assertStrictEquals( records.graphitiMcpArgs[0], @@ -659,6 +731,7 @@ describe("index", () => { ); assertEquals(records.sessionManagerArgs[0][6], { idleRetentionMs: config.redis.sessionTtlSeconds * 1000, + runtimeStateMigrator: records.sessionMcpRuntimeInstances[0], }); assertEquals(records.createEventHandlerArgs.length, 1); @@ -725,6 +798,26 @@ describe("index", () => { records.createMessagesHandlerArgs[0].sessionManager, records.sessionManagerInstances[0], ); + assertEquals(records.toolGuidanceCacheInstances.length, 1); + assertEquals(records.toolRoutingOutcomeCacheInstances.length, 1); + assertEquals(records.createToolBeforeHandlerArgs.length, 1); + assertStrictEquals( + records.createToolBeforeHandlerArgs[0].sessionCanonicalizer, + records.sessionManagerInstances[0], + ); + assertStrictEquals( + records.createToolBeforeHandlerArgs[0].guidanceThrottle, + records.toolGuidanceCacheInstances[0], + ); + assertStrictEquals( + records.createToolBeforeHandlerArgs[0].routingOutcomes, + records.toolRoutingOutcomeCacheInstances[0], + ); + assertEquals(records.createToolAfterHandlerArgs.length, 1); + assertStrictEquals( + records.createToolAfterHandlerArgs[0].routingOutcomes, + records.toolRoutingOutcomeCacheInstances[0], + ); assertStrictEquals(plugin.event, hooks.event); assertStrictEquals(plugin["chat.message"], hooks.chat); @@ -736,6 +829,9 @@ describe("index", () => { plugin["experimental.chat.messages.transform"], hooks.messages, ); + assertStrictEquals(plugin.tool, hooks.tool); + assertStrictEquals(plugin["tool.execute.before"], hooks.toolBefore); + assertStrictEquals(plugin["tool.execute.after"], hooks.toolAfter); }); it("warns on degraded startup without blocking plugin initialization", async () => { @@ -800,6 +896,42 @@ describe("index", () => { assertStrictEquals(plugin["chat.message"], hooks.chat); }); + it("passes live redis client, ttl, and groupId into session MCP runtime", async () => { + const { config, input, records, dependencies } = createEntrypointHarness( + true, + ); + + await invokeGraphiti(input, dependencies); + + assertEquals(records.sessionMcpRuntimeArgs, [{ + redisClient: records.redisClientInstances[0], + sessionTtlSeconds: config.redis.sessionTtlSeconds, + groupId: "group-id", + }]); + }); + + it("passes the session MCP runtime as the root-state migrator", async () => { + const { input, records, dependencies } = createEntrypointHarness(true); + + await invokeGraphiti(input, dependencies); + + assertStrictEquals( + records.sessionManagerArgs[0][6].runtimeStateMigrator, + records.sessionMcpRuntimeInstances[0], + ); + }); + + it("does not leave runtime in stub corpus mode when redis is available", async () => { + const { input, records, dependencies } = createEntrypointHarness(true); + + await invokeGraphiti(input, dependencies); + + const args = records.sessionMcpRuntimeArgs[0] ?? {}; + assertStrictEquals(args.redisClient, records.redisClientInstances[0]); + assertEquals(args.sessionTtlSeconds, 60); + assertEquals(args.groupId, "group-id"); + }); + it("reports degraded startup once when both startup promises reject", async () => { const { input, records, dependencies } = createEntrypointHarnessWithOptions({ @@ -901,10 +1033,12 @@ describe("index", () => { assertEquals(firstHarness.records.teardownTaskRuns, [ "graphiti-drain-flush", "graphiti-async", + "session-mcp-runtime", "graphiti", "redis", ]); assertEquals(firstHarness.records.graphitiAsyncDisposeCalls, 1); + assertEquals(firstHarness.records.sessionMcpRuntimeDisposeCalls, 1); assertEquals(firstHarness.records.connectionStopCalls, 1); assertEquals(firstHarness.records.redisCloseCalls, 1); }); diff --git a/src/index.ts b/src/index.ts index 84479f9..9fd4650 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,8 @@ import { createChatHandler } from "./handlers/chat.ts"; import { createCompactingHandler } from "./handlers/compacting.ts"; import { createEventHandler } from "./handlers/event.ts"; import { createMessagesHandler } from "./handlers/messages.ts"; +import { createToolAfterHandler } from "./handlers/tool-after.ts"; +import { createToolBeforeHandler } from "./handlers/tool-before.ts"; import { SessionManager } from "./session.ts"; import { BatchDrainService } from "./services/batch-drain.ts"; import { GraphitiConnectionManager } from "./services/connection-manager.ts"; @@ -19,6 +21,9 @@ import { RedisEventsService } from "./services/redis-events.ts"; import { logger } from "./services/logger.ts"; import { RedisSnapshotService } from "./services/redis-snapshot.ts"; import { registerRuntimeTeardown } from "./services/runtime-teardown.ts"; +import { createSessionMcpRuntime } from "./services/session-mcp-runtime.ts"; +import { ToolGuidanceCache } from "./services/tool-guidance-cache.ts"; +import { ToolRoutingOutcomeCache } from "./services/tool-routing-outcome-cache.ts"; import { makeGroupId, makeUserGroupId } from "./utils.ts"; type GraphitiDependencies = { @@ -41,11 +46,16 @@ type GraphitiDependencies = { RedisCacheService: typeof RedisCacheService; BatchDrainService: typeof BatchDrainService; GraphitiAsyncService: typeof GraphitiAsyncService; + createSessionMcpRuntime: typeof createSessionMcpRuntime; SessionManager: typeof SessionManager; createEventHandler: typeof createEventHandler; createChatHandler: typeof createChatHandler; createCompactingHandler: typeof createCompactingHandler; createMessagesHandler: typeof createMessagesHandler; + createToolBeforeHandler: typeof createToolBeforeHandler; + createToolAfterHandler: typeof createToolAfterHandler; + ToolGuidanceCache: typeof ToolGuidanceCache; + ToolRoutingOutcomeCache: typeof ToolRoutingOutcomeCache; makeGroupId: typeof makeGroupId; makeUserGroupId: typeof makeUserGroupId; }; @@ -91,11 +101,16 @@ const defaultGraphitiDependencies: GraphitiDependencies = { RedisCacheService, BatchDrainService, GraphitiAsyncService, + createSessionMcpRuntime, SessionManager, createEventHandler, createChatHandler, createCompactingHandler, createMessagesHandler, + createToolBeforeHandler, + createToolAfterHandler, + ToolGuidanceCache, + ToolRoutingOutcomeCache, makeGroupId, makeUserGroupId, }; @@ -175,12 +190,6 @@ export const graphiti: Plugin = ( drainRetryMax: config.redis.drainRetryMax, }, ); - const graphitiAsync = new dependencies.GraphitiAsyncService( - graphitiClient, - redisCache, - batchDrain, - ); - const defaultGroupId = dependencies.makeGroupId( config.graphiti.groupIdPrefix, input.directory, @@ -190,6 +199,17 @@ export const graphiti: Plugin = ( input.directory, ); + const graphitiAsync = new dependencies.GraphitiAsyncService( + graphitiClient, + redisCache, + batchDrain, + ); + const sessionMcpRuntime = dependencies.createSessionMcpRuntime({ + redisClient, + sessionTtlSeconds: config.redis.sessionTtlSeconds, + groupId: defaultGroupId, + }); + const sessionManager = new dependencies.SessionManager( defaultGroupId, defaultUserGroupId, @@ -199,8 +219,11 @@ export const graphiti: Plugin = ( redisCache, { idleRetentionMs: config.redis.sessionTtlSeconds * 1000, + runtimeStateMigrator: sessionMcpRuntime, }, ); + const toolGuidanceCache = new dependencies.ToolGuidanceCache(); + const toolRoutingOutcomes = new dependencies.ToolRoutingOutcomeCache(); activeRuntimeTeardown = dependencies.registerRuntimeTeardown([ { @@ -214,6 +237,10 @@ export const graphiti: Plugin = ( name: "graphiti-async", run: () => graphitiAsync.dispose(), }, + { + name: "session-mcp-runtime", + run: () => sessionMcpRuntime.dispose(), + }, { name: "graphiti", run: () => connectionManager.stop(), @@ -250,6 +277,15 @@ export const graphiti: Plugin = ( .createMessagesHandler({ sessionManager, }), + tool: sessionMcpRuntime.tools, + "tool.execute.before": dependencies.createToolBeforeHandler({ + sessionCanonicalizer: sessionManager, + guidanceThrottle: toolGuidanceCache, + routingOutcomes: toolRoutingOutcomes, + }), + "tool.execute.after": dependencies.createToolAfterHandler({ + routingOutcomes: toolRoutingOutcomes, + }), }; }); diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index babcee1..a9498a9 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -124,12 +124,11 @@ const validateEndpoint = (endpoint: string): string => { try { new URL(normalized); } catch (cause) { - throw new Error( + const error = new Error( `Invalid Graphiti endpoint: ${JSON.stringify(normalized)}`, - { - cause, - }, ); + (error as Error & { cause?: unknown }).cause = cause; + throw error; } return normalized; diff --git a/src/services/redis-client.ts b/src/services/redis-client.ts index d7b79d5..c3c9998 100644 --- a/src/services/redis-client.ts +++ b/src/services/redis-client.ts @@ -54,6 +54,12 @@ type StoredValue = { expiresAt?: number; }; +export type RedisKeySnapshot = + | { kind: "missing" } + | { kind: "string"; value: string; ttlSeconds?: number } + | { kind: "list"; values: string[]; ttlSeconds?: number } + | { kind: "hash"; values: Record; ttlSeconds?: number }; + class InMemoryRedisStore implements RedisRuntime { private readonly values = new Map(); @@ -332,11 +338,16 @@ class InMemoryRedisStore implements RedisRuntime { return Promise.resolve(true); } - snapshot(key: string): - | { kind: "missing" } - | { kind: "string"; value: string; ttlSeconds?: number } - | { kind: "list"; values: string[]; ttlSeconds?: number } - | { kind: "hash"; values: Record; ttlSeconds?: number } { + keys(prefix = ""): string[] { + const results: string[] = []; + for (const key of [...this.values.keys()]) { + this.cleanup(key); + if (this.values.has(key) && key.startsWith(prefix)) results.push(key); + } + return results.sort(); + } + + snapshot(key: string): RedisKeySnapshot { this.cleanup(key); const existing = this.values.get(key); if (!existing) return { kind: "missing" }; @@ -1109,6 +1120,40 @@ export class RedisClient { ); } + snapshot(key: string): Promise { + return Promise.resolve(this.memory.snapshot(key)); + } + + keysByPrefix(prefix: string): Promise { + return Promise.resolve(this.memory.keys(prefix)); + } + + async restoreSnapshot( + key: string, + snapshot: RedisKeySnapshot, + ): Promise { + switch (snapshot.kind) { + case "missing": + await this.deleteKey(key); + return; + case "string": + await this.setString(key, snapshot.value, snapshot.ttlSeconds); + return; + case "hash": + await this.deleteKey(key); + if (Object.keys(snapshot.values).length === 0) return; + await this.setHashFields(key, snapshot.values, snapshot.ttlSeconds); + return; + case "list": + await this.deleteKey(key); + if (snapshot.values.length === 0) return; + for (const value of snapshot.values) { + await this.appendToList(key, value, snapshot.ttlSeconds); + } + return; + } + } + async deleteKeyIfValue(key: string, expectedValue: string): Promise { return await this.useMutationRuntime([key], async (runtime) => { if (runtime === this.memory) { diff --git a/src/services/session-corpus.test.ts b/src/services/session-corpus.test.ts new file mode 100644 index 0000000..da8f837 --- /dev/null +++ b/src/services/session-corpus.test.ts @@ -0,0 +1,863 @@ +import { + assert, + assertEquals, + assertMatch, + assertStringIncludes, +} from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { RedisClient } from "./redis-client.ts"; +import { createSessionCorpusService } from "./session-corpus.ts"; + +const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + +describe("session-corpus", () => { + it("fetches local HTTP content, normalizes it, and indexes it", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const fetchCalls: string[] = []; + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-fetch", + fetchImpl: (input) => { + fetchCalls.push(String(input)); + return Promise.resolve( + new Response( + "# Redis Session TTLs\n\nSession TTL protects local corpus state.", + { + headers: { "content-type": "text/markdown; charset=utf-8" }, + }, + ), + ); + }, + }); + + const indexed = await corpus.fetchAndIndex({ + rootSessionId: "root-fetch", + url: "http://127.0.0.1/local-doc", + timeoutSeconds: 5, + }); + const search = await corpus.search({ + rootSessionId: "root-fetch", + query: "session ttl", + }); + + assertEquals(fetchCalls, ["http://127.0.0.1/local-doc"]); + assertEquals(indexed.status, "ok"); + assertEquals(indexed.contentType, "text/markdown"); + assertEquals(indexed.corpusRef, search.results[0]?.corpus_ref); + assert(search.results[0]?.snippet.includes("Session TTL")); + }); + + it("ranks the session ttl document first in the small-corpus baseline", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-rank", + }); + + const docA = await corpus.index({ + rootSessionId: "root-rank", + content: + "# Redis Session TTLs\n\nSession TTL refresh keeps the local session corpus alive.", + }); + await corpus.index({ + rootSessionId: "root-rank", + content: + "# Graphiti Async Drain\n\nDrain retries happen asynchronously after compaction.", + }); + await corpus.index({ + rootSessionId: "root-rank", + content: + "# Child Session Canonicalization\n\nChild sessions resolve to a canonical root session.", + }); + + const search = await corpus.search({ + rootSessionId: "root-rank", + query: "session ttl", + }); + + assertEquals(search.status, "ok"); + assertEquals(search.results[0]?.corpus_ref, docA.corpusRef); + }); + + it("returns structured empty results after TTL expiry instead of throwing", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 0.001, + groupId: "group-expiry", + }); + + await corpus.index({ + rootSessionId: "root-expiry", + content: "# Redis Session TTLs\n\nTTL expires quickly.", + }); + await wait(20); + + const search = await corpus.search({ + rootSessionId: "root-expiry", + query: "ttl", + }); + + assertEquals(search.status, "ok"); + assertEquals(search.results, []); + assertEquals(search.corpusRefs, []); + }); + + it("stores oversized artifact text with a bounded summary and makes it searchable", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-artifact", + }); + + const artifact = await corpus.storeArtifact({ + rootSessionId: "root-artifact", + toolName: "session_execute", + body: "SESSION TTL REPORT\n" + + "session ttl keeps search warm\n".repeat(500), + }); + const search = await corpus.search({ + rootSessionId: "root-artifact", + query: "session ttl", + }); + + assertMatch(artifact.artifactRef, /^local:\/\/session_execute\//); + assert(artifact.summary.length <= 320); + assertEquals(search.results[0]?.corpus_ref, artifact.corpusRef); + }); + + it("namespaces corpus keys with groupId and root_session_id", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-alpha", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-scoped", + content: "# Scoped Corpus\n\nRedis-backed local corpus.", + }); + + assertEquals( + indexed.corpusRef, + "session:group-alpha:root-scoped:corpus:corpus-1:meta", + ); + const meta = await redis.getHashAll(indexed.corpusRef); + assertEquals(meta.root_session_id, "root-scoped"); + }); + + it("does not persist extra stem or vocab key families outside the locked namespace", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-namespace", + }); + + await corpus.index({ + rootSessionId: "root-namespace", + content: "# Index Maintenance\n\nIndex updates keep retrieval healthy.", + }); + + const vocab = await redis.getHashAll( + "session:group-namespace:root-namespace:vocab", + ); + const stemHits = await redis.getListRange( + "session:group-namespace:root-namespace:stem:index", + 0, + 10, + ); + + assertEquals(vocab, {}); + assertEquals(stemHits, []); + }); + + it("continues corpus ids across runtime reinitialization with the same redis state", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const first = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-ids", + }); + + const firstIndexed = await first.index({ + rootSessionId: "root-ids", + content: "# First\n\nSession TTL baseline.", + }); + + const second = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-ids", + }); + const secondIndexed = await second.index({ + rootSessionId: "root-ids", + content: "# Second\n\nGraphiti async drain notes.", + }); + + assertEquals( + firstIndexed.corpusRef, + "session:group-ids:root-ids:corpus:corpus-1:meta", + ); + assertEquals( + secondIndexed.corpusRef, + "session:group-ids:root-ids:corpus:corpus-2:meta", + ); + assertEquals( + await redis.getListRange("session:group-ids:root-ids:corpora", 0, 10), + ["corpus-1", "corpus-2"], + ); + }); + + it("keeps concurrent corpus writes from reusing the same corpus id", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + let waitingResolvers: Array<() => void> = []; + let blockedWrites = 0; + const originalSetHashFields = redis.setHashFields.bind(redis); + redis.setHashFields = async (key, values, ttlSeconds) => { + if ( + key === "session:group-race:root-race:stats" && + values.next_corpus_id !== undefined + ) { + blockedWrites += 1; + await new Promise((resolve) => { + waitingResolvers.push(resolve); + if (blockedWrites === 2) { + for (const resume of waitingResolvers) resume(); + waitingResolvers = []; + } + }); + } + return await originalSetHashFields(key, values, ttlSeconds); + }; + + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-race", + }); + + const [first, second] = await Promise.all([ + corpus.index({ + rootSessionId: "root-race", + content: "# First\n\nFirst concurrent write.", + }), + corpus.index({ + rootSessionId: "root-race", + content: "# Second\n\nSecond concurrent write.", + }), + ]); + + assertEquals(first.corpusRef === second.corpusRef, false); + assertEquals( + await redis.getListRange("session:group-race:root-race:corpora", 0, 10), + ["corpus-1", "corpus-2"], + ); + }); + + it("stores each chunk id exactly once in the corpus chunk list", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-chunk-list", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-chunk-list", + content: [ + "# Alpha", + "", + "First paragraph.", + "", + "Second paragraph.", + ].join("\n"), + }); + + const corpusId = indexed.corpusRef.split(":").at(-2) ?? ""; + const chunkIds = await redis.getListRange( + `session:group-chunk-list:root-chunk-list:corpus:${corpusId}:chunks`, + 0, + 20, + ); + + assertEquals(chunkIds.length, indexed.chunkCount); + assertEquals(new Set(chunkIds).size, chunkIds.length); + }); + + it("normalizes HTML into markdown-visible headings, lists, and fenced code", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-html", + }); + + await corpus.index({ + rootSessionId: "root-html", + contentType: "text/html", + content: [ + "
", + "

Install Guide

", + "

Use the local Redis runtime.

", + "
  • Install Redis
  • Verify TTL refresh
", + "
redis-cli PING\nTTL session:key
", + "
", + ].join(""), + }); + + const listSearch = await corpus.search({ + rootSessionId: "root-html", + query: "verify ttl refresh", + }); + const codeSearch = await corpus.search({ + rootSessionId: "root-html", + query: "redis-cli ping", + }); + + assertStringIncludes( + listSearch.results[0]?.snippet ?? "", + "- Verify TTL refresh", + ); + assertStringIncludes(codeSearch.results[0]?.snippet ?? "", "```"); + assertStringIncludes( + codeSearch.results[0]?.snippet ?? "", + "redis-cli PING", + ); + }); + + it("keeps fenced code blocks atomic under the nearest heading during chunking", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-code", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-code", + content: [ + "# Setup", + "", + "Prelude text ".repeat(120), + "", + "## Runtime", + "", + "```ts", + "const runtime = createSessionMcpRuntime({ redisClient });", + 'await runtime.tools.session_search.execute({ query: "ttl" }, ctx);', + "```", + "", + "Trailing text ".repeat(120), + ].join("\n"), + }); + + const corpusId = indexed.corpusRef.split(":").at(-2) ?? ""; + const chunkIds = await redis.getListRange( + `session:group-code:root-code:corpus:${corpusId}:chunks`, + 0, + 20, + ); + const codeChunk = await Promise.any( + chunkIds.map((chunkId) => + redis.getHashAll(`session:group-code:root-code:chunk:${chunkId}`).then( + (chunk) => { + if ((chunk.text ?? "").includes("createSessionMcpRuntime")) { + return chunk; + } + throw new Error("not code chunk"); + }, + ) + ), + ); + + assertEquals(codeChunk.title, "Runtime"); + assertStringIncludes(codeChunk.text ?? "", "```ts"); + assertStringIncludes(codeChunk.text ?? "", "```\n"); + }); + + it("finds inflected queries through stemming", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-stem", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-stem", + content: + "# Index Maintenance\n\nThis corpus tracks index updates and index health.", + }); + + const search = await corpus.search({ + rootSessionId: "root-stem", + query: "indices update", + }); + + assertEquals(search.results[0]?.corpus_ref, indexed.corpusRef); + }); + + it("matches porter-equivalent word families beyond simple plural stripping", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-porter", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-porter", + content: + "# Organization Notes\n\nOrganization planning stays searchable across sessions.", + }); + + const search = await corpus.search({ + rootSessionId: "root-porter", + query: "organize planning", + }); + + assertEquals(search.results[0]?.corpus_ref, indexed.corpusRef); + }); + + it("anchors snippets near stemmed matches instead of always falling back to the document start", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-snippet", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-snippet", + content: "# Long Index Notes\n\n" + + "preamble words ".repeat(80) + + "\n\nIndex maintenance happens near the end of this corpus.", + }); + + const search = await corpus.search({ + rootSessionId: "root-snippet", + query: "indices", + }); + + assertEquals(search.results[0]?.corpus_ref, indexed.corpusRef); + assertStringIncludes( + search.results[0]?.snippet ?? "", + "Index maintenance happens near the end", + ); + }); + + it("uses BM25-style ranking so repeated and title-weighted terms outrank weak matches", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-bm25", + }); + + const strong = await corpus.index({ + rootSessionId: "root-bm25", + content: + "# Session TTL Guide\n\nSession TTL session TTL refresh session TTL keeps search warm.", + }); + await corpus.index({ + rootSessionId: "root-bm25", + content: "# Session Notes\n\nTTL appears once.", + }); + + const search = await corpus.search({ + rootSessionId: "root-bm25", + query: "session ttl", + }); + + assertEquals(search.results[0]?.corpus_ref, strong.corpusRef); + }); + + it("applies the 200-candidate cap after ranking, so later stronger postings can still surface", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-cap-order", + }); + + for (let index = 1; index <= 205; index += 1) { + await corpus.index({ + rootSessionId: "root-cap-order", + content: index === 205 + ? "# Session Session Session\n\nSession session session session dominates this chunk." + : `# Weak ${index}\n\nSession appears once in weak chunk ${index}.`, + }); + } + + const search = await corpus.search({ + rootSessionId: "root-cap-order", + query: "session", + }); + + assertStringIncludes( + search.results[0]?.snippet ?? "", + "dominates this chunk", + ); + }); + + it("keeps RRF- and proximity-relevant chunks eligible until the final 200-candidate cap", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-cap-rrf", + }); + + for (let index = 1; index <= 204; index += 1) { + await corpus.index({ + rootSessionId: "root-cap-rrf", + content: `# Weak ${index}\n\nRedis ${ + "padding ".repeat(20) + } TTL appears separately in weak chunk ${index}.`, + }); + } + const strong = await corpus.index({ + rootSessionId: "root-cap-rrf", + content: + "# Redis TTL Refresh\n\nRedis TTL refresh happens together in this late strong chunk.", + }); + + const search = await corpus.search({ + rootSessionId: "root-cap-rrf", + query: "redis ttl refresh", + }); + + assertEquals(search.results[0]?.corpus_ref, strong.corpusRef); + assertStringIncludes(search.results[0]?.snippet ?? "", "late strong chunk"); + }); + + it("returns a structured error when fetch responds with non-ok status", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-fetch-error", + fetchImpl: () => + Promise.resolve( + new Response("missing", { + status: 404, + headers: { "content-type": "text/html; charset=utf-8" }, + }), + ), + }); + + const result = await corpus.fetchAndIndex({ + rootSessionId: "root-fetch-error", + url: "https://example.com/missing", + timeoutSeconds: 5, + }); + + assertEquals(result.status, "error"); + assertMatch( + result.corpusRef, + /^session:group-fetch-error:root-fetch-error:corpus:[^:]+:meta$/, + ); + assertStringIncludes(result.summary, "HTTP 404"); + assertEquals(result.queryHints, []); + assertEquals(result.fetchedUrl, "https://example.com/missing"); + assertEquals(result.contentType, "text/html"); + assertEquals(result.truncated, false); + }); + + it("uses trigram expansion only when exact or stem recall is sparse", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-trigram", + }); + + const exact = await corpus.index({ + rootSessionId: "root-trigram", + content: "# Session TTL\n\nSession TTL preserves local corpus context.", + }); + const partial = await corpus.index({ + rootSessionId: "root-trigram", + content: + "# Sessile Tiling\n\nA distractor with overlapping trigrams only.", + }); + + const exactRecall = await corpus.search({ + rootSessionId: "root-trigram", + query: "session ttl", + }); + const partialRecall = await corpus.search({ + rootSessionId: "root-trigram", + query: "sess tt", + }); + + assertEquals(exactRecall.corpusRefs, [exact.corpusRef]); + assertEquals(partialRecall.corpusRefs.includes(partial.corpusRef), true); + }); + + it("corrects fuzzy misspellings before retrieval", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-fuzzy", + }); + + const indexed = await corpus.index({ + rootSessionId: "root-fuzzy", + content: "# Session TTL\n\nSession TTL keeps the corpus searchable.", + }); + + const search = await corpus.search({ + rootSessionId: "root-fuzzy", + query: "sesion tll", + }); + + assertEquals(search.results[0]?.corpus_ref, indexed.corpusRef); + }); + + it("reranks multi-term matches by proximity", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-proximity", + }); + + const close = await corpus.index({ + rootSessionId: "root-proximity", + content: + "# Redis Session TTL\n\nRedis session TTL refresh happens together in this paragraph.", + }); + await corpus.index({ + rootSessionId: "root-proximity", + content: "# Redis Drift\n\nRedis signals drift.\n\n" + + "padding words ".repeat(80) + "\nTTL appears much later.", + }); + + const search = await corpus.search({ + rootSessionId: "root-proximity", + query: "redis ttl", + }); + + assertEquals(search.results[0]?.corpus_ref, close.corpusRef); + }); + + it("stores one canonical full artifact body without duplicating it in chunk payloads", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-artifact-storage", + }); + + const body = "SESSION TTL REPORT\n" + + "session ttl keeps retrieval warm\n".repeat(200); + const artifact = await corpus.storeArtifact({ + rootSessionId: "root-artifact-storage", + toolName: "session_execute", + body, + }); + + const artifactId = artifact.artifactRef.split("/").at(-1) ?? ""; + const corpusId = artifact.corpusRef.split(":").at(-2) ?? ""; + const chunkIds = await redis.getListRange( + `session:group-artifact-storage:root-artifact-storage:corpus:${corpusId}:chunks`, + 0, + 20, + ); + const bodySnapshot = await redis.getString( + `session:group-artifact-storage:root-artifact-storage:artifact:${artifactId}:body`, + ); + const chunkPayloads = await Promise.all( + chunkIds.map((chunkId) => + redis.getHashAll( + `session:group-artifact-storage:root-artifact-storage:chunk:${chunkId}`, + ) + ), + ); + + assertEquals(bodySnapshot, body); + assertEquals( + chunkPayloads.some((chunk) => (chunk.text ?? "") === body), + false, + ); + }); + + it("refreshes every affected corpus family on a successful search, not just the top results", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 0.1, + groupId: "group-refresh", + }); + + for (let index = 1; index <= 5; index += 1) { + await corpus.index({ + rootSessionId: "root-refresh", + content: + `# Strong ${index}\n\nSession TTL session TTL refresh session TTL doc ${index}.`, + }); + } + const weak = await corpus.index({ + rootSessionId: "root-refresh", + content: + "# Weak Match\n\nSession TTL appears once. Unique survivor marker remains searchable.", + }); + + await wait(50); + const broad = await corpus.search({ + rootSessionId: "root-refresh", + query: "session ttl", + }); + assertEquals(broad.results.length, 5); + await wait(80); + + const survivor = await corpus.search({ + rootSessionId: "root-refresh", + query: "survivor marker", + }); + + assertEquals(survivor.results[0]?.corpus_ref, weak.corpusRef); + }); + + it("keeps search on postings instead of scanning the full corpora list", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const originalGetListRange = redis.getListRange.bind(redis); + let searchMode = false; + redis.getListRange = async (key, start, stop) => { + if ( + searchMode && key === "session:group-postings:root-postings:corpora" + ) { + throw new Error("search scanned corpora list"); + } + return await originalGetListRange(key, start, stop); + }; + + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-postings", + }); + + await corpus.index({ + rootSessionId: "root-postings", + content: "# Session TTL\n\nSession TTL keeps search local.", + }); + searchMode = true; + + const search = await corpus.search({ + rootSessionId: "root-postings", + query: "session ttl", + }); + + assertEquals(search.results.length > 0, true); + }); + + it("migrates provisional-root corpus, posting, artifact, and stats keys onto the canonical root with TTLs", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 120, + groupId: "group-migrate", + }); + + await corpus.index({ + rootSessionId: "parent-root", + content: [ + "# Parent Corpus", + "", + "Canonical parent content remains searchable.", + ].join("\n"), + }); + const migrated = await corpus.storeArtifact({ + rootSessionId: "child-root", + toolName: "session_execute", + body: "temporary root artifact body with redis ttl migration markers", + }); + const sourceMetaBefore = await redis.snapshot(migrated.corpusRef); + const sourceStatsBefore = await redis.snapshot( + "session:group-migrate:child-root:stats", + ); + + await corpus.migrateRootSessionState("child-root", "parent-root"); + + const parentSearch = await corpus.search({ + rootSessionId: "parent-root", + query: "migration markers canonical parent", + }); + const parentStats = await corpus.getStats("parent-root"); + const childSearch = await corpus.search({ + rootSessionId: "child-root", + query: "migration markers", + }); + const sourceMetaAfter = await redis.snapshot(migrated.corpusRef); + const parentCorpora = await redis.getListRange( + "session:group-migrate:parent-root:corpora", + 0, + 10, + ); + + assertEquals(parentSearch.results.length > 0, true); + assertEquals(parentStats.artifactCount, 1); + assertEquals(parentStats.corpusCount, 2); + assertEquals(childSearch.results, []); + assertEquals(sourceMetaAfter.kind, "missing"); + assertEquals(parentCorpora, ["corpus-1", "corpus-2"]); + assertEquals(sourceMetaBefore.kind === "hash", true); + assertEquals(sourceStatsBefore.kind === "hash", true); + if (sourceMetaBefore.kind === "hash") { + const migratedMeta = await redis.snapshot( + "session:group-migrate:parent-root:corpus:corpus-2:meta", + ); + assertEquals(migratedMeta.kind, "hash"); + if (migratedMeta.kind === "hash") { + assertEquals( + Math.abs( + (migratedMeta.ttlSeconds ?? 0) - (sourceMetaBefore.ttlSeconds ?? 0), + ) <= 1, + true, + ); + } + } + }); + + it("does not migrate sibling root keys that only share the same prefix", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 120, + groupId: "group-migrate-prefix", + }); + + await corpus.index({ + rootSessionId: "child-root", + content: "# Child Root\n\nOnly this root should migrate.", + }); + const sibling = await corpus.index({ + rootSessionId: "child-root-2", + content: "# Child Root 2\n\nSibling root must stay untouched.", + }); + + await corpus.migrateRootSessionState("child-root", "parent-root"); + + const parentSearch = await corpus.search({ + rootSessionId: "parent-root", + query: "only this root should migrate", + }); + const siblingSearch = await corpus.search({ + rootSessionId: "child-root-2", + query: "sibling root untouched", + }); + const siblingMeta = await redis.snapshot(sibling.corpusRef); + + assertEquals(parentSearch.results.length > 0, true); + assertEquals(siblingSearch.results.length > 0, true); + assertEquals(siblingMeta.kind, "hash"); + }); +}); diff --git a/src/services/session-corpus.ts b/src/services/session-corpus.ts new file mode 100644 index 0000000..a543e0a --- /dev/null +++ b/src/services/session-corpus.ts @@ -0,0 +1,1841 @@ +import type { RedisClient, RedisKeySnapshot } from "./redis-client.ts"; + +const MAX_INDEXED_BODY_BYTES = 512 * 1024; +const SEARCH_RESULT_LIMIT = 5; +const SEARCH_CANDIDATE_LIMIT = 200; +const SEARCH_POSTINGS_FETCH_LIMIT = 1000; +const SEARCH_SNIPPET_LIMIT = 320; +const TEXT_CHUNK_SIZE = 1200; +const TEXT_CHUNK_OVERLAP = 200; +const RRF_K = 60; +const SEARCH_SCAN_LIMIT = 10_000; +const VOCAB_TOKEN = "__vocab__"; +const STEM_TOKEN_PREFIX = "__stem__:"; + +type SessionCorpusOptions = { + redis: RedisClient; + ttlSeconds: number; + groupId: string; + fetchImpl?: typeof fetch; + now?: () => number; +}; + +type IndexInput = { + rootSessionId: string; + content: string; + contentType?: string; + title?: string; + sourceUrl?: string; + artifactId?: string; +}; + +type SearchInput = { + rootSessionId: string; + query: string; +}; + +type FetchAndIndexInput = { + rootSessionId: string; + url: string; + timeoutSeconds?: number; +}; + +type StoreArtifactInput = { + rootSessionId: string; + toolName: string; + body: string; +}; + +type SearchResult = { + corpus_ref: string; + snippet: string; + score: number; +}; + +type CorpusMeta = { + title: string; + contentType: string; + createdAt: number; + sourceUrl?: string; + truncated: boolean; + artifactId?: string; +}; + +type ChunkRecord = { + id: string; + corpusId: string; + title: string; + text: string; + terms: string[]; + stems: string[]; + trigrams: string[]; + termFreqs: Record; + stemFreqs: Record; + stemPositions: Record; + length: number; + createdAt: number; +}; + +type ChunkSource = { + title: string; + text: string; +}; + +type TokenWithPosition = { + token: string; + position: number; +}; + +const encoder = new TextEncoder(); + +const normalizeWhitespace = (value: string): string => + value.replace(/\r\n/g, "\n").replace(/[ \t]+/g, " ").trim(); + +const decodeHtmlEntities = (value: string): string => + value + .replace(/ /gi, " ") + .replace(/&/gi, "&") + .replace(/</gi, "<") + .replace(/>/gi, ">") + .replace(/"/gi, '"') + .replace(/'/gi, "'"); + +const unique = (values: Iterable): T[] => [...new Set(values)]; + +const pluralStemExceptions: Record = { + indices: "index", + index: "index", +}; + +const stemToken = (token: string): string => { + const lower = token.toLowerCase(); + if (pluralStemExceptions[lower]) return pluralStemExceptions[lower]; + if (lower.length <= 3) return lower; + + const isConsonant = (value: string, index: number): boolean => { + const char = value[index]; + if (!char) return false; + if ("aeiou".includes(char)) return false; + if (char === "y") { + return index === 0 ? true : !isConsonant(value, index - 1); + } + return true; + }; + + const measure = (value: string): number => { + let count = 0; + let inVowelRun = false; + for (let index = 0; index < value.length; index += 1) { + const vowel = !isConsonant(value, index); + if (vowel) { + inVowelRun = true; + } else if (inVowelRun) { + count += 1; + inVowelRun = false; + } + } + return count; + }; + + const containsVowel = (value: string): boolean => + [...value].some((_char, index) => !isConsonant(value, index)); + + const endsWithDoubleConsonant = (value: string): boolean => + value.length >= 2 && + value.at(-1) === value.at(-2) && + isConsonant(value, value.length - 1); + + const cvc = (value: string): boolean => { + if (value.length < 3) return false; + const a = value.length - 3; + const b = value.length - 2; + const c = value.length - 1; + return isConsonant(value, a) && !isConsonant(value, b) && + isConsonant(value, c) && !"wxy".includes(value[c]); + }; + + const replaceSuffix = ( + value: string, + suffix: string, + replacement: string, + predicate: (stem: string) => boolean = () => true, + ): string | null => { + if (!value.endsWith(suffix)) return null; + const stem = value.slice(0, -suffix.length); + if (!predicate(stem)) return null; + return `${stem}${replacement}`; + }; + + let stem = lower; + + if (stem.endsWith("sses")) stem = stem.slice(0, -2); + else if (stem.endsWith("ies")) stem = stem.slice(0, -2); + else if (stem.endsWith("ss")) { + // Keep. + } else if (stem.endsWith("s")) stem = stem.slice(0, -1); + + const step1b = + replaceSuffix(stem, "eed", "ee", (base) => measure(base) > 0) ?? + replaceSuffix(stem, "eedly", "ee", (base) => measure(base) > 0); + if (step1b) { + stem = step1b; + } else { + const removed = replaceSuffix(stem, "ingly", "", containsVowel) ?? + replaceSuffix(stem, "edly", "", containsVowel) ?? + replaceSuffix(stem, "ing", "", containsVowel) ?? + replaceSuffix(stem, "ed", "", containsVowel); + if (removed) { + stem = removed; + if (/(at|bl|iz)$/.test(stem)) stem = `${stem}e`; + else if (endsWithDoubleConsonant(stem) && !/[lsz]$/.test(stem)) { + stem = stem.slice(0, -1); + } else if (measure(stem) === 1 && cvc(stem)) { + stem = `${stem}e`; + } + } + } + + if (stem.endsWith("y") && containsVowel(stem.slice(0, -1))) { + stem = `${stem.slice(0, -1)}i`; + } + + const step2Rules: Array<[string, string]> = [ + ["ational", "ate"], + ["tional", "tion"], + ["enci", "ence"], + ["anci", "ance"], + ["izer", "ize"], + ["abli", "able"], + ["alli", "al"], + ["entli", "ent"], + ["eli", "e"], + ["ousli", "ous"], + ["ization", "ize"], + ["ation", "ate"], + ["ator", "ate"], + ["alism", "al"], + ["iveness", "ive"], + ["fulness", "ful"], + ["ousness", "ous"], + ["aliti", "al"], + ["iviti", "ive"], + ["biliti", "ble"], + ["logi", "log"], + ]; + for (const [suffix, replacement] of step2Rules) { + const replaced = replaceSuffix( + stem, + suffix, + replacement, + (base) => measure(base) > 0, + ); + if (replaced) { + stem = replaced; + break; + } + } + + const step3Rules: Array<[string, string]> = [ + ["icate", "ic"], + ["ative", ""], + ["alize", "al"], + ["iciti", "ic"], + ["ical", "ic"], + ["ful", ""], + ["ness", ""], + ]; + for (const [suffix, replacement] of step3Rules) { + const replaced = replaceSuffix( + stem, + suffix, + replacement, + (base) => measure(base) > 0, + ); + if (replaced) { + stem = replaced; + break; + } + } + + const step4Suffixes = [ + "ement", + "ance", + "ence", + "able", + "ible", + "ment", + "ant", + "ent", + "ism", + "ate", + "iti", + "ous", + "ive", + "ize", + "al", + "er", + "ic", + "ou", + ]; + for (const suffix of step4Suffixes) { + const replaced = replaceSuffix( + stem, + suffix, + "", + (base) => measure(base) > 1, + ); + if (replaced) { + stem = replaced; + break; + } + } + const ionReplaced = replaceSuffix( + stem, + "ion", + "", + (base) => measure(base) > 1 && /[st]$/.test(base), + ); + if (ionReplaced) stem = ionReplaced; + + const withoutTrailingE = replaceSuffix( + stem, + "e", + "", + (base) => measure(base) > 1 || (measure(base) === 1 && !cvc(base)), + ); + if (withoutTrailingE) stem = withoutTrailingE; + if ( + measure(stem) > 1 && endsWithDoubleConsonant(stem) && stem.endsWith("l") + ) { + stem = stem.slice(0, -1); + } + + return stem || lower; +}; + +const tokenizeWithPositions = (value: string): TokenWithPosition[] => { + const matches = value.toLowerCase().matchAll(/[a-z0-9]+/g); + let position = 0; + const tokens: TokenWithPosition[] = []; + for (const match of matches) { + const token = match[0]; + if (token.length < 2) continue; + tokens.push({ token, position: position++ }); + } + return tokens; +}; + +const tokenize = (value: string): string[] => + tokenizeWithPositions(value).map(({ token }) => token); + +const frequencyMap = (values: string[]): Record => { + const counts: Record = {}; + for (const value of values) counts[value] = (counts[value] ?? 0) + 1; + return counts; +}; + +const buildStemPositions = ( + values: TokenWithPosition[], +): Record => { + const positions: Record = {}; + for (const value of values) { + const stem = stemToken(value.token); + positions[stem] ??= []; + positions[stem].push(value.position); + } + return positions; +}; + +const makeTrigrams = (value: string): string[] => { + const normalized = normalizeWhitespace(value).toLowerCase(); + if (normalized.length < 3) return normalized ? [normalized] : []; + const trigrams = new Set(); + for (let index = 0; index <= normalized.length - 3; index += 1) { + trigrams.add(normalized.slice(index, index + 3)); + } + return [...trigrams]; +}; + +const htmlToMarkdown = (html: string): string => { + const codePlaceholders = new Map(); + let codeCounter = 0; + let working = html + .replace(//gi, "\n") + .replace(//gi, "\n"); + + working = working.replace( + /]*>\s*(?:]*)>)?([\s\S]*?)(?:<\/code>)?\s*<\/pre>/gi, + (_match, codeAttrs, codeBody) => { + const language = /language-([a-z0-9_-]+)/i.exec(codeAttrs ?? "")?.[1] ?? + ""; + const body = decodeHtmlEntities( + String(codeBody).replace(/<[^>]+>/g, ""), + ).trimEnd(); + const placeholder = `CODEBLOCKPLACEHOLDER${++codeCounter}`; + codePlaceholders.set( + placeholder, + `\n\n\`\`\`${language}\n${body}\n\`\`\`\n\n`, + ); + return `\n\n${placeholder}\n\n`; + }, + ); + + working = working.replace( + /]*>([\s\S]*?)<\/h\1>/gi, + (_m, level, text) => { + const heading = decodeHtmlEntities(String(text).replace(/<[^>]+>/g, " ")) + .replace(/\s+/g, " ").trim(); + return `\n\n${"#".repeat(Number(level))} ${heading}\n\n`; + }, + ); + + working = working.replace(/]*>([\s\S]*?)<\/ol>/gi, (_m, listBody) => { + const items = [...String(listBody).matchAll(/]*>([\s\S]*?)<\/li>/gi)] + .map(([, item], index) => + `${index + 1}. ${ + decodeHtmlEntities(String(item).replace(/<[^>]+>/g, " ")).replace( + /\s+/g, + " ", + ).trim() + }` + ) + .filter(Boolean); + return `\n\n${items.join("\n")}\n\n`; + }); + + working = working.replace(/]*>([\s\S]*?)<\/ul>/gi, (_m, listBody) => { + const items = [...String(listBody).matchAll(/]*>([\s\S]*?)<\/li>/gi)] + .map(([, item]) => + `- ${ + decodeHtmlEntities(String(item).replace(/<[^>]+>/g, " ")).replace( + /\s+/g, + " ", + ).trim() + }` + ) + .filter(Boolean); + return `\n\n${items.join("\n")}\n\n`; + }); + + working = working + .replace(/<(article|section|div|p)[^>]*>/gi, "\n\n") + .replace(/<\/(article|section|div|p)>/gi, "\n\n") + .replace(//gi, "\n") + .replace(/<[^>]+>/g, " "); + + working = decodeHtmlEntities(working) + .replace(/[ \t]+\n/g, "\n") + .replace(/\n{3,}/g, "\n\n") + .trim(); + + for (const [placeholder, codeBlock] of codePlaceholders) { + working = working.replaceAll(placeholder, codeBlock.trim()); + } + + return working.replace(/```([a-z0-9_-]*)\n\n+/gi, "```$1\n"); +}; + +const inferContentType = (content: string, contentType?: string): string => { + const normalized = (contentType ?? "").toLowerCase(); + if (normalized.includes("html")) return "text/html"; + if (normalized.includes("markdown")) return "text/markdown"; + if (normalized.includes("json")) return "application/json"; + const trimmed = content.trim(); + if (trimmed.startsWith("<") && trimmed.includes(">")) return "text/html"; + if ( + (trimmed.startsWith("{") && trimmed.endsWith("}")) || + (trimmed.startsWith("[") && trimmed.endsWith("]")) + ) { + return "application/json"; + } + if (/^#{1,6}\s+/m.test(content)) return "text/markdown"; + return "text/plain"; +}; + +const normalizeContent = ( + content: string, + contentType?: string, +): { body: string; contentType: string; title: string; truncated: boolean } => { + const resolvedContentType = inferContentType(content, contentType); + let normalized = content; + + if (resolvedContentType === "text/html") { + normalized = htmlToMarkdown(content); + } else if (resolvedContentType === "application/json") { + try { + normalized = JSON.stringify(JSON.parse(content), null, 2); + } catch { + normalized = content; + } + } + + let truncated = false; + while (encoder.encode(normalized).byteLength > MAX_INDEXED_BODY_BYTES) { + normalized = normalized.slice( + 0, + Math.max(Math.floor(normalized.length * 0.8), 1), + ); + truncated = true; + } + + const titleLine = normalized + .split("\n") + .map((line) => line.trim()) + .find((line) => line.length > 0) ?? "Untitled corpus"; + const title = titleLine.replace(/^#{1,6}\s+/, "").trim(); + + return { + body: normalized.trim(), + contentType: resolvedContentType, + title, + truncated, + }; +}; + +const splitTextChunk = (text: string): string[] => { + const trimmed = text.trim(); + if (!trimmed) return []; + if (trimmed.length <= TEXT_CHUNK_SIZE) return [trimmed]; + + const paragraphs = trimmed.split(/\n{2,}/).map((paragraph) => + paragraph.trim() + ).filter(Boolean); + const chunks: string[] = []; + let current = ""; + + const pushCurrent = () => { + const value = current.trim(); + if (value) chunks.push(value); + current = ""; + }; + + for (const paragraph of paragraphs) { + if (!current) { + current = paragraph; + continue; + } + if (`${current}\n\n${paragraph}`.length <= TEXT_CHUNK_SIZE) { + current = `${current}\n\n${paragraph}`; + continue; + } + pushCurrent(); + if (paragraph.length <= TEXT_CHUNK_SIZE) { + current = paragraph; + continue; + } + + let offset = 0; + while (offset < paragraph.length) { + const end = Math.min(offset + TEXT_CHUNK_SIZE, paragraph.length); + chunks.push(paragraph.slice(offset, end).trim()); + if (end >= paragraph.length) break; + offset += TEXT_CHUNK_SIZE - TEXT_CHUNK_OVERLAP; + } + } + + pushCurrent(); + return chunks; +}; + +const chunkMarkdown = (text: string, fallbackTitle: string): ChunkSource[] => { + const lines = text.split("\n"); + const chunks: ChunkSource[] = []; + let currentTitle = fallbackTitle; + let textBuffer: string[] = []; + let codeBuffer: string[] = []; + let inCodeBlock = false; + + const flushText = () => { + const joined = textBuffer.join("\n").trim(); + textBuffer = []; + for (const piece of splitTextChunk(joined)) { + chunks.push({ title: currentTitle, text: piece }); + } + }; + + const flushCode = () => { + const joined = codeBuffer.join("\n").trim(); + codeBuffer = []; + if (joined) chunks.push({ title: currentTitle, text: `${joined}\n` }); + }; + + for (const line of lines) { + const trimmed = line.trim(); + if (!inCodeBlock && /^#{1,6}\s+/.test(trimmed)) { + flushText(); + currentTitle = trimmed.replace(/^#{1,6}\s+/, "").trim() || fallbackTitle; + continue; + } + + if (trimmed.startsWith("```")) { + if (inCodeBlock) { + codeBuffer.push(line); + flushCode(); + inCodeBlock = false; + } else { + flushText(); + inCodeBlock = true; + codeBuffer.push(line); + } + continue; + } + + if (inCodeBlock) codeBuffer.push(line); + else textBuffer.push(line); + } + + if (inCodeBlock) flushCode(); + flushText(); + return chunks.filter((chunk) => chunk.text.trim().length > 0); +}; + +const extractSnippet = ( + text: string, + anchors: { + tokens: string[]; + stems: string[]; + trigrams: string[]; + }, +): string => { + const normalized = text.trim(); + if (normalized.length <= SEARCH_SNIPPET_LIMIT) return normalized; + const lower = normalized.toLowerCase(); + const tokenMatches = anchors.tokens + .map((term) => lower.indexOf(term.toLowerCase())) + .filter((index) => index >= 0); + + const stemMatches = anchors.stems.flatMap((stem) => { + const matches = lower.matchAll(/[a-z0-9]+/g); + const indexes: number[] = []; + for (const match of matches) { + const token = match[0]; + if (stemToken(token) === stem) indexes.push(match.index ?? -1); + } + return indexes.filter((index) => index >= 0); + }); + + const trigramMatches = anchors.trigrams + .map((trigram) => lower.indexOf(trigram.toLowerCase())) + .filter((index) => index >= 0); + + const firstMatch = [...tokenMatches, ...stemMatches, ...trigramMatches] + .sort((a, b) => a - b)[0] ?? 0; + const start = Math.max(firstMatch - 80, 0); + return normalized.slice(start, start + SEARCH_SNIPPET_LIMIT).trim(); +}; + +const levenshtein = (left: string, right: string): number => { + if (left === right) return 0; + if (!left.length) return right.length; + if (!right.length) return left.length; + let previous = Array.from({ length: right.length + 1 }, (_, index) => index); + for (let i = 0; i < left.length; i += 1) { + const current = [i + 1]; + for (let j = 0; j < right.length; j += 1) { + const cost = left[i] === right[j] ? 0 : 1; + current[j + 1] = Math.min( + current[j] + 1, + previous[j + 1] + 1, + previous[j] + cost, + ); + } + previous = current; + } + return previous[right.length]; +}; + +const bm25Score = ( + tf: number, + df: number, + docLength: number, + avgDocLength: number, + docCount: number, +): number => { + if (tf <= 0 || df <= 0 || docCount <= 0) return 0; + const k1 = 1.2; + const b = 0.75; + const idf = Math.log(1 + ((docCount - df + 0.5) / (df + 0.5))); + const numerator = tf * (k1 + 1); + const denominator = tf + + k1 * (1 - b + b * (docLength / Math.max(avgDocLength, 1))); + return idf * (numerator / denominator); +}; + +const proximityBoost = ( + queryStems: string[], + positions: Record, +): number => { + const uniqueStems = unique(queryStems).filter((stem) => + (positions[stem]?.length ?? 0) > 0 + ); + if (uniqueStems.length <= 1) return 0; + + let minWindow = Number.POSITIVE_INFINITY; + const firstStem = uniqueStems[0]; + for (const start of positions[firstStem] ?? []) { + let min = start; + let max = start; + let complete = true; + for (const stem of uniqueStems.slice(1)) { + const candidates = positions[stem] ?? []; + if (candidates.length === 0) { + complete = false; + break; + } + const nearest = candidates.reduce( + (best, value) => + Math.abs(value - start) < Math.abs(best - start) ? value : best, + candidates[0], + ); + min = Math.min(min, nearest); + max = Math.max(max, nearest); + } + if (complete) minWindow = Math.min(minWindow, max - min); + } + + return Number.isFinite(minWindow) ? 12 / (minWindow + 1) : 0; +}; + +const partialStringOriented = ( + query: string, + tokens: string[], + vocabulary: ReadonlySet, +): boolean => { + if (/[^a-z0-9\s]/i.test(query)) return true; + if (!query.includes(" ")) { + return tokens.some((token) => !vocabulary.has(token) && token.length <= 5); + } + return tokens.some((token) => !vocabulary.has(token) && token.length <= 4); +}; + +const artifactRefFor = (toolName: string, artifactId: string): string => + `local://${toolName}/${artifactId}`; + +export type SessionCorpusService = ReturnType< + typeof createSessionCorpusService +>; + +export const createSessionCorpusService = (options: SessionCorpusOptions) => { + const fetchImpl = options.fetchImpl ?? fetch; + const now = options.now ?? (() => Date.now()); + + const sessionPrefix = (rootSessionId: string) => + `session:${options.groupId}:${rootSessionId}`; + const statsKey = (rootSessionId: string) => + `${sessionPrefix(rootSessionId)}:stats`; + const corporaKey = (rootSessionId: string) => + `${sessionPrefix(rootSessionId)}:corpora`; + const corpusMetaKey = (rootSessionId: string, corpusId: string) => + `${sessionPrefix(rootSessionId)}:corpus:${corpusId}:meta`; + const corpusChunksKey = (rootSessionId: string, corpusId: string) => + `${sessionPrefix(rootSessionId)}:corpus:${corpusId}:chunks`; + const chunkKey = (rootSessionId: string, chunkId: string) => + `${sessionPrefix(rootSessionId)}:chunk:${chunkId}`; + const termKey = (rootSessionId: string, token: string) => + `${sessionPrefix(rootSessionId)}:term:${token}`; + const stemPostingKey = (rootSessionId: string, stem: string) => + termKey(rootSessionId, `${STEM_TOKEN_PREFIX}${stem}`); + const vocabKey = (rootSessionId: string) => + termKey(rootSessionId, VOCAB_TOKEN); + const trigramKey = (rootSessionId: string, trigram: string) => + `${sessionPrefix(rootSessionId)}:tri:${trigram}`; + const artifactMetaKey = (rootSessionId: string, artifactId: string) => + `${sessionPrefix(rootSessionId)}:artifact:${artifactId}:meta`; + const artifactBodyKey = (rootSessionId: string, artifactId: string) => + `${sessionPrefix(rootSessionId)}:artifact:${artifactId}:body`; + const corpusRefFor = (rootSessionId: string, corpusId: string) => + corpusMetaKey(rootSessionId, corpusId); + + const maxTtl = (...values: Array): number | undefined => { + let ttl: number | undefined; + for (const value of values) { + if (value === undefined) continue; + ttl = ttl === undefined ? value : Math.max(ttl, value); + } + return ttl; + }; + + const isNumericString = (value: string | undefined): boolean => + value !== undefined && /^-?\d+(?:\.\d+)?$/.test(value); + + const mergeHashValues = ( + existing: Record, + incoming: Record, + mode: "replace" | "sum-numeric", + ): Record => { + const merged = { ...existing }; + for (const [field, value] of Object.entries(incoming)) { + if ( + mode === "sum-numeric" && isNumericString(merged[field]) && + isNumericString(value) + ) { + merged[field] = String(Number(merged[field]) + Number(value)); + continue; + } + merged[field] = value; + } + return merged; + }; + + const requireSnapshotKind = ( + key: string, + snapshot: RedisKeySnapshot | undefined, + kind: TKind, + ): Extract => { + if (!snapshot || snapshot.kind !== kind) { + throw new Error(`Expected ${kind} snapshot for ${key}`); + } + return snapshot as Extract; + }; + + const mapCorpusRef = ( + corpusRef: string | undefined, + sourceRootSessionId: string, + targetRootSessionId: string, + corpusIdMap: ReadonlyMap, + ): string | undefined => { + if (!corpusRef) return corpusRef; + const sourcePrefix = `${sessionPrefix(sourceRootSessionId)}:corpus:`; + if (!corpusRef.startsWith(sourcePrefix)) return corpusRef; + const sourceCorpusId = corpusRef.split(":").at(-2) ?? ""; + const targetCorpusId = corpusIdMap.get(sourceCorpusId); + return targetCorpusId + ? corpusRefFor(targetRootSessionId, targetCorpusId) + : corpusRef; + }; + + const reserveCorpusId = async (rootSessionId: string): Promise => { + const listKey = corporaKey(rootSessionId); + const index = await options.redis.appendToList( + listKey, + "__pending__", + options.ttlSeconds, + ); + const corpusId = `corpus-${index}`; + await options.redis.setListItem(listKey, index - 1, corpusId); + return corpusId; + }; + + const reserveChunkId = async ( + rootSessionId: string, + corpusId: string, + ): Promise<{ chunkId: string; chunkIndex: number }> => { + const listKey = corpusChunksKey(rootSessionId, corpusId); + const index = await options.redis.appendToList( + listKey, + "__pending__", + options.ttlSeconds, + ); + const chunkId = `chunk-${corpusId}-${index}`; + await options.redis.setListItem(listKey, index - 1, chunkId); + return { chunkId, chunkIndex: index - 1 }; + }; + + const reserveArtifactId = (): string => `artifact-${crypto.randomUUID()}`; + + const touchIfPresent = async (key: string) => { + await options.redis.touch(key, options.ttlSeconds).catch(() => undefined); + }; + + const refreshCorpusFamily = async ( + rootSessionId: string, + corpusId: string, + ) => { + await touchIfPresent(corporaKey(rootSessionId)); + await touchIfPresent(statsKey(rootSessionId)); + await touchIfPresent(corpusMetaKey(rootSessionId, corpusId)); + await touchIfPresent(corpusChunksKey(rootSessionId, corpusId)); + await touchIfPresent(vocabKey(rootSessionId)); + + const chunkIds = await options.redis.getListRange( + corpusChunksKey(rootSessionId, corpusId), + 0, + SEARCH_SCAN_LIMIT, + ); + for (const chunkId of chunkIds) { + const chunk = await options.redis.getHashAll( + chunkKey(rootSessionId, chunkId), + ); + if (Object.keys(chunk).length === 0) continue; + await touchIfPresent(chunkKey(rootSessionId, chunkId)); + for (const token of JSON.parse(chunk.terms ?? "[]") as string[]) { + await touchIfPresent(termKey(rootSessionId, token)); + } + for (const stem of JSON.parse(chunk.stems ?? "[]") as string[]) { + await touchIfPresent(stemPostingKey(rootSessionId, stem)); + } + for (const trigram of JSON.parse(chunk.trigrams ?? "[]") as string[]) { + await touchIfPresent(trigramKey(rootSessionId, trigram)); + } + } + + const meta = await options.redis.getHashAll( + corpusMetaKey(rootSessionId, corpusId), + ); + if (meta.artifact_id) { + await touchIfPresent(artifactMetaKey(rootSessionId, meta.artifact_id)); + await touchIfPresent(artifactBodyKey(rootSessionId, meta.artifact_id)); + } + }; + + const chunkContent = ( + text: string, + contentType: string, + fallbackTitle: string, + ): ChunkSource[] => { + if (contentType === "text/markdown" || contentType === "text/html") { + return chunkMarkdown(text, fallbackTitle); + } + return splitTextChunk(text).map((piece) => ({ + title: fallbackTitle, + text: piece, + })); + }; + + const writeCorpus = async ( + input: IndexInput, + sourceType: string, + ): Promise<{ + corpusRef: string; + chunkCount: number; + queryHints: string[]; + truncated: boolean; + contentType: string; + }> => { + const normalized = normalizeContent(input.content, input.contentType); + const createdAt = now(); + const meta: CorpusMeta = { + title: input.title ?? normalized.title, + contentType: normalized.contentType, + createdAt, + sourceUrl: input.sourceUrl, + truncated: normalized.truncated, + artifactId: input.artifactId, + }; + const chunks = chunkContent( + normalized.body, + normalized.contentType, + meta.title, + ); + const corpusId = await reserveCorpusId(input.rootSessionId); + await options.redis.setHashFields( + corpusMetaKey(input.rootSessionId, corpusId), + { + title: meta.title, + content_type: meta.contentType, + source_type: sourceType, + source_url: meta.sourceUrl, + created_at: meta.createdAt, + truncated: meta.truncated ? "1" : "0", + artifact_id: meta.artifactId, + chunk_count: chunks.length, + root_session_id: input.rootSessionId, + group_id: options.groupId, + }, + options.ttlSeconds, + ); + + const vocabUpdates: Record = {}; + for (const chunk of chunks) { + const { chunkId, chunkIndex } = await reserveChunkId( + input.rootSessionId, + corpusId, + ); + const combined = `${chunk.title}\n${chunk.text}`; + const termPositions = tokenizeWithPositions(combined); + const terms = termPositions.map(({ token }) => token); + const stems = terms.map((token) => stemToken(token)); + const record: ChunkRecord = { + id: chunkId, + corpusId, + title: chunk.title, + text: chunk.text, + terms: unique(terms), + stems: unique(stems), + trigrams: makeTrigrams(combined), + termFreqs: frequencyMap(terms), + stemFreqs: frequencyMap(stems), + stemPositions: buildStemPositions(termPositions), + length: Math.max(terms.length, 1), + createdAt, + }; + + await options.redis.setHashFields( + chunkKey(input.rootSessionId, chunkId), + { + corpus_id: corpusId, + chunk_index: chunkIndex, + title: record.title, + text: record.text, + terms: JSON.stringify(record.terms), + stems: JSON.stringify(record.stems), + trigrams: JSON.stringify(record.trigrams), + term_freqs: JSON.stringify(record.termFreqs), + stem_freqs: JSON.stringify(record.stemFreqs), + stem_positions: JSON.stringify(record.stemPositions), + length: record.length, + created_at: record.createdAt, + }, + options.ttlSeconds, + ); + + for (const term of record.terms) vocabUpdates[term] = stemToken(term); + for (const term of record.terms) { + await options.redis.appendToList( + termKey(input.rootSessionId, term), + chunkId, + options.ttlSeconds, + ); + } + for (const stem of record.stems) { + await options.redis.appendToList( + stemPostingKey(input.rootSessionId, stem), + chunkId, + options.ttlSeconds, + ); + } + for (const trigram of record.trigrams) { + await options.redis.appendToList( + trigramKey(input.rootSessionId, trigram), + chunkId, + options.ttlSeconds, + ); + } + } + + if (Object.keys(vocabUpdates).length > 0) { + await options.redis.setHashFields( + vocabKey(input.rootSessionId), + vocabUpdates, + options.ttlSeconds, + ); + } + + const currentStats = await options.redis.getHashAll( + statsKey(input.rootSessionId), + ); + await options.redis.setHashFields( + statsKey(input.rootSessionId), + { + corpus_count: Number(currentStats.corpus_count ?? 0) + 1, + chunk_count: Number(currentStats.chunk_count ?? 0) + chunks.length, + bytes_indexed_total: Number(currentStats.bytes_indexed_total ?? 0) + + encoder.encode(normalized.body).byteLength, + }, + options.ttlSeconds, + ); + + await refreshCorpusFamily(input.rootSessionId, corpusId); + return { + corpusRef: corpusRefFor(input.rootSessionId, corpusId), + chunkCount: chunks.length, + queryHints: unique(tokenize(meta.title)).slice(0, 5), + truncated: meta.truncated, + contentType: meta.contentType, + }; + }; + + const loadChunk = async ( + rootSessionId: string, + chunkId: string, + ): Promise => { + const chunk = await options.redis.getHashAll( + chunkKey(rootSessionId, chunkId), + ); + if (Object.keys(chunk).length === 0) return null; + return { + id: chunkId, + corpusId: chunk.corpus_id ?? "", + title: chunk.title ?? "", + text: chunk.text ?? "", + terms: JSON.parse(chunk.terms ?? "[]"), + stems: JSON.parse(chunk.stems ?? "[]"), + trigrams: JSON.parse(chunk.trigrams ?? "[]"), + termFreqs: JSON.parse(chunk.term_freqs ?? "{}"), + stemFreqs: JSON.parse(chunk.stem_freqs ?? "{}"), + stemPositions: JSON.parse(chunk.stem_positions ?? "{}"), + length: Number(chunk.length ?? 1), + createdAt: Number(chunk.created_at ?? 0), + }; + }; + + let disposed = false; + + const dispose = (): Promise => { + if (disposed) return Promise.resolve(); + disposed = true; + return Promise.resolve(); + }; + + const migrateRootSessionState = async ( + sourceRootSessionId: string, + targetRootSessionId: string, + ): Promise => { + if (sourceRootSessionId === targetRootSessionId) return; + + const sourcePrefix = sessionPrefix(sourceRootSessionId); + const targetPrefix = sessionPrefix(targetRootSessionId); + const sourceKeys = await options.redis.keysByPrefix(`${sourcePrefix}:`); + if (sourceKeys.length === 0) return; + + const sourceSnapshots = new Map( + await Promise.all(sourceKeys.map(async (key) => { + const snapshot = await options.redis.snapshot(key); + return [key, snapshot] as const; + })), + ); + const originalTargetSnapshots = new Map(); + const workingTargetSnapshots = new Map(); + const handledSourceKeys = new Set(); + + const getWorkingTargetSnapshot = async ( + key: string, + ): Promise => { + const existing = workingTargetSnapshots.get(key); + if (existing) return existing; + const snapshot = await options.redis.snapshot(key); + originalTargetSnapshots.set(key, snapshot); + workingTargetSnapshots.set(key, snapshot); + return snapshot; + }; + + const setWorkingTargetSnapshot = ( + key: string, + snapshot: RedisKeySnapshot, + ): void => { + if (!originalTargetSnapshots.has(key)) { + originalTargetSnapshots.set(key, { kind: "missing" }); + } + workingTargetSnapshots.set(key, snapshot); + }; + + const targetCorporaKey = corporaKey(targetRootSessionId); + const sourceCorporaKey = corporaKey(sourceRootSessionId); + const sourceCorporaSnapshot = sourceSnapshots.get(sourceCorporaKey); + const sourceCorpusIds = sourceCorporaSnapshot?.kind === "list" + ? sourceCorporaSnapshot.values + : []; + if (sourceCorporaSnapshot) handledSourceKeys.add(sourceCorporaKey); + + const targetCorporaSnapshot = await getWorkingTargetSnapshot( + targetCorporaKey, + ); + if ( + targetCorporaSnapshot.kind !== "missing" && + targetCorporaSnapshot.kind !== "list" + ) { + throw new Error(`Expected list snapshot for ${targetCorporaKey}`); + } + const targetCorpusIds = targetCorporaSnapshot.kind === "list" + ? targetCorporaSnapshot.values + : []; + const corpusIdMap = new Map(); + sourceCorpusIds.forEach((corpusId, index) => { + corpusIdMap.set(corpusId, `corpus-${targetCorpusIds.length + index + 1}`); + }); + setWorkingTargetSnapshot(targetCorporaKey, { + kind: "list", + values: [ + ...targetCorpusIds, + ...sourceCorpusIds.map((corpusId) => + corpusIdMap.get(corpusId) ?? corpusId + ), + ], + ttlSeconds: maxTtl( + targetCorporaSnapshot.kind === "list" + ? targetCorporaSnapshot.ttlSeconds + : undefined, + sourceCorporaSnapshot?.kind === "list" + ? sourceCorporaSnapshot.ttlSeconds + : undefined, + ), + }); + + const chunkIdMap = new Map(); + for (const sourceCorpusId of sourceCorpusIds) { + const targetCorpusId = corpusIdMap.get(sourceCorpusId); + if (!targetCorpusId) continue; + + const sourceCorpusMetaKey = corpusMetaKey( + sourceRootSessionId, + sourceCorpusId, + ); + const sourceCorpusMetaSnapshot = requireSnapshotKind( + sourceCorpusMetaKey, + sourceSnapshots.get(sourceCorpusMetaKey), + "hash", + ); + handledSourceKeys.add(sourceCorpusMetaKey); + setWorkingTargetSnapshot( + corpusMetaKey(targetRootSessionId, targetCorpusId), + { + kind: "hash", + values: { + ...sourceCorpusMetaSnapshot.values, + root_session_id: targetRootSessionId, + group_id: options.groupId, + }, + ttlSeconds: sourceCorpusMetaSnapshot.ttlSeconds, + }, + ); + + const sourceChunkListKey = corpusChunksKey( + sourceRootSessionId, + sourceCorpusId, + ); + const sourceChunkListSnapshot = requireSnapshotKind( + sourceChunkListKey, + sourceSnapshots.get(sourceChunkListKey), + "list", + ); + handledSourceKeys.add(sourceChunkListKey); + const mappedChunkIds = sourceChunkListSnapshot.values.map(( + _chunkId, + index, + ) => `chunk-${targetCorpusId}-${index + 1}`); + sourceChunkListSnapshot.values.forEach((chunkId, index) => { + chunkIdMap.set(chunkId, mappedChunkIds[index]); + }); + setWorkingTargetSnapshot( + corpusChunksKey(targetRootSessionId, targetCorpusId), + { + kind: "list", + values: mappedChunkIds, + ttlSeconds: sourceChunkListSnapshot.ttlSeconds, + }, + ); + + for ( + const [index, sourceChunkId] of sourceChunkListSnapshot.values.entries() + ) { + const sourceChunkKey = chunkKey(sourceRootSessionId, sourceChunkId); + const sourceChunkSnapshot = requireSnapshotKind( + sourceChunkKey, + sourceSnapshots.get(sourceChunkKey), + "hash", + ); + handledSourceKeys.add(sourceChunkKey); + setWorkingTargetSnapshot( + chunkKey(targetRootSessionId, mappedChunkIds[index]), + { + kind: "hash", + values: { + ...sourceChunkSnapshot.values, + corpus_id: targetCorpusId, + chunk_index: String(index), + }, + ttlSeconds: sourceChunkSnapshot.ttlSeconds, + }, + ); + } + } + + const sourceStatsKey = statsKey(sourceRootSessionId); + const sourceStatsSnapshot = sourceSnapshots.get(sourceStatsKey); + if (sourceStatsSnapshot) { + const sourceStats = requireSnapshotKind( + sourceStatsKey, + sourceStatsSnapshot, + "hash", + ); + handledSourceKeys.add(sourceStatsKey); + const targetStatsKey = statsKey(targetRootSessionId); + const targetStatsSnapshot = await getWorkingTargetSnapshot( + targetStatsKey, + ); + const targetStats = targetStatsSnapshot.kind === "hash" + ? targetStatsSnapshot.values + : targetStatsSnapshot.kind === "missing" + ? {} + : (() => { + throw new Error(`Expected hash snapshot for ${targetStatsKey}`); + })(); + setWorkingTargetSnapshot(targetStatsKey, { + kind: "hash", + values: mergeHashValues(targetStats, sourceStats.values, "sum-numeric"), + ttlSeconds: maxTtl( + targetStatsSnapshot.kind === "hash" + ? targetStatsSnapshot.ttlSeconds + : undefined, + sourceStats.ttlSeconds, + ), + }); + } + + for (const sourceKey of sourceKeys) { + if (handledSourceKeys.has(sourceKey)) continue; + const sourceSnapshot = sourceSnapshots.get(sourceKey); + if (!sourceSnapshot || sourceSnapshot.kind === "missing") continue; + + if (sourceKey === vocabKey(sourceRootSessionId)) { + const sourceVocab = requireSnapshotKind( + sourceKey, + sourceSnapshot, + "hash", + ); + const targetKey = vocabKey(targetRootSessionId); + const targetSnapshot = await getWorkingTargetSnapshot(targetKey); + const targetValues = targetSnapshot.kind === "hash" + ? targetSnapshot.values + : targetSnapshot.kind === "missing" + ? {} + : (() => { + throw new Error(`Expected hash snapshot for ${targetKey}`); + })(); + setWorkingTargetSnapshot(targetKey, { + kind: "hash", + values: mergeHashValues(targetValues, sourceVocab.values, "replace"), + ttlSeconds: maxTtl( + targetSnapshot.kind === "hash" + ? targetSnapshot.ttlSeconds + : undefined, + sourceVocab.ttlSeconds, + ), + }); + handledSourceKeys.add(sourceKey); + continue; + } + + if ( + sourceKey.startsWith(`${sourcePrefix}:term:`) || + sourceKey.startsWith(`${sourcePrefix}:tri:`) + ) { + const sourcePosting = requireSnapshotKind( + sourceKey, + sourceSnapshot, + "list", + ); + const targetKey = `${targetPrefix}${ + sourceKey.slice(sourcePrefix.length) + }`; + const targetSnapshot = await getWorkingTargetSnapshot(targetKey); + const targetValues = targetSnapshot.kind === "list" + ? targetSnapshot.values + : targetSnapshot.kind === "missing" + ? [] + : (() => { + throw new Error(`Expected list snapshot for ${targetKey}`); + })(); + setWorkingTargetSnapshot(targetKey, { + kind: "list", + values: [ + ...targetValues, + ...sourcePosting.values.map((chunkId) => + chunkIdMap.get(chunkId) ?? chunkId + ), + ], + ttlSeconds: maxTtl( + targetSnapshot.kind === "list" + ? targetSnapshot.ttlSeconds + : undefined, + sourcePosting.ttlSeconds, + ), + }); + handledSourceKeys.add(sourceKey); + continue; + } + + if (sourceKey.startsWith(`${sourcePrefix}:artifact:`)) { + const targetKey = `${targetPrefix}${ + sourceKey.slice(sourcePrefix.length) + }`; + const targetSnapshot = await getWorkingTargetSnapshot(targetKey); + if (targetSnapshot.kind !== "missing") { + throw new Error( + `Refusing to overwrite existing artifact key ${targetKey}`, + ); + } + if (sourceKey.endsWith(":meta")) { + const sourceMeta = requireSnapshotKind( + sourceKey, + sourceSnapshot, + "hash", + ); + setWorkingTargetSnapshot(targetKey, { + kind: "hash", + values: { + ...sourceMeta.values, + corpus_ref: mapCorpusRef( + sourceMeta.values.corpus_ref, + sourceRootSessionId, + targetRootSessionId, + corpusIdMap, + ) ?? sourceMeta.values.corpus_ref ?? "", + }, + ttlSeconds: sourceMeta.ttlSeconds, + }); + } else if (sourceKey.endsWith(":body")) { + const sourceBody = requireSnapshotKind( + sourceKey, + sourceSnapshot, + "string", + ); + setWorkingTargetSnapshot(targetKey, sourceBody); + } else { + throw new Error(`Unhandled artifact key ${sourceKey}`); + } + handledSourceKeys.add(sourceKey); + continue; + } + + throw new Error(`Unhandled session corpus key family ${sourceKey}`); + } + + const unhandledSourceKeys = sourceKeys.filter((key) => + !handledSourceKeys.has(key) + ); + if (unhandledSourceKeys.length > 0) { + throw new Error( + `Unhandled session corpus key family ${unhandledSourceKeys.join(", ")}`, + ); + } + + const targetEntries = [...workingTargetSnapshots.entries()].sort(( + left, + right, + ) => left[0].localeCompare(right[0])); + const sourceEntries = [...sourceSnapshots.entries()].sort((left, right) => + right[0].localeCompare(left[0]) + ); + + try { + for (const [key, snapshot] of targetEntries) { + await options.redis.restoreSnapshot(key, snapshot); + } + for (const key of sourceKeys) { + await options.redis.deleteKey(key); + } + } catch (error) { + for (const [key] of [...targetEntries].reverse()) { + await options.redis.restoreSnapshot( + key, + originalTargetSnapshots.get(key) ?? { kind: "missing" }, + ).catch(() => undefined); + } + for (const [key, snapshot] of sourceEntries) { + await options.redis.restoreSnapshot(key, snapshot).catch(() => + undefined + ); + } + throw error; + } + }; + + return { + async index(input: IndexInput) { + const result = await writeCorpus(input, "index"); + return { status: "ok" as const, ...result }; + }, + + async fetchAndIndex(input: FetchAndIndexInput) { + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + (input.timeoutSeconds ?? 15) * 1000, + ); + try { + const response = await fetchImpl(input.url, { + signal: controller.signal, + }); + const contentType = + response.headers.get("content-type")?.split(";")[0] ?? "text/plain"; + if (!response.ok) { + return { + status: "error" as const, + corpusRef: corpusRefFor( + input.rootSessionId, + `error-http-${response.status}`, + ), + summary: + `Fetch failed for ${input.url} with HTTP ${response.status}.`, + queryHints: [], + fetchedUrl: input.url, + contentType, + truncated: false, + }; + } + const content = await response.text(); + const indexed = await writeCorpus( + { + rootSessionId: input.rootSessionId, + content, + contentType, + sourceUrl: input.url, + }, + "fetch", + ); + return { + status: "ok" as const, + corpusRef: indexed.corpusRef, + summary: `Fetched and indexed ${input.url}`, + queryHints: indexed.queryHints, + fetchedUrl: input.url, + contentType: indexed.contentType, + truncated: indexed.truncated, + }; + } finally { + clearTimeout(timeout); + } + }, + + async storeArtifact(input: StoreArtifactInput) { + const artifactId = reserveArtifactId(); + const artifactRef = artifactRefFor(input.toolName, artifactId); + + await options.redis.setString( + artifactBodyKey(input.rootSessionId, artifactId), + input.body, + options.ttlSeconds, + ); + + const corpus = await writeCorpus( + { + rootSessionId: input.rootSessionId, + content: input.body, + contentType: "text/plain", + title: `${input.toolName} artifact`, + artifactId, + }, + "artifact", + ); + + await options.redis.setHashFields( + artifactMetaKey(input.rootSessionId, artifactId), + { + tool_name: input.toolName, + artifact_ref: artifactRef, + corpus_ref: corpus.corpusRef, + bytes: encoder.encode(input.body).byteLength, + created_at: now(), + }, + options.ttlSeconds, + ); + + const currentStats = await options.redis.getHashAll( + statsKey(input.rootSessionId), + ); + await options.redis.setHashFields( + statsKey(input.rootSessionId), + { + artifact_count: Number(currentStats.artifact_count ?? 0) + 1, + bytes_saved_estimate: Number(currentStats.bytes_saved_estimate ?? 0) + + encoder.encode(input.body).byteLength, + }, + options.ttlSeconds, + ); + + await refreshCorpusFamily( + input.rootSessionId, + corpus.corpusRef.split(":").at(-2) ?? "", + ); + + return { + status: "ok" as const, + artifactRef, + corpusRef: corpus.corpusRef, + summary: normalizeWhitespace(input.body).slice(0, SEARCH_SNIPPET_LIMIT), + }; + }, + + async search(input: SearchInput): Promise<{ + status: "ok"; + results: SearchResult[]; + corpusRefs: string[]; + truncated: boolean; + }> { + const queryTokens = unique(tokenize(input.query)); + const vocabulary = await options.redis.getHashAll( + vocabKey(input.rootSessionId), + ); + const vocabularyTerms = new Set(Object.keys(vocabulary)); + + const correctedTokens = await Promise.all( + queryTokens.map(async (token) => { + const exact = await options.redis.getListRange( + termKey(input.rootSessionId, token), + 0, + 0, + ); + const stem = await options.redis.getListRange( + stemPostingKey(input.rootSessionId, stemToken(token)), + 0, + 0, + ); + if ( + exact.length > 0 || stem.length > 0 || vocabularyTerms.has(token) + ) { + return token; + } + + let best = token; + let bestDistance = Number.POSITIVE_INFINITY; + for (const candidate of vocabularyTerms) { + const distance = levenshtein(token, candidate); + if (distance < bestDistance) { + best = candidate; + bestDistance = distance; + } + } + return bestDistance <= Math.max(1, Math.floor(token.length / 3)) + ? best + : token; + }), + ); + + const queryStems = correctedTokens.map((token) => stemToken(token)); + const queryTrigrams = makeTrigrams(correctedTokens.join(" ")); + + const exactCandidateIds = new Set(); + const stemCandidateIds = new Set(); + + const tokenHitCounts = new Map(); + const stemHitCounts = new Map(); + + for (const token of correctedTokens) { + const chunkIds = await options.redis.getListRange( + termKey(input.rootSessionId, token), + 0, + SEARCH_POSTINGS_FETCH_LIMIT - 1, + ); + tokenHitCounts.set(token, chunkIds.length); + for (const chunkId of chunkIds) exactCandidateIds.add(chunkId); + } + for (const stem of queryStems) { + const chunkIds = await options.redis.getListRange( + stemPostingKey(input.rootSessionId, stem), + 0, + SEARCH_POSTINGS_FETCH_LIMIT - 1, + ); + stemHitCounts.set(stem, chunkIds.length); + for (const chunkId of chunkIds) stemCandidateIds.add(chunkId); + } + + const sparseRecall = correctedTokens.some((token, index) => { + const stem = queryStems[index]; + return (tokenHitCounts.get(token) ?? 0) === 0 && + (stemHitCounts.get(stem) ?? 0) === 0; + }) || unique([...exactCandidateIds, ...stemCandidateIds]).length === 0; + const useTrigrams = queryTrigrams.length > 0 && + (sparseRecall || + partialStringOriented(input.query, queryTokens, vocabularyTerms)); + + const trigramCandidateIds = new Set(); + if (useTrigrams) { + for (const trigram of queryTrigrams) { + const chunkIds = await options.redis.getListRange( + trigramKey(input.rootSessionId, trigram), + 0, + SEARCH_POSTINGS_FETCH_LIMIT - 1, + ); + for (const chunkId of chunkIds) trigramCandidateIds.add(chunkId); + } + } + + const candidateIds = unique([ + ...exactCandidateIds, + ...stemCandidateIds, + ...trigramCandidateIds, + ]); + const candidateRecords = (await Promise.all( + candidateIds.map((chunkId) => loadChunk(input.rootSessionId, chunkId)), + )).filter((value): value is ChunkRecord => value !== null); + const docCount = Math.max(candidateRecords.length, 1); + const avgDocLength = + candidateRecords.reduce((sum, record) => sum + record.length, 0) / + docCount; + const termDocFreqs = Object.fromEntries( + await Promise.all(correctedTokens.map(async (token) => [ + token, + await options.redis.getListLength( + termKey(input.rootSessionId, token), + ), + ])), + ) as Record; + const stemDocFreqs = Object.fromEntries( + await Promise.all(queryStems.map(async (stem) => [ + stem, + await options.redis.getListLength( + stemPostingKey(input.rootSessionId, stem), + ), + ])), + ) as Record; + + const exactRanking = [...candidateRecords] + .map((record) => ({ + chunkId: record.id, + score: correctedTokens.reduce((sum, token) => { + const titleTokens = tokenize(record.title); + const titleFreqs = frequencyMap(titleTokens); + return sum + bm25Score( + (record.termFreqs[token] ?? 0) + ((titleFreqs[token] ?? 0) * 2), + termDocFreqs[token] ?? 0, + record.length, + avgDocLength, + docCount, + ); + }, 0), + })) + .filter((item) => item.score > 0) + .sort((left, right) => right.score - left.score); + + const stemRanking = [...candidateRecords] + .map((record) => ({ + chunkId: record.id, + score: queryStems.reduce((sum, stem) => { + const titleTokens = tokenize(record.title); + const titleStems = titleTokens.map((token) => stemToken(token)); + const titleStemFreqs = frequencyMap(titleStems); + return sum + (bm25Score( + (record.stemFreqs[stem] ?? 0) + (titleStemFreqs[stem] ?? 0), + stemDocFreqs[stem] ?? 0, + record.length, + avgDocLength, + docCount, + ) * 0.6); + }, 0), + })) + .filter((item) => item.score > 0) + .sort((left, right) => right.score - left.score); + + const trigramRanking = useTrigrams + ? [...candidateRecords] + .map((record) => ({ + chunkId: record.id, + score: queryTrigrams.length === 0 + ? 0 + : queryTrigrams.filter((trigram) => + record.trigrams.includes(trigram) + ) + .length / queryTrigrams.length, + })) + .filter((item) => item.score > 0) + .sort((left, right) => right.score - left.score) + : []; + + const preliminaryScores = new Map(); + for (const ranking of [exactRanking, stemRanking, trigramRanking]) { + for (const item of ranking) { + preliminaryScores.set( + item.chunkId, + (preliminaryScores.get(item.chunkId) ?? 0) + item.score, + ); + } + } + const boundedCandidateIds = [...preliminaryScores.entries()] + .sort((left, right) => right[1] - left[1]) + .slice(0, SEARCH_CANDIDATE_LIMIT) + .map(([chunkId]) => chunkId); + const candidateIdSet = new Set(boundedCandidateIds); + const boundedCandidateRecords = candidateRecords.filter((record) => + candidateIdSet.has(record.id) + ); + + const rankMaps = [exactRanking, stemRanking, trigramRanking].map(( + ranking, + ) => new Map(ranking.map((item, index) => [item.chunkId, index + 1]))); + + const scored = boundedCandidateRecords.map((record) => { + const titleTokens = tokenize(record.title); + const titleFreqs = frequencyMap(titleTokens); + const titleStems = titleTokens.map((token) => stemToken(token)); + const titleStemFreqs = frequencyMap(titleStems); + + let lexical = 0; + for (const token of correctedTokens) { + lexical += bm25Score( + (record.termFreqs[token] ?? 0) + ((titleFreqs[token] ?? 0) * 2), + termDocFreqs[token] ?? 0, + record.length, + avgDocLength, + docCount, + ); + } + for (const stem of queryStems) { + lexical += bm25Score( + (record.stemFreqs[stem] ?? 0) + (titleStemFreqs[stem] ?? 0), + stemDocFreqs[stem] ?? 0, + record.length, + avgDocLength, + docCount, + ) * 0.6; + } + + let rrf = 0; + for (const rankMap of rankMaps) { + const rank = rankMap.get(record.id); + if (rank) rrf += 1 / (RRF_K + rank); + } + + const trigramScore = useTrigrams + ? queryTrigrams.filter((trigram) => record.trigrams.includes(trigram)) + .length / Math.max(queryTrigrams.length, 1) * 0.25 + : 0; + const proximity = proximityBoost(queryStems, record.stemPositions); + const recencyBoost = Math.min( + 0.1, + Math.max(0, (record.createdAt - (now() - 86_400_000)) / 86_400_000) * + 0.1, + ); + const shorterChunkBoost = Math.max( + 0, + 0.08 - Math.min(record.length, 1_600) / 20_000, + ); + + const score = lexical + rrf + trigramScore + proximity + recencyBoost + + shorterChunkBoost; + return { + corpusId: record.corpusId, + corpus_ref: corpusRefFor(input.rootSessionId, record.corpusId), + snippet: extractSnippet( + `${record.title}\n${record.text}`, + { + tokens: correctedTokens, + stems: queryStems, + trigrams: useTrigrams ? queryTrigrams : [], + }, + ), + score, + }; + }).filter((item) => item.score > 0); + + scored.sort((left, right) => { + if (right.score !== left.score) return right.score - left.score; + if (left.corpus_ref !== right.corpus_ref) { + return left.corpus_ref.localeCompare(right.corpus_ref); + } + return left.snippet.localeCompare(right.snippet); + }); + + const results = scored.slice(0, SEARCH_RESULT_LIMIT).map(( + { corpusId: _corpusId, ...result }, + ) => result); + const corpusRefs = unique(results.map((result) => result.corpus_ref)); + const matchedCorpusIds = unique(scored.map((result) => result.corpusId)); + for (const corpusId of matchedCorpusIds) { + if (corpusId) await refreshCorpusFamily(input.rootSessionId, corpusId); + } + + return { + status: "ok", + results, + corpusRefs, + truncated: scored.length > SEARCH_RESULT_LIMIT, + }; + }, + + async getStats(rootSessionId: string) { + const counters = await options.redis.getHashAll(statsKey(rootSessionId)); + return { + counters: Object.fromEntries( + Object.entries(counters).map(([key, value]) => [key, Number(value)]), + ), + corpusCount: Number(counters.corpus_count ?? 0), + artifactCount: Number(counters.artifact_count ?? 0), + bytesSavedEstimate: Number(counters.bytes_saved_estimate ?? 0), + }; + }, + + migrateRootSessionState, + dispose, + }; +}; diff --git a/src/services/session-mcp-runtime.test.ts b/src/services/session-mcp-runtime.test.ts new file mode 100644 index 0000000..3a28715 --- /dev/null +++ b/src/services/session-mcp-runtime.test.ts @@ -0,0 +1,609 @@ +import { + assert, + assertEquals, + assertExists, + assertStringIncludes, +} from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { + createSessionMcpRuntime, + SESSION_MCP_RESPONSE_BUDGET_BYTES, +} from "./session-mcp-runtime.ts"; +import { + SESSION_MCP_TOOL_NAMES, + sessionMcpRequestSchemas, + sessionMcpResponseSchemas, + type SessionMcpToolName, +} from "./session-mcp-types.ts"; +import { RedisClient } from "./redis-client.ts"; + +const textEncoder = new TextEncoder(); + +const toolContext = { + sessionID: "session-123", + messageID: "message-123", + agent: "agent-123", + directory: "/workspace/project", + worktree: "/workspace/project", + abort: AbortSignal.timeout(1_000), + metadata: () => {}, + ask: async () => {}, +}; + +const validRequests: Record> = { + session_execute: { + root_session_id: "root-123", + command: "pwd", + }, + session_execute_file: { + root_session_id: "root-123", + paths: ["README.md"], + }, + session_batch_execute: { + root_session_id: "root-123", + commands: [{ command: "first" }, { command: "second" }], + }, + session_index: { + root_session_id: "root-123", + content: "hello world", + }, + session_search: { + root_session_id: "root-123", + query: "hello", + }, + session_fetch_and_index: { + root_session_id: "root-123", + url: "https://example.com", + }, + session_stats: { + root_session_id: "root-123", + }, + session_doctor: { + root_session_id: "root-123", + }, +}; + +describe("session-mcp-runtime", () => { + it("registers exactly the 8 session tools", () => { + const runtime = createSessionMcpRuntime(); + + try { + assertEquals(Object.keys(runtime.tools), [...SESSION_MCP_TOOL_NAMES]); + } finally { + void runtime.dispose(); + } + }); + + it("rejects requests without root_session_id for every tool schema", () => { + for (const toolName of SESSION_MCP_TOOL_NAMES) { + const request = { ...validRequests[toolName] }; + delete request.root_session_id; + + const parsed = sessionMcpRequestSchemas[toolName].safeParse(request); + assertEquals(parsed.success, false, toolName); + } + }); + + it("returns minimal valid stub responses for all registered tools", async () => { + const runtime = createSessionMcpRuntime(); + + try { + for (const toolName of SESSION_MCP_TOOL_NAMES) { + const serialized = await runtime.tools[toolName].execute( + validRequests[toolName], + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals( + sessionMcpResponseSchemas[toolName].safeParse(parsed).success, + true, + toolName, + ); + } + } finally { + await runtime.dispose(); + } + }); + + it("caps serialized responses to the exact 8 KB budget", async () => { + const runtime = createSessionMcpRuntime(); + + try { + for (const toolName of SESSION_MCP_TOOL_NAMES) { + const serialized = await runtime.tools[toolName].execute( + validRequests[toolName], + toolContext, + ); + + assert( + textEncoder.encode(serialized).byteLength <= + SESSION_MCP_RESPONSE_BUDGET_BYTES, + `${toolName} exceeded response budget`, + ); + } + } finally { + await runtime.dispose(); + } + }); + + it("falls back to a local artifact reference when inline output crosses 8 KB", async () => { + const runtime = createSessionMcpRuntime({ + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: "x".repeat(SESSION_MCP_RESPONSE_BUDGET_BYTES + 1_024), + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: SESSION_MCP_RESPONSE_BUDGET_BYTES + 1_024, + }), + }, + }); + + try { + const serialized = await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assert( + textEncoder.encode(serialized).byteLength <= + SESSION_MCP_RESPONSE_BUDGET_BYTES, + ); + assertExists(parsed.artifact_ref); + assertEquals( + parsed.artifact_ref.startsWith("local://session_execute/"), + true, + ); + assert(parsed.summary.length < SESSION_MCP_RESPONSE_BUDGET_BYTES); + } finally { + await runtime.dispose(); + } + }); + + it("executes session_batch_execute sequentially in request order", async () => { + const executionOrder: string[] = []; + const runtime = createSessionMcpRuntime({ + handlers: { + session_execute: (request: { command: string }) => { + executionOrder.push(request.command); + return Promise.resolve({ + status: "ok", + summary: `executed ${request.command}`, + exit_code: executionOrder.length - 1, + timed_out: false, + truncated: false, + bytes_captured: request.command.length, + }); + }, + }, + }); + + try { + const serialized = await runtime.tools.session_batch_execute.execute( + { + root_session_id: "root-123", + commands: [ + { command: "first" }, + { command: "second" }, + { command: "third" }, + ], + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals(executionOrder, ["first", "second", "third"]); + assertEquals( + parsed.results.map((item: { summary: string }) => item.summary), + [ + "executed first", + "executed second", + "executed third", + ], + ); + } finally { + await runtime.dispose(); + } + }); + + it("indexes local content and serves session_search from the local corpus", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-123", + content: + "# Redis Session TTLs\n\nSession TTL refreshes the local session corpus.", + }, + toolContext, + ); + const serialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-123", + query: "session ttl", + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals(parsed.status, "ok"); + assertEquals(parsed.results.length > 0, true); + assertEquals(parsed.results[0].snippet.includes("Session TTL"), true); + } finally { + await runtime.dispose(); + } + }); + + it("stores oversized session_execute output in the local corpus so it becomes searchable", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: "SESSION TTL REPORT\n" + + "session ttl keeps local corpus search warm\n".repeat(400), + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: SESSION_MCP_RESPONSE_BUDGET_BYTES + 4_096, + }), + }, + } as never); + + try { + const executeSerialized = await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + const searchSerialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-123", + query: "session ttl", + }, + toolContext, + ); + const executed = JSON.parse(executeSerialized); + const search = JSON.parse(searchSerialized); + + assertExists(executed.artifact_ref); + assertEquals(search.results.length > 0, true); + assertEquals(search.results[0].snippet.includes("session ttl"), true); + } finally { + await runtime.dispose(); + } + }); + + it("stores the full hidden payload for oversized session_execute overflow, not only the visible summary", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const hiddenPayload = "FULL SESSION PAYLOAD\n" + + "full payload marker\n".repeat(400); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-full-artifact", + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: "Visible bounded summary only.", + artifact_ref: `inline://payload/${ + encodeURIComponent(hiddenPayload) + }`, + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: SESSION_MCP_RESPONSE_BUDGET_BYTES + 4_096, + }), + }, + } as never); + + try { + const serialized = await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + const parsed = JSON.parse(serialized); + const artifactId = String(parsed.artifact_ref).split("/").at(-1) ?? ""; + const storedBody = await redis.getString( + `session:group-full-artifact:root-123:artifact:${artifactId}:body`, + ); + + assertEquals(parsed.summary, "Visible bounded summary only."); + assertEquals(storedBody, hiddenPayload); + } finally { + await runtime.dispose(); + } + }); + + it("persists hidden large session_execute output even when the visible response is already bounded", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const hiddenPayload = "HIDDEN LARGE PAYLOAD\n" + + "searchable hidden marker\n".repeat(300); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-hidden-artifact", + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: "Visible summary stays within budget.", + artifact_ref: `inline://payload/${ + encodeURIComponent(hiddenPayload) + }`, + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: hiddenPayload.length, + }), + }, + } as never); + + try { + const executeSerialized = await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + const execute = JSON.parse(executeSerialized); + const searchSerialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-123", + query: "searchable hidden marker", + }, + toolContext, + ); + const search = JSON.parse(searchSerialized); + + assertEquals(execute.summary, "Visible summary stays within budget."); + assertExists(execute.artifact_ref); + assertEquals(search.results.length > 0, true); + assertStringIncludes( + search.results[0].snippet, + "searchable hidden marker", + ); + } finally { + await runtime.dispose(); + } + }); + + it("uses the production-style redis runtime path for session_index and session_search", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 90, + groupId: "group-runtime", + } as never); + + try { + const indexedSerialized = await runtime.tools.session_index.execute( + { + root_session_id: "root-runtime", + content: + "# Runtime Search\n\nSession TTL remains available through the live corpus.", + }, + toolContext, + ); + const searchSerialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-runtime", + query: "session ttl", + }, + toolContext, + ); + + const indexed = JSON.parse(indexedSerialized); + const search = JSON.parse(searchSerialized); + + assertEquals( + indexed.corpus_ref, + "session:group-runtime:root-runtime:corpus:corpus-1:meta", + ); + assertEquals(search.corpus_refs, [indexed.corpus_ref]); + assertEquals(search.results.length > 0, true); + } finally { + await runtime.dispose(); + } + }); + + it("stores oversized session_batch_execute output behind bounded artifact refs instead of overflowing the response budget", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-batch", + handlers: { + session_execute: (request: { command: string }) => + Promise.resolve({ + status: "ok", + summary: `${request.command}: ` + "x".repeat(6_000), + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 6_010, + }), + }, + } as never); + + try { + const serialized = await runtime.tools.session_batch_execute.execute( + { + root_session_id: "root-batch", + commands: [ + { command: "first" }, + { command: "second" }, + ], + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assert( + textEncoder.encode(serialized).byteLength <= + SESSION_MCP_RESPONSE_BUDGET_BYTES, + ); + assertEquals(parsed.truncated, true); + assertEquals(parsed.results.length, 2); + assertExists(parsed.results[0].artifact_ref); + assertExists(parsed.results[1].artifact_ref); + assertEquals( + parsed.results[0].artifact_ref.startsWith("local://session_execute/"), + true, + ); + } finally { + await runtime.dispose(); + } + }); + + it("uses group-scoped stub refs when redis-backed corpus storage is unavailable", async () => { + const runtime = createSessionMcpRuntime({ + groupId: "group-stub", + }); + + try { + const indexedSerialized = await runtime.tools.session_index.execute( + { + root_session_id: "root-stub", + content: "stub body", + }, + toolContext, + ); + const fetchSerialized = await runtime.tools.session_fetch_and_index + .execute( + { + root_session_id: "root-stub", + url: "https://example.com", + }, + toolContext, + ); + + const indexed = JSON.parse(indexedSerialized); + const fetched = JSON.parse(fetchSerialized); + + assertEquals( + indexed.corpus_ref, + "session:group-stub:root-stub:corpus:stub-index:meta", + ); + assertEquals( + fetched.corpus_ref, + "session:group-stub:root-stub:corpus:stub-fetch:meta", + ); + } finally { + await runtime.dispose(); + } + }); + + it("serializes a schema-valid error response for non-ok fetches", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const originalFetch = globalThis.fetch; + globalThis.fetch = () => + Promise.resolve( + new Response("missing", { + status: 404, + headers: { "content-type": "text/plain; charset=utf-8" }, + }), + ); + + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-runtime-fetch-error", + } as never); + + try { + const serialized = await runtime.tools.session_fetch_and_index.execute( + { + root_session_id: "root-runtime-fetch-error", + url: "https://example.com/missing", + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals( + sessionMcpResponseSchemas.session_fetch_and_index.safeParse(parsed) + .success, + true, + ); + assertEquals(parsed.status, "error"); + assertEquals(parsed.corpus_ref.length > 0, true); + assertStringIncludes(parsed.summary, "HTTP 404"); + assertEquals(parsed.query_hints, []); + assertEquals(parsed.fetched_url, "https://example.com/missing"); + assertEquals(parsed.content_type, "text/plain"); + assertEquals(parsed.truncated, false); + } finally { + globalThis.fetch = originalFetch; + await runtime.dispose(); + } + }); + + it("disposes redis-backed corpus resources exactly once during runtime teardown", async () => { + let disposeCalls = 0; + const runtime = createSessionMcpRuntime({ + redisClient: new RedisClient({ endpoint: "redis://unused" }), + sessionTtlSeconds: 60, + createSessionCorpusService: () => ({ + index: () => + Promise.resolve({ + status: "ok", + corpusRef: "ref", + chunkCount: 0, + queryHints: [], + }), + search: () => + Promise.resolve({ + status: "ok", + results: [], + corpusRefs: [], + truncated: false, + }), + fetchAndIndex: () => + Promise.resolve({ + status: "ok", + corpusRef: "ref", + summary: "ok", + queryHints: [], + fetchedUrl: "url", + contentType: "text/plain", + truncated: false, + }), + getStats: () => + Promise.resolve({ + counters: {}, + corpusCount: 0, + artifactCount: 0, + bytesSavedEstimate: 0, + }), + storeArtifact: () => + Promise.resolve({ + status: "ok", + artifactRef: "local://session_execute/1", + corpusRef: "ref", + summary: "ok", + }), + migrateRootSessionState: () => Promise.resolve(), + dispose: () => { + disposeCalls += 1; + return Promise.resolve(); + }, + }), + } as never); + + await runtime.dispose(); + await runtime.dispose(); + + assertEquals(disposeCalls, 1); + }); +}); diff --git a/src/services/session-mcp-runtime.ts b/src/services/session-mcp-runtime.ts new file mode 100644 index 0000000..bcedb9d --- /dev/null +++ b/src/services/session-mcp-runtime.ts @@ -0,0 +1,591 @@ +import { + tool, + type ToolContext, + type ToolDefinition, +} from "@opencode-ai/plugin"; +import type { RedisClient } from "./redis-client.ts"; +import { + createSessionCorpusService, + type SessionCorpusService, +} from "./session-corpus.ts"; +import { + SESSION_MCP_TOOL_NAMES, + type SessionMcpRequestMap, + sessionMcpRequestSchemas, + type SessionMcpResponseMap, + sessionMcpResponseSchemas, + type SessionMcpToolName, +} from "./session-mcp-types.ts"; + +export const SESSION_MCP_RESPONSE_BUDGET_BYTES = 8 * 1024; + +type PluginToolArgs = Parameters[0]["args"]; + +const pluginSchema = tool.schema; + +const pluginRootSessionIdArgs: PluginToolArgs = { + root_session_id: pluginSchema.string().min(1), +}; + +const pluginSessionExecuteStepSchema = pluginSchema.object({ + command: pluginSchema.string().min(1), + timeout_seconds: pluginSchema.number().int().positive().max(120).optional(), +}); + +const sessionMcpToolArgs: Record = { + session_execute: { + ...pluginRootSessionIdArgs, + command: pluginSchema.string().min(1), + timeout_seconds: pluginSchema.number().int().positive().max(120).optional(), + }, + session_execute_file: { + ...pluginRootSessionIdArgs, + paths: pluginSchema.array(pluginSchema.string().min(1)).min(1), + }, + session_batch_execute: { + ...pluginRootSessionIdArgs, + commands: pluginSchema.array(pluginSessionExecuteStepSchema).min(1), + }, + session_index: { + ...pluginRootSessionIdArgs, + content: pluginSchema.string(), + }, + session_search: { + ...pluginRootSessionIdArgs, + query: pluginSchema.string().min(1), + }, + session_fetch_and_index: { + ...pluginRootSessionIdArgs, + url: pluginSchema.string().url(), + timeout_seconds: pluginSchema.number().int().positive().max(120).optional(), + }, + session_stats: { + ...pluginRootSessionIdArgs, + }, + session_doctor: { + ...pluginRootSessionIdArgs, + }, +}; + +type SessionMcpHandler = ( + request: SessionMcpRequestMap[TToolName], + context: ToolContext, +) => Promise; + +type SessionMcpHandlerMap = { + [K in SessionMcpToolName]: SessionMcpHandler; +}; + +type SessionMcpRuntimeOptions = { + handlers?: Partial; + redisClient?: RedisClient; + sessionTtlSeconds?: number; + groupId?: string; + createSessionCorpusService?: typeof createSessionCorpusService; +}; + +export type SessionMcpRuntime = { + tools: Record; + dispose: () => Promise; + migrateRootSessionState: ( + sourceRootSessionId: string, + targetRootSessionId: string, + ) => Promise; +}; + +const parseRequest = ( + toolName: TToolName, + rawRequest: unknown, +): SessionMcpRequestMap[TToolName] => + sessionMcpRequestSchemas[toolName].parse( + rawRequest, + ) as SessionMcpRequestMap[TToolName]; + +const parseResponse = ( + toolName: TToolName, + rawResponse: unknown, +): SessionMcpResponseMap[TToolName] => + sessionMcpResponseSchemas[toolName].parse( + rawResponse, + ) as SessionMcpResponseMap[TToolName]; + +const textEncoder = new TextEncoder(); + +const serialize = (value: unknown): string => JSON.stringify(value); + +const extractInlineArtifactPayload = ( + artifactRef: string | undefined, +): string | null => { + if (!artifactRef?.startsWith("inline://payload/")) return null; + try { + return decodeURIComponent(artifactRef.slice("inline://payload/".length)); + } catch { + return null; + } +}; + +const byteLength = (value: string): number => + textEncoder.encode(value).byteLength; + +const isWithinBudget = (value: string): boolean => + byteLength(value) <= SESSION_MCP_RESPONSE_BUDGET_BYTES; + +const makeCorpusRef = ( + groupId: string, + rootSessionId: string, + corpusId: string, +): string => `session:${groupId}:${rootSessionId}:corpus:${corpusId}:meta`; + +export const createSessionMcpRuntime = ( + options: SessionMcpRuntimeOptions = {}, +): SessionMcpRuntime => { + const groupId = options.groupId ?? "local"; + const createCorpus = options.createSessionCorpusService ?? + createSessionCorpusService; + const corpus: SessionCorpusService | null = options.redisClient + ? createCorpus({ + redis: options.redisClient, + ttlSeconds: options.sessionTtlSeconds ?? 60, + groupId, + }) + : null; + let artifactCounter = 0; + const artifactStore = new Map(); + + const writeArtifact = ( + toolName: SessionMcpToolName, + body: string, + ): Promise => { + const artifactRef = `local://${toolName}/${++artifactCounter}`; + artifactStore.set(artifactRef, body); + return Promise.resolve(artifactRef); + }; + + const defaultHandlers: SessionMcpHandlerMap = { + session_execute: (request) => + Promise.resolve({ + status: "ok", + summary: + `Stub session_execute accepted command for ${request.root_session_id}.`, + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 0, + }), + session_execute_file: (request) => + Promise.resolve({ + status: "ok", + summary: + `Stub session_execute_file accepted ${request.paths.length} file(s).`, + file_count: request.paths.length, + truncated: false, + }), + session_batch_execute: async (request, context) => { + const results: SessionMcpResponseMap["session_execute"][] = []; + for (const command of request.commands) { + results.push( + await handlerMap.session_execute({ + root_session_id: request.root_session_id, + command: command.command, + timeout_seconds: command.timeout_seconds, + }, context), + ); + } + return { + status: "ok", + summary: + `Stub session_batch_execute completed ${results.length} command(s).`, + results, + truncated: false, + }; + }, + session_index: async (request) => { + if (!corpus) { + return { + status: "ok", + corpus_ref: makeCorpusRef( + groupId, + request.root_session_id, + "stub-index", + ), + chunk_count: 0, + query_hints: [], + }; + } + const result = await corpus.index({ + rootSessionId: request.root_session_id, + content: request.content, + }); + return { + status: result.status, + corpus_ref: result.corpusRef, + chunk_count: result.chunkCount, + query_hints: result.queryHints, + }; + }, + session_search: async (request) => { + if (!corpus) { + return { + status: "ok", + results: [], + corpus_refs: [], + truncated: false, + }; + } + const result = await corpus.search({ + rootSessionId: request.root_session_id, + query: request.query, + }); + return { + status: result.status, + results: result.results, + corpus_refs: result.corpusRefs, + truncated: result.truncated, + }; + }, + session_fetch_and_index: async (request) => { + if (!corpus) { + return { + status: "ok", + corpus_ref: makeCorpusRef( + groupId, + request.root_session_id, + "stub-fetch", + ), + summary: `Stub session_fetch_and_index accepted ${request.url}.`, + query_hints: [], + fetched_url: request.url, + content_type: "text/plain", + truncated: false, + }; + } + const result = await corpus.fetchAndIndex({ + rootSessionId: request.root_session_id, + url: request.url, + timeoutSeconds: request.timeout_seconds, + }); + return { + status: result.status, + corpus_ref: result.corpusRef, + summary: result.summary, + query_hints: result.queryHints, + fetched_url: result.fetchedUrl, + content_type: result.contentType, + truncated: result.truncated, + }; + }, + session_stats: async (request) => { + if (!corpus) { + return { + status: "ok", + counters: {}, + corpus_count: 0, + artifact_count: 0, + bytes_saved_estimate: 0, + }; + } + const stats = await corpus.getStats(request.root_session_id); + return { + status: "ok", + counters: stats.counters, + corpus_count: stats.corpusCount, + artifact_count: stats.artifactCount, + bytes_saved_estimate: stats.bytesSavedEstimate, + }; + }, + session_doctor: () => + Promise.resolve({ + status: "ok", + checks: [{ + name: "session-mcp-runtime", + status: "ok", + detail: "Stub runtime handlers are registered in-process.", + }], + redis: { + status: "not_checked", + detail: "Redis health is not checked by the Task 1 stub runtime.", + }, + graphiti_cache: { + status: "not_checked", + detail: + "Graphiti cache health is not checked by the Task 1 stub runtime.", + }, + runtime: { + status: "ok", + detail: "In-process session MCP runtime is active.", + }, + }), + }; + + const handlerMap: SessionMcpHandlerMap = { + ...defaultHandlers, + ...options.handlers, + }; + + const persistInlineArtifactIfPresent = async < + TToolName extends "session_execute" | "session_execute_file", + >( + toolName: TToolName, + response: SessionMcpResponseMap[TToolName], + rootSessionId: string, + ): Promise => { + const payload = extractInlineArtifactPayload(response.artifact_ref); + if (!payload) return response; + + const artifact = corpus + ? await corpus.storeArtifact({ + rootSessionId, + toolName, + body: payload, + }).catch(() => null) + : null; + const fallbackArtifactRef = await writeArtifact(toolName, payload); + const artifactRef = artifact?.artifactRef ?? fallbackArtifactRef; + + if (toolName === "session_execute") { + return { + ...response, + artifact_ref: artifactRef, + } as SessionMcpResponseMap[TToolName]; + } + + return { + ...response, + artifact_ref: artifactRef, + corpus_ref: (response as SessionMcpResponseMap["session_execute_file"]) + .corpus_ref ?? artifact?.corpusRef, + } as SessionMcpResponseMap[TToolName]; + }; + + const coerceOversizedResponse = async ( + toolName: TToolName, + response: SessionMcpResponseMap[TToolName], + rootSessionId: string, + ): Promise => { + const resolveArtifactBody = ( + payload: { summary: string; artifact_ref?: string }, + ) => extractInlineArtifactPayload(payload.artifact_ref) ?? payload.summary; + const resolveArtifactRef = ( + originalRef: string | undefined, + storedRef: string | undefined, + fallbackRef: string, + ) => + extractInlineArtifactPayload(originalRef) + ? (storedRef ?? fallbackRef) + : (originalRef ?? storedRef ?? fallbackRef); + + if (toolName === "session_execute") { + const oversized = response as SessionMcpResponseMap["session_execute"]; + const artifactBody = resolveArtifactBody(oversized); + const artifact = corpus + ? await corpus.storeArtifact({ + rootSessionId, + toolName, + body: artifactBody, + }).catch(() => null) + : null; + const fallbackArtifactRef = await writeArtifact(toolName, artifactBody); + const artifactRef = resolveArtifactRef( + oversized.artifact_ref, + artifact?.artifactRef, + fallbackArtifactRef, + ); + return { + ...oversized, + artifact_ref: artifactRef, + summary: `Oversized output moved to local artifact ${artifactRef}.`, + truncated: true, + } as SessionMcpResponseMap[TToolName]; + } + + if (toolName === "session_execute_file") { + const oversized = + response as SessionMcpResponseMap["session_execute_file"]; + const artifactBody = resolveArtifactBody(oversized); + const artifact = corpus + ? await corpus.storeArtifact({ + rootSessionId, + toolName, + body: artifactBody, + }).catch(() => null) + : null; + const fallbackArtifactRef = await writeArtifact(toolName, artifactBody); + const artifactRef = resolveArtifactRef( + oversized.artifact_ref, + artifact?.artifactRef, + fallbackArtifactRef, + ); + return { + ...oversized, + artifact_ref: artifactRef, + corpus_ref: oversized.corpus_ref ?? artifact?.corpusRef, + summary: `Oversized output moved to local artifact ${artifactRef}.`, + truncated: true, + } as SessionMcpResponseMap[TToolName]; + } + + if (toolName === "session_batch_execute") { + const oversized = + response as SessionMcpResponseMap["session_batch_execute"]; + const results = await Promise.all( + oversized.results.map(async (result) => { + const artifactBody = resolveArtifactBody(result); + const artifact = corpus + ? await corpus.storeArtifact({ + rootSessionId, + toolName: "session_execute", + body: artifactBody, + }).catch(() => null) + : null; + const fallbackArtifactRef = await writeArtifact( + "session_execute", + artifactBody, + ); + const artifactRef = resolveArtifactRef( + result.artifact_ref, + artifact?.artifactRef, + fallbackArtifactRef, + ); + return { + ...result, + artifact_ref: artifactRef, + summary: + `Oversized batch step output moved to local artifact ${artifactRef}.`, + truncated: true, + }; + }), + ); + return { + ...oversized, + summary: + `Batch output truncated to stay within ${SESSION_MCP_RESPONSE_BUDGET_BYTES} bytes.`, + results, + truncated: true, + } as SessionMcpResponseMap[TToolName]; + } + + if (toolName === "session_search") { + const oversized = response as SessionMcpResponseMap["session_search"]; + return { + ...oversized, + results: oversized.results.slice(0, 1).map(( + result: SessionMcpResponseMap["session_search"]["results"][number], + ) => ({ + ...result, + snippet: result.snippet.slice(0, 320), + })), + truncated: true, + } as SessionMcpResponseMap[TToolName]; + } + + return response; + }; + + const executeTool = async ( + toolName: TToolName, + rawRequest: unknown, + context: ToolContext, + ): Promise => { + const request = parseRequest(toolName, rawRequest); + let response = parseResponse( + toolName, + await (handlerMap[toolName] as ( + request: SessionMcpRequestMap[TToolName], + context: ToolContext, + ) => Promise)(request, context), + ); + + if (toolName === "session_execute") { + response = parseResponse( + toolName, + await persistInlineArtifactIfPresent( + toolName, + response as SessionMcpResponseMap["session_execute"], + request.root_session_id, + ), + ); + } + + if (toolName === "session_execute_file") { + response = parseResponse( + toolName, + await persistInlineArtifactIfPresent( + toolName, + response as SessionMcpResponseMap["session_execute_file"], + request.root_session_id, + ), + ); + } + + let serialized = serialize(response); + + if (!isWithinBudget(serialized)) { + response = parseResponse( + toolName, + await coerceOversizedResponse( + toolName, + response, + request.root_session_id, + ), + ); + serialized = serialize(response); + } + + if (!isWithinBudget(serialized)) { + throw new Error( + `${toolName} response exceeded ${SESSION_MCP_RESPONSE_BUDGET_BYTES} bytes`, + ); + } + + return serialized; + }; + + const descriptions: Record = { + session_execute: "Execute a bounded session command.", + session_execute_file: "Read local files through the session runtime.", + session_batch_execute: "Execute bounded session commands sequentially.", + session_index: "Index local content for the current root session.", + session_search: + "Search local indexed content for the current root session.", + session_fetch_and_index: + "Fetch content and index it for the current root session.", + session_stats: "Return local session MCP stats.", + session_doctor: "Return local session MCP health checks.", + }; + + const tools = Object.fromEntries( + SESSION_MCP_TOOL_NAMES.map((toolName) => [ + toolName, + tool({ + description: descriptions[toolName], + args: sessionMcpToolArgs[toolName], + execute: (args, context) => executeTool(toolName, args, context), + }), + ]), + ) as unknown as Record; + + let disposed = false; + + const dispose = async (): Promise => { + if (disposed) return; + disposed = true; + artifactStore.clear(); + await corpus?.dispose?.(); + }; + + const migrateRootSessionState = async ( + sourceRootSessionId: string, + targetRootSessionId: string, + ): Promise => { + await corpus?.migrateRootSessionState?.( + sourceRootSessionId, + targetRootSessionId, + ); + }; + + return { + tools, + dispose, + migrateRootSessionState, + }; +}; diff --git a/src/services/session-mcp-types.ts b/src/services/session-mcp-types.ts new file mode 100644 index 0000000..062d210 --- /dev/null +++ b/src/services/session-mcp-types.ts @@ -0,0 +1,170 @@ +import { z } from "zod"; +import type { + SessionMcpCheckStatus, + SessionMcpStatus, +} from "../types/index.ts"; + +export const SESSION_MCP_TOOL_NAMES = [ + "session_execute", + "session_execute_file", + "session_batch_execute", + "session_index", + "session_search", + "session_fetch_and_index", + "session_stats", + "session_doctor", +] as const; + +export type SessionMcpToolName = (typeof SESSION_MCP_TOOL_NAMES)[number]; + +export const sessionMcpStatusSchema = z.enum( + [ + "ok", + "error", + ] satisfies SessionMcpStatus[], +); + +export const sessionMcpCheckStatusSchema = z.enum( + [ + "ok", + "degraded", + "unavailable", + "not_checked", + ] satisfies SessionMcpCheckStatus[], +); + +const rootSessionIdShape = { + root_session_id: z.string().min(1), +}; + +const sessionExecuteStepSchema = z.object({ + command: z.string().min(1), + timeout_seconds: z.number().int().positive().max(120).optional(), +}).strict(); + +const searchResultSchema = z.object({ + corpus_ref: z.string().min(1), + snippet: z.string(), + score: z.number(), +}).strict(); + +const doctorCheckSchema = z.object({ + name: z.string().min(1), + status: sessionMcpCheckStatusSchema, + detail: z.string().min(1), +}).strict(); + +const doctorSubsystemSchema = z.object({ + status: sessionMcpCheckStatusSchema, + detail: z.string().min(1), +}).strict(); + +export const sessionMcpRequestSchemas = { + session_execute: z.object({ + ...rootSessionIdShape, + command: z.string().min(1), + timeout_seconds: z.number().int().positive().max(120).optional(), + }).strict(), + session_execute_file: z.object({ + ...rootSessionIdShape, + paths: z.array(z.string().min(1)).min(1), + }).strict(), + session_batch_execute: z.object({ + ...rootSessionIdShape, + commands: z.array(sessionExecuteStepSchema).min(1), + }).strict(), + session_index: z.object({ + ...rootSessionIdShape, + content: z.string(), + }).strict(), + session_search: z.object({ + ...rootSessionIdShape, + query: z.string().min(1), + }).strict(), + session_fetch_and_index: z.object({ + ...rootSessionIdShape, + url: z.string().url(), + timeout_seconds: z.number().int().positive().max(120).optional(), + }).strict(), + session_stats: z.object({ + ...rootSessionIdShape, + }).strict(), + session_doctor: z.object({ + ...rootSessionIdShape, + }).strict(), +}; + +export const sessionExecuteResponseSchema = z.object({ + status: sessionMcpStatusSchema, + summary: z.string(), + artifact_ref: z.string().min(1).optional(), + exit_code: z.number().int(), + timed_out: z.boolean(), + truncated: z.boolean(), + bytes_captured: z.number().int().nonnegative(), +}).strict(); + +export const sessionMcpResponseSchemas = { + session_execute: sessionExecuteResponseSchema, + session_batch_execute: z.object({ + status: sessionMcpStatusSchema, + summary: z.string(), + results: z.array(sessionExecuteResponseSchema), + truncated: z.boolean(), + }).strict(), + session_execute_file: z.object({ + status: sessionMcpStatusSchema, + summary: z.string(), + artifact_ref: z.string().min(1).optional(), + corpus_ref: z.string().min(1).optional(), + file_count: z.number().int().nonnegative(), + truncated: z.boolean(), + }).strict(), + session_index: z.object({ + status: sessionMcpStatusSchema, + corpus_ref: z.string().min(1), + chunk_count: z.number().int().nonnegative(), + query_hints: z.array(z.string()), + }).strict(), + session_search: z.object({ + status: sessionMcpStatusSchema, + results: z.array(searchResultSchema), + corpus_refs: z.array(z.string()), + truncated: z.boolean(), + }).strict(), + session_fetch_and_index: z.object({ + status: sessionMcpStatusSchema, + corpus_ref: z.string().min(1), + summary: z.string(), + query_hints: z.array(z.string()), + fetched_url: z.string().min(1), + content_type: z.string().min(1), + truncated: z.boolean(), + }).strict(), + session_stats: z.object({ + status: sessionMcpStatusSchema, + counters: z.record(z.string(), z.number()), + corpus_count: z.number().int().nonnegative(), + artifact_count: z.number().int().nonnegative(), + bytes_saved_estimate: z.number().int().nonnegative(), + }).strict(), + session_doctor: z.object({ + status: sessionMcpStatusSchema, + checks: z.array(doctorCheckSchema), + redis: doctorSubsystemSchema, + graphiti_cache: doctorSubsystemSchema, + runtime: doctorSubsystemSchema, + }).strict(), +}; + +export type SessionMcpRequestMap = { + [K in SessionMcpToolName]: ReturnType< + (typeof sessionMcpRequestSchemas)[K]["parse"] + >; +}; + +export type SessionMcpResponseMap = { + [K in SessionMcpToolName]: ReturnType< + (typeof sessionMcpResponseSchemas)[K]["parse"] + >; +}; diff --git a/src/services/tool-guidance-cache.test.ts b/src/services/tool-guidance-cache.test.ts new file mode 100644 index 0000000..67a5342 --- /dev/null +++ b/src/services/tool-guidance-cache.test.ts @@ -0,0 +1,64 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; + +import type { ToolGuidanceType } from "./tool-guidance.ts"; +import { ToolGuidanceCache } from "./tool-guidance-cache.ts"; + +const READ_GUIDANCE_TYPE: ToolGuidanceType = "read"; +const GREP_GUIDANCE_TYPE: ToolGuidanceType = "grep"; +const BASH_GUIDANCE_TYPE: ToolGuidanceType = "bash"; + +describe("tool guidance cache", () => { + it("only emits a guidance type once per canonical root session", () => { + const cache = new ToolGuidanceCache(); + + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), false); + }); + + it("allows different guidance types to emit independently for one canonical root session", () => { + const cache = new ToolGuidanceCache(); + + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session", GREP_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session", BASH_GUIDANCE_TYPE), true); + + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), false); + assertEquals(cache.shouldEmit("root-session", GREP_GUIDANCE_TYPE), false); + assertEquals(cache.shouldEmit("root-session", BASH_GUIDANCE_TYPE), false); + }); + + it("does not share throttle state across canonical root sessions", () => { + const cache = new ToolGuidanceCache(); + + assertEquals(cache.shouldEmit("root-session-a", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session-b", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session-a", READ_GUIDANCE_TYPE), false); + assertEquals(cache.shouldEmit("root-session-b", READ_GUIDANCE_TYPE), false); + }); + + it("allows a session guidance type to emit again after clearSession", () => { + const cache = new ToolGuidanceCache(); + + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), false); + + cache.clearSession("root-session"); + + assertEquals(cache.shouldEmit("root-session", READ_GUIDANCE_TYPE), true); + }); + + it("allows all session guidance types to emit again after clearAll", () => { + const cache = new ToolGuidanceCache(); + + assertEquals(cache.shouldEmit("root-session-a", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session-b", GREP_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session-a", READ_GUIDANCE_TYPE), false); + assertEquals(cache.shouldEmit("root-session-b", GREP_GUIDANCE_TYPE), false); + + cache.clearAll(); + + assertEquals(cache.shouldEmit("root-session-a", READ_GUIDANCE_TYPE), true); + assertEquals(cache.shouldEmit("root-session-b", GREP_GUIDANCE_TYPE), true); + }); +}); diff --git a/src/services/tool-guidance-cache.ts b/src/services/tool-guidance-cache.ts new file mode 100644 index 0000000..737a280 --- /dev/null +++ b/src/services/tool-guidance-cache.ts @@ -0,0 +1,23 @@ +import type { ToolGuidanceType } from "./tool-guidance.ts"; + +export class ToolGuidanceCache { + private readonly emittedBySession = new Map>(); + + shouldEmit(sessionId: string, guidanceType: ToolGuidanceType): boolean { + const emitted = this.emittedBySession.get(sessionId); + if (emitted?.has(guidanceType)) return false; + + const next = emitted ?? new Set(); + next.add(guidanceType); + this.emittedBySession.set(sessionId, next); + return true; + } + + clearSession(sessionId: string): void { + this.emittedBySession.delete(sessionId); + } + + clearAll(): void { + this.emittedBySession.clear(); + } +} diff --git a/src/services/tool-guidance.ts b/src/services/tool-guidance.ts new file mode 100644 index 0000000..9d8e07e --- /dev/null +++ b/src/services/tool-guidance.ts @@ -0,0 +1,60 @@ +export const READ_GUIDANCE = + "Use Read only for a narrow, known file slice; prefer targeted reads over broad transcript-heavy file dumps."; + +export const GREP_GUIDANCE = + "Use Grep to locate exact matches first, then follow with the smallest Read needed for the matched lines."; + +export const BASH_GUIDANCE = + "Use ordinary Bash only for execution-oriented shell work; prefer Read, Grep, and Glob for file inspection to avoid noisy output."; + +export const ROUTING_BLOCK = `Routing note: prefer bounded tool usage. +- Use Read for targeted file slices, not broad dumps. +- Use Grep before Read when searching content. +- Use ordinary Bash for execution work, not codebase inspection. +- Keep tool output as small and task-focused as possible.`; + +export type ToolGuidanceType = "read" | "grep" | "bash"; + +export const TOOL_GUIDANCE_TEXT: Record = { + read: READ_GUIDANCE, + grep: GREP_GUIDANCE, + bash: BASH_GUIDANCE, +}; + +export type ContextGuidanceOutcome = { + decision: "context"; + guidanceType: ToolGuidanceType; + guidance: string; + sdkVisible: { + argsMutation: "none"; + immediateDelivery: "no-op"; + }; + continuity: { + recordRoutingNudge: true; + injectVia: "session_memory_next_turn"; + }; +}; + +/** + * Implementation refinement of plans/ContextOverhaul.md §5.2: + * `context` guidance remains a no-op for the current SDK-visible tool call, but + * is still materialized as a compact routing outcome so the normal + * `` continuity path can inject the once-per-session nudge on + * the next model turn. This refines delivery timing only; it does not change + * the plan's hot-path semantics. + */ +export const createContextGuidanceOutcome = ( + guidanceType: ToolGuidanceType, +): ContextGuidanceOutcome => ({ + decision: "context", + guidanceType, + guidance: TOOL_GUIDANCE_TEXT[guidanceType], + sdkVisible: { + argsMutation: "none", + immediateDelivery: "no-op", + }, + continuity: { + recordRoutingNudge: true, + injectVia: "session_memory_next_turn", + }, +}); diff --git a/src/services/tool-routing-outcome-cache.test.ts b/src/services/tool-routing-outcome-cache.test.ts new file mode 100644 index 0000000..495c68d --- /dev/null +++ b/src/services/tool-routing-outcome-cache.test.ts @@ -0,0 +1,55 @@ +import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; +import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; + +import { + type ToolRoutingOutcome, + ToolRoutingOutcomeCache, +} from "./tool-routing-outcome-cache.ts"; + +describe("tool routing outcome cache", () => { + const cache = new ToolRoutingOutcomeCache(); + + afterEach(() => { + cache.clearAll(); + }); + + it("set(callId, outcome) stores a compact routing outcome", () => { + const outcome: ToolRoutingOutcome = { + source: "tool-routing", + action: "modify", + reason: "bash-network-rewrite", + }; + + cache.set("call-1", outcome); + + assertStrictEquals(cache.take("call-1"), outcome); + }); + + it("take(callId) returns and clears the stored outcome", () => { + const outcome: ToolRoutingOutcome = { + source: "tool-routing", + action: "context", + guidanceType: "read", + reason: "read-guidance", + }; + + cache.set("call-1", outcome); + + assertStrictEquals(cache.take("call-1"), outcome); + assertEquals(cache.take("call-1"), undefined); + }); + + it("take(callId) is safe to call repeatedly after the entry is cleared", () => { + const outcome: ToolRoutingOutcome = { + source: "tool-routing", + action: "deny", + reason: "webfetch-denied", + }; + + cache.set("call-1", outcome); + + assertStrictEquals(cache.take("call-1"), outcome); + assertEquals(cache.take("call-1"), undefined); + assertEquals(cache.take("call-1"), undefined); + }); +}); diff --git a/src/services/tool-routing-outcome-cache.ts b/src/services/tool-routing-outcome-cache.ts new file mode 100644 index 0000000..596ca44 --- /dev/null +++ b/src/services/tool-routing-outcome-cache.ts @@ -0,0 +1,26 @@ +import type { ToolGuidanceType } from "./tool-guidance.ts"; + +export type ToolRoutingOutcome = { + source: "tool-routing"; + action: "modify" | "deny" | "context"; + reason: string; + guidanceType?: ToolGuidanceType; +}; + +export class ToolRoutingOutcomeCache { + private readonly outcomes = new Map(); + + set(callId: string, outcome: ToolRoutingOutcome): void { + this.outcomes.set(callId, outcome); + } + + take(callId: string): ToolRoutingOutcome | undefined { + const outcome = this.outcomes.get(callId); + this.outcomes.delete(callId); + return outcome; + } + + clearAll(): void { + this.outcomes.clear(); + } +} diff --git a/src/services/tool-routing.test.ts b/src/services/tool-routing.test.ts new file mode 100644 index 0000000..565f298 --- /dev/null +++ b/src/services/tool-routing.test.ts @@ -0,0 +1,259 @@ +import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; + +import { ROUTING_BLOCK } from "./tool-guidance.ts"; +import { routeToolCall } from "./tool-routing.ts"; + +class MockGuidanceThrottle { + calls: Array<{ sessionId: string; guidanceType: string }> = []; + + constructor(private readonly nextResult = true) {} + + shouldEmit(sessionId: string, guidanceType: string): boolean { + this.calls.push({ sessionId, guidanceType }); + return this.nextResult; + } +} + +describe("tool routing", () => { + it("returns a Read guidance decision", () => { + const throttle = new MockGuidanceThrottle(true); + + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Read", + args: { filePath: "/tmp/example.ts", offset: 1, limit: 40 }, + guidanceThrottle: throttle, + }); + + assertEquals(decision.action, "context"); + if (decision.action !== "context") { + throw new Error(`Expected context decision, got ${decision.action}`); + } + assertEquals(decision.guidanceType, "read"); + assertEquals(decision.reason, "read-guidance"); + assertEquals(throttle.calls, [{ + sessionId: "root-session", + guidanceType: "read", + }]); + }); + + it("hard-denies WebFetch", () => { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "WebFetch", + args: { url: "https://example.com" }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision.action, "deny"); + if (decision.action !== "deny") { + throw new Error(`Expected deny decision, got ${decision.action}`); + } + assertEquals(decision.reason, "webfetch-denied"); + assertStringIncludes(decision.guidance, "WebFetch"); + }); + + it("rewrites Bash curl commands", () => { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Bash", + args: { command: "curl https://example.com/data.json" }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision.action, "modify"); + if (decision.action !== "modify") { + throw new Error(`Expected modify decision, got ${decision.action}`); + } + assertEquals(decision.reason, "bash-network-rewrite"); + assertStringIncludes(String(decision.args.command), "Routing note"); + assertStringIncludes(String(decision.args.command), "network"); + }); + + it("rewrites Bash inline HTTP commands", () => { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Bash", + args: { + command: 'node -e "fetch("https://example.com/api").then(console.log)"', + }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision.action, "modify"); + if (decision.action !== "modify") { + throw new Error(`Expected modify decision, got ${decision.action}`); + } + assertEquals(decision.reason, "bash-inline-http-rewrite"); + assertStringIncludes(String(decision.args.command), "Routing note"); + assertStringIncludes(String(decision.args.command), "HTTP"); + }); + + it("rewrites Bash build-tool commands", () => { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Bash", + args: { command: "./gradlew build" }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision.action, "modify"); + if (decision.action !== "modify") { + throw new Error(`Expected modify decision, got ${decision.action}`); + } + assertEquals(decision.reason, "bash-build-rewrite"); + assertStringIncludes(String(decision.args.command), "build"); + assertStringIncludes(String(decision.args.command), "Routing note"); + }); + + it("returns ordinary Bash guidance as a fallback", () => { + const throttle = new MockGuidanceThrottle(true); + + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Bash", + args: { command: "deno test src/session.ts" }, + guidanceThrottle: throttle, + }); + + assertEquals(decision.action, "context"); + if (decision.action !== "context") { + throw new Error(`Expected context decision, got ${decision.action}`); + } + assertEquals(decision.guidanceType, "bash"); + assertEquals(decision.reason, "bash-guidance"); + assertEquals(throttle.calls, [{ + sessionId: "root-session", + guidanceType: "bash", + }]); + }); + + it("returns a Grep guidance decision", () => { + const throttle = new MockGuidanceThrottle(true); + + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Grep", + args: { pattern: "routeToolCall", include: "*.ts" }, + guidanceThrottle: throttle, + }); + + assertEquals(decision.action, "context"); + if (decision.action !== "context") { + throw new Error(`Expected context decision, got ${decision.action}`); + } + assertEquals(decision.guidanceType, "grep"); + assertEquals(decision.reason, "grep-guidance"); + assertEquals(throttle.calls, [{ + sessionId: "root-session", + guidanceType: "grep", + }]); + }); + + it("passes Glob through unchanged", () => { + const throttle = new MockGuidanceThrottle(true); + + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Glob", + args: { pattern: "src/**/*.ts" }, + guidanceThrottle: throttle, + }); + + assertEquals(decision, { action: "allow", reason: "glob-allow" }); + assertEquals(throttle.calls, []); + }); + + it("rewrites the Task prompt field", () => { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Task", + args: { prompt: "Investigate the failing test", subagent_type: "leaf" }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision.action, "modify"); + if (decision.action !== "modify") { + throw new Error(`Expected modify decision, got ${decision.action}`); + } + assertEquals(decision.reason, "task-routing-block"); + assertStringIncludes( + String(decision.args.prompt), + "Investigate the failing test", + ); + assertStringIncludes(String(decision.args.prompt), ROUTING_BLOCK); + assertEquals(decision.args.subagent_type, "leaf"); + }); + + it("rewrites the first present Task prompt field in priority order", () => { + const scenarios = [ + { + field: "prompt", + args: { + prompt: "p", + request: "r", + objective: "o", + question: "q", + query: "qq", + task: "t", + }, + }, + { + field: "request", + args: { + request: "r", + objective: "o", + question: "q", + query: "qq", + task: "t", + }, + }, + { + field: "objective", + args: { objective: "o", question: "q", query: "qq", task: "t" }, + }, + { + field: "question", + args: { question: "q", query: "qq", task: "t" }, + }, + { + field: "query", + args: { query: "qq", task: "t" }, + }, + { + field: "task", + args: { task: "t" }, + }, + ] as const; + + for (const scenario of scenarios) { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "Task", + args: { ...scenario.args }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision.action, "modify"); + if (decision.action !== "modify") { + throw new Error(`Expected modify decision, got ${decision.action}`); + } + assertStringIncludes( + String(decision.args[scenario.field]), + ROUTING_BLOCK, + ); + } + }); + + it("fails open for unknown tools", () => { + const decision = routeToolCall({ + canonicalSessionId: "root-session", + toolName: "UnknownTool", + args: { anything: true }, + guidanceThrottle: new MockGuidanceThrottle(true), + }); + + assertEquals(decision, { action: "allow", reason: "unknown-tool-allow" }); + }); +}); diff --git a/src/services/tool-routing.ts b/src/services/tool-routing.ts new file mode 100644 index 0000000..2134c3c --- /dev/null +++ b/src/services/tool-routing.ts @@ -0,0 +1,204 @@ +import { + BASH_GUIDANCE, + createContextGuidanceOutcome, + GREP_GUIDANCE, + READ_GUIDANCE, + ROUTING_BLOCK, + type ToolGuidanceType, +} from "./tool-guidance.ts"; + +export type RoutingDecision = + | { action: "allow"; reason: string } + | { action: "modify"; args: Record; reason: string } + | { action: "deny"; guidance: string; reason: string } + | { + action: "context"; + guidance: string; + guidanceType: ToolGuidanceType; + reason: string; + sdkVisible: { + argsMutation: "none"; + immediateDelivery: "no-op"; + }; + continuity: { + recordRoutingNudge: true; + injectVia: "session_memory_next_turn"; + }; + }; + +export interface GuidanceThrottle { + shouldEmit(sessionId: string, guidanceType: ToolGuidanceType): boolean; +} + +export interface RouteToolCallInput { + canonicalSessionId: string; + toolName: string; + args: Record; + guidanceThrottle: GuidanceThrottle; +} + +const TASK_PROMPT_FIELDS = [ + "prompt", + "request", + "objective", + "question", + "query", + "task", +] as const; + +const withRoutingBlock = (value: string): string => + value.includes(ROUTING_BLOCK) ? value : `${value.trim()}\n\n${ROUTING_BLOCK}`; + +const buildGuidanceCommand = (details: string): string => { + const message = `${ROUTING_BLOCK}\n${details}`.replaceAll("'", "'\\''"); + return `printf '%s\n' '${message}'`; +}; + +const asCommand = (args: Record): string => { + const command = args.command; + return typeof command === "string" ? command : ""; +}; + +const contextDecision = ( + guidanceType: ToolGuidanceType, + reason: string, +): RoutingDecision => { + const outcome = createContextGuidanceOutcome(guidanceType); + return { + action: "context", + guidance: outcome.guidance, + guidanceType: outcome.guidanceType, + reason, + sdkVisible: outcome.sdkVisible, + continuity: outcome.continuity, + }; +}; + +const routeRead = ( + canonicalSessionId: string, + guidanceThrottle: GuidanceThrottle, +): RoutingDecision => { + if (guidanceThrottle.shouldEmit(canonicalSessionId, "read")) { + return contextDecision("read", "read-guidance"); + } + return { action: "allow", reason: "read-allow" }; +}; + +const routeGrep = ( + canonicalSessionId: string, + guidanceThrottle: GuidanceThrottle, +): RoutingDecision => { + if (guidanceThrottle.shouldEmit(canonicalSessionId, "grep")) { + return contextDecision("grep", "grep-guidance"); + } + return { action: "allow", reason: "grep-allow" }; +}; + +const routeWebFetch = (): RoutingDecision => ({ + action: "deny", + reason: "webfetch-denied", + guidance: + "WebFetch is blocked. Use a safer search/fetch flow instead of raw page fetches.", +}); + +const routeBash = ( + canonicalSessionId: string, + args: Record, + guidanceThrottle: GuidanceThrottle, +): RoutingDecision => { + const command = asCommand(args); + const normalized = command.toLowerCase(); + + if (/\b(curl|wget)\b/.test(normalized)) { + return { + action: "modify", + reason: "bash-network-rewrite", + args: { + ...args, + command: buildGuidanceCommand( + "Avoid raw network shell fetches here; use the safer fetch/search path instead.", + ), + }, + }; + } + + if ( + /https?:\/\//.test(normalized) || + /\bfetch\s*\(/.test(normalized) || + /axios\./.test(normalized) || + /requests\.(get|post|put|patch|delete)\s*\(/.test(normalized) + ) { + return { + action: "modify", + reason: "bash-inline-http-rewrite", + args: { + ...args, + command: buildGuidanceCommand( + "Avoid inline HTTP clients in Bash here; use a bounded fetch/search path instead.", + ), + }, + }; + } + + if (/\b(gradle|gradlew|mvn|mvnw)\b/.test(normalized)) { + return { + action: "modify", + reason: "bash-build-rewrite", + args: { + ...args, + command: buildGuidanceCommand( + "Avoid high-volume build-tool output in ordinary Bash; use a safer bounded execution path.", + ), + }, + }; + } + + if (guidanceThrottle.shouldEmit(canonicalSessionId, "bash")) { + return contextDecision("bash", "bash-guidance"); + } + + return { action: "allow", reason: "bash-allow" }; +}; + +const routeTask = (args: Record): RoutingDecision => { + for (const field of TASK_PROMPT_FIELDS) { + const value = args[field]; + if (typeof value !== "string" || value.trim().length === 0) continue; + return { + action: "modify", + reason: "task-routing-block", + args: { + ...args, + [field]: withRoutingBlock(value), + }, + }; + } + + return { action: "allow", reason: "task-allow" }; +}; + +export const routeToolCall = ({ + canonicalSessionId, + toolName, + args, + guidanceThrottle, +}: RouteToolCallInput): RoutingDecision => { + switch (toolName.toLowerCase()) { + case "read": + return routeRead(canonicalSessionId, guidanceThrottle); + case "webfetch": + return routeWebFetch(); + case "bash": + return routeBash(canonicalSessionId, args, guidanceThrottle); + case "grep": + return routeGrep(canonicalSessionId, guidanceThrottle); + case "glob": + return { action: "allow", reason: "glob-allow" }; + case "task": + return routeTask(args); + default: + return { action: "allow", reason: "unknown-tool-allow" }; + } +}; + +export { BASH_GUIDANCE, GREP_GUIDANCE, READ_GUIDANCE }; diff --git a/src/session.test.ts b/src/session.test.ts new file mode 100644 index 0000000..19dcc08 --- /dev/null +++ b/src/session.test.ts @@ -0,0 +1,176 @@ +import { assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { SessionManager } from "./session.ts"; +import { setSuppressConsoleWarningsDuringTestsOverride } from "./services/opencode-warning.ts"; +import { RedisClient } from "./services/redis-client.ts"; +import { createSessionCorpusService } from "./services/session-corpus.ts"; + +const createExplicitSessionNotFoundError = ( + details: Record = { status: 404 }, +): Error => Object.assign(new Error("Session not found"), details); + +describe("SessionManager Task 6 runtime migration", () => { + it("migrates temporary-root corpora and stats onto the canonical parent root", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 120, + groupId: "group-task-6", + }); + const manager = new SessionManager( + "group-task-6", + "user-task-6", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + { + runtimeStateMigrator: { + migrateRootSessionState: ( + sourceSessionId: string, + canonicalSessionId: string, + ) => + corpus.migrateRootSessionState(sourceSessionId, canonicalSessionId), + }, + } as never, + ); + + await manager.resolveCanonicalSessionId("child-session"); + const parentIndexed = await corpus.index({ + rootSessionId: "parent-session", + content: ["# Parent Root", "", "Canonical parent corpus lives here."] + .join("\n"), + }); + const childIndexed = await corpus.index({ + rootSessionId: "child-session", + content: ["# Child Root", "", "Temporary child corpus migrates here."] + .join("\n"), + }); + + manager.setParentId("parent-session", null); + manager.setParentId("child-session", "parent-session"); + const resolved = await manager.resolveSessionState("child-session"); + const search = await corpus.search({ + rootSessionId: "parent-session", + query: "canonical migrates", + }); + const stats = await corpus.getStats("parent-session"); + + assertEquals(resolved.canonicalSessionId, "parent-session"); + assertEquals(search.corpusRefs.includes(parentIndexed.corpusRef), true); + assertEquals(search.corpusRefs.includes(childIndexed.corpusRef), false); + assertEquals( + search.results.some((result) => + result.snippet.includes("Temporary child corpus migrates here") + ), + true, + ); + assertEquals(stats.corpusCount, 2); + }); + + it("does not delete root-owned corpora or stats when deleting a child after migration", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 120, + groupId: "group-task-6-delete", + }); + const manager = new SessionManager( + "group-task-6-delete", + "user-task-6-delete", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + { + runtimeStateMigrator: { + migrateRootSessionState: ( + sourceSessionId: string, + canonicalSessionId: string, + ) => + corpus.migrateRootSessionState(sourceSessionId, canonicalSessionId), + }, + } as never, + ); + + await manager.resolveCanonicalSessionId("child-session"); + await corpus.storeArtifact({ + rootSessionId: "child-session", + toolName: "session_execute", + body: "child artifact body survives migration", + }); + + manager.setParentId("parent-session", null); + manager.setParentId("child-session", "parent-session"); + await manager.resolveSessionState("child-session"); + manager.deleteSession("child-session"); + + const search = await corpus.search({ + rootSessionId: "parent-session", + query: "artifact survives migration", + }); + const stats = await corpus.getStats("parent-session"); + + assertEquals(search.results.length > 0, true); + assertEquals(stats.artifactCount, 1); + assertEquals(stats.corpusCount, 1); + }); + + it("surfaces temporary-root runtime migration failures instead of continuing with split ownership", async () => { + const originalWarn = console.warn; + const warnCalls: unknown[][] = []; + console.warn = (...args: unknown[]) => { + warnCalls.push(args); + }; + setSuppressConsoleWarningsDuringTestsOverride(true); + + const manager = new SessionManager( + "group-task-6-failure", + "user-task-6-failure", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + { + runtimeStateMigrator: { + migrateRootSessionState: () => + Promise.reject(new Error("migration failed")), + }, + } as never, + ); + + try { + await manager.resolveCanonicalSessionId("child-session"); + manager.setParentId("parent-session", null); + manager.setParentId("child-session", "parent-session"); + + await assertRejects( + () => manager.resolveCanonicalSessionId("child-session"), + Error, + "migration failed", + ); + assertEquals(warnCalls, []); + } finally { + setSuppressConsoleWarningsDuringTestsOverride(undefined); + console.warn = originalWarn; + } + }); +}); diff --git a/src/session.ts b/src/session.ts index b9ef1f5..a89d90f 100644 --- a/src/session.ts +++ b/src/session.ts @@ -200,10 +200,28 @@ export type SessionState = { type TimerHandle = ReturnType | number; +export interface SessionRuntimeStateMigrator { + migrateRootSessionState( + sourceRootSessionId: string, + targetRootSessionId: string, + ): Promise; +} + +type TemporaryRootRuntimeMigration = { + canonicalSessionId: string; + promise: Promise; +}; + export interface SessionManagerOptions { idleRetentionMs?: number; setTimer?: (callback: () => void, delayMs: number) => TimerHandle; clearTimer?: (timer: TimerHandle) => void; + runtimeStateMigrator?: SessionRuntimeStateMigrator; +} + +export interface ToolRoutingSessionCanonicalizer { + getCachedCanonicalSessionId(sessionId: string): string | undefined; + resolveCanonicalSessionId(sessionId: string): Promise; } type SessionLifecycle = { @@ -596,6 +614,11 @@ export class SessionManager { delayMs: number, ) => TimerHandle; private readonly clearTimerImpl: (timer: TimerHandle) => void; + private readonly runtimeStateMigrator?: SessionRuntimeStateMigrator; + private readonly temporaryRootRuntimeMigrations = new Map< + string, + TemporaryRootRuntimeMigration + >(); constructor( private readonly defaultGroupId: string, @@ -616,6 +639,7 @@ export class SessionManager { this.setTimerImpl, this.clearTimerImpl, ); + this.runtimeStateMigrator = options.runtimeStateMigrator; } createDefaultState(groupId: string, userGroupId: string): SessionState { @@ -653,6 +677,20 @@ export class SessionManager { this.sessions.set(sessionId, state); } + /** + * Hot-path-friendly canonical lookup for live tool routing. + * + * This method is intentionally cache-only and will never call + * `sdkClient.session.get()`. If a child session has not yet been seen by the + * existing async lineage path, this may return `undefined` on the first tool + * call. Callers that can afford async fallback should then use + * `resolveCanonicalSessionId()`, which may fetch uncached lineage data. + */ + getCachedCanonicalSessionId(sessionId: string): string | undefined { + if (this.temporaryRootSessionIds.has(sessionId)) return undefined; + return this.canonicalSessionIdCache.get(sessionId); + } + markSessionActive(sessionId: string): void { this.markLifecycleActive(sessionId); const canonicalSessionId = this.canonicalSessionIdCache.get(sessionId); @@ -701,6 +739,7 @@ export class SessionManager { this.parentIdCache.set(sessionId, parentId); if (!parentId) { this.temporaryRootSessionIds.delete(sessionId); + this.temporaryRootRuntimeMigrations.delete(sessionId); this.canonicalSessionIdCache.set(sessionId, sessionId); return; } @@ -709,10 +748,22 @@ export class SessionManager { if (parentCanonical) { this.canonicalSessionIdCache.set(sessionId, parentCanonical); if (parentCanonical !== sessionId) { - this.migrateTemporaryRootRuntimeState(sessionId, parentCanonical); - } - if (wasTemporaryRoot) { - this.temporaryRootSessionIds.delete(sessionId); + this.mergeTemporaryRootInMemoryRuntimeState( + sessionId, + parentCanonical, + ); + if (wasTemporaryRoot) { + void this.ensureTemporaryRootRuntimeStateMigrated( + sessionId, + parentCanonical, + ).catch((err) => { + logger.warn("Temporary-root runtime migration failed", { + sessionId, + canonicalSessionId: parentCanonical, + err, + }); + }); + } } return; } @@ -754,7 +805,7 @@ export class SessionManager { ); } - private migrateTemporaryRootRuntimeState( + private mergeTemporaryRootInMemoryRuntimeState( sessionId: string, canonicalSessionId: string, ): void { @@ -785,6 +836,42 @@ export class SessionManager { } } + private async ensureTemporaryRootRuntimeStateMigrated( + sessionId: string, + canonicalSessionId: string, + ): Promise { + if (sessionId === canonicalSessionId) return; + + const existingMigration = this.temporaryRootRuntimeMigrations.get( + sessionId, + ); + if (existingMigration) { + if (existingMigration.canonicalSessionId !== canonicalSessionId) { + throw new Error( + `Temporary root ${sessionId} attempted to migrate to multiple canonical roots`, + ); + } + await existingMigration.promise; + return; + } + + this.mergeTemporaryRootInMemoryRuntimeState(sessionId, canonicalSessionId); + const promise = (async () => { + await this.runtimeStateMigrator?.migrateRootSessionState( + sessionId, + canonicalSessionId, + ); + this.temporaryRootSessionIds.delete(sessionId); + this.temporaryRootRuntimeMigrations.delete(sessionId); + })(); + + this.temporaryRootRuntimeMigrations.set(sessionId, { + canonicalSessionId, + promise, + }); + await promise; + } + async resolveParentId( sessionId: string, ): Promise { @@ -804,9 +891,13 @@ export class SessionManager { : (response as { parentID?: string }); if (!sessionInfo) return undefined; const parentId = sessionInfo.parentID ?? null; + const wasTemporaryRoot = this.temporaryRootSessionIds.has(sessionId); this.parentIdCache.set(sessionId, parentId); - this.temporaryRootSessionIds.delete(sessionId); + if (parentId === null || !wasTemporaryRoot) { + this.temporaryRootSessionIds.delete(sessionId); + } if (parentId === null) { + this.temporaryRootRuntimeMigrations.delete(sessionId); this.canonicalSessionIdCache.set(sessionId, sessionId); } else { this.canonicalSessionIdCache.delete(sessionId); @@ -833,8 +924,15 @@ export class SessionManager { visited: Set = new Set(), ): Promise { const cached = this.canonicalSessionIdCache.get(sessionId); + const hasPendingTemporaryRootMigration = + this.temporaryRootSessionIds.has(sessionId) && cached !== undefined && + cached !== sessionId; const hasProvisionalTemporaryRoot = this.temporaryRootSessionIds.has(sessionId) && cached === sessionId; + if (cached && hasPendingTemporaryRootMigration) { + await this.ensureTemporaryRootRuntimeStateMigrated(sessionId, cached); + return cached; + } if (cached && !hasProvisionalTemporaryRoot) return cached; if (visited.has(sessionId)) { logger.debug("Detected cycle while resolving canonical session", { @@ -845,6 +943,10 @@ export class SessionManager { } visited.add(sessionId); + // Async canonical resolution may fetch uncached session lineage through + // `sdkClient.session.get()`. Future hot-path callers such as + // `tool.execute.before` should consult `getCachedCanonicalSessionId()` + // first so the initial canonicalization tradeoff is explicit. const parentId = await this.resolveParentId(sessionId); if (parentId === undefined) { return hasProvisionalTemporaryRoot ? cached : undefined; @@ -860,8 +962,13 @@ export class SessionManager { ); if (!canonicalSessionId) return undefined; if (canonicalSessionId !== sessionId) { - this.migrateTemporaryRootRuntimeState(sessionId, canonicalSessionId); - this.temporaryRootSessionIds.delete(sessionId); + this.canonicalSessionIdCache.set(sessionId, canonicalSessionId); + if (this.temporaryRootSessionIds.has(sessionId)) { + await this.ensureTemporaryRootRuntimeStateMigrated( + sessionId, + canonicalSessionId, + ); + } } this.canonicalSessionIdCache.set(sessionId, canonicalSessionId); return canonicalSessionId; @@ -1051,6 +1158,7 @@ export class SessionManager { this.parentIdCache.delete(sessionId); this.canonicalSessionIdCache.delete(sessionId); this.temporaryRootSessionIds.delete(sessionId); + this.temporaryRootRuntimeMigrations.delete(sessionId); for ( const [childSessionId, parentId] of [...this.parentIdCache.entries()] ) { diff --git a/src/types/index.ts b/src/types/index.ts index 1b8b75e..1139546 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -201,3 +201,11 @@ export interface PreparedSessionMemory { nodeRefs: string[]; refreshDecision: CacheRefreshDecision; } + +export type SessionMcpStatus = "ok" | "error"; + +export type SessionMcpCheckStatus = + | "ok" + | "degraded" + | "unavailable" + | "not_checked"; From 545952e0555339e07c6fd2f80128e41cbe19a4eb Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Mon, 23 Mar 2026 00:44:19 +0800 Subject: [PATCH 04/38] fix: report session doctor cache health Expose Redis-backed graphiti cache health through session_doctor so live runtime checks reflect the actual local hot-tier state. --- src/index.test.ts | 2 + src/index.ts | 1 + src/services/session-mcp-runtime.test.ts | 194 +++++++++++++++++++++++ src/services/session-mcp-runtime.ts | 75 +++++++-- 4 files changed, 259 insertions(+), 13 deletions(-) diff --git a/src/index.test.ts b/src/index.test.ts index 8233660..a50bc76 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -651,6 +651,7 @@ describe("index", () => { assertEquals(records.redisCloseCalls, 1); assertEquals(records.sessionMcpRuntimeArgs, [{ redisClient: records.redisClientInstances[0], + graphitiCache: records.redisCacheInstances[0], sessionTtlSeconds: config.redis.sessionTtlSeconds, groupId: "group-id", }]); @@ -905,6 +906,7 @@ describe("index", () => { assertEquals(records.sessionMcpRuntimeArgs, [{ redisClient: records.redisClientInstances[0], + graphitiCache: records.redisCacheInstances[0], sessionTtlSeconds: config.redis.sessionTtlSeconds, groupId: "group-id", }]); diff --git a/src/index.ts b/src/index.ts index 9fd4650..6e00fd9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -206,6 +206,7 @@ export const graphiti: Plugin = ( ); const sessionMcpRuntime = dependencies.createSessionMcpRuntime({ redisClient, + graphitiCache: redisCache, sessionTtlSeconds: config.redis.sessionTtlSeconds, groupId: defaultGroupId, }); diff --git a/src/services/session-mcp-runtime.test.ts b/src/services/session-mcp-runtime.test.ts index 3a28715..1095ead 100644 --- a/src/services/session-mcp-runtime.test.ts +++ b/src/services/session-mcp-runtime.test.ts @@ -17,6 +17,100 @@ import { } from "./session-mcp-types.ts"; import { RedisClient } from "./redis-client.ts"; +type RedisEvent = "close" | "end" | "error" | "ready"; + +class DoctorRedisRuntime { + private readonly listeners = new Map< + RedisEvent, + Set<(...args: unknown[]) => void> + >(); + + connect(): Promise { + this.emit("ready"); + return Promise.resolve(); + } + + ping(): Promise<"PONG"> { + return Promise.resolve("PONG"); + } + + quit(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + lpush(): Promise { + return Promise.resolve(0); + } + + rpush(): Promise { + return Promise.resolve(0); + } + + lmove(): Promise { + return Promise.resolve(null); + } + + lrange(): Promise { + return Promise.resolve([]); + } + + llen(): Promise { + return Promise.resolve(0); + } + + ltrim(): Promise { + return Promise.resolve(); + } + + lindex(): Promise { + return Promise.resolve(null); + } + + lset(): Promise { + return Promise.resolve(); + } + + get(): Promise { + return Promise.resolve(null); + } + + set(): Promise<"OK"> { + return Promise.resolve("OK"); + } + + expire(): Promise { + return Promise.resolve(1); + } + + del(): Promise { + return Promise.resolve(0); + } + + hset(): Promise { + return Promise.resolve(0); + } + + hgetall(): Promise> { + return Promise.resolve({}); + } + + on(event: RedisEvent, listener: (...args: unknown[]) => void): void { + const listeners = this.listeners.get(event) ?? new Set(); + listeners.add(listener); + this.listeners.set(event, listeners); + } + + off(event: RedisEvent, listener: (...args: unknown[]) => void): void { + this.listeners.get(event)?.delete(listener); + } + + private emit(event: RedisEvent, ...args: unknown[]): void { + for (const listener of this.listeners.get(event) ?? []) { + listener(...args); + } + } +} + const textEncoder = new TextEncoder(); const toolContext = { @@ -106,6 +200,106 @@ describe("session-mcp-runtime", () => { } }); + it("reports live redis health in session_doctor when a redis client is provided", async () => { + const degradedRedis = new RedisClient({ endpoint: "redis://unused" }); + const degradedRuntime = createSessionMcpRuntime({ + redisClient: degradedRedis, + sessionTtlSeconds: 60, + }); + const connectedRedis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new DoctorRedisRuntime(), + }); + const connectedRuntime = createSessionMcpRuntime({ + redisClient: connectedRedis, + sessionTtlSeconds: 60, + }); + + try { + const degradedSerialized = await degradedRuntime.tools.session_doctor + .execute( + validRequests.session_doctor, + toolContext, + ); + const degraded = JSON.parse(degradedSerialized); + + assertEquals(degraded.runtime.status, "ok"); + assertEquals(degraded.redis.status, "degraded"); + + await connectedRedis.connect(); + + const connectedSerialized = await connectedRuntime.tools.session_doctor + .execute( + validRequests.session_doctor, + toolContext, + ); + const connected = JSON.parse(connectedSerialized); + + assertEquals(connected.runtime.status, "ok"); + assertEquals(connected.redis.status, "ok"); + assertEquals(connected.graphiti_cache.status, "not_checked"); + } finally { + await degradedRuntime.dispose(); + await degradedRedis.close(); + await connectedRuntime.dispose(); + await connectedRedis.close(); + } + }); + + it("reports local graphiti cache health in session_doctor", async () => { + const disconnectedRedis = new RedisClient({ endpoint: "redis://unused" }); + const connectedRedis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => new DoctorRedisRuntime(), + }); + + const noCacheRuntime = createSessionMcpRuntime(); + const degradedCacheRuntime = createSessionMcpRuntime({ + redisClient: disconnectedRedis, + sessionTtlSeconds: 60, + graphitiCache: {}, + }); + const connectedCacheRuntime = createSessionMcpRuntime({ + redisClient: connectedRedis, + sessionTtlSeconds: 60, + graphitiCache: {}, + }); + + try { + const noCache = JSON.parse( + await noCacheRuntime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ), + ); + assertEquals(noCache.graphiti_cache.status, "not_checked"); + + const degradedCache = JSON.parse( + await degradedCacheRuntime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ), + ); + assertEquals(degradedCache.graphiti_cache.status, "degraded"); + + await connectedRedis.connect(); + + const connectedCache = JSON.parse( + await connectedCacheRuntime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ), + ); + assertEquals(connectedCache.graphiti_cache.status, "ok"); + } finally { + await noCacheRuntime.dispose(); + await degradedCacheRuntime.dispose(); + await connectedCacheRuntime.dispose(); + await disconnectedRedis.close(); + await connectedRedis.close(); + } + }); + it("caps serialized responses to the exact 8 KB budget", async () => { const runtime = createSessionMcpRuntime(); diff --git a/src/services/session-mcp-runtime.ts b/src/services/session-mcp-runtime.ts index bcedb9d..8ed33d4 100644 --- a/src/services/session-mcp-runtime.ts +++ b/src/services/session-mcp-runtime.ts @@ -4,6 +4,7 @@ import { type ToolDefinition, } from "@opencode-ai/plugin"; import type { RedisClient } from "./redis-client.ts"; +import type { RedisCacheService } from "./redis-cache.ts"; import { createSessionCorpusService, type SessionCorpusService, @@ -79,6 +80,7 @@ type SessionMcpHandlerMap = { type SessionMcpRuntimeOptions = { handlers?: Partial; redisClient?: RedisClient; + graphitiCache?: RedisCacheService | object; sessionTtlSeconds?: number; groupId?: string; createSessionCorpusService?: typeof createSessionCorpusService; @@ -93,6 +95,54 @@ export type SessionMcpRuntime = { ) => Promise; }; +const getRedisDoctorStatus = ( + redisClient: RedisClient | undefined, +): { status: "ok" | "degraded" | "not_checked"; detail: string } => { + if (!redisClient) { + return { + status: "not_checked", + detail: "Redis client is not configured for this runtime.", + }; + } + + if (redisClient.isConnected()) { + return { + status: "ok", + detail: "Redis hot tier is connected.", + }; + } + + return { + status: "degraded", + detail: "Redis hot tier is unavailable; using in-memory fallback.", + }; +}; + +const getGraphitiCacheDoctorStatus = ( + graphitiCache: SessionMcpRuntimeOptions["graphitiCache"], + redisClient: RedisClient | undefined, +): { status: "ok" | "degraded" | "not_checked"; detail: string } => { + if (!graphitiCache) { + return { + status: "not_checked", + detail: "Graphiti cache service is not configured for this runtime.", + }; + } + + if (redisClient?.isConnected()) { + return { + status: "ok", + detail: "Graphiti cache is backed by the connected Redis hot tier.", + }; + } + + return { + status: "degraded", + detail: + "Graphiti cache is configured but Redis is unavailable; cache access is degraded.", + }; +}; + const parseRequest = ( toolName: TToolName, rawRequest: unknown, @@ -293,28 +343,27 @@ export const createSessionMcpRuntime = ( bytes_saved_estimate: stats.bytesSavedEstimate, }; }, - session_doctor: () => - Promise.resolve({ + session_doctor: () => { + const redis = getRedisDoctorStatus(options.redisClient); + const graphitiCache = getGraphitiCacheDoctorStatus( + options.graphitiCache, + options.redisClient, + ); + return Promise.resolve({ status: "ok", checks: [{ name: "session-mcp-runtime", status: "ok", - detail: "Stub runtime handlers are registered in-process.", + detail: "In-process session MCP runtime handlers are registered.", }], - redis: { - status: "not_checked", - detail: "Redis health is not checked by the Task 1 stub runtime.", - }, - graphiti_cache: { - status: "not_checked", - detail: - "Graphiti cache health is not checked by the Task 1 stub runtime.", - }, + redis, + graphiti_cache: graphitiCache, runtime: { status: "ok", detail: "In-process session MCP runtime is active.", }, - }), + }); + }, }; const handlerMap: SessionMcpHandlerMap = { From 127b159d32341d85f82bd9ece4c8722af8019a56 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Mon, 23 Mar 2026 02:24:50 +0800 Subject: [PATCH 05/38] chore: ignore local worktree directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a3ee37a..1018f29 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ dist/ node_modules/ +.worktrees/ From ee10d29ca3ca96888fa26ede25dcbb98766ce80a Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 00:08:51 +0800 Subject: [PATCH 06/38] fix: address review follow-up in drain and connection manager --- src/services/batch-drain.ts | 17 ++++++++++------- src/services/connection-manager.test.ts | 5 ++++- src/services/connection-manager.ts | 5 ++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index 2ce89a9..9f6d50e 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -221,15 +221,18 @@ export class BatchDrainService { } const retryState = await this.getRetryState(groupId, batchKey); - if (retryState && retryState.nextAttemptAt > Date.now()) { - const retryAfterMs = Math.max(0, retryState.nextAttemptAt - Date.now()); - await this.events.releaseClaim(groupId, claimed.claimToken); - return { status: "backoff", drained: 0, retryAfterMs }; + if (retryState) { + const now = Date.now(); + if (retryState.nextAttemptAt > now) { + const retryAfterMs = Math.max(0, retryState.nextAttemptAt - now); + await this.events.releaseClaim(groupId, claimed.claimToken); + return { status: "backoff", drained: 0, retryAfterMs }; + } } let lostClaim = false; let claimRefreshChain: Promise = Promise.resolve(); - let heartbeatTimer: number | null = null; + let heartbeatTimer: ReturnType | null = null; let refreshClaimHeartbeatRunning = false; const refreshClaimOwnership = (): Promise => { const refreshTask = claimRefreshChain.then(async () => { @@ -260,7 +263,7 @@ export class BatchDrainService { heartbeatTimer = setTimeout( refreshClaimHeartbeat, this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), - ) as unknown as number; + ); } } }; @@ -274,7 +277,7 @@ export class BatchDrainService { heartbeatTimer = setTimeout( refreshClaimHeartbeat, this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), - ) as unknown as number; + ); let checkpointedCount = 0; try { diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index 2b2236f..8318f77 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -1,5 +1,6 @@ import { assertEquals, + assertInstanceOf, assertRejects, assertThrows, } from "jsr:@std/assert@^1.0.0"; @@ -913,7 +914,7 @@ describe("connection manager", () => { }); it("rejects invalid non-empty endpoints up front", () => { - assertThrows( + const error = assertThrows( () => new GraphitiConnectionManager({ endpoint: "not a valid url", @@ -926,6 +927,8 @@ describe("connection manager", () => { Error, 'Invalid Graphiti endpoint: "not a valid url"', ); + + assertInstanceOf(error.cause, TypeError); }); it("moves back offline when connectionFactory throws synchronously", async () => { diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index a9498a9..4c332f7 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -124,11 +124,10 @@ const validateEndpoint = (endpoint: string): string => { try { new URL(normalized); } catch (cause) { - const error = new Error( + throw new Error( `Invalid Graphiti endpoint: ${JSON.stringify(normalized)}`, + { cause }, ); - (error as Error & { cause?: unknown }).cause = cause; - throw error; } return normalized; From e4becc616b4af5ff174f5a8eb029a59d41c7c5e5 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 01:54:47 +0800 Subject: [PATCH 07/38] feat: close narrowed MCP batch and index parity gaps --- README.md | 19 +- docs/ContextOverhaul.md | 9 +- docs/ContextOverhaulTests.md | 14 +- ...0-context-mode-mcp-first-implementation.md | 2 +- .../2026-03-20-context-mode-mcp-first.md | 2 +- ...23-context-mode-batch-index-gap-closure.md | 492 +++++++ ...6-03-23-clean-slate-architecture-design.md | 498 +++++++ src/handlers/tool-before.test.ts | 129 ++ src/handlers/tool-before.ts | 23 +- src/index.test.ts | 38 + src/index.ts | 7 + src/services/redis-client.test.ts | 103 ++ src/services/redis-client.ts | 246 ++++ src/services/session-corpus.test.ts | 188 +++ src/services/session-corpus.ts | 300 +++- src/services/session-executor.test.ts | 341 +++++ src/services/session-executor.ts | 569 ++++++++ src/services/session-mcp-runtime.test.ts | 1236 ++++++++++++++++- src/services/session-mcp-runtime.ts | 473 ++++++- src/services/session-mcp-types.ts | 176 ++- src/services/tool-routing.test.ts | 2 +- src/session.test.ts | 175 ++- src/session.ts | 43 +- 23 files changed, 4872 insertions(+), 213 deletions(-) create mode 100644 docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md create mode 100644 docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md create mode 100644 src/services/session-executor.test.ts create mode 100644 src/services/session-executor.ts diff --git a/README.md b/README.md index cf48b89..685c3d9 100644 --- a/README.md +++ b/README.md @@ -84,12 +84,24 @@ surface** for data-heavy work. These tools run in-process alongside the plugin hooks and share the same canonical root-session identity and Redis/FalkorDB hot tier. +The `session_*` tools also write into the same local continuity model as the +rest of the session: their bounded summaries are recorded as structured events, +folded into the local snapshot, and preserved through compaction under the same +`` envelope used for ordinary chat continuity. + - **Bounded execution** (`session_execute`, `session_execute_file`, `session_batch_execute`) — run commands or process files locally, store full output in the local corpus, and return only a bounded summary to the model. + `session_batch_execute` supports ordered mixed steps, so one request can + combine bounded command execution with local corpus search. - **Local indexing and search** (`session_index`, `session_search`, `session_fetch_and_index`) — index content into a per-session local corpus in - Redis/FalkorDB and search it with bounded result sets. + Redis/FalkorDB and search it with bounded result sets. The local corpus stays + local-first: indexing and retrieval happen against the session's local store, + while any Graphiti augmentation remains asynchronous and cache-backed. + `session_index` accepts either inline `content` or a local `path`; when the + same `source` and `label` are indexed again for one root session, the prior + logical document is replaced instead of appended. - **Diagnostics** (`session_stats`, `session_doctor`) — inspect session state and corpus health. @@ -311,8 +323,9 @@ Events are also queued for background ingestion into long-term memory: - **On idle** (`session.idle`): buffered events are sent to Graphiti and the priority-tiered snapshot is rebuilt. -- **Before compaction** (`session.compacted`): all pending events are sent - immediately so nothing is lost. +- **After compaction** (`session.compacted`): the compaction summary and any + pending continuity are scheduled for background Graphiti ingestion so nothing + is lost across compaction boundaries. ### Compaction Preservation diff --git a/docs/ContextOverhaul.md b/docs/ContextOverhaul.md index 2266eb9..b9a48f5 100644 --- a/docs/ContextOverhaul.md +++ b/docs/ContextOverhaul.md @@ -7,8 +7,15 @@ - `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` (implementation) +> **Historical-only note:** This document preserves the earlier native-routing +> overhaul proposal and its original section numbering. Any implementation +> phases, file-change lists, or acceptance checklists below are historical notes +> only and are **not** the active backlog for the repository. For current +> architecture and acceptance criteria, use the two superseding MCP-first plan +> documents above together with `README.md`. + **Date:** 2026-03-20\ -**Canonical refs:** `AGENTS.md`, `README.md`, `docs/ContextOverhaulTests.md` +**Historical refs:** `README.md`, `docs/ContextOverhaulTests.md` --- diff --git a/docs/ContextOverhaulTests.md b/docs/ContextOverhaulTests.md index 0f4a94a..a0f5b47 100644 --- a/docs/ContextOverhaulTests.md +++ b/docs/ContextOverhaulTests.md @@ -11,6 +11,11 @@ superseded)\ > design. The active architecture is now MCP-first; see the implementation plan > linked above for the current acceptance criteria. The suites below remain as > historical reference for the original hot-path invariants. +> +> **Historical-only note:** Checklist items in this document are not the +> authoritative Task 7 gate. Use the implementation plan above for current +> MCP-first acceptance, including local-first ``, compaction +> continuity, and Graphiti-off-the-hot-path verification. --- @@ -209,7 +214,7 @@ and within budget. - [ ] B-3: Total injected payload (session + persistent) does not exceed 5% of a 128k-token model context (≈ 25 600 chars). - [ ] B-4: Snapshot XML conforms to the priority-tiered schema from - `ContextOverhaul.md` §8.3. + [`docs/ContextOverhaul.md` §8.3](ContextOverhaul.md#83-snapshot-policy). - [ ] B-5: Snapshot respects the 3 KB budget — lower-priority sections are truncated first. - [ ] B-6: Each `session_memory` always contains `last_request`; list sections @@ -541,12 +546,13 @@ parent. **Tier:** Unit + Integration -**Canonical design reference:** `docs/ContextOverhaul.md` §11.1 +**Historical design reference:** +[`docs/ContextOverhaul.md` §11.1](ContextOverhaul.md#111-kept-divergence) **Divergence note:** This behavior intentionally differs from official `mksglu/context-mode`, which treats subagent work as summarized tool events -rather than first-class session participants. See §11.1 of the design doc for -the rationale and alignment guidance. +rather than first-class session participants. See the historical §11.1 design +note above for the rationale and alignment guidance. #### Checklist diff --git a/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md index f8a4663..aa95648 100644 --- a/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md +++ b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md @@ -1,6 +1,6 @@ # Context-Mode-Aligned MCP-First Replacement — Implementation Task Plan -**Status:** Planned\ +**Status:** Completed\ **Date:** 2026-03-20\ **Primary architecture:** `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`\ diff --git a/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md index 99d0425..9a8830c 100644 --- a/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md +++ b/docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md @@ -1,6 +1,6 @@ # Context-Mode-Aligned MCP-First Replacement Plan -**Status:** Superseding plan\ +**Status:** Completed\ **Date:** 2026-03-20\ **Supersedes:** `plans/ContextOverhaul.md` and any in-progress Task 1 / Task 2 work derived from that native-hook-first plan\ diff --git a/docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md b/docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md new file mode 100644 index 0000000..f7d8f7d --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md @@ -0,0 +1,492 @@ +# Context-Mode Batch And Index Gap Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining verified context-mode parity gaps by adding mixed +command+search batching to `session_batch_execute` and critical +path/source/label replacement semantics to `session_index`, while keeping +`session_*`, skipping upgrade/update, and preserving the Redis/FalkorDB + +Graphiti architecture. + +**Architecture:** Keep the current MCP-first runtime and local corpus +architecture, but extend their contracts in two focused places only: batch step +dispatch and identity-aware indexing. Reuse existing bounded-response, artifact +spillover, local search, and root-session semantics instead of introducing a +second protocol or storage path. + +**Tech Stack:** Deno, TypeScript, OpenCode plugin tool APIs, Redis/FalkorDB +hot-tier storage, Graphiti async cache augmentation, existing `session_*` MCP +runtime and corpus services. + +--- + +## File structure and responsibility lock-in + +- `src/services/session-mcp-types.ts` + - Extend request/response contracts for mixed batch steps and critical index + parity fields. +- `src/services/session-mcp-runtime.ts` + - Extend runtime dispatch for mixed batch steps and path/source/label + indexing. +- `src/services/session-mcp-runtime.test.ts` + - Add runtime-level contract tests for mixed batch and index behavior. +- `src/services/session-executor.ts` + - Reuse existing safe file reading helpers if needed for path-based indexing + input normalization. +- `src/services/session-executor.test.ts` + - Add tests only if executor helpers are extended. +- `src/services/session-corpus.ts` + - Add identity-aware replacement bookkeeping and replacement-safe re-index + behavior. +- `src/services/session-corpus.test.ts` + - Add focused replacement and path-ingestion parity tests. +- `README.md` + - Update only the documented `session_batch_execute` and `session_index` + behavior. + +Do **not** rename `session_*` to `ctx_*`. Do **not** add `session_upgrade` / +`ctx_upgrade` / `ctx_update`. Do **not** broaden into clean-slate modularization +work. + +## Locked implementation decisions + +These are not left to the implementer; the plan is explicitly choosing them now. + +- **Mixed batch response shape:** `session_batch_execute.results` becomes a + discriminated union of typed step result items, not a homogeneous array of + execute responses. + +```ts +type SessionBatchStepResult = + | { kind: "command"; result: SessionExecuteResponse } + | { kind: "search"; result: SessionSearchResponse }; +``` + +- **Backward compatibility:** keep accepting the current `commands` request + field for command-only callers, and add `steps` for mixed callers. Normalize + both forms internally into one ordered step list. + +- **Mixed-step orchestration location:** mixed batch execution is coordinated in + `src/services/session-mcp-runtime.ts`, not by generalizing + `src/services/session-executor.ts` into a mixed command/search engine. + +- **Batch budgeting:** existing execute-only budget/coercion code in + `session-mcp-runtime.ts` and any reused helper in `session-executor.ts` must + be updated to branch by result kind rather than assuming every item is an + execute response. + +- **Index replacement model:** replacement happens at the logical indexed + document level. Old searchable state for the same + `(rootSessionId, source, + label)` must be removed before the replacement is + committed. Do not use tombstones or search-time filtering. + +- **Path resolution model:** `session_index` path ingestion must resolve against + the active worktree/directory from `ToolContext`. Paths inside the active root + are read directly; paths outside that root must follow the host permission + model by requesting the needed external-directory/read grants and returning a + structured bounded denial if permission is refused. + +## Task order + +### Task 1: Add mixed-step batch contracts + +**Files:** + +- Modify: `src/services/session-mcp-types.ts` +- Test: `src/services/session-mcp-runtime.test.ts` + +- [ ] **Step 1: Write the failing tests for mixed batch requests** + +Add tests covering: + +- mixed `command` + `search` steps are accepted +- legacy `commands` input remains accepted for command-only callers +- empty batch still rejects +- unknown step kinds reject + +Suggested test sketch: + +```ts +it("accepts mixed command and search batch steps", async () => { + const runtime = createSessionMcpRuntime({ ...deps }); + const handler = runtime.tools.session_batch_execute.execute; + const result = await handler( + { + root_session_id: "root-1", + steps: [ + { kind: "command", command: "pwd" }, + { kind: "search", query: "session continuity" }, + ], + }, + makeToolContext(), + ); + assertEquals(result.status, "ok"); +}); +``` + +- [ ] **Step 2: Run the focused test to verify it fails** + +Run: `deno test src/services/session-mcp-runtime.test.ts --filter "mixed|batch"` + +Expected: FAIL because `session_batch_execute` still only accepts homogeneous +command input and returns homogeneous execute-style results. + +- [ ] **Step 3: Extend the batch schema minimally** + +Implement a mixed-step request shape in `src/services/session-mcp-types.ts`: + +```ts +type SessionBatchStep = + | { kind: "command"; command: string; timeout_seconds?: number } + | { kind: "search"; query: string }; + +type SessionBatchStepResult = + | { kind: "command"; result: SessionExecuteResponse } + | { kind: "search"; result: SessionSearchResponse }; +``` + +Keep backward compatibility explicitly: + +- `commands` remains valid for command-only callers +- `steps` becomes the new mixed-step shape +- runtime normalizes both forms internally + +- [ ] **Step 4: Run the focused test to verify schema acceptance now passes or + fails later in dispatch** + +Run: `deno test src/services/session-mcp-runtime.test.ts --filter "mixed|batch"` + +Expected: The request shape parses, but runtime behavior may still fail until +dispatch is implemented. + +### Task 2: Implement mixed-step batch dispatch and budgeting + +**Files:** + +- Modify: `src/services/session-mcp-runtime.ts` +- Modify: `src/services/session-executor.ts` (only if a small shared budgeting + helper extraction is clearly beneficial) +- Test: `src/services/session-mcp-runtime.test.ts` +- Test: `src/services/session-executor.test.ts` (only if helper extraction + happens) + +- [ ] **Step 1: Write failing dispatch tests for mixed command + search + execution** + +Add tests covering: + +- sequential step execution order is preserved +- search step uses local corpus search +- oversized command step still spills safely to artifacts +- typed per-step result items are preserved in `results` + +Suggested test sketch: + +```ts +it("executes mixed command and search steps in order", async () => { + // arrange indexed content first + // execute batch with command then search + // assert typed results in original order +}); +``` + +- [ ] **Step 2: Run the focused test to verify it fails** + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "order|search step|mixed"` + +Expected: FAIL because runtime only supports command-only batch execution. + +- [ ] **Step 3: Implement the minimal mixed-step runtime dispatch** + +In `src/services/session-mcp-runtime.ts`: + +- iterate `steps` +- for `command`, reuse existing executor path +- for `search`, call `corpus.search(...)` +- preserve original order +- keep per-step results typed and bounded + +Also update existing execute-only assumptions in batch result budgeting and +coercion so search results are handled by kind rather than treated as execute +responses. + +Do not add parallel execution. + +- [ ] **Step 4: Run the focused batch tests** + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "batch|search step|mixed|order"` + +Expected: PASS. + +### Task 3: Add critical index contract fields + +**Files:** + +- Modify: `src/services/session-mcp-types.ts` +- Test: `src/services/session-mcp-runtime.test.ts` + +- [ ] **Step 1: Write failing tests for path/source/label index requests** + +Add tests covering: + +- inline `content` still works +- `path` is accepted as an alternative content source +- `source` and `label` fields are accepted + +- [ ] **Step 2: Run the focused test to verify it fails** + +Run: `deno test src/services/session-mcp-runtime.test.ts --filter "index"` + +Expected: FAIL because current schema supports only inline `content`. + +- [ ] **Step 3: Extend the index schema minimally** + +Support the verified critical fields only: + +```ts +type SessionIndexRequest = { + root_session_id: string; + content?: string; + path?: string; + source?: string; + label?: string; +}; +``` + +Require at least one of `content` or `path`. + +- [ ] **Step 4: Re-run the focused test** + +Run: `deno test src/services/session-mcp-runtime.test.ts --filter "index"` + +Expected: request validation passes, but path/replacement behavior may still +fail. + +### Task 4: Implement safe path-based indexing input resolution + +**Files:** + +- Modify: `src/services/session-mcp-runtime.ts` +- Modify: `src/services/session-executor.ts` (only if helper extraction is + needed) +- Test: `src/services/session-mcp-runtime.test.ts` +- Test: `src/services/session-executor.test.ts` (only if helper extraction is + needed) + +- [ ] **Step 1: Write a failing test for path-based indexing** + +Add a test that indexes a local file via `path` and confirms the content becomes +searchable. + +- [ ] **Step 2: Run the focused test to verify it fails** + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "path-based indexing|indexes a local file"` + +Expected: FAIL because runtime does not yet resolve `path` input. + +- [ ] **Step 3: Implement minimal safe path ingestion** + +In `src/services/session-mcp-runtime.ts`: + +- when `path` is present, read the file through existing safe local file rules +- use `ToolContext` worktree/directory information for path resolution +- normalize that body into the same `corpus.index(...)` pipeline used for inline + content +- for out-of-workspace paths, request the needed host permissions and return a + structured bounded error if permission is refused + +If shared logic is clearly needed, extract a tiny helper from +`src/services/session-executor.ts`; otherwise keep the change local. + +- [ ] **Step 4: Run the focused test** + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "path-based indexing|indexes a local file"` + +Expected: PASS. + +### Task 5: Implement source/label replacement semantics in the corpus + +**Files:** + +- Modify: `src/services/session-corpus.ts` +- Test: `src/services/session-corpus.test.ts` +- Test: `src/services/session-mcp-runtime.test.ts` + +- [ ] **Step 1: Write failing corpus tests for replacement semantics** + +Add tests covering: + +- re-indexing the same `(rootSessionId, source, label)` replaces prior + searchable content +- old content is no longer returned by search +- replacement does not duplicate logical-document state + +Suggested test sketch: + +```ts +it("replaces prior content for the same source and label", async () => { + await corpus.index({ + rootSessionId: "root-1", + content: "old alpha body", + source: "build-log", + label: "latest", + }); + await corpus.index({ + rootSessionId: "root-1", + content: "new beta body", + source: "build-log", + label: "latest", + }); + const oldSearch = await corpus.search({ + rootSessionId: "root-1", + query: "alpha", + }); + const newSearch = await corpus.search({ + rootSessionId: "root-1", + query: "beta", + }); + assertEquals(oldSearch.results.length, 0); + assertEquals(newSearch.results.length > 0, true); +}); +``` + +- [ ] **Step 2: Run the focused corpus test to verify it fails** + +Run: +`deno test src/services/session-corpus.test.ts --filter "source and label|replaces prior content"` + +Expected: FAIL because indexing currently only appends. + +- [ ] **Step 3: Implement minimal identity-aware replacement bookkeeping** + +In `src/services/session-corpus.ts`: + +- introduce a stable mapping for `(groupId, rootSessionId, source, label)` to + the current logical corpus/document identity +- on replacement: + - find the old logical document’s owned searchable state + - remove old searchable associations/postings/metadata before indexing the + replacement + - write the new canonical content + - update the identity mapping + +Keep this local to the corpus subsystem; do not implement replacement as +search-time filtering or tombstoning. + +- [ ] **Step 4: Run the focused replacement tests** + +Run: +`deno test src/services/session-corpus.test.ts --filter "source and label|replaces prior content|replacement"` + +Expected: PASS. + +### Task 6: Wire runtime-level index replacement behavior + +**Files:** + +- Modify: `src/services/session-mcp-runtime.ts` +- Test: `src/services/session-mcp-runtime.test.ts` + +- [ ] **Step 1: Write a failing runtime-level replacement test** + +Add a test that calls `session_index` twice with the same `source`/`label`, then +uses `session_search` to confirm only the new content remains visible. + +- [ ] **Step 2: Run the focused test to verify it fails** + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "same source|same label|replacement"` + +Expected: FAIL until runtime passes the new identity fields through cleanly. + +- [ ] **Step 3: Implement the minimal runtime pass-through** + +Pass `source` and `label` from `session_index` requests to `corpus.index(...)`. + +- [ ] **Step 4: Run the focused test** + +Run: +`deno test src/services/session-mcp-runtime.test.ts --filter "same source|same label|replacement"` + +Expected: PASS. + +### Task 7: Update docs for the narrowed parity closure only + +**Files:** + +- Modify: `README.md` + +- [ ] **Step 1: Write the doc changes** + +Update only the relevant sections to describe: + +- mixed command + search support in `session_batch_execute` +- `session_index` support for inline content or local `path` +- optional `source`/`label` replacement semantics + +Do not rename tools and do not mention any upgrade tool. + +- [ ] **Step 2: Verify docs stay aligned with the narrowed scope** + +Read back the changed sections and confirm they match the approved decisions: + +- `session_*` stays public +- no upgrade/update tool +- Graphiti + Redis/FalkorDB architecture unchanged + +### Task 8: Run final verification for the narrowed gap-closure work + +**Files:** + +- Test: `src/services/session-mcp-runtime.test.ts` +- Test: `src/services/session-corpus.test.ts` +- Test: `src/services/session-executor.test.ts` +- Test: `README.md` + +- [ ] **Step 1: Run focused parity verification** + +Run: + +```bash +deno test src/services/session-mcp-runtime.test.ts src/services/session-corpus.test.ts src/services/session-executor.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run full repo verification** + +Run: + +```bash +deno test && deno task check && deno task lint && deno fmt --check +``` + +Expected: all commands pass. + +- [ ] **Step 3: Re-check the original narrowed goals against the result** + +Confirm all of the following are true: + +- `session_batch_execute` supports mixed command + search steps +- `session_index` supports path ingestion +- `session_index` supports `source`/`label` replacement semantics +- `session_*` naming remains unchanged +- no upgrade/update tool was added +- Graphiti async + cached `` behavior remains intact + +## Exit criteria + +- Mixed command + search steps work in `session_batch_execute`. +- `session_index` accepts inline content or local path input. +- Re-indexing the same `(source, label)` replaces prior indexed content for that + root session. +- README reflects the narrowed parity closure only. +- Full repo tests, check, lint, and format verification all pass. diff --git a/docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md b/docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md new file mode 100644 index 0000000..3190e89 --- /dev/null +++ b/docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md @@ -0,0 +1,498 @@ +# Clean-Slate Architecture Design + +## Goal + +Define the cleaner architecture this repository should have if redesigned from +scratch for long-term maintainability, while preserving the original product +intent: follow `context-mode` as closely as practical, with a few explicit local +touches. + +The clean-slate target keeps this exact intent: + +1. replace SQLite with FalkorDB/Redis hot-tier storage +2. rename `ctx_*` to `session_*` +3. skip the upgrade/update tool to keep the control-pane impact area smaller +4. retain the Graphiti feature intact for long-term memory, with async ingestion + and synchronous cached injection on the hot path +5. when either Graphiti or FalkorDB/Redis is degraded, fall back to base + OpenCode behavior with a warning instead of throwing + +This is a clean-slate design target, not an incremental refactor plan. + +## Design Principles + +1. **Capability-first modules, not service piles** + - Organize by product capability and ownership boundary, not by generic + “service” or “handler” categories. +2. **One owner per truth** + - Session identity, continuity assembly, corpus state, MCP protocol, routing + policy, and Graphiti augmentation should each have one authoritative home. +3. **Thin orchestration, thick domain modules** + - Bootstrap files should wire modules together, not contain business logic. +4. **Graphiti stays off the hot path** + - All synchronous hooks and MCP calls must remain local-first. +5. **Explicit contracts at boundaries** + - Hooks, MCP tools, persistence adapters, and async workers communicate via + typed module contracts rather than cross-cutting internal calls. +6. **Graceful degradation over startup failure** + - If Graphiti or FalkorDB/Redis is unavailable, the plugin should warn and + degrade to base OpenCode-compatible behavior rather than throwing. + +## Recommended Top-Level Modules + +### `app/` + +Owns only plugin/runtime composition: + +- config loading +- dependency construction +- runtime lifecycle startup/shutdown +- teardown ordering +- OpenCode hook registration +- degraded-mode detection and warning emission + +This replaces the current overgrown orchestration role of `src/index.ts`. + +### `session/` + +Owns canonical session identity and session-local lifecycle rules: + +- canonical root resolution +- child/parent lineage +- temporary-root migration coordination +- session lifecycle activity tracking +- assistant buffering tied to session ownership + +Nothing outside `session/` should traverse parent chains or reason about +provisional-to-canonical migration. + +### `continuity/` + +Owns short-term memory composition: + +- event extraction from OpenCode/SDK payloads into continuity records +- event normalization for continuity-facing records +- snapshot building +- local `` assembly +- duplicate filtering / section shaping +- context-window budgeting for continuity payloads +- compaction continuity rules + +This module should answer: “given local events, cached persistent memory, and +session state, what exact memory envelope should the model see?” + +In the clean-slate architecture, synchronous injection still happens on the hot +path, but only from local state and cached Graphiti recall. Fresh Graphiti calls +must remain asynchronous. + +### `corpus/` + +Owns local knowledge storage and retrieval: + +- ingestion +- HTML/text normalization +- chunking +- lexical indexing/postings +- artifacts and bounded body spillover +- stats/accounting +- replacement semantics (`source`/`label`) +- root-session migration of corpus-owned state + +This should become a subsystem, not one giant file. + +### `mcp/` + +Owns the `session_*` public tool protocol: + +- tool registry +- request/response schema definitions +- request validation +- bounded response budgeting +- per-tool dispatch +- tool-facing diagnostics surfaces + +`mcp/` should not implement corpus/session internals directly; it should call +module interfaces. + +The public naming remains `session_*` even though the target capability set is +context-mode-inspired. + +### `routing/` + +Owns native-tool steering policy: + +- before-hook routing rules +- after-hook attribution metadata +- guidance throttling +- routing outcome cache +- policy explanation strings + +This module should be mostly pure policy code plus tiny caches. + +### `graphiti/` + +Owns the asynchronous long-term memory path: + +- episode draining/batching +- retry/recovery behavior +- Graphiti connection/client behavior +- refresh scheduling +- persistent-memory cache hydration + +This module must never be required for synchronous hook correctness. + +Its cached outputs are still consumed synchronously by `continuity/` when +assembling `` on the hot path. + +### `platform/` + +Owns external adapters: + +- Redis/FalkorDB adapter +- Graphiti transport adapter +- command execution adapter +- OpenCode warning/notification adapter + +The rest of the system should depend on interfaces, not on raw SDK/client +objects. + +Only the Redis config surface needs to remain canonical. No separate FalkorDB +config namespace is required in the clean-slate design. + +## Proposed Directory Shape + +```text +src/ + app/ + plugin.ts + runtime.ts + teardown.ts + config.ts + + session/ + canonicalizer.ts + lifecycle.ts + migration.ts + assistant-buffer.ts + types.ts + + continuity/ + event-extractor.ts + event-model.ts + event-normalizer.ts + budget.ts + snapshot-builder.ts + memory-builder.ts + memory-renderer.ts + injection-state.ts + types.ts + + corpus/ + ingest.ts + normalize.ts + chunking.ts + index-store.ts + search.ts + artifacts.ts + stats.ts + replacement.ts + migration.ts + types.ts + + mcp/ + registry.ts + schemas.ts + budgeting.ts + runtime.ts + tools/ + execute.ts + execute-file.ts + batch.ts + index.ts + search.ts + fetch-and-index.ts + stats.ts + doctor.ts + + routing/ + policy.ts + before-hook.ts + after-hook.ts + guidance-cache.ts + outcome-cache.ts + types.ts + + graphiti/ + client.ts + connection.ts + drain.ts + refresh.ts + cache-sync.ts + coordinator.ts + types.ts + + platform/ + redis/ + client.ts + hash.ts + list.ts + migration.ts + executor/ + runtime.ts + files.ts + opencode/ + warnings.ts + hooks.ts + normalize.ts + + shared/ + constants.ts + errors.ts + logger.ts + xml.ts + text.ts + ids.ts + types.ts +``` + +## Ownership Boundaries + +### `app` depends on everything; nothing depends on `app` + +`app` is the composition root. It wires modules and exposes plugin hooks/tool +registrations. It should contain almost no domain decisions. + +`app` also owns degraded startup/runtime policy: if Redis/FalkorDB or Graphiti +is unavailable, it emits warnings and composes the best available reduced +runtime instead of throwing. + +### `session` is the identity authority + +All code that needs a canonical root session asks `session`. No other module +inspects parent chains, caches provisional mappings, or owns retry semantics for +temporary-root migration. + +### `continuity` is the memory authority + +Handlers should delegate to `continuity` for assembling model-facing memory. +`continuity` should not own transport or storage clients directly; it should +depend on abstract event/snapshot/cache readers. + +`continuity` also owns: + +- extraction of raw SDK payloads into continuity-facing events +- context-window budgeting for local memory assembly + +### `corpus` is the local retrieval authority + +All indexing/search/artifact concerns live here. `mcp` and `routing` should not +duplicate chunking, budgeting, or identity-replacement logic. + +### `mcp` is the protocol authority + +`mcp` decides how public tool calls are parsed, validated, and encoded. It does +not decide search ranking, session migration, or routing policy. + +### `routing` is the policy authority + +The tool guidance system should be a pure policy layer with minimal state. It +should never need to know corpus internals beyond public capabilities. + +### `graphiti` is the long-term augmentation authority + +All episode flushing and refresh logic stays here. The only synchronous thing +the rest of the system should consume is cached recall data already materialized +locally. + +`graphiti/` owns its own transport-facing client/connection layer. `platform/` +does not need a separate Graphiti transport subtree in the clean-slate design. + +If `graphiti/` is degraded, the system continues without long-term augmentation +and without throwing; cached or absent `` should be handled +gracefully. + +### `shared/` is the pure utility layer + +`shared/` contains domain-agnostic helpers only: + +- constants +- generic errors +- logging facade +- text helpers +- XML helpers +- shared IDs/types + +Nothing in `shared/` should import from any domain module. + +## What Changes From Today + +### Current `src/index.ts` + +Today it acts as both: + +- composition root +- runtime lifecycle coordinator +- teardown scheduler +- dependency policy file + +In the clean-slate design it becomes a thin entrypoint delegating almost +entirely to `app/plugin.ts` and `app/runtime.ts`. + +### Current `src/session.ts` + +Today it mixes too many concerns: + +- canonical session identity +- lifecycle and activity retention +- assistant buffering +- memory composition +- XML rendering inputs +- persistent-memory assembly +- migration bookkeeping + +In the clean-slate design it is split mostly across `session/` and +`continuity/`. + +### Current `src/services/session-mcp-runtime.ts` + +Today it mixes: + +- tool registry +- schema bridging +- request validation +- response budgeting +- artifact fallback +- tool implementation logic +- stats wiring + +In the clean-slate design it becomes `mcp/runtime.ts` plus per-tool handlers and +shared protocol helpers. + +### Current `src/services/session-corpus.ts` + +Today it owns too much of the local retrieval system in one place: + +- normalization +- chunking +- indexing +- search ranking +- artifacts +- migration +- stats +- replacement logic + +In the clean-slate design it becomes a real `corpus/` subsystem with smaller, +individually testable components. + +### Current `src/handlers/*` + +Today the handler files contain a mix of adapter code and orchestration logic. +In the clean-slate design they become thin OpenCode-facing adapters under the +`platform/opencode/` boundary, delegating into: + +- `continuity/` for chat/message/compaction assembly +- `routing/` for native-tool policy and attribution +- `session/` for canonical root resolution when needed + +They should stop owning any meaningful business logic. + +## Data Flow in the Clean-Slate Design + +### Chat / transform / compaction hot path + +1. OpenCode hook enters `app` adapter. +2. `session` resolves canonical root. +3. `continuity` reads local events/snapshot/cache state. +4. `continuity` renders local-first ``, including cached + `` when available. +5. Hook returns without any Graphiti dependency. + +### Tool execution path + +1. OpenCode MCP tool call enters `mcp/registry.ts`. +2. `mcp/runtime.ts` validates request and session root. +3. Tool-specific handler dispatches to `corpus`, executor adapter, or session + contract. +4. `mcp/budgeting.ts` enforces bounded output. +5. Result returns with typed bounded payloads. + +### Native-tool routing path + +1. Before-hook enters `routing/policy.ts`. +2. Policy emits allow/deny/rewrite/guidance outcome. +3. After-hook records attribution metadata only. +4. Event capture stores compact continuity metadata only. + +### Async Graphiti path + +1. `event` path stores local events synchronously. +2. Async coordinator picks up buffered work later. +3. `graphiti/drain.ts` turns events into episodes. +4. `graphiti/refresh.ts` updates cached persistent memory. +5. Later hot-path injections consume only cached results. + +### Degraded startup/runtime path + +1. `app` detects Graphiti and Redis/FalkorDB availability independently. +2. If Redis/FalkorDB is unavailable, the plugin warns and falls back to the + minimum safe local/base-OpenCode-compatible mode rather than throwing. +3. If Graphiti is unavailable, the plugin warns and continues without persistent + memory augmentation. +4. If both are unavailable, the plugin still does not throw during startup; it + degrades to the least-capable safe mode and surfaces warnings. + +## Testing Strategy + +### Unit-first module tests + +Each module should have strong direct tests: + +- `session`: canonicalization, migration retry, child deletion safety +- `continuity`: memory envelope composition and duplicate suppression +- `corpus`: indexing, replacement, migration, search ranking, artifact rules +- `mcp`: schema validation, budgeting, per-tool output shaping +- `routing`: policy verdicts and attribution metadata +- `graphiti`: retry/backoff/cache hydration semantics + +### Thin vertical slices + +Keep a smaller number of full-path integration tests for: + +- hot-path local-first memory injection +- compaction survival +- mixed MCP tool execution +- async Graphiti refresh/drain interactions + +This reduces the current tendency for a few giant files to accumulate too much +test surface. + +## Why This Is More Maintainable + +- A developer can reason about one capability at a time. +- Identity logic stops leaking across hooks, runtime, and migration code. +- Memory composition becomes a first-class subsystem instead of a side effect of + `SessionManager` growth. +- Local corpus evolution becomes easier because ingestion, search, replacement, + and migration are no longer one file. +- MCP feature work becomes additive: adding a new `session_*` tool mostly means + one new handler plus schema, not more branching in a central runtime file. +- Graphiti integration remains powerful but structurally quarantined from the + synchronous path. + +## Success Criteria For The Clean-Slate Architecture + +- The clean-slate architecture still reflects the original context-mode parity + intent, with the approved local touches. +- `app/` is only orchestration. +- `session/` owns identity and lifecycle. +- `continuity/` owns short-term memory assembly. +- `corpus/` owns local retrieval/index/artifact/stat semantics. +- `mcp/` owns the public `session_*` protocol surface. +- `routing/` owns native-tool policy. +- `graphiti/` owns asynchronous long-term augmentation. +- Synchronous injection remains available from local state and cached Graphiti + recall without synchronous Graphiti fetches. +- Redis/FalkorDB and Graphiti degradation both fall back with warnings rather + than throwing. +- No single file needs to understand all three of: OpenCode hooks, session + identity, and corpus internals at once. diff --git a/src/handlers/tool-before.test.ts b/src/handlers/tool-before.test.ts index 58f1a53..586f75b 100644 --- a/src/handlers/tool-before.test.ts +++ b/src/handlers/tool-before.test.ts @@ -188,6 +188,135 @@ describe("tool execute before handler", () => { assertEquals(routingOutcomes.take("call-6"), undefined); }); + it("injects canonical root_session_id into every session tool call", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + + const scenarios = [ + ["session_execute", { command: "pwd" }], + ["session_execute_file", { paths: ["README.md"] }], + ["session_batch_execute", { commands: [{ command: "pwd" }] }], + ["session_index", { content: "indexed content" }], + ["session_search", { query: "indexed" }], + ["session_fetch_and_index", { url: "https://example.com" }], + ["session_stats", {}], + ["session_doctor", {}], + ] as const; + + for (const [tool, args] of scenarios) { + const output: { args: Record } = { args: { ...args } }; + + await handler( + { + tool, + sessionID: "root-session", + callID: `${tool}-call`, + } as never, + output as never, + ); + + assertEquals(output.args.root_session_id, "root-session", tool); + } + }); + + it("injects the canonical parent root_session_id for child session tools", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.resolved.set("child-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + const output: { args: Record } = { + args: { query: "indexed" }, + }; + + await handler( + { + tool: "session_search", + sessionID: "child-session", + callID: "call-8", + } as never, + output as never, + ); + + assertEquals(output.args.root_session_id, "root-session"); + assertEquals(canonicalizer.cachedCalls, ["child-session"]); + assertEquals(canonicalizer.resolveCalls, ["child-session"]); + assertEquals(routingOutcomes.take("call-8"), undefined); + }); + + it("normalizes an already-present mismatched root_session_id for session tools", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("child-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + const output = { + args: { root_session_id: "wrong-root", command: "pwd" }, + }; + + await handler( + { + tool: "session_execute", + sessionID: "child-session", + callID: "call-9", + } as never, + output as never, + ); + + assertEquals(output.args.root_session_id, "root-session"); + assertEquals(routingOutcomes.take("call-9"), undefined); + }); + + it("does not inject root_session_id into native tools", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall, + }); + + const scenarios = [ + ["Read", { filePath: "/tmp/example.ts" }], + ["Bash", { command: "curl https://example.com/data.json" }], + ["Grep", { pattern: "routeToolCall", include: "*.ts" }], + ["Glob", { pattern: "src/**/*.ts" }], + ["WebFetch", { url: "https://example.com" }], + ["Task", { prompt: "Investigate the failing test" }], + ] as const; + + for (const [tool, args] of scenarios) { + const output = { args: { ...args } }; + try { + await handler( + { + tool, + sessionID: "root-session", + callID: `${tool}-native-call`, + } as never, + output as never, + ); + } catch { + // WebFetch is denied by design; we only care about root_session_id injection. + } + + assertEquals("root_session_id" in output.args, false, tool); + } + }); + it("does not perform Redis or Graphiti access on the before-hook path", async () => { const canonicalizer = new MockSessionCanonicalizer(); canonicalizer.cached.set("root-session", "root-session"); diff --git a/src/handlers/tool-before.ts b/src/handlers/tool-before.ts index 0a8dea5..dd3bbbf 100644 --- a/src/handlers/tool-before.ts +++ b/src/handlers/tool-before.ts @@ -5,6 +5,7 @@ import { type RouteToolCallInput, type RoutingDecision, } from "../services/tool-routing.ts"; +import { SESSION_MCP_TOOL_NAMES } from "../services/session-mcp-types.ts"; import type { ToolRoutingOutcomeCache } from "../services/tool-routing-outcome-cache.ts"; import type { ToolRoutingSessionCanonicalizer } from "../session.ts"; @@ -24,6 +25,21 @@ const toRecord = (value: unknown): Record => ? value as Record : {}; +const SESSION_MCP_TOOL_NAME_SET = new Set(SESSION_MCP_TOOL_NAMES); + +const isSessionMcpTool = (toolName: string): boolean => + SESSION_MCP_TOOL_NAME_SET.has( + toolName as typeof SESSION_MCP_TOOL_NAMES[number], + ); + +const injectRootSessionId = ( + args: Record, + canonicalSessionId: string, +): Record => ({ + ...args, + root_session_id: canonicalSessionId, +}); + const resolveCanonicalSessionId = async ( sessionCanonicalizer: ToolRoutingSessionCanonicalizer, sessionId: string, @@ -52,7 +68,12 @@ export function createToolBeforeHandler( deps.sessionCanonicalizer, sessionID, ); - const args = toRecord(output.args); + const args = isSessionMcpTool(tool) + ? injectRootSessionId(toRecord(output.args), canonicalSessionId) + : toRecord(output.args); + if (isSessionMcpTool(tool)) { + output.args = args; + } const decision = route({ canonicalSessionId, toolName: tool, diff --git a/src/index.test.ts b/src/index.test.ts index a50bc76..a527e32 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -75,9 +75,14 @@ function createEntrypointHarnessWithOptions(options: { redisCloseCalls: 0, graphitiAsyncDisposeCalls: 0, graphitiAsyncFlushCalls: [] as string[][], + createSessionExecutorCalls: [] as Array< + Record | undefined + >, + sessionExecutorInstances: [] as unknown[], sessionMcpRuntimeArgs: [] as Array | undefined>, sessionMcpRuntimeDisposeCalls: 0, sessionMcpRuntimeInstances: [] as unknown[], + sessionMcpRuntimeCanonicalizerCalls: [] as unknown[], teardownTaskRuns: [] as string[], teardownRegistrations: [] as Array< { @@ -300,6 +305,17 @@ function createEntrypointHarnessWithOptions(options: { records.teardownTaskRuns.push("session-mcp-runtime"); return Promise.resolve(); } + + setSessionCanonicalizer(sessionCanonicalizer: unknown) { + records.sessionMcpRuntimeCanonicalizerCalls.push(sessionCanonicalizer); + } + } + + class MockSessionExecutor { + constructor(args?: Record) { + records.createSessionExecutorCalls.push(args); + records.sessionExecutorInstances.push(this); + } } const dependencies = { @@ -339,6 +355,8 @@ function createEntrypointHarnessWithOptions(options: { RedisCacheService: MockRedisCacheService, BatchDrainService: MockBatchDrainService, GraphitiAsyncService: MockGraphitiAsyncService, + createSessionExecutor: (args?: Record) => + new MockSessionExecutor(args), createSessionMcpRuntime: (args?: Record) => new MockSessionMcpRuntime(args), SessionManager: MockSessionManager, @@ -654,6 +672,8 @@ describe("index", () => { graphitiCache: records.redisCacheInstances[0], sessionTtlSeconds: config.redis.sessionTtlSeconds, groupId: "group-id", + sessionExecutor: records.sessionExecutorInstances[0], + createSessionExecutor: dependencies.createSessionExecutor, }]); assertStrictEquals( @@ -734,6 +754,10 @@ describe("index", () => { idleRetentionMs: config.redis.sessionTtlSeconds * 1000, runtimeStateMigrator: records.sessionMcpRuntimeInstances[0], }); + assertStrictEquals( + records.sessionMcpRuntimeCanonicalizerCalls[0], + records.sessionManagerInstances[0], + ); assertEquals(records.createEventHandlerArgs.length, 1); assertStrictEquals( @@ -909,6 +933,8 @@ describe("index", () => { graphitiCache: records.redisCacheInstances[0], sessionTtlSeconds: config.redis.sessionTtlSeconds, groupId: "group-id", + sessionExecutor: records.sessionExecutorInstances[0], + createSessionExecutor: dependencies.createSessionExecutor, }]); }); @@ -923,6 +949,18 @@ describe("index", () => { ); }); + it("wires the session manager into the runtime root validator explicitly after construction", async () => { + const { input, records, dependencies } = createEntrypointHarness(true); + + await invokeGraphiti(input, dependencies); + + assertEquals(records.sessionMcpRuntimeCanonicalizerCalls.length, 1); + assertStrictEquals( + records.sessionMcpRuntimeCanonicalizerCalls[0], + records.sessionManagerInstances[0], + ); + }); + it("does not leave runtime in stub corpus mode when redis is available", async () => { const { input, records, dependencies } = createEntrypointHarness(true); diff --git a/src/index.ts b/src/index.ts index 6e00fd9..b002c66 100644 --- a/src/index.ts +++ b/src/index.ts @@ -21,6 +21,7 @@ import { RedisEventsService } from "./services/redis-events.ts"; import { logger } from "./services/logger.ts"; import { RedisSnapshotService } from "./services/redis-snapshot.ts"; import { registerRuntimeTeardown } from "./services/runtime-teardown.ts"; +import { createSessionExecutor } from "./services/session-executor.ts"; import { createSessionMcpRuntime } from "./services/session-mcp-runtime.ts"; import { ToolGuidanceCache } from "./services/tool-guidance-cache.ts"; import { ToolRoutingOutcomeCache } from "./services/tool-routing-outcome-cache.ts"; @@ -46,6 +47,7 @@ type GraphitiDependencies = { RedisCacheService: typeof RedisCacheService; BatchDrainService: typeof BatchDrainService; GraphitiAsyncService: typeof GraphitiAsyncService; + createSessionExecutor: typeof createSessionExecutor; createSessionMcpRuntime: typeof createSessionMcpRuntime; SessionManager: typeof SessionManager; createEventHandler: typeof createEventHandler; @@ -101,6 +103,7 @@ const defaultGraphitiDependencies: GraphitiDependencies = { RedisCacheService, BatchDrainService, GraphitiAsyncService, + createSessionExecutor, createSessionMcpRuntime, SessionManager, createEventHandler, @@ -204,11 +207,14 @@ export const graphiti: Plugin = ( redisCache, batchDrain, ); + const sessionExecutor = dependencies.createSessionExecutor(); const sessionMcpRuntime = dependencies.createSessionMcpRuntime({ redisClient, graphitiCache: redisCache, sessionTtlSeconds: config.redis.sessionTtlSeconds, groupId: defaultGroupId, + sessionExecutor, + createSessionExecutor: dependencies.createSessionExecutor, }); const sessionManager = new dependencies.SessionManager( @@ -223,6 +229,7 @@ export const graphiti: Plugin = ( runtimeStateMigrator: sessionMcpRuntime, }, ); + sessionMcpRuntime.setSessionCanonicalizer(sessionManager); const toolGuidanceCache = new dependencies.ToolGuidanceCache(); const toolRoutingOutcomes = new dependencies.ToolRoutingOutcomeCache(); diff --git a/src/services/redis-client.test.ts b/src/services/redis-client.test.ts index f7a654b..1c4ba90 100644 --- a/src/services/redis-client.test.ts +++ b/src/services/redis-client.test.ts @@ -342,6 +342,109 @@ describe("redis client", () => { }); }); + it("increments hash fields atomically and preserves TTL", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + + await redis.incrementHashFields("memory-cache:group-1:stats", { + calls_total: 1, + bytes_total: 2.5, + }, 1); + await redis.incrementHashFields("memory-cache:group-1:stats", { + calls_total: 2, + bytes_total: 1.5, + }, 1); + + assertEquals(await redis.getHashAll("memory-cache:group-1:stats"), { + calls_total: "3", + bytes_total: "4", + }); + assertEquals( + await redis.snapshot("memory-cache:group-1:stats"), + { + kind: "hash", + values: { + calls_total: "3", + bytes_total: "4", + }, + ttlSeconds: 1, + }, + ); + }); + + it("applies migration units atomically and preserves original ownership on failure", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + + await redis.setString("session:src:artifact:body", "payload", 60); + await redis.appendToList("session:src:term:ttl", "chunk-1", 60); + await redis.setHashFields( + "session:dst:artifact:meta", + { existing: "1" }, + 60, + ); + + const originalApplySnapshotToStore = (redis as unknown as { + applySnapshotToStore: ( + store: unknown, + key: string, + snapshot: unknown, + ) => Promise; + }).applySnapshotToStore.bind(redis); + let injectedFailure = false; + (redis as unknown as { + applySnapshotToStore: ( + store: unknown, + key: string, + snapshot: unknown, + ) => Promise; + }).applySnapshotToStore = async (store, key, snapshot) => { + if (!injectedFailure && key === "session:dst:term:ttl") { + injectedFailure = true; + throw new Error("injected migration failure"); + } + await originalApplySnapshotToStore(store, key, snapshot); + }; + + await assertRejects( + () => + redis.applyMigrationUnit({ + writes: [ + { + key: "session:dst:artifact:body", + snapshot: { kind: "string", value: "payload", ttlSeconds: 60 }, + }, + { + key: "session:dst:term:ttl", + snapshot: { kind: "list", values: ["chunk-1"], ttlSeconds: 60 }, + }, + ], + deleteKeys: ["session:src:artifact:body", "session:src:term:ttl"], + }), + Error, + "injected migration failure", + ); + + assertEquals(await redis.getString("session:src:artifact:body"), "payload"); + assertEquals(await redis.getListRange("session:src:term:ttl", 0, 10), [ + "chunk-1", + ]); + assertEquals(await redis.getString("session:dst:artifact:body"), null); + assertEquals(await redis.getListRange("session:dst:term:ttl", 0, 10), []); + assertEquals(await redis.getHashAll("session:dst:artifact:meta"), { + existing: "1", + }); + + const sourceBody = await redis.snapshot("session:src:artifact:body"); + const sourcePosting = await redis.snapshot("session:src:term:ttl"); + assertEquals(sourceBody.kind, "string"); + assertEquals(sourcePosting.kind, "list"); + if (sourceBody.kind === "string") { + assertEquals((sourceBody.ttlSeconds ?? 0) > 0, true); + } + if (sourcePosting.kind === "list") { + assertEquals((sourcePosting.ttlSeconds ?? 0) > 0, true); + } + }); + it("enforces TTL on in-memory hash fallbacks when the runtime lacks hash support", async () => { const redis = new RedisClient({ endpoint: "redis://unused", diff --git a/src/services/redis-client.ts b/src/services/redis-client.ts index c3c9998..61bb0d2 100644 --- a/src/services/redis-client.ts +++ b/src/services/redis-client.ts @@ -25,6 +25,12 @@ type RedisRuntime = { get(key: string): Promise; hset?(key: string, values: Record): Promise; hgetall?(key: string): Promise>; + hincrby?(key: string, field: string, increment: number): Promise; + hincrbyfloat?( + key: string, + field: string, + increment: number, + ): Promise; set( key: string, value: string, @@ -60,6 +66,11 @@ export type RedisKeySnapshot = | { kind: "list"; values: string[]; ttlSeconds?: number } | { kind: "hash"; values: Record; ttlSeconds?: number }; +export type RedisMigrationUnit = { + writes: Array<{ key: string; snapshot: RedisKeySnapshot }>; + deleteKeys: string[]; +}; + class InMemoryRedisStore implements RedisRuntime { private readonly values = new Map(); @@ -271,6 +282,24 @@ class InMemoryRedisStore implements RedisRuntime { return Promise.resolve(Object.fromEntries(existing.value.entries())); } + hincrby(key: string, field: string, increment: number): Promise { + this.cleanup(key); + const hash = this.ensureHash(key); + const current = Number(hash.get(field) ?? 0); + const next = current + increment; + hash.set(field, String(next)); + return Promise.resolve(next); + } + + hincrbyfloat(key: string, field: string, increment: number): Promise { + this.cleanup(key); + const hash = this.ensureHash(key); + const current = Number(hash.get(field) ?? 0); + const next = current + increment; + hash.set(field, String(next)); + return Promise.resolve(String(next)); + } + set( key: string, value: string, @@ -705,6 +734,85 @@ export class RedisClient { }); } + private clearPendingFallbackReplaysForKey(key: string): void { + for ( + const replayKey of [ + `string:${key}`, + `hash:${key}`, + `list:${key}`, + `expire:${key}`, + `del:${key}`, + `compareAndTouch:${key}`, + `delIfValue:${key}`, + `snapshot:${key}`, + ] + ) { + this.pendingFallbackReplays.delete(replayKey); + } + } + + private async applySnapshotToStore( + store: RedisRuntime, + key: string, + snapshot: RedisKeySnapshot, + ): Promise { + if (snapshot.kind === "hash" && !store.hset) { + return; + } + + await store.del(key); + + switch (snapshot.kind) { + case "missing": + return; + case "string": + if (snapshot.ttlSeconds) { + await store.set(key, snapshot.value, "EX", snapshot.ttlSeconds); + return; + } + await store.set(key, snapshot.value); + return; + case "list": + if (snapshot.values.length === 0) return; + for (const value of snapshot.values) { + await store.rpush(key, value); + } + if (snapshot.ttlSeconds) { + await store.expire(key, snapshot.ttlSeconds); + } + return; + case "hash": + if (Object.keys(snapshot.values).length === 0) return; + if (!store.hset) return; + await store.hset(key, snapshot.values); + if (snapshot.ttlSeconds) { + await store.expire(key, snapshot.ttlSeconds); + } + return; + } + } + + private queuePendingSnapshotReplay(key: string): void { + this.clearPendingFallbackReplaysForKey(key); + this.queuePendingFallbackReplay(`snapshot:${key}`, async (runtime) => { + const snapshot = this.memory.snapshot(key); + if (snapshot.kind === "hash" && !runtime.hset) return; + await this.applySnapshotToStore(runtime, key, snapshot); + }); + } + + private async rollbackMigrationUnit( + store: RedisRuntime, + snapshots: Map, + ): Promise { + const rollbackEntries = [...snapshots.entries()].sort((left, right) => + left[0].localeCompare(right[0]) + ); + for (const [key, snapshot] of rollbackEntries) { + await this.applySnapshotToStore(store, key, snapshot); + } + } + private isDurableDrainKey(key: string): boolean { return key.startsWith("drain:"); } @@ -1057,6 +1165,56 @@ export class RedisClient { }); } + async incrementHashFields( + key: string, + deltas: Record, + ttlSeconds?: number, + ): Promise> { + const increments = Object.entries(deltas).filter(([, value]) => + value !== undefined && value !== 0 + ) as Array<[string, number]>; + + if (increments.length === 0) { + if (ttlSeconds) await this.touch(key, ttlSeconds); + return await this.getHashAll(key); + } + + await this.useMutationRuntime([key], async (runtime) => { + const incrementField = async ( + target: RedisRuntime, + field: string, + delta: number, + ): Promise => { + if (Number.isInteger(delta) && target.hincrby) { + await target.hincrby(key, field, delta); + return; + } + if (target.hincrbyfloat) { + await target.hincrbyfloat(key, field, delta); + return; + } + + throw new Error("Redis runtime lacks hash increment support"); + }; + + for (const [field, delta] of increments) { + await incrementField(runtime, field, delta); + } + if (ttlSeconds) await runtime.expire(key, ttlSeconds); + + if (runtime !== this.memory && !this.isDurableDrainKey(key)) { + for (const [field, delta] of increments) { + await incrementField(this.memory, field, delta); + } + if (ttlSeconds) await this.memory.expire(key, ttlSeconds); + } + }, () => { + this.queuePendingHashSnapshotReplay(key); + }); + + return await this.getHashAll(key); + } + async compareAndTouch( key: string, expectedValue: string, @@ -1154,6 +1312,94 @@ export class RedisClient { } } + async applyMigrationUnit(unit: RedisMigrationUnit): Promise { + const writes = [...unit.writes].sort((left, right) => + left.key.localeCompare(right.key) + ); + const writeKeys = new Set(writes.map(({ key }) => key)); + const deleteKeys = [...new Set(unit.deleteKeys)] + .filter((key) => !writeKeys.has(key)) + .sort((left, right) => left.localeCompare(right)); + const affectedKeys = [...new Set([...writeKeys, ...deleteKeys])].sort(( + left, + right, + ) => left.localeCompare(right)); + + if (affectedKeys.length === 0) return; + + const beforeSnapshots = new Map( + await Promise.all( + affectedKeys.map(async (key) => + [key, await this.snapshot(key)] as const + ), + ), + ); + + const applyToStore = async (store: RedisRuntime): Promise => { + for (const { key, snapshot } of writes) { + await this.applySnapshotToStore(store, key, snapshot); + } + for (const key of deleteKeys) { + await store.del(key); + } + }; + + const runtime = this.connected ? this.redis : null; + + if (!runtime) { + try { + await applyToStore(this.memory); + } catch (error) { + await this.rollbackMigrationUnit(this.memory, beforeSnapshots).catch( + () => undefined, + ); + throw error; + } + + for (const key of affectedKeys) { + const snapshot = this.memory.snapshot(key); + if (snapshot.kind === "hash") { + this.hashFallbackKeys.add(key); + } else { + this.hashFallbackKeys.delete(key); + } + this.queuePendingSnapshotReplay(key); + } + return; + } + + try { + await applyToStore(runtime); + } catch (error) { + await this.rollbackMigrationUnit(runtime, beforeSnapshots).catch(() => + undefined + ); + throw error; + } + + try { + await applyToStore(this.memory); + } catch (error) { + await this.rollbackMigrationUnit(runtime, beforeSnapshots).catch(() => + undefined + ); + await this.rollbackMigrationUnit(this.memory, beforeSnapshots).catch(() => + undefined + ); + throw error; + } + + for (const key of affectedKeys) { + const snapshot = this.memory.snapshot(key); + if (snapshot.kind === "hash" && !runtime.hset) { + this.hashFallbackKeys.add(key); + } else { + this.hashFallbackKeys.delete(key); + } + this.clearPendingFallbackReplaysForKey(key); + } + } + async deleteKeyIfValue(key: string, expectedValue: string): Promise { return await this.useMutationRuntime([key], async (runtime) => { if (runtime === this.memory) { diff --git a/src/services/session-corpus.test.ts b/src/services/session-corpus.test.ts index da8f837..459bf82 100644 --- a/src/services/session-corpus.test.ts +++ b/src/services/session-corpus.test.ts @@ -9,6 +9,7 @@ import { RedisClient } from "./redis-client.ts"; import { createSessionCorpusService } from "./session-corpus.ts"; const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); +const textEncoder = new TextEncoder(); describe("session-corpus", () => { it("fetches local HTTP content, normalizes it, and indexes it", async () => { @@ -785,6 +786,13 @@ describe("session-corpus", () => { "session:group-migrate:child-root:stats", ); + redis.restoreSnapshot = () => { + return Promise.reject(new Error("legacy restoreSnapshot path used")); + }; + redis.deleteKey = () => { + return Promise.reject(new Error("legacy deleteKey path used")); + }; + await corpus.migrateRootSessionState("child-root", "parent-root"); const parentSearch = await corpus.search({ @@ -827,6 +835,186 @@ describe("session-corpus", () => { } }); + it("tracks root-session-local corpus and artifact byte counters without duplicating full artifact bodies", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 120, + groupId: "group-stats", + }); + + await corpus.index({ + rootSessionId: "root-stats", + content: "# Corpus One\n\nfirst local corpus body", + }); + const artifact = await corpus.storeArtifact({ + rootSessionId: "root-stats", + toolName: "session_execute", + body: "artifact payload body\n" + "payload marker\n".repeat(40), + }); + + const stats = await corpus.getStats("root-stats"); + const artifactId = artifact.artifactRef.split("/").at(-1) ?? ""; + const bodyKeys = await redis.keysByPrefix( + "session:group-stats:root-stats:artifact:", + ); + + assertEquals(stats.corpusCount, 2); + assertEquals(stats.artifactCount, 1); + assertEquals(stats.counters.corpus_count, 2); + assertEquals(stats.counters.artifact_count, 1); + assertEquals((stats.counters.bytes_indexed_total ?? 0) > 0, true); + assertEquals((stats.counters.bytes_saved_estimate ?? 0) > 0, true); + assertEquals( + stats.counters.bytes_saved_estimate, + textEncoder.encode( + "artifact payload body\n" + "payload marker\n".repeat(40), + ) + .byteLength, + ); + assertEquals( + bodyKeys.filter((key) => key.endsWith(":body")).length, + 1, + ); + assertEquals( + bodyKeys.some((key) => + key === `session:group-stats:root-stats:artifact:${artifactId}:body` + ), + true, + ); + }); + + it("replaces prior content for the same source and label", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-replacement", + }); + + const first = await corpus.index({ + rootSessionId: "root-replacement", + content: "old alpha body", + source: "build-log", + label: "latest", + }); + const second = await corpus.index({ + rootSessionId: "root-replacement", + content: "new beta body", + source: "build-log", + label: "latest", + }); + + const oldSearch = await corpus.search({ + rootSessionId: "root-replacement", + query: "alpha", + }); + const newSearch = await corpus.search({ + rootSessionId: "root-replacement", + query: "beta", + }); + + assertEquals(oldSearch.results.length, 0); + assertEquals(newSearch.results.length > 0, true); + assertEquals(newSearch.results[0]?.corpus_ref, second.corpusRef); + assertEquals(first.corpusRef === second.corpusRef, false); + }); + + it("removes prior postings and corpus metadata when replacing the same source and label", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 60, + groupId: "group-replacement-cleanup", + }); + + const first = await corpus.index({ + rootSessionId: "root-replacement-cleanup", + content: "old alpha body", + source: "build-log", + label: "latest", + }); + const second = await corpus.index({ + rootSessionId: "root-replacement-cleanup", + content: "new beta body", + source: "build-log", + label: "latest", + }); + + const firstCorpusId = first.corpusRef.split(":").at(-2) ?? ""; + const secondCorpusId = second.corpusRef.split(":").at(-2) ?? ""; + const firstMeta = await redis.snapshot(first.corpusRef); + const secondMeta = await redis.snapshot(second.corpusRef); + const firstChunks = await redis.snapshot( + `session:group-replacement-cleanup:root-replacement-cleanup:corpus:${firstCorpusId}:chunks`, + ); + const secondChunks = await redis.snapshot( + `session:group-replacement-cleanup:root-replacement-cleanup:corpus:${secondCorpusId}:chunks`, + ); + const alphaPostings = await redis.getListRange( + "session:group-replacement-cleanup:root-replacement-cleanup:term:alpha", + 0, + 10, + ); + const betaPostings = await redis.getListRange( + "session:group-replacement-cleanup:root-replacement-cleanup:term:beta", + 0, + 10, + ); + + assertEquals(firstMeta.kind, "missing"); + assertEquals(firstChunks.kind, "missing"); + assertEquals(secondMeta.kind, "hash"); + assertEquals(secondChunks.kind, "list"); + assertEquals(alphaPostings, []); + assertEquals(betaPostings.length > 0, true); + }); + + it("composes concurrent stats deltas without losing increments", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const corpus = createSessionCorpusService({ + redis, + ttlSeconds: 120, + groupId: "group-atomic-stats", + }); + const trackedKey = "session:group-atomic-stats:root-atomic:stats"; + const originalGetHashAll = redis.getHashAll.bind(redis); + let blockStatsReads = true; + let blockedReads = 0; + let waitingResolvers: Array<() => void> = []; + + redis.getHashAll = async (key) => { + if (blockStatsReads && key === trackedKey) { + blockedReads += 1; + await new Promise((resolve) => { + waitingResolvers.push(resolve); + if (blockedReads === 2) { + for (const resume of waitingResolvers) resume(); + waitingResolvers = []; + } + }); + } + return await originalGetHashAll(key); + }; + + await Promise.all([ + corpus.recordStats("root-atomic", { + artifact_count: 1, + bytes_saved_estimate: 10, + }), + corpus.recordStats("root-atomic", { + artifact_count: 2, + bytes_saved_estimate: 5, + }), + ]); + blockStatsReads = false; + + const stats = await corpus.getStats("root-atomic"); + + assertEquals(stats.counters.artifact_count, 3); + assertEquals(stats.counters.bytes_saved_estimate, 15); + }); + it("does not migrate sibling root keys that only share the same prefix", async () => { const redis = new RedisClient({ endpoint: "redis://unused" }); const corpus = createSessionCorpusService({ diff --git a/src/services/session-corpus.ts b/src/services/session-corpus.ts index a543e0a..04edc70 100644 --- a/src/services/session-corpus.ts +++ b/src/services/session-corpus.ts @@ -25,6 +25,8 @@ type IndexInput = { content: string; contentType?: string; title?: string; + source?: string; + label?: string; sourceUrl?: string; artifactId?: string; }; @@ -56,6 +58,8 @@ type CorpusMeta = { title: string; contentType: string; createdAt: number; + source?: string; + label?: string; sourceUrl?: string; truncated: boolean; artifactId?: string; @@ -729,6 +733,8 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { `${sessionPrefix(rootSessionId)}:corpus:${corpusId}:meta`; const corpusChunksKey = (rootSessionId: string, corpusId: string) => `${sessionPrefix(rootSessionId)}:corpus:${corpusId}:chunks`; + const corpusCounterKey = (rootSessionId: string) => + `${sessionPrefix(rootSessionId)}:corpus-counter`; const chunkKey = (rootSessionId: string, chunkId: string) => `${sessionPrefix(rootSessionId)}:chunk:${chunkId}`; const termKey = (rootSessionId: string, token: string) => @@ -745,6 +751,25 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { `${sessionPrefix(rootSessionId)}:artifact:${artifactId}:body`; const corpusRefFor = (rootSessionId: string, corpusId: string) => corpusMetaKey(rootSessionId, corpusId); + const identityKey = ( + rootSessionId: string, + source: string, + label: string, + ) => + `${sessionPrefix(rootSessionId)}:identity:${encodeURIComponent(source)}:${ + encodeURIComponent(label) + }`; + + const updateStats = async ( + rootSessionId: string, + deltas: Record, + ): Promise> => { + return await options.redis.incrementHashFields( + statsKey(rootSessionId), + deltas, + options.ttlSeconds, + ); + }; const maxTtl = (...values: Array): number | undefined => { let ttl: number | undefined; @@ -805,15 +830,12 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { }; const reserveCorpusId = async (rootSessionId: string): Promise => { - const listKey = corporaKey(rootSessionId); const index = await options.redis.appendToList( - listKey, - "__pending__", + corpusCounterKey(rootSessionId), + "__reserved__", options.ttlSeconds, ); - const corpusId = `corpus-${index}`; - await options.redis.setListItem(listKey, index - 1, corpusId); - return corpusId; + return `corpus-${index}`; }; const reserveChunkId = async ( @@ -824,10 +846,12 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { const index = await options.redis.appendToList( listKey, "__pending__", - options.ttlSeconds, ); const chunkId = `chunk-${corpusId}-${index}`; await options.redis.setListItem(listKey, index - 1, chunkId); + await options.redis.touch(listKey, options.ttlSeconds).catch(() => + undefined + ); return { chunkId, chunkIndex: index - 1 }; }; @@ -908,6 +932,8 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { title: input.title ?? normalized.title, contentType: normalized.contentType, createdAt, + source: input.source, + label: input.label, sourceUrl: input.sourceUrl, truncated: normalized.truncated, artifactId: input.artifactId, @@ -918,12 +944,19 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { meta.title, ); const corpusId = await reserveCorpusId(input.rootSessionId); + await options.redis.appendToList( + corporaKey(input.rootSessionId), + corpusId, + options.ttlSeconds, + ); await options.redis.setHashFields( corpusMetaKey(input.rootSessionId, corpusId), { title: meta.title, content_type: meta.contentType, source_type: sourceType, + source: meta.source, + label: meta.label, source_url: meta.sourceUrl, created_at: meta.createdAt, truncated: meta.truncated ? "1" : "0", @@ -1011,19 +1044,11 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { ); } - const currentStats = await options.redis.getHashAll( - statsKey(input.rootSessionId), - ); - await options.redis.setHashFields( - statsKey(input.rootSessionId), - { - corpus_count: Number(currentStats.corpus_count ?? 0) + 1, - chunk_count: Number(currentStats.chunk_count ?? 0) + chunks.length, - bytes_indexed_total: Number(currentStats.bytes_indexed_total ?? 0) + - encoder.encode(normalized.body).byteLength, - }, - options.ttlSeconds, - ); + await updateStats(input.rootSessionId, { + corpus_count: 1, + chunk_count: chunks.length, + bytes_indexed_total: encoder.encode(normalized.body).byteLength, + }); await refreshCorpusFamily(input.rootSessionId, corpusId); return { @@ -1035,6 +1060,111 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { }; }; + const deleteListEntriesMatching = async ( + key: string, + predicate: (value: string) => boolean, + ): Promise => { + const snapshot = await options.redis.snapshot(key); + if (snapshot.kind !== "list") return; + const values = snapshot.values.filter((value) => !predicate(value)); + if (values.length === 0) { + await options.redis.deleteKey(key); + return; + } + await options.redis.restoreSnapshot(key, { + kind: "list", + values, + ttlSeconds: snapshot.ttlSeconds, + }); + }; + + const deleteHashFields = async ( + key: string, + fields: Iterable, + ): Promise => { + const snapshot = await options.redis.snapshot(key); + if (snapshot.kind !== "hash") return; + const nextValues = { ...snapshot.values }; + for (const field of fields) delete nextValues[field]; + const nextSnapshot: RedisKeySnapshot = Object.keys(nextValues).length === 0 + ? { kind: "missing" } + : { + kind: "hash", + values: nextValues, + ttlSeconds: snapshot.ttlSeconds, + }; + await options.redis.restoreSnapshot(key, nextSnapshot); + }; + + const deleteCorpus = async ( + rootSessionId: string, + corpusId: string, + ): Promise => { + const metaKey = corpusMetaKey(rootSessionId, corpusId); + const chunksKey = corpusChunksKey(rootSessionId, corpusId); + const chunkIds = await options.redis.getListRange( + chunksKey, + 0, + SEARCH_SCAN_LIMIT, + ); + const chunkIdSet = new Set(chunkIds); + const termSet = new Set(); + const stemSet = new Set(); + const trigramSet = new Set(); + + for (const chunkId of chunkIds) { + const chunk = await loadChunk(rootSessionId, chunkId); + if (!chunk) continue; + for (const term of chunk.terms) termSet.add(term); + for (const stem of chunk.stems) stemSet.add(stem); + for (const trigram of chunk.trigrams) trigramSet.add(trigram); + await options.redis.deleteKey(chunkKey(rootSessionId, chunkId)); + } + + for (const term of termSet) { + await deleteListEntriesMatching( + termKey(rootSessionId, term), + (value) => chunkIdSet.has(value), + ); + } + for (const stem of stemSet) { + await deleteListEntriesMatching( + stemPostingKey(rootSessionId, stem), + (value) => chunkIdSet.has(value), + ); + } + for (const trigram of trigramSet) { + await deleteListEntriesMatching( + trigramKey(rootSessionId, trigram), + (value) => chunkIdSet.has(value), + ); + } + + const removableTerms: string[] = []; + for (const term of termSet) { + const remaining = await options.redis.getListRange( + termKey(rootSessionId, term), + 0, + 0, + ); + if (remaining.length === 0) removableTerms.push(term); + } + if (removableTerms.length > 0) { + await deleteHashFields(vocabKey(rootSessionId), removableTerms); + } + + await deleteListEntriesMatching( + corporaKey(rootSessionId), + (value) => value === corpusId, + ); + await options.redis.deleteKey(chunksKey); + await options.redis.deleteKey(metaKey); + await updateStats(rootSessionId, { + corpus_count: -1, + chunk_count: -chunkIds.length, + }); + }; + const loadChunk = async ( rootSessionId: string, chunkId: string, @@ -1084,7 +1214,6 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { return [key, snapshot] as const; })), ); - const originalTargetSnapshots = new Map(); const workingTargetSnapshots = new Map(); const handledSourceKeys = new Set(); @@ -1094,7 +1223,6 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { const existing = workingTargetSnapshots.get(key); if (existing) return existing; const snapshot = await options.redis.snapshot(key); - originalTargetSnapshots.set(key, snapshot); workingTargetSnapshots.set(key, snapshot); return snapshot; }; @@ -1103,9 +1231,6 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { key: string, snapshot: RedisKeySnapshot, ): void => { - if (!originalTargetSnapshots.has(key)) { - originalTargetSnapshots.set(key, { kind: "missing" }); - } workingTargetSnapshots.set(key, snapshot); }; @@ -1296,6 +1421,55 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { continue; } + if (sourceKey === corpusCounterKey(sourceRootSessionId)) { + const sourceCounter = requireSnapshotKind( + sourceKey, + sourceSnapshot, + "list", + ); + const targetKey = corpusCounterKey(targetRootSessionId); + const targetSnapshot = await getWorkingTargetSnapshot(targetKey); + const targetValues = targetSnapshot.kind === "list" + ? targetSnapshot.values + : targetSnapshot.kind === "missing" + ? [] + : (() => { + throw new Error(`Expected list snapshot for ${targetKey}`); + })(); + setWorkingTargetSnapshot(targetKey, { + kind: "list", + values: [...targetValues, ...sourceCounter.values], + ttlSeconds: maxTtl( + targetSnapshot.kind === "list" + ? targetSnapshot.ttlSeconds + : undefined, + sourceCounter.ttlSeconds, + ), + }); + handledSourceKeys.add(sourceKey); + continue; + } + + if (sourceKey.startsWith(`${sourcePrefix}:identity:`)) { + const sourceIdentity = requireSnapshotKind( + sourceKey, + sourceSnapshot, + "string", + ); + const targetKey = `${targetPrefix}${ + sourceKey.slice(sourcePrefix.length) + }`; + const targetCorpusId = corpusIdMap.get(sourceIdentity.value) ?? + sourceIdentity.value; + setWorkingTargetSnapshot(targetKey, { + kind: "string", + value: targetCorpusId, + ttlSeconds: sourceIdentity.ttlSeconds, + }); + handledSourceKeys.add(sourceKey); + continue; + } + if ( sourceKey.startsWith(`${sourcePrefix}:term:`) || sourceKey.startsWith(`${sourcePrefix}:tri:`) @@ -1390,40 +1564,35 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { ); } - const targetEntries = [...workingTargetSnapshots.entries()].sort(( - left, - right, - ) => left[0].localeCompare(right[0])); - const sourceEntries = [...sourceSnapshots.entries()].sort((left, right) => - right[0].localeCompare(left[0]) - ); - - try { - for (const [key, snapshot] of targetEntries) { - await options.redis.restoreSnapshot(key, snapshot); - } - for (const key of sourceKeys) { - await options.redis.deleteKey(key); - } - } catch (error) { - for (const [key] of [...targetEntries].reverse()) { - await options.redis.restoreSnapshot( - key, - originalTargetSnapshots.get(key) ?? { kind: "missing" }, - ).catch(() => undefined); - } - for (const [key, snapshot] of sourceEntries) { - await options.redis.restoreSnapshot(key, snapshot).catch(() => - undefined - ); - } - throw error; - } + await options.redis.applyMigrationUnit({ + writes: [...workingTargetSnapshots.entries()].map(([key, snapshot]) => ({ + key, + snapshot, + })), + deleteKeys: sourceKeys, + }); }; return { async index(input: IndexInput) { + if (input.source && input.label) { + const currentCorpusId = await options.redis.getString( + identityKey(input.rootSessionId, input.source, input.label), + ); + if (currentCorpusId) { + await deleteCorpus(input.rootSessionId, currentCorpusId); + } + } + const result = await writeCorpus(input, "index"); + if (input.source && input.label) { + const corpusId = result.corpusRef.split(":").at(-2) ?? ""; + await options.redis.setString( + identityKey(input.rootSessionId, input.source, input.label), + corpusId, + options.ttlSeconds, + ); + } return { status: "ok" as const, ...result }; }, @@ -1511,18 +1680,10 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { options.ttlSeconds, ); - const currentStats = await options.redis.getHashAll( - statsKey(input.rootSessionId), - ); - await options.redis.setHashFields( - statsKey(input.rootSessionId), - { - artifact_count: Number(currentStats.artifact_count ?? 0) + 1, - bytes_saved_estimate: Number(currentStats.bytes_saved_estimate ?? 0) + - encoder.encode(input.body).byteLength, - }, - options.ttlSeconds, - ); + await updateStats(input.rootSessionId, { + artifact_count: 1, + bytes_saved_estimate: encoder.encode(input.body).byteLength, + }); await refreshCorpusFamily( input.rootSessionId, @@ -1835,6 +1996,13 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { }; }, + async recordStats( + rootSessionId: string, + deltas: Record, + ) { + await updateStats(rootSessionId, deltas); + }, + migrateRootSessionState, dispose, }; diff --git a/src/services/session-executor.test.ts b/src/services/session-executor.test.ts new file mode 100644 index 0000000..31d9fe0 --- /dev/null +++ b/src/services/session-executor.test.ts @@ -0,0 +1,341 @@ +import { + assert, + assertEquals, + assertRejects, + assertStringIncludes, +} from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; +import { + createSessionExecutor, + SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES, +} from "./session-executor.ts"; +import { sessionMcpResponseSchemas } from "./session-mcp-types.ts"; + +const textEncoder = new TextEncoder(); +type ExecutorOptions = NonNullable[0]>; +type RunCommandInput = NonNullable extends ( + input: infer T, +) => Promise ? T + : never; +type StoreArtifactInput = NonNullable extends + ( + input: infer T, + ) => Promise ? T + : never; + +describe("session-executor", () => { + it("enforces command timeouts within the bounded executor", async () => { + const executor = createSessionExecutor({ + defaultCommandTimeoutSeconds: 1, + maxCommandTimeoutSeconds: 1, + runCommand: ({ signal }: RunCommandInput) => + new Promise((_resolve, reject) => { + signal.addEventListener("abort", () => { + reject(new DOMException("Aborted", "AbortError")); + }, { once: true }); + }), + readFile: () => Promise.reject(new Error("unexpected file read")), + storeArtifact: () => + Promise.resolve({ artifactRef: "local://session_execute/1" }), + }); + + const startedAt = Date.now(); + const response = await executor.executeCommand( + { + root_session_id: "root-timeout", + command: "sleep forever", + timeout_seconds: 120, + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assert(Date.now() - startedAt < 1_500); + assertEquals(response.status, "error"); + assertEquals(response.timed_out, true); + assertEquals(response.exit_code, -1); + assertEquals(response.truncated, false); + assertStringIncludes(response.summary.toLowerCase(), "timed out"); + assertEquals( + sessionMcpResponseSchemas.session_execute.safeParse(response).success, + true, + ); + }); + + it("reads local files directly from the executor worktree", async () => { + const readPaths: string[] = []; + const executor = createSessionExecutor({ + runCommand: () => Promise.reject(new Error("unexpected command")), + readFile: (path: string) => { + readPaths.push(path); + return Promise.resolve("session executor file body"); + }, + storeArtifact: () => + Promise.resolve({ artifactRef: "local://session_execute_file/1" }), + }); + + const response = await executor.executeFile( + { + root_session_id: "root-file", + paths: ["notes/today.md"], + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assertEquals(readPaths, ["/workspace/project/notes/today.md"]); + assertEquals(response.status, "ok"); + assertEquals(response.file_count, 1); + assertEquals(response.truncated, false); + assertStringIncludes(response.summary, "session executor file body"); + assertEquals( + sessionMcpResponseSchemas.session_execute_file.safeParse(response) + .success, + true, + ); + }); + + it("executes session batches sequentially through the shared command executor", async () => { + const executionOrder: string[] = []; + const executor = createSessionExecutor({ + runCommand: ({ command }: RunCommandInput) => { + executionOrder.push(command); + return Promise.resolve({ + exitCode: 0, + stdout: `${command}-out`, + stderr: "", + }); + }, + readFile: () => Promise.reject(new Error("unexpected file read")), + storeArtifact: () => + Promise.resolve({ artifactRef: "local://session_execute/1" }), + }); + + const response = await executor.executeBatch( + { + root_session_id: "root-batch", + commands: [{ command: "first" }, { command: "second" }], + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assertEquals(executionOrder, ["first", "second"]); + assertEquals(response.status, "ok"); + assertEquals(response.truncated, false); + const parsed = sessionMcpResponseSchemas.session_batch_execute.safeParse( + response, + ); + assertEquals(parsed.success, true); + if (!parsed.success) return; + assertEquals( + parsed.data.results.map((result) => + result.kind === "command" + ? result.result.summary + : "unexpected-search-result" + ), + ["first-out", "second-out"], + ); + }); + + it("stores oversized command output behind a bounded artifact reference", async () => { + const storedBodies: string[] = []; + const executor = createSessionExecutor({ + runCommand: () => + Promise.resolve({ + exitCode: 0, + stdout: "x".repeat(SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES + 2_048), + stderr: "", + }), + readFile: () => Promise.reject(new Error("unexpected file read")), + storeArtifact: ({ body }: StoreArtifactInput) => { + storedBodies.push(body); + return Promise.resolve({ + artifactRef: "local://session_execute/overflow-1", + }); + }, + }); + + const response = await executor.executeCommand( + { + root_session_id: "root-command-overflow", + command: "big-output", + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assertEquals(response.truncated, true); + assertEquals(response.artifact_ref, "local://session_execute/overflow-1"); + assertEquals(storedBodies.length, 1); + assert(storedBodies[0].length > SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES); + assert( + textEncoder.encode(JSON.stringify(response)).byteLength <= + SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES, + ); + }); + + it("stores oversized file output behind artifact and corpus references", async () => { + const storedBodies: string[] = []; + const executor = createSessionExecutor({ + runCommand: () => Promise.reject(new Error("unexpected command")), + readFile: () => + Promise.resolve( + "y".repeat(SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES + 2_048), + ), + storeArtifact: ({ body }: StoreArtifactInput) => { + storedBodies.push(body); + return Promise.resolve({ + artifactRef: "local://session_execute_file/overflow-1", + corpusRef: "session:group:root-file-overflow:corpus:corpus-1:meta", + }); + }, + }); + + const response = await executor.executeFile( + { + root_session_id: "root-file-overflow", + paths: ["notes/huge.md"], + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assertEquals(response.truncated, true); + assertEquals( + response.artifact_ref, + "local://session_execute_file/overflow-1", + ); + assertEquals( + response.corpus_ref, + "session:group:root-file-overflow:corpus:corpus-1:meta", + ); + assertEquals(storedBodies.length, 1); + assert( + textEncoder.encode(JSON.stringify(response)).byteLength <= + SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES, + ); + }); + + it("passes bounded accounting metadata to artifact storage for oversized command and file responses", async () => { + const artifactInputs: StoreArtifactInput[] = []; + const executor = createSessionExecutor({ + runCommand: () => + Promise.resolve({ + exitCode: 0, + stdout: "x".repeat(SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES + 1_024), + stderr: "", + }), + readFile: () => + Promise.resolve( + "y".repeat(SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES + 1_024), + ), + storeArtifact: (input: StoreArtifactInput) => { + artifactInputs.push(input); + return Promise.resolve({ + artifactRef: `local://${input.toolName}/${artifactInputs.length}`, + corpusRef: + `session:group:${input.rootSessionId}:corpus:corpus-${artifactInputs.length}:meta`, + }); + }, + }); + + await executor.executeCommand( + { + root_session_id: "root-accounting", + command: "huge-command", + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + await executor.executeFile( + { + root_session_id: "root-accounting", + paths: ["notes/huge.md"], + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assertEquals(artifactInputs.length, 2); + assertEquals(artifactInputs[0].rootSessionId, "root-accounting"); + assertEquals(artifactInputs[0].toolName, "session_execute"); + assertEquals( + artifactInputs[0].maxNormalizedIndexedBodyBytes > + SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES, + true, + ); + assertEquals(artifactInputs[1].toolName, "session_execute_file"); + assertEquals( + artifactInputs.every((input) => input.body.length > 0), + true, + ); + }); + + it("returns bounded schema-valid failures for command and file errors", async () => { + const executor = createSessionExecutor({ + runCommand: () => + Promise.resolve({ + exitCode: 17, + stdout: "", + stderr: "command failed loudly", + }), + readFile: () => Promise.reject(new Error("file missing")), + storeArtifact: () => + Promise.resolve({ artifactRef: "local://session_execute/unused" }), + }); + + const commandFailure = await executor.executeCommand( + { + root_session_id: "root-command-failure", + command: "explode", + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + const fileFailure = await executor.executeFile( + { + root_session_id: "root-file-failure", + paths: ["missing.txt"], + }, + { worktree: "/workspace/project", directory: "/workspace/project" }, + ); + + assertEquals(commandFailure.status, "error"); + assertEquals(commandFailure.exit_code, 17); + assertEquals(commandFailure.timed_out, false); + assertStringIncludes(commandFailure.summary, "command failed loudly"); + assertEquals(fileFailure.status, "error"); + assertEquals(fileFailure.file_count, 0); + assertEquals(fileFailure.truncated, false); + assertStringIncludes(fileFailure.summary, "file missing"); + assertEquals( + sessionMcpResponseSchemas.session_execute.safeParse(commandFailure) + .success, + true, + ); + assertEquals( + sessionMcpResponseSchemas.session_execute_file.safeParse(fileFailure) + .success, + true, + ); + }); + + it("rejects invalid empty batch requests", async () => { + const executor = createSessionExecutor({ + runCommand: () => + Promise.resolve({ exitCode: 0, stdout: "ok", stderr: "" }), + readFile: () => Promise.resolve("ok"), + storeArtifact: () => + Promise.resolve({ artifactRef: "local://session_execute/1" }), + }); + + await assertRejects( + () => + executor.executeBatch( + { + root_session_id: "root-empty-batch", + commands: [], + }, + { + worktree: "/workspace/project", + directory: "/workspace/project", + }, + ), + Error, + "at least one command", + ); + }); +}); diff --git a/src/services/session-executor.ts b/src/services/session-executor.ts new file mode 100644 index 0000000..0a8a22d --- /dev/null +++ b/src/services/session-executor.ts @@ -0,0 +1,569 @@ +import path from "node:path"; +import type { + SessionMcpRequestMap, + SessionMcpResponseMap, +} from "./session-mcp-types.ts"; + +export const SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES = 8 * 1024; +export const SESSION_EXECUTOR_DEFAULT_COMMAND_TIMEOUT_SECONDS = 30; +export const SESSION_EXECUTOR_MAX_COMMAND_TIMEOUT_SECONDS = 120; +export const SESSION_EXECUTOR_MAX_NORMALIZED_INDEXED_BODY_BYTES = 512 * 1024; +export const SESSION_EXECUTOR_OUT_OF_WORKSPACE_MESSAGE = + "Path is outside the active workspace."; + +type SessionExecuteResponse = SessionMcpResponseMap["session_execute"]; +type SessionExecuteFileResponse = SessionMcpResponseMap["session_execute_file"]; +type SessionBatchExecuteResponse = + SessionMcpResponseMap["session_batch_execute"]; +type SessionExecuteRequest = SessionMcpRequestMap["session_execute"]; +type SessionExecuteFileRequest = SessionMcpRequestMap["session_execute_file"]; +type SessionBatchExecuteRequest = SessionMcpRequestMap["session_batch_execute"]; + +export type SessionExecutorContext = { + worktree?: string; + directory?: string; +}; + +type CommandExecutionResult = { + exitCode: number; + stdout: string; + stderr: string; +}; + +type StoredArtifact = { + artifactRef: string; + corpusRef?: string; +}; + +type SessionBatchCommandResult = { + kind: "command"; + result: SessionExecuteResponse; +}; + +type SessionBatchCommandResponse = { + status: SessionBatchExecuteResponse["status"]; + summary: string; + results: SessionBatchCommandResult[]; + truncated: boolean; +}; + +type SessionExecutorOptions = { + responseBudgetBytes?: number; + defaultCommandTimeoutSeconds?: number; + maxCommandTimeoutSeconds?: number; + maxNormalizedIndexedBodyBytes?: number; + runCommand?: (input: { + command: string; + cwd: string; + timeoutSeconds: number; + signal: AbortSignal; + }) => Promise; + readFile?: (path: string) => Promise; + storeArtifact?: (input: { + rootSessionId: string; + toolName: "session_execute" | "session_execute_file"; + body: string; + maxNormalizedIndexedBodyBytes: number; + }) => Promise; +}; + +export type SessionExecutor = { + executeCommand: ( + request: SessionExecuteRequest, + context: SessionExecutorContext, + ) => Promise; + executeFile: ( + request: SessionExecuteFileRequest, + context: SessionExecutorContext, + ) => Promise; + executeBatch: ( + request: SessionBatchExecuteRequest, + context: SessionExecutorContext, + executeStep?: ( + request: SessionExecuteRequest, + context: SessionExecutorContext, + ) => Promise, + ) => Promise; + readLocalFile?: ( + inputPath: string, + context: SessionExecutorContext, + ) => Promise; +}; + +const textEncoder = new TextEncoder(); +const textDecoder = new TextDecoder(); + +const byteLength = (value: string): number => + textEncoder.encode(value).byteLength; + +const serialize = (value: unknown): string => JSON.stringify(value); + +const clampTimeoutSeconds = ( + timeoutSeconds: number | undefined, + defaults: { + defaultCommandTimeoutSeconds: number; + maxCommandTimeoutSeconds: number; + }, +): number => + Math.min( + timeoutSeconds ?? defaults.defaultCommandTimeoutSeconds, + defaults.maxCommandTimeoutSeconds, + ); + +const defaultRunCommand: NonNullable = + async ({ command, cwd, signal }) => { + const shell = Deno.build.os === "windows" + ? { executable: "cmd", args: ["/d", "/s", "/c", command] } + : { executable: "/bin/sh", args: ["-lc", command] }; + const output = await new Deno.Command(shell.executable, { + args: shell.args, + cwd, + stdin: "null", + stdout: "piped", + stderr: "piped", + signal, + }).output(); + + return { + exitCode: output.code, + stdout: textDecoder.decode(output.stdout), + stderr: textDecoder.decode(output.stderr), + }; + }; + +const defaultReadFile: NonNullable = ( + filePath, +) => Deno.readTextFile(filePath); + +const defaultStoreArtifact: NonNullable< + SessionExecutorOptions["storeArtifact"] +> = ({ body }) => + Promise.resolve({ + artifactRef: `inline://payload/${encodeURIComponent(body)}`, + }); + +const resolveCwd = (context: SessionExecutorContext): string => + context.worktree ?? context.directory ?? Deno.cwd(); + +const isWithinRoot = (rootPath: string, targetPath: string): boolean => { + const relative = path.relative(rootPath, targetPath); + return relative === "" || + (!relative.startsWith("..") && !path.isAbsolute(relative)); +}; + +const resolveWorkspaceRoot = (context: SessionExecutorContext): string => + path.resolve(resolveCwd(context)); + +const resolveFilePath = ( + context: SessionExecutorContext, + inputPath: string, +): string => { + const workspaceRoot = resolveWorkspaceRoot(context); + const baseDirectory = path.resolve(context.directory ?? workspaceRoot); + const candidatePath = path.isAbsolute(inputPath) + ? path.resolve(inputPath) + : path.resolve(baseDirectory, inputPath); + + if (!isWithinRoot(workspaceRoot, candidatePath)) { + throw new Error(SESSION_EXECUTOR_OUT_OF_WORKSPACE_MESSAGE); + } + + return candidatePath; +}; + +const summarizeCommandBody = (stdout: string, stderr: string): string => { + const body = stdout || stderr; + return body.trim() || "Command completed with no output."; +}; + +const summarizeFileBody = (paths: string[], contents: string[]): string => + paths.map((filePath, index) => `==> ${filePath} <==\n${contents[index]}`) + .join("\n\n").trim(); + +const isAbortError = (error: unknown): boolean => + error instanceof DOMException + ? error.name === "AbortError" + : error instanceof Error && error.name === "AbortError"; + +const truncateToBudget = (value: string, budgetBytes: number): string => { + if (byteLength(value) <= budgetBytes) return value; + let result = value; + while (result.length > 1 && byteLength(result) > budgetBytes) { + result = result.slice(0, Math.max(Math.floor(result.length * 0.8), 1)); + } + return result; +}; + +const createBoundedCommandArtifactResponse = async ( + response: SessionExecuteResponse, + request: SessionExecuteRequest, + options: Required< + Pick< + SessionExecutorOptions, + "responseBudgetBytes" | "maxNormalizedIndexedBodyBytes" | "storeArtifact" + > + >, +): Promise => { + const artifact = await options.storeArtifact({ + rootSessionId: request.root_session_id, + toolName: "session_execute", + body: response.summary, + maxNormalizedIndexedBodyBytes: options.maxNormalizedIndexedBodyBytes, + }); + return { + ...response, + artifact_ref: artifact.artifactRef, + summary: truncateToBudget( + `Oversized output moved to local artifact ${artifact.artifactRef}.`, + Math.floor(options.responseBudgetBytes / 2), + ), + truncated: true, + }; +}; + +const createBoundedFileArtifactResponse = async ( + response: SessionExecuteFileResponse, + request: SessionExecuteFileRequest, + options: Required< + Pick< + SessionExecutorOptions, + "responseBudgetBytes" | "maxNormalizedIndexedBodyBytes" | "storeArtifact" + > + >, +): Promise => { + const artifact = await options.storeArtifact({ + rootSessionId: request.root_session_id, + toolName: "session_execute_file", + body: response.summary, + maxNormalizedIndexedBodyBytes: options.maxNormalizedIndexedBodyBytes, + }); + return { + ...response, + artifact_ref: artifact.artifactRef, + corpus_ref: artifact.corpusRef, + summary: truncateToBudget( + `Oversized output moved to local artifact ${artifact.artifactRef}.`, + Math.floor(options.responseBudgetBytes / 2), + ), + truncated: true, + }; +}; + +const ensureCommandResponseWithinBudget = async ( + response: SessionExecuteResponse, + request: SessionExecuteRequest, + options: Required< + Pick< + SessionExecutorOptions, + "responseBudgetBytes" | "maxNormalizedIndexedBodyBytes" | "storeArtifact" + > + >, +): Promise => { + if (byteLength(serialize(response)) <= options.responseBudgetBytes) { + return response; + } + + const artifactResponse = await createBoundedCommandArtifactResponse( + response, + request, + options, + ); + if (byteLength(serialize(artifactResponse)) <= options.responseBudgetBytes) { + return artifactResponse; + } + + return { + ...artifactResponse, + summary: truncateToBudget( + artifactResponse.summary, + Math.floor(options.responseBudgetBytes / 4), + ), + }; +}; + +const ensureFileResponseWithinBudget = async ( + response: SessionExecuteFileResponse, + request: SessionExecuteFileRequest, + options: Required< + Pick< + SessionExecutorOptions, + "responseBudgetBytes" | "maxNormalizedIndexedBodyBytes" | "storeArtifact" + > + >, +): Promise => { + if (byteLength(serialize(response)) <= options.responseBudgetBytes) { + return response; + } + + const artifactResponse = await createBoundedFileArtifactResponse( + response, + request, + options, + ); + if (byteLength(serialize(artifactResponse)) <= options.responseBudgetBytes) { + return artifactResponse; + } + + return { + ...artifactResponse, + summary: truncateToBudget( + artifactResponse.summary, + Math.floor(options.responseBudgetBytes / 4), + ), + }; +}; + +const createBoundedBatchStepResponse = async ( + response: SessionBatchCommandResult, + rootSessionId: string, + options: Required< + Pick< + SessionExecutorOptions, + "responseBudgetBytes" | "maxNormalizedIndexedBodyBytes" | "storeArtifact" + > + >, +): Promise => { + const artifactRef = response.result.artifact_ref ?? + (await options.storeArtifact({ + rootSessionId, + toolName: "session_execute", + body: response.result.summary, + maxNormalizedIndexedBodyBytes: options.maxNormalizedIndexedBodyBytes, + })).artifactRef; + const compacted: SessionBatchCommandResult = { + ...response, + result: { + ...response.result, + artifact_ref: artifactRef, + summary: truncateToBudget( + `Oversized batch step output moved to local artifact ${artifactRef}.`, + Math.floor(options.responseBudgetBytes / 4), + ), + truncated: true, + }, + }; + + if (byteLength(serialize(compacted)) <= options.responseBudgetBytes) { + return compacted; + } + + return { + ...compacted, + result: { + ...compacted.result, + summary: truncateToBudget(compacted.result.summary, 128), + }, + }; +}; + +const ensureBatchResponseWithinBudget = async ( + response: SessionBatchCommandResponse, + request: SessionBatchExecuteRequest, + options: Required< + Pick< + SessionExecutorOptions, + "responseBudgetBytes" | "maxNormalizedIndexedBodyBytes" | "storeArtifact" + > + >, +): Promise => { + if (byteLength(serialize(response)) <= options.responseBudgetBytes) { + return response; + } + + const results = [...response.results]; + const oversizedResultIndexes = results + .map((result, index) => ({ + index, + bytes: byteLength(serialize(result)), + summaryBytes: byteLength(result.result.summary), + })) + .sort((left, right) => + right.bytes - left.bytes || right.summaryBytes - left.summaryBytes + ); + + let compacted: SessionBatchCommandResponse = { + ...response, + summary: + `Batch output truncated to stay within ${options.responseBudgetBytes} bytes.`, + results, + truncated: true, + }; + + for (const candidate of oversizedResultIndexes) { + results[candidate.index] = await createBoundedBatchStepResponse( + results[candidate.index], + request.root_session_id, + options, + ); + compacted = { + ...compacted, + results: [...results], + }; + } + + if (byteLength(serialize(compacted)) <= options.responseBudgetBytes) { + return compacted; + } + + return { + ...compacted, + summary: truncateToBudget(compacted.summary, 128), + }; +}; + +export const createSessionExecutor = ( + options: SessionExecutorOptions = {}, +): SessionExecutor => { + const responseBudgetBytes = options.responseBudgetBytes ?? + SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES; + const defaultCommandTimeoutSeconds = options.defaultCommandTimeoutSeconds ?? + SESSION_EXECUTOR_DEFAULT_COMMAND_TIMEOUT_SECONDS; + const maxCommandTimeoutSeconds = options.maxCommandTimeoutSeconds ?? + SESSION_EXECUTOR_MAX_COMMAND_TIMEOUT_SECONDS; + const maxNormalizedIndexedBodyBytes = options.maxNormalizedIndexedBodyBytes ?? + SESSION_EXECUTOR_MAX_NORMALIZED_INDEXED_BODY_BYTES; + const runCommand = options.runCommand ?? defaultRunCommand; + const readFile = options.readFile ?? defaultReadFile; + const storeArtifact = options.storeArtifact ?? defaultStoreArtifact; + const readLocalFile = ( + inputPath: string, + context: SessionExecutorContext, + ) => readFile(resolveFilePath(context, inputPath)); + + const ensureCommand = ( + response: SessionExecuteResponse, + request: SessionExecuteRequest, + ) => + ensureCommandResponseWithinBudget(response, request, { + responseBudgetBytes, + maxNormalizedIndexedBodyBytes, + storeArtifact, + }); + + const ensureFile = ( + response: SessionExecuteFileResponse, + request: SessionExecuteFileRequest, + ) => + ensureFileResponseWithinBudget(response, request, { + responseBudgetBytes, + maxNormalizedIndexedBodyBytes, + storeArtifact, + }); + + return { + readLocalFile, + + async executeCommand(request, context) { + const timeoutSeconds = clampTimeoutSeconds(request.timeout_seconds, { + defaultCommandTimeoutSeconds, + maxCommandTimeoutSeconds, + }); + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + timeoutSeconds * 1000, + ); + + try { + const result = await runCommand({ + command: request.command, + cwd: resolveCwd(context), + timeoutSeconds, + signal: controller.signal, + }); + const summary = summarizeCommandBody(result.stdout, result.stderr); + return await ensureCommand({ + status: result.exitCode === 0 ? "ok" : "error", + summary, + exit_code: result.exitCode, + timed_out: false, + truncated: false, + bytes_captured: byteLength(result.stdout) + byteLength(result.stderr), + }, request); + } catch (error) { + if (isAbortError(error)) { + return await ensureCommand({ + status: "error", + summary: `Command timed out after ${timeoutSeconds} second(s).`, + exit_code: -1, + timed_out: true, + truncated: false, + bytes_captured: 0, + }, request); + } + + return await ensureCommand({ + status: "error", + summary: error instanceof Error ? error.message : String(error), + exit_code: -1, + timed_out: false, + truncated: false, + bytes_captured: 0, + }, request); + } finally { + clearTimeout(timeout); + } + }, + + async executeFile(request, context) { + try { + const contents = await Promise.all( + request.paths.map((inputPath) => readLocalFile(inputPath, context)), + ); + return await ensureFile({ + status: "ok", + summary: summarizeFileBody(request.paths, contents), + file_count: request.paths.length, + truncated: false, + }, request); + } catch (error) { + return await ensureFile({ + status: "error", + summary: error instanceof Error ? error.message : String(error), + file_count: 0, + truncated: false, + }, request); + } + }, + + async executeBatch(request, context, executeStep) { + if (request.commands.length === 0) { + throw new Error("session_batch_execute requires at least one command"); + } + + const stepExecutor = executeStep ?? + ((stepRequest, stepContext) => + this.executeCommand(stepRequest, stepContext)); + const results: SessionBatchCommandResult[] = []; + + for (const command of request.commands) { + const result = await stepExecutor({ + root_session_id: request.root_session_id, + command: command.command, + timeout_seconds: command.timeout_seconds, + }, context); + results.push( + { kind: "command", result }, + ); + } + + const batchResponse: SessionBatchCommandResponse = { + status: results.every((result) => result.result.status === "ok") + ? "ok" + : "error", + summary: `Completed ${results.length} command(s).`, + results, + truncated: false, + }; + + return await ensureBatchResponseWithinBudget(batchResponse, { + root_session_id: request.root_session_id, + commands: request.commands, + }, { + responseBudgetBytes, + maxNormalizedIndexedBodyBytes, + storeArtifact, + }) as SessionBatchExecuteResponse; + }, + }; +}; diff --git a/src/services/session-mcp-runtime.test.ts b/src/services/session-mcp-runtime.test.ts index 1095ead..906f40a 100644 --- a/src/services/session-mcp-runtime.test.ts +++ b/src/services/session-mcp-runtime.test.ts @@ -2,6 +2,7 @@ import { assert, assertEquals, assertExists, + assertRejects, assertStringIncludes, } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; @@ -9,6 +10,7 @@ import { createSessionMcpRuntime, SESSION_MCP_RESPONSE_BUDGET_BYTES, } from "./session-mcp-runtime.ts"; +import type { SessionExecutor } from "./session-executor.ts"; import { SESSION_MCP_TOOL_NAMES, sessionMcpRequestSchemas, @@ -16,10 +18,12 @@ import { type SessionMcpToolName, } from "./session-mcp-types.ts"; import { RedisClient } from "./redis-client.ts"; +import { SessionManager } from "../session.ts"; type RedisEvent = "close" | "end" | "error" | "ready"; class DoctorRedisRuntime { + private readonly hashes = new Map>(); private readonly listeners = new Map< RedisEvent, Set<(...args: unknown[]) => void> @@ -86,12 +90,41 @@ class DoctorRedisRuntime { return Promise.resolve(0); } - hset(): Promise { - return Promise.resolve(0); + hset(key: string, values: Record): Promise { + const hash = this.hashes.get(key) ?? new Map(); + let added = 0; + for (const [field, value] of Object.entries(values)) { + if (!hash.has(field)) added += 1; + hash.set(field, value); + } + this.hashes.set(key, hash); + return Promise.resolve(added); + } + + hgetall(key: string): Promise> { + return Promise.resolve( + Object.fromEntries((this.hashes.get(key) ?? new Map()).entries()), + ); + } + + hincrby(key: string, field: string, increment: number): Promise { + const hash = this.hashes.get(key) ?? new Map(); + const next = Number(hash.get(field) ?? 0) + increment; + hash.set(field, String(next)); + this.hashes.set(key, hash); + return Promise.resolve(next); } - hgetall(): Promise> { - return Promise.resolve({}); + hincrbyfloat( + key: string, + field: string, + increment: number, + ): Promise { + const hash = this.hashes.get(key) ?? new Map(); + const next = Number(hash.get(field) ?? 0) + increment; + hash.set(field, String(next)); + this.hashes.set(key, hash); + return Promise.resolve(String(next)); } on(event: RedisEvent, listener: (...args: unknown[]) => void): void { @@ -121,9 +154,19 @@ const toolContext = { worktree: "/workspace/project", abort: AbortSignal.timeout(1_000), metadata: () => {}, - ask: async () => {}, + ask: async (_input: { + permission: string; + patterns: string[]; + always: string[]; + metadata: Record; + }) => {}, }; +const createToolContext = (overrides: Partial = {}) => ({ + ...toolContext, + ...overrides, +}); + const validRequests: Record> = { session_execute: { root_session_id: "root-123", @@ -157,6 +200,96 @@ const validRequests: Record> = { }, }; +Deno.test("mixed|batch schema compatibility", () => { + const request = sessionMcpRequestSchemas.session_batch_execute.safeParse({ + root_session_id: "root-123", + steps: [ + { kind: "command", command: "pwd" }, + { kind: "search", query: "session continuity" }, + ], + }); + const response = sessionMcpResponseSchemas.session_batch_execute.safeParse({ + status: "ok", + summary: "Completed 2 step(s).", + results: [ + { + kind: "command", + result: { + status: "ok", + summary: "pwd", + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 3, + }, + }, + { + kind: "search", + result: { + status: "ok", + results: [ + { + corpus_ref: "session:root:corpus:1", + snippet: "session continuity", + score: 0.9, + }, + ], + corpus_refs: ["session:root:corpus:1"], + truncated: false, + }, + }, + ], + truncated: false, + }); + + assertEquals(request.success, true); + if (request.success) { + assertEquals(request.data.commands.length, 1); + assertEquals(request.data.commands[0]?.command, "pwd"); + assertEquals(request.data.steps, [ + { kind: "command", command: "pwd" }, + { kind: "search", query: "session continuity" }, + ]); + } + + assertEquals(response.success, true); +}); + +Deno.test("index schema compatibility accepts critical request fields", () => { + const inlineRequest = sessionMcpRequestSchemas.session_index.safeParse({ + root_session_id: "root-123", + content: "hello world", + }); + const pathRequest = sessionMcpRequestSchemas.session_index.safeParse({ + root_session_id: "root-123", + path: "docs/notes.md", + }); + const metadataRequest = sessionMcpRequestSchemas.session_index.safeParse({ + root_session_id: "root-123", + content: "hello world", + source: "local-file", + label: "notes", + }); + + assertEquals(inlineRequest.success, true); + assertEquals(pathRequest.success, true); + assertEquals(metadataRequest.success, true); + if (metadataRequest.success) { + assertEquals(metadataRequest.data.source, "local-file"); + assertEquals(metadataRequest.data.label, "notes"); + } +}); + +Deno.test("index schema compatibility rejects requests without content or path", () => { + const request = sessionMcpRequestSchemas.session_index.safeParse({ + root_session_id: "root-123", + source: "local-file", + label: "notes", + }); + + assertEquals(request.success, false); +}); + describe("session-mcp-runtime", () => { it("registers exactly the 8 session tools", () => { const runtime = createSessionMcpRuntime(); @@ -168,6 +301,84 @@ describe("session-mcp-runtime", () => { } }); + it("delegates execution tools to the injected shared executor when configured", async () => { + const calls: Array<{ tool: string; payload: unknown }> = []; + type ExecutorRequestMap = { + executeCommand: Parameters[0]; + executeFile: Parameters[0]; + }; + const executor: SessionExecutor = { + executeCommand(request: ExecutorRequestMap["executeCommand"]) { + calls.push({ tool: "session_execute", payload: request }); + return Promise.resolve({ + status: "ok", + summary: "executor command", + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 16, + }); + }, + executeFile(request: ExecutorRequestMap["executeFile"]) { + calls.push({ tool: "session_execute_file", payload: request }); + return Promise.resolve({ + status: "ok", + summary: "executor file", + file_count: 1, + truncated: false, + }); + }, + executeBatch() { + return Promise.resolve({ + status: "ok", + summary: "executor batch", + results: [], + truncated: false, + }); + }, + }; + const runtime = createSessionMcpRuntime({ + sessionExecutor: executor, + } as never); + + try { + const command = JSON.parse( + await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ), + ); + const file = JSON.parse( + await runtime.tools.session_execute_file.execute( + validRequests.session_execute_file, + toolContext, + ), + ); + const batch = JSON.parse( + await runtime.tools.session_batch_execute.execute( + validRequests.session_batch_execute, + toolContext, + ), + ); + + assertEquals(calls.map((call) => call.tool), [ + "session_execute", + "session_execute_file", + "session_execute", + "session_execute", + ]); + assertEquals(command.summary, "executor command"); + assertEquals(file.summary, "executor file"); + assertEquals(batch.summary, "Completed 2 step(s)."); + assertEquals(batch.results.map((item: { kind: string }) => item.kind), [ + "command", + "command", + ]); + } finally { + await runtime.dispose(); + } + }); + it("rejects requests without root_session_id for every tool schema", () => { for (const toolName of SESSION_MCP_TOOL_NAMES) { const request = { ...validRequests[toolName] }; @@ -178,6 +389,121 @@ describe("session-mcp-runtime", () => { } }); + it("accepts mixed batch step requests via steps and normalizes them internally", () => { + const parsed = sessionMcpRequestSchemas.session_batch_execute.safeParse({ + root_session_id: "root-123", + steps: [ + { kind: "command", command: "pwd" }, + { kind: "search", query: "session continuity" }, + ], + }); + + assertEquals(parsed.success, true); + if (!parsed.success) return; + + assertEquals(parsed.data.commands, [{ + command: "pwd", + timeout_seconds: undefined, + }]); + assertEquals(parsed.data.steps, [ + { kind: "command", command: "pwd" }, + { kind: "search", query: "session continuity" }, + ]); + }); + + it("accepts legacy batch commands input and normalizes it to mixed steps", () => { + const parsed = sessionMcpRequestSchemas.session_batch_execute.safeParse({ + root_session_id: "root-123", + commands: [ + { command: "first" }, + { command: "second", timeout_seconds: 5 }, + ], + }); + + assertEquals(parsed.success, true); + if (!parsed.success) return; + + assertEquals(parsed.data.commands, [ + { command: "first" }, + { command: "second", timeout_seconds: 5 }, + ]); + assertEquals(parsed.data.steps, [ + { kind: "command", command: "first" }, + { kind: "command", command: "second", timeout_seconds: 5 }, + ]); + }); + + it("rejects empty batch requests", () => { + const emptySteps = sessionMcpRequestSchemas.session_batch_execute.safeParse( + { + root_session_id: "root-123", + steps: [], + }, + ); + const emptyCommands = sessionMcpRequestSchemas.session_batch_execute + .safeParse({ + root_session_id: "root-123", + commands: [], + }); + + assertEquals(emptySteps.success, false); + assertEquals(emptyCommands.success, false); + }); + + it("rejects unknown mixed batch step kinds", () => { + const parsed = sessionMcpRequestSchemas.session_batch_execute.safeParse({ + root_session_id: "root-123", + steps: [ + { kind: "command", command: "pwd" }, + { kind: "unknown", query: "session continuity" }, + ], + }); + + assertEquals(parsed.success, false); + }); + + it("validates mixed batch response results as a discriminated union", () => { + const parsed = sessionMcpResponseSchemas.session_batch_execute.safeParse({ + status: "ok", + summary: "Completed 2 step(s).", + results: [ + { + kind: "command", + result: { + status: "ok", + summary: "pwd", + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 3, + }, + }, + { + kind: "search", + result: { + status: "ok", + results: [ + { + corpus_ref: "session:root:corpus:1", + snippet: "session continuity", + score: 0.9, + }, + ], + corpus_refs: ["session:root:corpus:1"], + truncated: false, + }, + }, + ], + truncated: false, + }); + + assertEquals(parsed.success, true); + if (!parsed.success) return; + + assertEquals(parsed.data.results[0]?.kind, "command"); + assertEquals(parsed.data.results[1]?.kind, "search"); + }); + it("returns minimal valid stub responses for all registered tools", async () => { const runtime = createSessionMcpRuntime(); @@ -200,6 +526,271 @@ describe("session-mcp-runtime", () => { } }); + it("rejects schema-valid caller/root mismatches before handler execution", async () => { + const manager = new SessionManager( + "group-runtime-mismatch", + "user-runtime-mismatch", + { + session: { + get() { + throw new Error("unexpected session lookup"); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + manager.setParentId("root-session", null); + manager.setParentId("child-session", "root-session"); + + let handlerCalls = 0; + const runtime = createSessionMcpRuntime({ + sessionCanonicalizer: manager, + handlers: { + session_execute: () => { + handlerCalls += 1; + return Promise.resolve({ + status: "ok", + summary: "should not execute", + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 0, + }); + }, + }, + } as never); + + try { + await assertRejects( + () => + runtime.tools.session_execute.execute( + { + root_session_id: "wrong-root", + command: "pwd", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ), + Error, + "root_session_id mismatch", + ); + assertEquals(handlerCalls, 0); + } finally { + await runtime.dispose(); + } + }); + + it("allows canonical child requests only when the injected root matches lineage", async () => { + const manager = new SessionManager( + "group-runtime-lineage", + "user-runtime-lineage", + { + session: { + get() { + throw new Error("unexpected session lookup"); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + manager.setParentId("root-session", null); + manager.setParentId("child-session", "root-session"); + + const runtime = createSessionMcpRuntime({ + sessionCanonicalizer: manager, + } as never); + + try { + const serialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-session", + query: "indexed", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ); + const parsed = JSON.parse(serialized); + + assertEquals(parsed.status, "ok"); + } finally { + await runtime.dispose(); + } + }); + + it("keeps provisional temporary-root requests valid until migration resolves them", async () => { + let childLookupCount = 0; + const manager = new SessionManager( + "group-runtime-provisional", + "user-runtime-provisional", + { + session: { + get({ path }: { path: { id: string } }) { + if (path.id === "child-session") { + childLookupCount += 1; + if (childLookupCount === 1) { + const error = Object.assign(new Error("Session not found"), { + status: 404, + }); + throw error; + } + return { data: { parentID: "parent-session" } }; + } + if (path.id === "parent-session") { + return { data: { parentID: null } }; + } + throw new Error(`Unexpected session lookup: ${path.id}`); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + const runtime = createSessionMcpRuntime({ + sessionCanonicalizer: manager, + } as never); + + try { + const provisionalSerialized = await runtime.tools.session_stats.execute( + { + root_session_id: "child-session", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ); + const provisional = JSON.parse(provisionalSerialized); + assertEquals(provisional.status, "ok"); + + const canonicalSerialized = await runtime.tools.session_stats.execute( + { + root_session_id: "parent-session", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ); + const canonical = JSON.parse(canonicalSerialized); + assertEquals(canonical.status, "ok"); + + await assertRejects( + () => + runtime.tools.session_stats.execute( + { + root_session_id: "child-session", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ), + Error, + "root_session_id mismatch", + ); + } finally { + await runtime.dispose(); + } + }); + + it("does not consume leaked global runtime validators when none are scoped to the runtime", async () => { + const manager = new SessionManager( + "group-runtime-isolation", + "user-runtime-isolation", + { + session: { + get() { + throw new Error("unexpected session lookup"); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + manager.setParentId("root-session", null); + manager.setParentId("child-session", "root-session"); + + const runtime = createSessionMcpRuntime(); + + try { + const serialized = await runtime.tools.session_stats.execute( + { + root_session_id: "session-123", + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals(parsed.status, "ok"); + } finally { + await runtime.dispose(); + } + }); + + it("enforces root validation only after an explicit canonicalizer is wired", async () => { + const manager = new SessionManager( + "group-runtime-explicit", + "user-runtime-explicit", + { + session: { + get() { + throw new Error("unexpected session lookup"); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + manager.setParentId("root-session", null); + manager.setParentId("child-session", "root-session"); + + const runtime = createSessionMcpRuntime(); + + try { + const uncheckedSerialized = await runtime.tools.session_stats.execute( + { + root_session_id: "wrong-root", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ); + assertEquals(JSON.parse(uncheckedSerialized).status, "ok"); + + runtime.setSessionCanonicalizer(manager); + + await assertRejects( + () => + runtime.tools.session_stats.execute( + { + root_session_id: "wrong-root", + }, + { + ...toolContext, + sessionID: "child-session", + }, + ), + Error, + "root_session_id mismatch", + ); + } finally { + await runtime.dispose(); + } + }); + it("reports live redis health in session_doctor when a redis client is provided", async () => { const degradedRedis = new RedisClient({ endpoint: "redis://unused" }); const degradedRuntime = createSessionMcpRuntime({ @@ -266,37 +857,243 @@ describe("session-mcp-runtime", () => { }); try { - const noCache = JSON.parse( - await noCacheRuntime.tools.session_doctor.execute( - validRequests.session_doctor, - toolContext, - ), + const noCache = JSON.parse( + await noCacheRuntime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ), + ); + assertEquals(noCache.graphiti_cache.status, "not_checked"); + + const degradedCache = JSON.parse( + await degradedCacheRuntime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ), + ); + assertEquals(degradedCache.graphiti_cache.status, "degraded"); + + await connectedRedis.connect(); + + const connectedCache = JSON.parse( + await connectedCacheRuntime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ), + ); + assertEquals(connectedCache.graphiti_cache.status, "ok"); + } finally { + await noCacheRuntime.dispose(); + await degradedCacheRuntime.dispose(); + await connectedCacheRuntime.dispose(); + await disconnectedRedis.close(); + await connectedRedis.close(); + } + }); + + it("returns schema-valid bounded doctor output after local stats wiring is active", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-doctor-bounded", + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: "z".repeat(SESSION_MCP_RESPONSE_BUDGET_BYTES + 2_048), + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: SESSION_MCP_RESPONSE_BUDGET_BYTES + 2_048, + }), + }, + } as never); + + try { + await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + const serialized = await runtime.tools.session_doctor.execute( + validRequests.session_doctor, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals( + sessionMcpResponseSchemas.session_doctor.safeParse(parsed).success, + true, + ); + assert( + textEncoder.encode(serialized).byteLength <= + SESSION_MCP_RESPONSE_BUDGET_BYTES, + ); + assertEquals(parsed.status, "ok"); + assertEquals(parsed.runtime.status, "ok"); + } finally { + await runtime.dispose(); + } + }); + + it("reads live local counters through session_stats for every session_* call family", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const originalFetch = globalThis.fetch; + globalThis.fetch = () => + Promise.resolve( + new Response("runtime fetched body", { + headers: { "content-type": "text/plain; charset=utf-8" }, + }), + ); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-live-stats", + } as never); + + try { + await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + await runtime.tools.session_execute_file.execute( + validRequests.session_execute_file, + toolContext, + ).catch(() => undefined); + await runtime.tools.session_batch_execute.execute( + validRequests.session_batch_execute, + toolContext, + ); + await runtime.tools.session_index.execute( + validRequests.session_index, + toolContext, + ); + await runtime.tools.session_search.execute( + validRequests.session_search, + toolContext, + ); + await runtime.tools.session_fetch_and_index.execute( + validRequests.session_fetch_and_index, + toolContext, + ); + const statsSerialized = await runtime.tools.session_stats.execute( + validRequests.session_stats, + toolContext, + ); + const stats = JSON.parse(statsSerialized); + + assertEquals(stats.status, "ok"); + assertEquals(stats.counters.session_execute_calls_total >= 1, true); + assertEquals(stats.counters.session_execute_file_calls_total >= 1, true); + assertEquals(stats.counters.session_batch_execute_calls_total >= 1, true); + assertEquals(stats.counters.session_index_calls_total >= 1, true); + assertEquals(stats.counters.session_search_calls_total >= 1, true); + assertEquals( + stats.counters.session_fetch_and_index_calls_total >= 1, + true, + ); + assertEquals(stats.counters.session_stats_calls_total >= 1, true); + assertEquals(stats.counters.bytes_returned_total > 0, true); + assertEquals(stats.counters.bytes_indexed_total > 0, true); + assertEquals(stats.counters.bytes_saved_estimate > 0, true); + assertEquals(stats.artifact_count >= 1, true); + assertEquals(stats.corpus_count >= 2, true); + } finally { + globalThis.fetch = originalFetch; + await runtime.dispose(); + } + }); + + it("does not duplicate full artifact bodies when an inline payload already provides the canonical stored body", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const hiddenPayload = "FULL CANONICAL PAYLOAD\n" + + "canonical marker\n".repeat(200); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-no-dup-artifacts", + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: "Visible bounded summary only.", + artifact_ref: `inline://payload/${ + encodeURIComponent(hiddenPayload) + }`, + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: hiddenPayload.length, + }), + }, + } as never); + + try { + const executeSerialized = await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, + ); + const execute = JSON.parse(executeSerialized); + const artifactKeys = await redis.keysByPrefix( + "session:group-no-dup-artifacts:root-123:artifact:", + ); + const artifactBodies = artifactKeys.filter((key) => + key.endsWith(":body") + ); + + assertExists(execute.artifact_ref); + assertEquals(artifactBodies.length, 1); + } finally { + await runtime.dispose(); + } + }); + + it("records corpus-backed artifact stats when the executor already returned a non-inline artifact_ref", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const hiddenPayload = "PRE-STORED EXECUTOR PAYLOAD\n" + + "pre-stored marker\n".repeat(160); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-prestored-artifact", + handlers: { + session_execute: () => + Promise.resolve({ + status: "ok", + summary: hiddenPayload, + artifact_ref: "local://session_execute/pre-existing", + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: hiddenPayload.length, + }), + }, + } as never); + + try { + const executeSerialized = await runtime.tools.session_execute.execute( + validRequests.session_execute, + toolContext, ); - assertEquals(noCache.graphiti_cache.status, "not_checked"); - - const degradedCache = JSON.parse( - await degradedCacheRuntime.tools.session_doctor.execute( - validRequests.session_doctor, - toolContext, - ), + const execute = JSON.parse(executeSerialized); + const statsSerialized = await runtime.tools.session_stats.execute( + validRequests.session_stats, + toolContext, ); - assertEquals(degradedCache.graphiti_cache.status, "degraded"); - - await connectedRedis.connect(); - - const connectedCache = JSON.parse( - await connectedCacheRuntime.tools.session_doctor.execute( - validRequests.session_doctor, - toolContext, - ), + const stats = JSON.parse(statsSerialized); + const artifactKeys = await redis.keysByPrefix( + "session:group-prestored-artifact:root-123:artifact:", ); - assertEquals(connectedCache.graphiti_cache.status, "ok"); + const artifactBodies = artifactKeys.filter((key) => + key.endsWith(":body") + ); + + assertEquals(execute.artifact_ref.length > 0, true); + assertEquals(stats.counters.bytes_saved_estimate > 0, true); + assertEquals(stats.artifact_count >= 1, true); + assertEquals(artifactBodies.length, 1); } finally { - await noCacheRuntime.dispose(); - await degradedCacheRuntime.dispose(); - await connectedCacheRuntime.dispose(); - await disconnectedRedis.close(); - await connectedRedis.close(); + await runtime.dispose(); } }); @@ -358,7 +1155,7 @@ describe("session-mcp-runtime", () => { } }); - it("executes session_batch_execute sequentially in request order", async () => { + it("executes sequential command groups in request order", async () => { const executionOrder: string[] = []; const runtime = createSessionMcpRuntime({ handlers: { @@ -392,7 +1189,9 @@ describe("session-mcp-runtime", () => { assertEquals(executionOrder, ["first", "second", "third"]); assertEquals( - parsed.results.map((item: { summary: string }) => item.summary), + parsed.results.map((item: { result: { summary: string } }) => + item.result.summary + ), [ "executed first", "executed second", @@ -611,7 +1410,203 @@ describe("session-mcp-runtime", () => { } }); - it("stores oversized session_batch_execute output behind bounded artifact refs instead of overflowing the response budget", async () => { + it("indexes a local file via path-based indexing and makes it searchable", async () => { + const worktreeDir = Deno.cwd(); + const localFile = `${worktreeDir}/src/services/session-mcp-runtime.ts`; + const askCalls: Array<{ + permission: string; + patterns: string[]; + always: string[]; + metadata: Record; + }> = []; + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-path-index", + readSessionIndexFile: () => + Promise.resolve( + "Index local content for the current root session.", + ), + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-path-index", + path: localFile, + }, + createToolContext({ + worktree: worktreeDir, + directory: worktreeDir, + ask: (input) => { + askCalls.push(input); + return Promise.resolve(); + }, + }), + ); + + const searchSerialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-path-index", + query: "Index local content for the current root session", + }, + createToolContext({ + worktree: worktreeDir, + directory: worktreeDir, + }), + ); + const search = JSON.parse(searchSerialized); + + assertEquals(search.status, "ok"); + assertEquals(search.results.length > 0, true); + assertStringIncludes( + search.results[0].snippet, + "Index local content", + ); + assertEquals(askCalls, [{ + permission: "read", + patterns: [localFile], + always: ["*"], + metadata: {}, + }]); + } finally { + await runtime.dispose(); + } + }); + + it("indexes an external file after requesting external_directory and read permissions", async () => { + const worktreeDir = Deno.cwd(); + const externalFile = + "/Users/vicary/Documents/Projects/vicary/opencode-graphiti/AGENTS.md"; + const externalParentDir = + "/Users/vicary/Documents/Projects/vicary/opencode-graphiti"; + const askCalls: Array<{ + permission: string; + patterns: string[]; + always: string[]; + metadata: Record; + }> = []; + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-path-index-external", + readSessionIndexFile: () => + Promise.resolve("Graphiti is never on the hot path."), + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-path-index-external", + path: externalFile, + }, + createToolContext({ + worktree: worktreeDir, + directory: worktreeDir, + ask: (input) => { + askCalls.push(input); + return Promise.resolve(); + }, + }), + ); + + const searchSerialized = await runtime.tools.session_search.execute( + { + root_session_id: "root-path-index-external", + query: "Graphiti is never on the hot path", + }, + createToolContext({ + worktree: worktreeDir, + directory: worktreeDir, + }), + ); + const search = JSON.parse(searchSerialized); + + assertEquals(search.status, "ok"); + assertEquals(search.results.length > 0, true); + assertStringIncludes( + search.results[0].snippet, + "Graphiti is never on the hot path", + ); + assertEquals(askCalls.length, 2); + assertEquals(askCalls[0], { + permission: "external_directory", + patterns: [`${externalParentDir}/*`], + always: [`${externalParentDir}/*`], + metadata: { + filepath: externalFile, + parentDir: externalParentDir, + }, + }); + assertEquals(askCalls[1], { + permission: "read", + patterns: [externalFile], + always: ["*"], + metadata: {}, + }); + } finally { + await runtime.dispose(); + } + }); + + it("replaces prior indexed content when session_index repeats the same source and label", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + groupId: "group-runtime-replacement", + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-runtime-replacement", + content: "old alpha body", + source: "build-log", + label: "latest", + }, + toolContext, + ); + await runtime.tools.session_index.execute( + { + root_session_id: "root-runtime-replacement", + content: "new beta body", + source: "build-log", + label: "latest", + }, + toolContext, + ); + + const oldSearch = JSON.parse( + await runtime.tools.session_search.execute( + { + root_session_id: "root-runtime-replacement", + query: "alpha", + }, + toolContext, + ), + ); + const newSearch = JSON.parse( + await runtime.tools.session_search.execute( + { + root_session_id: "root-runtime-replacement", + query: "beta", + }, + toolContext, + ), + ); + + assertEquals(oldSearch.results.length, 0); + assertEquals(newSearch.results.length > 0, true); + assertStringIncludes(newSearch.results[0].snippet, "beta"); + } finally { + await runtime.dispose(); + } + }); + + it("stores oversized sequential command output behind bounded artifact refs instead of overflowing the response budget", async () => { const redis = new RedisClient({ endpoint: "redis://unused" }); const runtime = createSessionMcpRuntime({ redisClient: redis, @@ -649,12 +1644,177 @@ describe("session-mcp-runtime", () => { ); assertEquals(parsed.truncated, true); assertEquals(parsed.results.length, 2); - assertExists(parsed.results[0].artifact_ref); - assertExists(parsed.results[1].artifact_ref); + assertEquals(parsed.results[0].kind, "command"); + assertEquals(parsed.results[1].kind, "command"); + assertExists(parsed.results[0].result.artifact_ref); + assertExists(parsed.results[1].result.artifact_ref); + assertEquals( + parsed.results[0].result.artifact_ref.startsWith( + "local://session_execute/", + ), + true, + ); + } finally { + await runtime.dispose(); + } + }); + + it("executes mixed batch steps in order and preserves typed per-step results", async () => { + const executionOrder: string[] = []; + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + handlers: { + session_execute: (request: { command: string }) => { + executionOrder.push(`command:${request.command}`); + return Promise.resolve({ + status: "ok", + summary: `executed ${request.command}`, + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: request.command.length, + }); + }, + }, + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-mixed-order", + content: "session continuity is preserved in the local corpus", + }, + toolContext, + ); + + const serialized = await runtime.tools.session_batch_execute.execute( + { + root_session_id: "root-mixed-order", + steps: [ + { kind: "command", command: "first" }, + { kind: "search", query: "session continuity" }, + { kind: "command", command: "third" }, + ], + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals(executionOrder, ["command:first", "command:third"]); + assertEquals(parsed.summary, "Completed 3 step(s)."); + assertEquals(parsed.results.map((item: { kind: string }) => item.kind), [ + "command", + "search", + "command", + ]); + assertEquals(parsed.results[0].result.summary, "executed first"); + assertEquals(parsed.results[1].result.results.length > 0, true); + assertStringIncludes( + parsed.results[1].result.results[0].snippet, + "session continuity", + ); + assertEquals(parsed.results[2].result.summary, "executed third"); + } finally { + await runtime.dispose(); + } + }); + + it("uses the local corpus for a mixed batch search step", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-search-step", + content: "local corpus search should find this indexed sentence", + }, + toolContext, + ); + + const serialized = await runtime.tools.session_batch_execute.execute( + { + root_session_id: "root-search-step", + steps: [{ kind: "search", query: "indexed sentence" }], + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assertEquals(parsed.results[0].kind, "search"); + assertEquals(parsed.results[0].result.status, "ok"); + assertEquals(parsed.results[0].result.results.length > 0, true); + assertStringIncludes( + parsed.results[0].result.results[0].snippet, + "indexed sentence", + ); + } finally { + await runtime.dispose(); + } + }); + + it("keeps oversized mixed batch command steps safely spilled to artifacts", async () => { + const redis = new RedisClient({ endpoint: "redis://unused" }); + const runtime = createSessionMcpRuntime({ + redisClient: redis, + sessionTtlSeconds: 60, + handlers: { + session_execute: (request: { command: string }) => + Promise.resolve({ + status: "ok", + summary: `${request.command}: ` + "x".repeat(7_000), + exit_code: 0, + timed_out: false, + truncated: false, + bytes_captured: 7_010, + }), + }, + } as never); + + try { + await runtime.tools.session_index.execute( + { + root_session_id: "root-mixed-spill", + content: "spill search term remains locally searchable", + }, + toolContext, + ); + + const serialized = await runtime.tools.session_batch_execute.execute( + { + root_session_id: "root-mixed-spill", + steps: [ + { kind: "command", command: "first" }, + { kind: "search", query: "spill search term" }, + { kind: "command", command: "second" }, + ], + }, + toolContext, + ); + const parsed = JSON.parse(serialized); + + assert( + textEncoder.encode(serialized).byteLength <= + SESSION_MCP_RESPONSE_BUDGET_BYTES, + ); + assertEquals(parsed.truncated, true); + assertEquals(parsed.results[0].kind, "command"); + assertEquals(parsed.results[1].kind, "search"); + assertEquals(parsed.results[2].kind, "command"); + assertExists(parsed.results[0].result.artifact_ref); + assertExists(parsed.results[2].result.artifact_ref); assertEquals( - parsed.results[0].artifact_ref.startsWith("local://session_execute/"), + parsed.results[0].result.artifact_ref.startsWith( + "local://session_execute/", + ), true, ); + assertEquals(parsed.results[1].result.results.length > 0, true); } finally { await runtime.dispose(); } diff --git a/src/services/session-mcp-runtime.ts b/src/services/session-mcp-runtime.ts index 8ed33d4..dcb734a 100644 --- a/src/services/session-mcp-runtime.ts +++ b/src/services/session-mcp-runtime.ts @@ -9,6 +9,13 @@ import { createSessionCorpusService, type SessionCorpusService, } from "./session-corpus.ts"; +import { + createSessionExecutor, + SESSION_EXECUTOR_DEFAULT_COMMAND_TIMEOUT_SECONDS, + SESSION_EXECUTOR_MAX_COMMAND_TIMEOUT_SECONDS, + SESSION_EXECUTOR_MAX_NORMALIZED_INDEXED_BODY_BYTES, + type SessionExecutor, +} from "./session-executor.ts"; import { SESSION_MCP_TOOL_NAMES, type SessionMcpRequestMap, @@ -17,6 +24,8 @@ import { sessionMcpResponseSchemas, type SessionMcpToolName, } from "./session-mcp-types.ts"; +import type { RuntimeRootSessionValidator } from "../session.ts"; +import path from "node:path"; export const SESSION_MCP_RESPONSE_BUDGET_BYTES = 8 * 1024; @@ -33,6 +42,13 @@ const pluginSessionExecuteStepSchema = pluginSchema.object({ timeout_seconds: pluginSchema.number().int().positive().max(120).optional(), }); +const pluginSessionBatchStepSchema = pluginSchema.object({ + kind: pluginSchema.string().min(1), + command: pluginSchema.string().min(1).optional(), + query: pluginSchema.string().min(1).optional(), + timeout_seconds: pluginSchema.number().int().positive().max(120).optional(), +}); + const sessionMcpToolArgs: Record = { session_execute: { ...pluginRootSessionIdArgs, @@ -45,11 +61,16 @@ const sessionMcpToolArgs: Record = { }, session_batch_execute: { ...pluginRootSessionIdArgs, - commands: pluginSchema.array(pluginSessionExecuteStepSchema).min(1), + commands: pluginSchema.array(pluginSessionExecuteStepSchema).min(1) + .optional(), + steps: pluginSchema.array(pluginSessionBatchStepSchema).min(1).optional(), }, session_index: { ...pluginRootSessionIdArgs, - content: pluginSchema.string(), + content: pluginSchema.string().optional(), + path: pluginSchema.string().min(1).optional(), + source: pluginSchema.string().min(1).optional(), + label: pluginSchema.string().min(1).optional(), }, session_search: { ...pluginRootSessionIdArgs, @@ -84,11 +105,34 @@ type SessionMcpRuntimeOptions = { sessionTtlSeconds?: number; groupId?: string; createSessionCorpusService?: typeof createSessionCorpusService; + createSessionExecutor?: typeof createSessionExecutor; + sessionExecutor?: SessionExecutor; + sessionCanonicalizer?: RuntimeRootSessionValidator; + readSessionIndexFile?: (filePath: string) => Promise; +}; + +type SessionExecuteResponse = SessionMcpResponseMap["session_execute"]; +type SessionSearchResponse = SessionMcpResponseMap["session_search"]; +type SessionBatchExecuteRequest = SessionMcpRequestMap["session_batch_execute"]; +type SessionBatchExecuteStep = NonNullable< + SessionBatchExecuteRequest["steps"] +>[number]; +type SessionBatchStepResultItem = + | { kind: "command"; result: SessionExecuteResponse } + | { kind: "search"; result: SessionSearchResponse }; +type SessionBatchExecuteResponse = { + status: "ok" | "error"; + summary: string; + results: SessionBatchStepResultItem[]; + truncated: boolean; }; export type SessionMcpRuntime = { tools: Record; dispose: () => Promise; + setSessionCanonicalizer: ( + sessionCanonicalizer: RuntimeRootSessionValidator | undefined, + ) => void; migrateRootSessionState: ( sourceRootSessionId: string, targetRootSessionId: string, @@ -159,6 +203,36 @@ const parseResponse = ( rawResponse, ) as SessionMcpResponseMap[TToolName]; +const validateResponsePreservingBatchShape = < + TToolName extends SessionMcpToolName, +>( + toolName: TToolName, + rawResponse: unknown, +): SessionMcpResponseMap[TToolName] => { + if (toolName !== "session_batch_execute") { + return parseResponse(toolName, rawResponse); + } + + sessionMcpResponseSchemas.session_batch_execute.parse(rawResponse); + return rawResponse as SessionMcpResponseMap[TToolName]; +}; + +const validateRuntimeRootSessionContract = async < + TToolName extends SessionMcpToolName, +>( + _toolName: TToolName, + request: SessionMcpRequestMap[TToolName], + context: ToolContext, + validator: RuntimeRootSessionValidator | undefined, +): Promise => { + const sessionId = context.sessionID; + if (!sessionId) return; + await validator?.validateRuntimeRootSessionId( + sessionId, + request.root_session_id, + ); +}; + const textEncoder = new TextEncoder(); const serialize = (value: unknown): string => JSON.stringify(value); @@ -177,15 +251,96 @@ const extractInlineArtifactPayload = ( const byteLength = (value: string): number => textEncoder.encode(value).byteLength; +const readTextFile = (filePath: string): Promise => + Deno.readTextFile(filePath); + +const createBoundedSessionIndexError = ( + code: "session_index_path_unreadable", + message: string, +): Error & { code: string; bounded: true } => + Object.assign(new Error(message), { code, bounded: true as const }); + const isWithinBudget = (value: string): boolean => byteLength(value) <= SESSION_MCP_RESPONSE_BUDGET_BYTES; +const resolveSessionIndexPath = ( + requestPath: string, + context: ToolContext, +): string => { + const workspaceRoot = path.resolve(context.worktree ?? context.directory); + const baseDirectory = path.resolve(context.directory ?? workspaceRoot); + return path.isAbsolute(requestPath) + ? path.resolve(requestPath) + : path.resolve(baseDirectory, requestPath); +}; + +const isWithinWorkspace = ( + workspaceRoot: string, + targetPath: string, +): boolean => { + const relative = path.relative(workspaceRoot, targetPath); + return relative === "" || + (!relative.startsWith("..") && !path.isAbsolute(relative)); +}; + +const requestSessionIndexPermissions = async ( + resolvedPath: string, + context: ToolContext, +): Promise => { + const workspaceRoot = path.resolve(context.worktree ?? context.directory); + if (!isWithinWorkspace(workspaceRoot, resolvedPath)) { + const parentDir = path.dirname(resolvedPath); + const glob = path.join(parentDir, "*").replaceAll("\\", "/"); + await context.ask({ + permission: "external_directory", + patterns: [glob], + always: [glob], + metadata: { + filepath: resolvedPath, + parentDir, + }, + }); + } + + await context.ask({ + permission: "read", + patterns: [resolvedPath], + always: ["*"], + metadata: {}, + }); +}; + +const readSessionIndexBody = async ( + request: SessionMcpRequestMap["session_index"], + context: ToolContext, + readSessionIndexFile: (filePath: string) => Promise, +): Promise => { + if (!request.path) return request.content; + + const resolvedPath = resolveSessionIndexPath(request.path, context); + + try { + await requestSessionIndexPermissions(resolvedPath, context); + return await readSessionIndexFile(resolvedPath); + } catch (error) { + throw createBoundedSessionIndexError( + "session_index_path_unreadable", + error instanceof Error + ? `session_index could not read path: ${resolvedPath}: ${error.message}` + : `session_index could not read path: ${String(error)}`, + ); + } +}; + const makeCorpusRef = ( groupId: string, rootSessionId: string, corpusId: string, ): string => `session:${groupId}:${rootSessionId}:corpus:${corpusId}:meta`; +const statsCounterKeyForTool = (toolName: SessionMcpToolName): string => + `${toolName}_calls_total`; + export const createSessionMcpRuntime = ( options: SessionMcpRuntimeOptions = {}, ): SessionMcpRuntime => { @@ -201,6 +356,10 @@ export const createSessionMcpRuntime = ( : null; let artifactCounter = 0; const artifactStore = new Map(); + const corpusBackedArtifactRefs = new Set(); + let sessionCanonicalizer = options.sessionCanonicalizer; + const createExecutor = options.createSessionExecutor ?? createSessionExecutor; + const readSessionIndexFile = options.readSessionIndexFile ?? readTextFile; const writeArtifact = ( toolName: SessionMcpToolName, @@ -211,45 +370,163 @@ export const createSessionMcpRuntime = ( return Promise.resolve(artifactRef); }; - const defaultHandlers: SessionMcpHandlerMap = { - session_execute: (request) => - Promise.resolve({ + const recordToolCall = async ( + rootSessionId: string, + toolName: SessionMcpToolName, + ): Promise => { + await corpus?.recordStats?.(rootSessionId, { + [statsCounterKeyForTool(toolName)]: 1, + }); + }; + + const recordReturnedBytes = async ( + rootSessionId: string, + serialized: string, + ): Promise => { + await corpus?.recordStats?.(rootSessionId, { + bytes_returned_total: byteLength(serialized), + }); + }; + + const rememberCorpusArtifactRef = (artifactRef: string | undefined): void => { + if (artifactRef) corpusBackedArtifactRefs.add(artifactRef); + }; + + const persistCanonicalLocalArtifactIfNeeded = async < + TToolName extends "session_execute" | "session_execute_file", + >( + toolName: TToolName, + response: SessionMcpResponseMap[TToolName], + rootSessionId: string, + ): Promise => { + if (!corpus) return; + if ( + toolName === "session_execute_file" && + (response as SessionMcpResponseMap["session_execute_file"]).corpus_ref + ) { + return; + } + if ( + response.artifact_ref && + corpusBackedArtifactRefs.has(response.artifact_ref) + ) { + return; + } + if (!response.summary.trim()) return; + const artifact = await corpus.storeArtifact({ + rootSessionId, + toolName, + body: response.summary, + }).catch(() => undefined); + rememberCorpusArtifactRef(artifact?.artifactRef); + }; + + const sessionExecutor = options.sessionExecutor ?? createExecutor({ + responseBudgetBytes: SESSION_MCP_RESPONSE_BUDGET_BYTES, + defaultCommandTimeoutSeconds: + SESSION_EXECUTOR_DEFAULT_COMMAND_TIMEOUT_SECONDS, + maxCommandTimeoutSeconds: SESSION_EXECUTOR_MAX_COMMAND_TIMEOUT_SECONDS, + maxNormalizedIndexedBodyBytes: + SESSION_EXECUTOR_MAX_NORMALIZED_INDEXED_BODY_BYTES, + storeArtifact: async ({ rootSessionId, toolName, body }) => { + const artifact = corpus + ? await corpus.storeArtifact({ + rootSessionId, + toolName, + body, + }).catch(() => null) + : null; + rememberCorpusArtifactRef(artifact?.artifactRef); + const fallbackArtifactRef = await writeArtifact(toolName, body); + return { + artifactRef: artifact?.artifactRef ?? fallbackArtifactRef, + corpusRef: artifact?.corpusRef, + }; + }, + }); + + const searchLocalCorpus = async ( + rootSessionId: string, + query: string, + ): Promise => { + if (!corpus) { + return { status: "ok", - summary: - `Stub session_execute accepted command for ${request.root_session_id}.`, - exit_code: 0, - timed_out: false, + results: [], + corpus_refs: [], truncated: false, - bytes_captured: 0, + }; + } + + const result = await corpus.search({ + rootSessionId, + query, + }); + return { + status: result.status, + results: result.results, + corpus_refs: result.corpusRefs, + truncated: result.truncated, + }; + }; + + const defaultHandlers: SessionMcpHandlerMap = { + session_execute: (request, context) => + sessionExecutor.executeCommand(request, { + worktree: context.worktree, + directory: context.directory, }), - session_execute_file: (request) => - Promise.resolve({ - status: "ok", - summary: - `Stub session_execute_file accepted ${request.paths.length} file(s).`, - file_count: request.paths.length, - truncated: false, + session_execute_file: (request, context) => + sessionExecutor.executeFile(request, { + worktree: context.worktree, + directory: context.directory, }), session_batch_execute: async (request, context) => { - const results: SessionMcpResponseMap["session_execute"][] = []; - for (const command of request.commands) { - results.push( - await handlerMap.session_execute({ - root_session_id: request.root_session_id, - command: command.command, - timeout_seconds: command.timeout_seconds, - }, context), + const steps = request.steps ?? request.commands.map((command) => ({ + kind: "command" as const, + ...command, + })); + if (steps.length === 0) { + throw new Error("session_batch_execute requires at least one step"); + } + + const results: SessionBatchStepResultItem[] = []; + for (const step of steps) { + if (step.kind === "command") { + const result = await handlerMap.session_execute( + { + root_session_id: request.root_session_id, + command: step.command, + timeout_seconds: step.timeout_seconds, + }, + context, + ); + results.push({ kind: "command", result }); + continue; + } + + const result = await searchLocalCorpus( + request.root_session_id, + step.query, ); + results.push({ kind: "search", result }); } + return { - status: "ok", - summary: - `Stub session_batch_execute completed ${results.length} command(s).`, + status: results.every((result) => result.result.status === "ok") + ? "ok" + : "error", + summary: `Completed ${results.length} step(s).`, results, truncated: false, - }; + } as SessionMcpResponseMap["session_batch_execute"]; }, - session_index: async (request) => { + session_index: async (request, context) => { + const content = await readSessionIndexBody( + request, + context, + readSessionIndexFile, + ); if (!corpus) { return { status: "ok", @@ -264,7 +541,9 @@ export const createSessionMcpRuntime = ( } const result = await corpus.index({ rootSessionId: request.root_session_id, - content: request.content, + content, + source: request.source, + label: request.label, }); return { status: result.status, @@ -274,24 +553,7 @@ export const createSessionMcpRuntime = ( }; }, session_search: async (request) => { - if (!corpus) { - return { - status: "ok", - results: [], - corpus_refs: [], - truncated: false, - }; - } - const result = await corpus.search({ - rootSessionId: request.root_session_id, - query: request.query, - }); - return { - status: result.status, - results: result.results, - corpus_refs: result.corpusRefs, - truncated: result.truncated, - }; + return await searchLocalCorpus(request.root_session_id, request.query); }, session_fetch_and_index: async (request) => { if (!corpus) { @@ -343,26 +605,37 @@ export const createSessionMcpRuntime = ( bytes_saved_estimate: stats.bytesSavedEstimate, }; }, - session_doctor: () => { + session_doctor: async (request) => { const redis = getRedisDoctorStatus(options.redisClient); const graphitiCache = getGraphitiCacheDoctorStatus( options.graphitiCache, options.redisClient, ); - return Promise.resolve({ + const stats = await corpus?.getStats(request.root_session_id); + return { status: "ok", - checks: [{ - name: "session-mcp-runtime", - status: "ok", - detail: "In-process session MCP runtime handlers are registered.", - }], + checks: [ + { + name: "session-mcp-runtime", + status: "ok", + detail: "In-process session MCP runtime handlers are registered.", + }, + ...(stats + ? [{ + name: "session-mcp-local-stats", + status: "ok" as const, + detail: + `Local stats available for ${request.root_session_id} (corpora=${stats.corpusCount}, artifacts=${stats.artifactCount}).`, + }] + : []), + ], redis, graphiti_cache: graphitiCache, runtime: { status: "ok", detail: "In-process session MCP runtime is active.", }, - }); + }; }, }; @@ -388,6 +661,7 @@ export const createSessionMcpRuntime = ( body: payload, }).catch(() => null) : null; + rememberCorpusArtifactRef(artifact?.artifactRef); const fallbackArtifactRef = await writeArtifact(toolName, payload); const artifactRef = artifact?.artifactRef ?? fallbackArtifactRef; @@ -433,6 +707,7 @@ export const createSessionMcpRuntime = ( body: artifactBody, }).catch(() => null) : null; + rememberCorpusArtifactRef(artifact?.artifactRef); const fallbackArtifactRef = await writeArtifact(toolName, artifactBody); const artifactRef = resolveArtifactRef( oversized.artifact_ref, @@ -458,6 +733,7 @@ export const createSessionMcpRuntime = ( body: artifactBody, }).catch(() => null) : null; + rememberCorpusArtifactRef(artifact?.artifactRef); const fallbackArtifactRef = await writeArtifact(toolName, artifactBody); const artifactRef = resolveArtifactRef( oversized.artifact_ref, @@ -474,11 +750,12 @@ export const createSessionMcpRuntime = ( } if (toolName === "session_batch_execute") { - const oversized = - response as SessionMcpResponseMap["session_batch_execute"]; - const results = await Promise.all( - oversized.results.map(async (result) => { - const artifactBody = resolveArtifactBody(result); + const oversized = response as unknown as SessionBatchExecuteResponse; + const results: SessionBatchStepResultItem[] = []; + + for (const result of oversized.results) { + if (result.kind === "command") { + const artifactBody = resolveArtifactBody(result.result); const artifact = corpus ? await corpus.storeArtifact({ rootSessionId, @@ -486,24 +763,42 @@ export const createSessionMcpRuntime = ( body: artifactBody, }).catch(() => null) : null; + rememberCorpusArtifactRef(artifact?.artifactRef); const fallbackArtifactRef = await writeArtifact( "session_execute", artifactBody, ); const artifactRef = resolveArtifactRef( - result.artifact_ref, + result.result.artifact_ref, artifact?.artifactRef, fallbackArtifactRef, ); - return { - ...result, - artifact_ref: artifactRef, - summary: - `Oversized batch step output moved to local artifact ${artifactRef}.`, + results.push({ + kind: "command", + result: { + ...result.result, + artifact_ref: artifactRef, + summary: + `Oversized batch step output moved to local artifact ${artifactRef}.`, + truncated: true, + }, + }); + continue; + } + + results.push({ + kind: "search", + result: { + ...result.result, + results: result.result.results.slice(0, 1).map((item) => ({ + ...item, + snippet: item.snippet.slice(0, 320), + })), truncated: true, - }; - }), - ); + }, + }); + } + return { ...oversized, summary: @@ -536,7 +831,14 @@ export const createSessionMcpRuntime = ( context: ToolContext, ): Promise => { const request = parseRequest(toolName, rawRequest); - let response = parseResponse( + await validateRuntimeRootSessionContract( + toolName, + request, + context, + sessionCanonicalizer, + ); + await recordToolCall(request.root_session_id, toolName); + let response = validateResponsePreservingBatchShape( toolName, await (handlerMap[toolName] as ( request: SessionMcpRequestMap[TToolName], @@ -569,7 +871,7 @@ export const createSessionMcpRuntime = ( let serialized = serialize(response); if (!isWithinBudget(serialized)) { - response = parseResponse( + response = validateResponsePreservingBatchShape( toolName, await coerceOversizedResponse( toolName, @@ -586,6 +888,24 @@ export const createSessionMcpRuntime = ( ); } + if (toolName === "session_execute") { + await persistCanonicalLocalArtifactIfNeeded( + toolName, + response as SessionMcpResponseMap["session_execute"], + request.root_session_id, + ); + } + + if (toolName === "session_execute_file") { + await persistCanonicalLocalArtifactIfNeeded( + toolName, + response as SessionMcpResponseMap["session_execute_file"], + request.root_session_id, + ); + } + + await recordReturnedBytes(request.root_session_id, serialized); + return serialized; }; @@ -622,6 +942,12 @@ export const createSessionMcpRuntime = ( await corpus?.dispose?.(); }; + const setSessionCanonicalizer = ( + nextSessionCanonicalizer: RuntimeRootSessionValidator | undefined, + ): void => { + sessionCanonicalizer = nextSessionCanonicalizer; + }; + const migrateRootSessionState = async ( sourceRootSessionId: string, targetRootSessionId: string, @@ -635,6 +961,7 @@ export const createSessionMcpRuntime = ( return { tools, dispose, + setSessionCanonicalizer, migrateRootSessionState, }; }; diff --git a/src/services/session-mcp-types.ts b/src/services/session-mcp-types.ts index 062d210..22eb45b 100644 --- a/src/services/session-mcp-types.ts +++ b/src/services/session-mcp-types.ts @@ -42,6 +42,39 @@ const sessionExecuteStepSchema = z.object({ timeout_seconds: z.number().int().positive().max(120).optional(), }).strict(); +export const sessionBatchCommandStepSchema = z.object({ + kind: z.literal("command"), + command: z.string().min(1), + timeout_seconds: z.number().int().positive().max(120).optional(), +}).strict(); + +export const sessionBatchSearchStepSchema = z.object({ + kind: z.literal("search"), + query: z.string().min(1), +}).strict(); + +export const sessionBatchStepSchema = z.discriminatedUnion("kind", [ + sessionBatchCommandStepSchema, + sessionBatchSearchStepSchema, +]); + +type SessionExecuteStep = z.infer; +type SessionBatchStep = z.infer; + +type SessionBatchExecuteRequest = { + root_session_id: string; + commands: SessionExecuteStep[]; + steps?: SessionBatchStep[]; +}; + +type SessionIndexRequest = { + root_session_id: string; + content: string; + path?: string; + source?: string; + label?: string; +}; + const searchResultSchema = z.object({ corpus_ref: z.string().min(1), snippet: z.string(), @@ -59,6 +92,62 @@ const doctorSubsystemSchema = z.object({ detail: z.string().min(1), }).strict(); +const sessionBatchExecuteLegacyRequestSchema = z.object({ + ...rootSessionIdShape, + commands: z.array(sessionExecuteStepSchema).min(1), +}).strict(); + +const sessionBatchExecuteMixedRequestSchema = z.object({ + ...rootSessionIdShape, + steps: z.array(sessionBatchStepSchema).min(1), +}).strict(); + +const sessionBatchExecuteRequestSchema = z.union([ + sessionBatchExecuteLegacyRequestSchema, + sessionBatchExecuteMixedRequestSchema, +]).transform((request) => { + if ("steps" in request) { + return { + root_session_id: request.root_session_id, + steps: request.steps, + commands: request.steps.flatMap((step) => + step.kind === "command" + ? [{ command: step.command, timeout_seconds: step.timeout_seconds }] + : [] + ), + }; + } + + return { + root_session_id: request.root_session_id, + commands: request.commands, + steps: request.commands.map((command) => ({ + kind: "command" as const, + ...command, + })), + }; +}); + +const sessionIndexRequestSchema = z.object({ + ...rootSessionIdShape, + content: z.string().optional(), + path: z.string().optional(), + source: z.string().optional(), + label: z.string().optional(), +}).strict().refine( + (request) => + typeof request.content === "string" || typeof request.path === "string", + { + message: "content or path is required", + }, +).transform((request) => ({ + root_session_id: request.root_session_id, + content: request.content ?? "", + path: request.path, + source: request.source, + label: request.label, +} satisfies SessionIndexRequest)); + export const sessionMcpRequestSchemas = { session_execute: z.object({ ...rootSessionIdShape, @@ -69,14 +158,8 @@ export const sessionMcpRequestSchemas = { ...rootSessionIdShape, paths: z.array(z.string().min(1)).min(1), }).strict(), - session_batch_execute: z.object({ - ...rootSessionIdShape, - commands: z.array(sessionExecuteStepSchema).min(1), - }).strict(), - session_index: z.object({ - ...rootSessionIdShape, - content: z.string(), - }).strict(), + session_batch_execute: sessionBatchExecuteRequestSchema, + session_index: sessionIndexRequestSchema, session_search: z.object({ ...rootSessionIdShape, query: z.string().min(1), @@ -104,12 +187,42 @@ export const sessionExecuteResponseSchema = z.object({ bytes_captured: z.number().int().nonnegative(), }).strict(); +export const sessionSearchResponseSchema = z.object({ + status: sessionMcpStatusSchema, + results: z.array(searchResultSchema), + corpus_refs: z.array(z.string()), + truncated: z.boolean(), +}).strict(); + +export const sessionBatchStepResultSchema = z.discriminatedUnion("kind", [ + z.object({ + kind: z.literal("command"), + result: sessionExecuteResponseSchema, + }).strict().transform((step) => ({ + ...step, + ...step.result, + })), + z.object({ + kind: z.literal("search"), + result: sessionSearchResponseSchema, + }).strict().transform((step) => ({ + ...step, + status: step.result.status, + summary: `Search returned ${step.result.results.length} result(s).`, + exit_code: -1, + timed_out: false, + truncated: step.result.truncated, + bytes_captured: 0, + artifact_ref: undefined as string | undefined, + })), +]); + export const sessionMcpResponseSchemas = { session_execute: sessionExecuteResponseSchema, session_batch_execute: z.object({ status: sessionMcpStatusSchema, summary: z.string(), - results: z.array(sessionExecuteResponseSchema), + results: z.array(sessionBatchStepResultSchema), truncated: z.boolean(), }).strict(), session_execute_file: z.object({ @@ -126,12 +239,7 @@ export const sessionMcpResponseSchemas = { chunk_count: z.number().int().nonnegative(), query_hints: z.array(z.string()), }).strict(), - session_search: z.object({ - status: sessionMcpStatusSchema, - results: z.array(searchResultSchema), - corpus_refs: z.array(z.string()), - truncated: z.boolean(), - }).strict(), + session_search: sessionSearchResponseSchema, session_fetch_and_index: z.object({ status: sessionMcpStatusSchema, corpus_ref: z.string().min(1), @@ -157,14 +265,48 @@ export const sessionMcpResponseSchemas = { }).strict(), }; -export type SessionMcpRequestMap = { +type SessionMcpInferredRequestMap = { [K in SessionMcpToolName]: ReturnType< (typeof sessionMcpRequestSchemas)[K]["parse"] >; }; -export type SessionMcpResponseMap = { +export type SessionMcpRequestMap = + & { + [ + K in Exclude< + SessionMcpToolName, + "session_batch_execute" | "session_index" + > + ]: SessionMcpInferredRequestMap[K]; + } + & { + session_batch_execute: SessionBatchExecuteRequest; + session_index: SessionIndexRequest; + }; + +type SessionExecuteResponse = z.infer; +type SessionSearchResponse = z.infer; +type SessionBatchStepResult = z.infer; + +type SessionBatchExecuteResponse = { + status: SessionMcpStatus; + summary: string; + results: Array; + truncated: boolean; +}; + +type SessionMcpInferredResponseMap = { [K in SessionMcpToolName]: ReturnType< (typeof sessionMcpResponseSchemas)[K]["parse"] >; }; + +export type SessionMcpResponseMap = + & { + [K in Exclude]: + SessionMcpInferredResponseMap[K]; + } + & { + session_batch_execute: SessionBatchExecuteResponse; + }; diff --git a/src/services/tool-routing.test.ts b/src/services/tool-routing.test.ts index 565f298..5003493 100644 --- a/src/services/tool-routing.test.ts +++ b/src/services/tool-routing.test.ts @@ -22,7 +22,7 @@ describe("tool routing", () => { const decision = routeToolCall({ canonicalSessionId: "root-session", toolName: "Read", - args: { filePath: "/tmp/example.ts", offset: 1, limit: 40 }, + args: { filePath: "/tmp/example.ts" }, guidanceThrottle: throttle, }); diff --git a/src/session.test.ts b/src/session.test.ts index 19dcc08..738b6d2 100644 --- a/src/session.test.ts +++ b/src/session.test.ts @@ -1,15 +1,42 @@ import { assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { SessionManager } from "./session.ts"; +import * as sessionModule from "./session.ts"; import { setSuppressConsoleWarningsDuringTestsOverride } from "./services/opencode-warning.ts"; import { RedisClient } from "./services/redis-client.ts"; import { createSessionCorpusService } from "./services/session-corpus.ts"; +const { SessionManager } = sessionModule; + const createExplicitSessionNotFoundError = ( details: Record = { status: 404 }, ): Error => Object.assign(new Error("Session not found"), details); describe("SessionManager Task 6 runtime migration", () => { + it("resolves child sessions to the canonical parent root session id", async () => { + const manager = new SessionManager( + "group-task-1", + "user-task-1", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + manager.setParentId("root-session", null); + manager.setParentId("child-session", "root-session"); + + assertEquals( + await manager.resolveCanonicalSessionId("child-session"), + "root-session", + ); + }); + it("migrates temporary-root corpora and stats onto the canonical parent root", async () => { const redis = new RedisClient({ endpoint: "redis://unused" }); const corpus = createSessionCorpusService({ @@ -173,4 +200,150 @@ describe("SessionManager Task 6 runtime migration", () => { console.warn = originalWarn; } }); + + it("retries temporary-root runtime migration after a transient failure", async () => { + let childLookupCount = 0; + let migrationAttempts = 0; + const manager = new SessionManager( + "group-task-6-retry", + "user-task-6-retry", + { + session: { + get({ path }: { path: { id: string } }) { + if (path.id === "child-session") { + childLookupCount += 1; + if (childLookupCount === 1) { + throw createExplicitSessionNotFoundError(); + } + return { data: { parentID: "parent-session" } }; + } + if (path.id === "parent-session") { + return { data: { parentID: null } }; + } + throw new Error(`Unexpected session lookup: ${path.id}`); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + { + runtimeStateMigrator: { + migrateRootSessionState: () => { + migrationAttempts += 1; + if (migrationAttempts === 1) { + return Promise.reject(new Error("transient migration failure")); + } + return Promise.resolve(); + }, + }, + } as never, + ); + + await manager.resolveCanonicalSessionId("child-session"); + + await assertRejects( + () => manager.resolveCanonicalSessionId("child-session"), + Error, + "transient migration failure", + ); + assertEquals( + await manager.resolveCanonicalSessionId("child-session"), + "parent-session", + ); + assertEquals(migrationAttempts, 2); + }); + + it("accepts a canonical child root only when it matches the resolved lineage", async () => { + const manager = new SessionManager( + "group-task-2-lineage", + "user-task-2-lineage", + { + session: { + get() { + throw createExplicitSessionNotFoundError(); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + manager.setParentId("root-session", null); + manager.setParentId("child-session", "root-session"); + + assertEquals( + await manager.validateRuntimeRootSessionId( + "child-session", + "root-session", + ), + "root-session", + ); + await assertRejects( + () => manager.validateRuntimeRootSessionId("child-session", "wrong-root"), + Error, + "root_session_id mismatch", + ); + }); + + it("keeps provisional temporary roots valid until a canonical migration resolves them", async () => { + let childLookupCount = 0; + const manager = new SessionManager( + "group-task-2-provisional", + "user-task-2-provisional", + { + session: { + get({ path }: { path: { id: string } }) { + if (path.id === "child-session") { + childLookupCount += 1; + if (childLookupCount === 1) { + throw createExplicitSessionNotFoundError(); + } + return { data: { parentID: "parent-session" } }; + } + if (path.id === "parent-session") { + return { data: { parentID: null } }; + } + throw new Error(`Unexpected session lookup: ${path.id}`); + }, + }, + } as never, + {} as never, + {} as never, + {} as never, + ); + + assertEquals( + await manager.validateRuntimeRootSessionId( + "child-session", + "child-session", + ), + "child-session", + ); + assertEquals( + await manager.validateRuntimeRootSessionId( + "child-session", + "parent-session", + ), + "parent-session", + ); + await assertRejects( + () => + manager.validateRuntimeRootSessionId("child-session", "child-session"), + Error, + "root_session_id mismatch", + ); + }); + + it("does not expose the dead global runtime validator API", () => { + assertEquals( + "getRegisteredRuntimeRootSessionValidator" in sessionModule, + false, + ); + assertEquals( + "setRegisteredRuntimeRootSessionValidator" in sessionModule, + false, + ); + }); }); diff --git a/src/session.ts b/src/session.ts index a89d90f..c728fcb 100644 --- a/src/session.ts +++ b/src/session.ts @@ -224,6 +224,14 @@ export interface ToolRoutingSessionCanonicalizer { resolveCanonicalSessionId(sessionId: string): Promise; } +export interface RuntimeRootSessionValidator + extends ToolRoutingSessionCanonicalizer { + validateRuntimeRootSessionId( + sessionId: string, + rootSessionId: string, + ): Promise; +} + type SessionLifecycle = { activityGeneration: number; idleCleanupTimer: TimerHandle | null; @@ -857,12 +865,17 @@ export class SessionManager { this.mergeTemporaryRootInMemoryRuntimeState(sessionId, canonicalSessionId); const promise = (async () => { - await this.runtimeStateMigrator?.migrateRootSessionState( - sessionId, - canonicalSessionId, - ); - this.temporaryRootSessionIds.delete(sessionId); - this.temporaryRootRuntimeMigrations.delete(sessionId); + try { + await this.runtimeStateMigrator?.migrateRootSessionState( + sessionId, + canonicalSessionId, + ); + this.temporaryRootSessionIds.delete(sessionId); + this.temporaryRootRuntimeMigrations.delete(sessionId); + } catch (err) { + this.temporaryRootRuntimeMigrations.delete(sessionId); + throw err; + } })(); this.temporaryRootRuntimeMigrations.set(sessionId, { @@ -974,6 +987,24 @@ export class SessionManager { return canonicalSessionId; } + async validateRuntimeRootSessionId( + sessionId: string, + rootSessionId: string, + ): Promise { + const canonicalSessionId = await this.resolveCanonicalSessionId(sessionId); + if (!canonicalSessionId) { + throw new Error( + `Unable to validate root_session_id for session ${sessionId}`, + ); + } + if (canonicalSessionId !== rootSessionId) { + throw new Error( + `root_session_id mismatch for session ${sessionId}: expected ${canonicalSessionId}, received ${rootSessionId}`, + ); + } + return canonicalSessionId; + } + async resolveSessionState( sessionId: string, ): Promise<{ From a3c9bd2353d916a7dd35022213a62dee3c7e5d81 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 02:12:09 +0800 Subject: [PATCH 08/38] test: fix external path fixture after merge --- .../plans/2026-03-23-mcp-first-gap-closure.md | 450 ++++++++++++++++++ src/services/session-mcp-runtime.test.ts | 4 +- 2 files changed, 452 insertions(+), 2 deletions(-) create mode 100644 docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md diff --git a/docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md b/docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md new file mode 100644 index 0000000..7dfced9 --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md @@ -0,0 +1,450 @@ +# MCP-First Gap Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining MCP-first alignment gaps so the branch fully +matches the context-mode-style pivot plan for canonical root-session +enforcement, bounded execution, stats, and secondary hook enforcement. + +**Architecture:** Keep `session_*` as the primary bounded execution and +retrieval surface, with hooks limited to root-session injection, enforcement, +attribution, and continuity capture. Finish the missing execution layer +(`session-executor`), tighten canonical root enforcement at both the hook and +runtime layers, and complete local stats and tool-specific coverage without +moving Graphiti onto the hot path. + +**Tech Stack:** Deno, TypeScript, `@opencode-ai/plugin`, local in-process MCP +runtime, Redis/FalkorDB hot tier, zod-backed tool schemas. + +--- + +## File Structure + +### New files + +- `src/services/session-executor.ts` — bounded local execution/file-processing + implementation for `session_execute`, `session_execute_file`, and shared batch + execution primitives. +- `src/services/session-executor.test.ts` — exhaustive execution-layer tests for + bounded command/file processing, truncation, artifacts, and error paths. + +### Existing files to modify + +- `src/handlers/tool-before.ts` — inject canonical `root_session_id` into every + `session_*` call and keep native-tool logic secondary. +- `src/handlers/tool-before.test.ts` — verify root injection, canonicalization, + and that native tools do not receive MCP root fields. +- `src/handlers/tool-after.ts` — keep attribution-only behavior and extend + metadata expectations if needed. +- `src/handlers/tool-after.test.ts` — verify no output rewriting and compact + attribution behavior. +- `src/services/tool-routing.ts` — re-center routing around MCP-first guidance + and ensure `session_*` tools are pass-through with explicit root injection + handled in the before-hook. +- `src/services/tool-routing.test.ts` — verify MCP-first routing outcomes and + that native heavy tools are directed toward `session_*` tools. +- `src/services/session-mcp-runtime.ts` — enforce root-session contract after + schema validation, delegate execution/file paths to `session-executor`, and + complete `session_stats` / `session_doctor` behavior. +- `src/services/session-mcp-runtime.test.ts` — verify root mismatch rejection, + stats behavior, `session_execute_file`, and doctor/stats health contracts. +- `src/services/session-corpus.ts` — finish any remaining stats or bounded + artifact integration points needed by executor/runtime. +- `src/services/session-corpus.test.ts` — cover stats, bounded artifact + accounting, and any remaining edge cases surfaced by the gap audit. +- `src/session.ts` — preserve canonical lineage authority and support stricter + root enforcement paths. +- `src/session.test.ts` — validate canonical root sharing and temporary-root + compatibility with the stricter runtime rules. +- `src/index.ts` — wire the runtime with executor/cache dependencies only; keep + Graphiti off the hot path. +- `src/index.test.ts` — verify runtime wiring for executor/cache dependencies + and teardown. +- `README.md` — update MCP-first wording only if implementation details or + guarantees change materially. +- `docs/ContextOverhaulTests.md` — add/refresh acceptance coverage references if + new required tests are introduced. + +### Existing files to verify but avoid broad rewrites + +- `src/handlers/event.ts` +- `src/handlers/event.test.ts` +- `docs/ContextOverhaul.md` + +These should only change if the new execution/stat metadata requires it. + +--- + +### Task 1: Inject canonical `root_session_id` into all `session_*` calls + +**Files:** + +- Modify: `src/handlers/tool-before.ts` +- Modify: `src/handlers/tool-before.test.ts` +- Modify: `src/services/tool-routing.ts` +- Modify: `src/services/tool-routing.test.ts` +- Test: `src/session.test.ts` + +- [ ] **Step 1: Write the failing tests for MCP root injection** + +Add failing coverage for: + +- every `session_*` tool call receiving injected `root_session_id` +- canonical child-session calls resolving to the parent/root session ID +- native tools (`Read`, `Bash`, `Grep`, `Glob`, `WebFetch`, `Task`) not + receiving injected `root_session_id` +- already-present mismatched `root_session_id` values being normalized or + flagged according to the locked runtime contract + +- [ ] **Step 2: Run the targeted tests to verify they fail** + +Run: +`deno test src/handlers/tool-before.test.ts src/services/tool-routing.test.ts src/session.test.ts` +Expected: FAIL with missing `root_session_id` injection or incorrect MCP-first +routing behavior. + +- [ ] **Step 3: Implement minimal before-hook/root-injection behavior** + +Implement these rules: + +- if `tool` is one of the `session_*` MCP tools, inject canonical + `root_session_id` +- use `SessionManager` canonical resolution only; do not create a second lineage + model +- keep non-`session_*` tools unchanged except for existing routing + guidance/rewrites +- keep `tool.execute.before` free of Graphiti/Redis I/O + +- [ ] **Step 4: Re-run the targeted tests to verify they pass** + +Run: +`deno test src/handlers/tool-before.test.ts src/services/tool-routing.test.ts src/session.test.ts` +Expected: PASS + +- [ ] **Step 5: Commit the task** + +```bash +git add src/handlers/tool-before.ts src/handlers/tool-before.test.ts src/services/tool-routing.ts src/services/tool-routing.test.ts src/session.test.ts +git commit -m "fix: inject canonical root ids for session tools" +``` + +### Task 2: Enforce runtime root-session contract strictly + +**Files:** + +- Modify: `src/services/session-mcp-runtime.ts` +- Modify: `src/services/session-mcp-runtime.test.ts` +- Modify: `src/session.ts` +- Test: `src/session.test.ts` + +- [ ] **Step 1: Write the failing tests for root mismatch rejection** + +Add failing coverage for: + +- missing `root_session_id` rejected by schema (already present; keep) +- mismatched caller/root combinations rejected after schema validation +- canonical child-session requests allowed only when the injected root matches + lineage +- temporary-root/provisional sessions remaining valid until migration resolves + them + +- [ ] **Step 2: Run the targeted tests to verify they fail** + +Run: `deno test src/services/session-mcp-runtime.test.ts src/session.test.ts` +Expected: FAIL with missing runtime mismatch enforcement. + +- [ ] **Step 3: Implement minimal runtime root enforcement** + +Implement: + +- a runtime-level validation step after request parsing and before handler + execution +- rejection of mismatched `root_session_id` values with schema-valid error + responses or explicit runtime errors, matching existing runtime conventions +- no fallback that silently invents a different root ID + +- [ ] **Step 4: Re-run the targeted tests to verify they pass** + +Run: `deno test src/services/session-mcp-runtime.test.ts src/session.test.ts` +Expected: PASS + +- [ ] **Step 5: Commit the task** + +```bash +git add src/services/session-mcp-runtime.ts src/services/session-mcp-runtime.test.ts src/session.ts src/session.test.ts +git commit -m "fix: enforce canonical root ids in session runtime" +``` + +### Task 3: Implement the missing bounded execution layer + +**Files:** + +- Create: `src/services/session-executor.ts` +- Create: `src/services/session-executor.test.ts` +- Modify: `src/services/session-mcp-runtime.ts` +- Modify: `src/services/session-mcp-runtime.test.ts` +- Modify: `src/index.ts` +- Modify: `src/index.test.ts` + +Use the locked execution defaults from +`docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` +§2.4 and §2.6 while implementing this task: 8 KB maximum serialized response +body, 30-second default command timeout, 120-second maximum command timeout, 512 +KB maximum normalized indexed body, local plugin-process execution, +`Deno.Command` for command execution, and direct Deno file APIs for file reads. + +- [ ] **Step 1: Write the failing execution-layer tests** + +Add failing coverage for: + +- `session_execute` bounded command execution with timeout enforcement +- `session_execute_file` direct local file processing without using native + `Read` +- `session_batch_execute` sequential execution through the shared executor +- oversized command/file outputs stored as artifacts/corpus refs rather than + returned inline +- execution failures, timeout failures, and file-read failures returning + bounded, schema-valid results + +This task covers basic executor correctness. Keep deeper `session_execute_file` +edge cases and routing-layer integration for Task 5. + +- [ ] **Step 2: Run the targeted tests to verify they fail** + +Run: +`deno test src/services/session-executor.test.ts src/services/session-mcp-runtime.test.ts src/index.test.ts` +Expected: FAIL because `src/services/session-executor.ts` does not exist yet and +runtime delegation is incomplete. + +- [ ] **Step 3: Implement the minimal executor and wire it into the runtime** + +Implement: + +- `src/services/session-executor.ts` for shared command/file execution + primitives +- direct Deno file reads for `session_execute_file` +- bounded summaries + artifact/corpus writes for oversized outputs +- runtime delegation from `session-mcp-runtime.ts` into the executor +- `src/index.ts` wiring for any new executor dependency only inside the + in-process runtime path + +- [ ] **Step 4: Re-run the targeted tests to verify they pass** + +Run: +`deno test src/services/session-executor.test.ts src/services/session-mcp-runtime.test.ts src/index.test.ts` +Expected: PASS + +- [ ] **Step 5: Commit the task** + +```bash +git add src/services/session-executor.ts src/services/session-executor.test.ts src/services/session-mcp-runtime.ts src/services/session-mcp-runtime.test.ts src/index.ts src/index.test.ts +git commit -m "feat: add bounded session executor runtime" +``` + +### Task 4: Complete stats integration and bounded output accounting + +**Files:** + +- Modify: `src/services/session-corpus.ts` +- Modify: `src/services/session-corpus.test.ts` +- Modify: `src/services/session-mcp-runtime.ts` +- Modify: `src/services/session-mcp-runtime.test.ts` +- Modify: `src/services/session-executor.ts` +- Modify: `src/services/session-executor.test.ts` +- Modify: `src/handlers/event.ts` +- Modify: `src/handlers/event.test.ts` + +- [ ] **Step 1: Write the failing tests for stats and bounded accounting** + +Add failing coverage for: + +- `session_stats` reading real counters from local state +- `session_doctor` continuing to return bounded, schema-valid local health + responses after the new stats/accounting wiring +- stats counters for every `session_*` call family +- `artifact_count`, `corpus_count`, `bytes_indexed_total`, + `bytes_returned_total`, `bytes_saved_estimate` +- compact continuity metadata for execution/file/batch activity without raw + payload dumps +- no duplicate full-body artifact storage when one canonical artifact body is + enough + +- [ ] **Step 2: Run the targeted tests to verify they fail** + +Run: +`deno test src/services/session-corpus.test.ts src/services/session-mcp-runtime.test.ts src/services/session-executor.test.ts src/handlers/event.test.ts` +Expected: FAIL with missing or incomplete stats/accounting behavior. + +- [ ] **Step 3: Implement minimal stats/accounting completion** + +Implement: + +- root-session-local counters in the corpus/stats layer +- direct `session_stats` reads from that local state +- preserve and extend `session_doctor` bounded local health behavior while + wiring real stats/accounting state +- executor/runtime updates for bytes captured/indexed/saved metadata +- compact event metadata only; no raw artifact bodies in continuity events + +- [ ] **Step 4: Re-run the targeted tests to verify they pass** + +Run: +`deno test src/services/session-corpus.test.ts src/services/session-mcp-runtime.test.ts src/handlers/event.test.ts` +Expected: PASS + +- [ ] **Step 5: Commit the task** + +```bash +git add src/services/session-corpus.ts src/services/session-corpus.test.ts src/services/session-mcp-runtime.ts src/services/session-mcp-runtime.test.ts src/services/session-executor.ts src/services/session-executor.test.ts src/handlers/event.ts src/handlers/event.test.ts +git commit -m "fix: complete local session stats accounting" +``` + +### Task 5: Finish `session_execute_file` and MCP-first routing coverage + +**Files:** + +- Modify: `src/services/session-executor.test.ts` +- Modify: `src/services/session-mcp-runtime.test.ts` +- Modify: `src/services/tool-routing.ts` +- Modify: `src/services/tool-routing.test.ts` +- Modify: `src/handlers/tool-after.ts` +- Modify: `src/handlers/tool-after.test.ts` + +Use the enforcement defaults already locked in +`docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` +§9.3 for this task: `session_*` calls must be allowed after root injection, +`WebFetch` denied toward `session_fetch_and_index`, data-heavy `Bash` routed +toward `session_execute` / `session_batch_execute`, large-analysis `Read` guided +toward `session_execute_file`, `Task` prompts rewritten with MCP-first guidance, +and `tool.execute.after` kept attribution-only. + +- [ ] **Step 1: Write the failing tests for file-processing and enforcement + coverage** + +Add failing coverage for: + +- `session_execute_file` happy path on one file and multiple files +- nonexistent file path handling +- oversized file content bounded to artifact/corpus refs +- file processing preserving schema-valid metadata and bounded summaries +- routing guidance explicitly steering large-analysis `Read` usage toward + `session_execute_file` +- `tool.execute.after` remaining attribution-only with no visible output + rewriting + +This task covers the higher-variance `session_execute_file` edge cases and +MCP-first routing integration that build on Task 3's basic executor correctness. + +- [ ] **Step 2: Run the targeted tests to verify they fail** + +Run: +`deno test src/services/session-executor.test.ts src/services/session-mcp-runtime.test.ts src/services/tool-routing.test.ts src/handlers/tool-after.test.ts` +Expected: FAIL where file-path behavior or routing guidance is still incomplete. + +- [ ] **Step 3: Implement minimal file/routing completion** + +Implement: + +- any remaining `session_execute_file` behavior missing from the + executor/runtime +- explicit MCP-first guidance toward `session_execute_file` for heavy + file-analysis cases +- no expansion of `tool.execute.after` beyond attribution metadata + +- [ ] **Step 4: Re-run the targeted tests to verify they pass** + +Run: +`deno test src/services/session-executor.test.ts src/services/session-mcp-runtime.test.ts src/services/tool-routing.test.ts src/handlers/tool-after.test.ts` +Expected: PASS + +- [ ] **Step 5: Commit the task** + +```bash +git add src/services/session-executor.test.ts src/services/session-mcp-runtime.test.ts src/services/tool-routing.ts src/services/tool-routing.test.ts src/handlers/tool-after.ts src/handlers/tool-after.test.ts +git commit -m "test: cover session execute file and routing edges" +``` + +### Task 6: Run the full MCP-first gap-closure verification matrix + +**Files:** + +- Modify only if verification exposes a concrete failure: + - `README.md` + - `docs/ContextOverhaulTests.md` + - any file touched above + +- [ ] **Step 1: Run the focused execution/runtime verification block** + +Run: +`deno test src/services/session-mcp-runtime.test.ts src/services/session-corpus.test.ts src/services/session-executor.test.ts src/session.test.ts src/index.test.ts` +Expected: PASS + +- [ ] **Step 2: Run the hook/routing verification block** + +Run: +`deno test src/handlers/tool-before.test.ts src/handlers/tool-after.test.ts src/services/tool-routing.test.ts src/handlers/event.test.ts` +Expected: PASS + +- [ ] **Step 3: Run the full repo test suite** + +Run: `deno test` Expected: PASS + +- [ ] **Step 4: Run build/type/lint/format verification** + +Run: `deno task build && deno task check && deno lint && deno fmt --check` +Expected: PASS + +- [ ] **Step 5: Update docs only if verification changed guarantees** + +If verification exposed stale wording, update `README.md` and/or +`docs/ContextOverhaulTests.md` to match the final behavior. Otherwise, make no +docs changes. + +- [ ] **Step 6: Commit the verification cleanup** + +```bash +git add README.md docs/ContextOverhaulTests.md +git commit -m "docs: finalize MCP-first gap closure verification" +``` + +Use this commit only if docs actually changed; otherwise skip the commit and +record that verification completed with no docs delta. + +--- + +## Final Verification Sequence + +Run this exact sequence after Task 6: + +```bash +deno test src/services/session-mcp-runtime.test.ts src/services/session-corpus.test.ts src/services/session-executor.test.ts src/session.test.ts src/index.test.ts +deno test src/handlers/tool-before.test.ts src/handlers/tool-after.test.ts src/services/tool-routing.test.ts src/handlers/event.test.ts +deno test +deno task build +deno task check +deno lint +deno fmt --check +``` + +## Completion Criteria + +This gap-closure plan is done only when all of the following are true: + +1. `tool.execute.before` injects canonical `root_session_id` into every + `session_*` call. +2. The runtime rejects mismatched `root_session_id` values instead of silently + accepting them. +3. `src/services/session-executor.ts` and + `src/services/session-executor.test.ts` exist and own bounded + execution/file-processing behavior. +4. `session_execute_file` has dedicated behavioral coverage, not only schema + coverage. +5. `session_stats` and `session_doctor` are backed by real local counters/health + signals and bounded accounting. +6. `tool-routing` is visibly secondary enforcement that points data-heavy work + toward `session_*` tools. +7. `tool.execute.after` remains attribution-only. +8. All verification commands above pass. diff --git a/src/services/session-mcp-runtime.test.ts b/src/services/session-mcp-runtime.test.ts index 906f40a..2e3c30e 100644 --- a/src/services/session-mcp-runtime.test.ts +++ b/src/services/session-mcp-runtime.test.ts @@ -1478,9 +1478,9 @@ describe("session-mcp-runtime", () => { it("indexes an external file after requesting external_directory and read permissions", async () => { const worktreeDir = Deno.cwd(); const externalFile = - "/Users/vicary/Documents/Projects/vicary/opencode-graphiti/AGENTS.md"; + "/Users/vicary/Documents/Projects/vicary/opencode/AGENTS.md"; const externalParentDir = - "/Users/vicary/Documents/Projects/vicary/opencode-graphiti"; + "/Users/vicary/Documents/Projects/vicary/opencode"; const askCalls: Array<{ permission: string; patterns: string[]; From a1b5bc3dba5140babc298ebc0143e2502c9c16ef Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 02:26:48 +0800 Subject: [PATCH 09/38] fix: batch chat event writes and trim config prefixes --- src/config.test.ts | 35 +++++++ src/config.ts | 4 +- src/handlers/chat.test.ts | 109 +++++++++++++++++++++- src/handlers/chat.ts | 17 ++-- src/services/redis-client.ts | 127 +++++++++++++++++++++++++ src/services/redis-events.test.ts | 149 ++++++++++++++++++++++++++++++ src/services/redis-events.ts | 41 ++++++++ 7 files changed, 468 insertions(+), 14 deletions(-) diff --git a/src/config.test.ts b/src/config.test.ts index 547172a..9d4ff5d 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -246,6 +246,41 @@ describe("config", () => { assertEquals(config.redis.endpoint, "redis://trimmed:6379"); }); + it("trims graphiti groupIdPrefix values with incidental surrounding whitespace", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + groupIdPrefix: " nested-prefix ", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.groupIdPrefix, "nested-prefix"); + assertEquals(config.groupIdPrefix, "nested-prefix"); + }); + + it("falls back to the default groupIdPrefix when the configured value is only whitespace", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + groupIdPrefix: " ", + graphiti: { + groupIdPrefix: "\n\t ", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.groupIdPrefix, "opencode"); + assertEquals(config.groupIdPrefix, "opencode"); + }); + it("fails open to defaults when config discovery search fails", () => { using _homedir = stub(os, "homedir", () => "/users/tester"); setConfigExplorerAdapterForTesting(() => diff --git a/src/config.ts b/src/config.ts index eb9b873..bbf92ba 100644 --- a/src/config.ts +++ b/src/config.ts @@ -91,7 +91,7 @@ const normalizeConfig = (value: unknown): RawGraphitiConfig => { const config: RawGraphitiConfig = { endpoint: readTrimmedString(value, "endpoint"), - groupIdPrefix: readString(value, "groupIdPrefix"), + groupIdPrefix: readTrimmedString(value, "groupIdPrefix"), driftThreshold: readNumber(value, "driftThreshold"), }; @@ -109,7 +109,7 @@ const normalizeConfig = (value: unknown): RawGraphitiConfig => { if (isRecord(value.graphiti)) { config.graphiti = compact({ endpoint: readTrimmedString(value.graphiti, "endpoint"), - groupIdPrefix: readString(value.graphiti, "groupIdPrefix"), + groupIdPrefix: readTrimmedString(value.graphiti, "groupIdPrefix"), driftThreshold: readNumber(value.graphiti, "driftThreshold"), }); } diff --git a/src/handlers/chat.test.ts b/src/handlers/chat.test.ts index 50940a4..ca6af25 100644 --- a/src/handlers/chat.test.ts +++ b/src/handlers/chat.test.ts @@ -1,4 +1,5 @@ import { assertEquals, assertStringIncludes } from "jsr:@std/assert@^1.0.0"; +import type { SessionEvent } from "../types/index.ts"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { setSuppressConsoleWarningsDuringTestsOverride } from "../services/opencode-warning.ts"; import { createChatHandler } from "./chat.ts"; @@ -97,6 +98,11 @@ class MockSessionManager { class MockRedisEvents { calls: Array<{ sessionId: string; groupId: string; summary: string }> = []; + batchCalls: Array<{ + sessionId: string; + groupId: string; + summaries: string[]; + }> = []; recordEvent( sessionId: string, @@ -106,6 +112,22 @@ class MockRedisEvents { this.calls.push({ sessionId, groupId, summary: event.summary }); return this.calls.length; } + + recordEvents( + sessionId: string, + groupId: string, + events: SessionEvent[], + ) { + this.batchCalls.push({ + sessionId, + groupId, + summaries: events.map((event) => event.summary), + }); + for (const event of events) { + this.calls.push({ sessionId, groupId, summary: event.summary }); + } + return this.calls.length; + } } class MockGraphitiAsync { @@ -187,6 +209,91 @@ describe("chat handler", () => { ); assertEquals(redisEvents.calls.length, 3); + assertEquals(redisEvents.batchCalls, [{ + sessionId: "session-1", + groupId: "group-1", + summaries: [ + "Please keep Graphiti off the hot path", + "Please keep Graphiti off the hot path", + "Please keep Graphiti off the hot path", + ], + }]); + }); + + it("uses batched event recording for zero, one, and many extracted chat events", async () => { + const graphitiAsync = new MockGraphitiAsync(); + + const noEventSessionManager = new MockSessionManager(); + const noEventRedisEvents = new MockRedisEvents(); + const noEventHandler = createChatHandler({ + sessionManager: noEventSessionManager as never, + redisEvents: noEventRedisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await noEventHandler( + { sessionID: "session-1" }, + { + parts: [{ type: "text", text: "tool: apply_patch\n+line" }], + } as never, + ); + + assertEquals(noEventRedisEvents.batchCalls, [{ + sessionId: "session-1", + groupId: "group-1", + summaries: [], + }]); + assertEquals(noEventRedisEvents.calls, []); + + const oneEventSessionManager = new MockSessionManager(); + const oneEventRedisEvents = new MockRedisEvents(); + const oneEventHandler = createChatHandler({ + sessionManager: oneEventSessionManager as never, + redisEvents: oneEventRedisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await oneEventHandler( + { sessionID: "session-1" }, + { parts: [{ type: "text", text: "Neutral request only" }] } as never, + ); + + assertEquals(oneEventRedisEvents.batchCalls, [{ + sessionId: "session-1", + groupId: "group-1", + summaries: ["Neutral request only"], + }]); + + const manyEventSessionManager = new MockSessionManager(); + const manyEventRedisEvents = new MockRedisEvents(); + const manyEventHandler = createChatHandler({ + sessionManager: manyEventSessionManager as never, + redisEvents: manyEventRedisEvents as never, + graphitiAsync: graphitiAsync as never, + drainTriggerSize: 99, + }); + + await manyEventHandler( + { sessionID: "session-1" }, + { + parts: [{ + type: "text", + text: "Please keep Graphiti off the hot path", + }], + } as never, + ); + + assertEquals(manyEventRedisEvents.batchCalls, [{ + sessionId: "session-1", + groupId: "group-1", + summaries: [ + "Please keep Graphiti off the hot path", + "Please keep Graphiti off the hot path", + "Please keep Graphiti off the hot path", + ], + }]); }); it("routes child-session user prompts through the canonical parent session", async () => { @@ -260,7 +367,7 @@ describe("chat handler", () => { const handler = createChatHandler({ sessionManager: sessionManager as never, redisEvents: { - recordEvent() { + recordEvents() { return 3; }, } as never, diff --git a/src/handlers/chat.ts b/src/handlers/chat.ts index 7dcb7e4..f0172b3 100644 --- a/src/handlers/chat.ts +++ b/src/handlers/chat.ts @@ -42,22 +42,17 @@ export function createChatHandler(deps: ChatHandlerDeps): ChatMessageHook { state.latestUserRequest = sanitizedMessageText; state.latestRefreshQuery = sanitizedMessageText; - let queueLength = 0; - for ( - const event of extractStructuredEvents({ + const queueLength = await redisEvents.recordEvents( + canonicalSessionId, + state.groupId, + extractStructuredEvents({ eventType: "chat.message", sessionId: sessionID, messageText: sanitizedMessageText, messageCount: state.messageCount, role: "user", - }) - ) { - queueLength = await redisEvents.recordEvent( - canonicalSessionId, - state.groupId, - event, - ); - } + }), + ); const prepared = await sessionManager.prepareInjection( canonicalSessionId, diff --git a/src/services/redis-client.ts b/src/services/redis-client.ts index 61bb0d2..a9a4ada 100644 --- a/src/services/redis-client.ts +++ b/src/services/redis-client.ts @@ -44,6 +44,48 @@ type RedisRuntime = { off?(event: RedisEvent, listener: (...args: unknown[]) => void): unknown; }; +const PREPEND_TO_TWO_LISTS_SCRIPT = ` +local primaryLen = redis.call('LLEN', KEYS[1]) +local secondaryLen = redis.call('LLEN', KEYS[2]) +local index = 1 +local primaryTtl = tonumber(ARGV[index]) +index = index + 1 +local primaryCount = tonumber(ARGV[index]) +index = index + 1 +if primaryCount > 0 then + local primaryValues = {} + for i = 1, primaryCount do + primaryValues[i] = ARGV[index] + index = index + 1 + end + primaryLen = redis.call('LPUSH', KEYS[1], unpack(primaryValues)) + if primaryTtl > 0 then + redis.call('EXPIRE', KEYS[1], primaryTtl) + end +end +local secondaryTtl = tonumber(ARGV[index]) +index = index + 1 +local secondaryCount = tonumber(ARGV[index]) +index = index + 1 +if secondaryCount > 0 then + local secondaryValues = {} + for i = 1, secondaryCount do + secondaryValues[i] = ARGV[index] + index = index + 1 + end + secondaryLen = redis.call('LPUSH', KEYS[2], unpack(secondaryValues)) + if secondaryTtl > 0 then + redis.call('EXPIRE', KEYS[2], secondaryTtl) + end +end +return secondaryLen +`; + +const isUnsupportedEvalError = (error: unknown): boolean => + error instanceof Error && + (error.message === "not implemented" || + error.message === "unsupported eval script"); + type RedisRuntimeFactory = ( endpoint: string, ) => Promise | RedisRuntime; @@ -907,6 +949,91 @@ export class RedisClient { }); } + async prependToTwoLists( + primaryKey: string, + primaryValues: string[], + primaryTtlSeconds: number | undefined, + secondaryKey: string, + secondaryValues: string[], + secondaryTtlSeconds: number | undefined, + ): Promise { + return await this.useMutationRuntime( + [primaryKey, secondaryKey], + async (runtime) => { + const primaryTtl = primaryTtlSeconds ?? 0; + const secondaryTtl = secondaryTtlSeconds ?? 0; + + const secondaryLength = runtime.eval + ? await (async () => { + try { + return await runtime.eval!( + PREPEND_TO_TWO_LISTS_SCRIPT, + 2, + primaryKey, + secondaryKey, + String(primaryTtl), + String(primaryValues.length), + ...primaryValues, + String(secondaryTtl), + String(secondaryValues.length), + ...secondaryValues, + ); + } catch (error) { + if (!isUnsupportedEvalError(error)) throw error; + return await (async () => { + for (const value of primaryValues) { + await runtime.lpush(primaryKey, value); + } + if (primaryTtl > 0 && primaryValues.length > 0) { + await runtime.expire(primaryKey, primaryTtl); + } + + let length = await runtime.llen(secondaryKey); + for (const value of secondaryValues) { + length = await runtime.lpush(secondaryKey, value); + } + if (secondaryTtl > 0 && secondaryValues.length > 0) { + await runtime.expire(secondaryKey, secondaryTtl); + } + return length; + })(); + } + })() + : await (async () => { + for (const value of primaryValues) { + await runtime.lpush(primaryKey, value); + } + if (primaryTtl > 0 && primaryValues.length > 0) { + await runtime.expire(primaryKey, primaryTtl); + } + + let length = await runtime.llen(secondaryKey); + for (const value of secondaryValues) { + length = await runtime.lpush(secondaryKey, value); + } + if (secondaryTtl > 0 && secondaryValues.length > 0) { + await runtime.expire(secondaryKey, secondaryTtl); + } + return length; + })(); + + if (runtime !== this.memory && !this.isDurableDrainKey(primaryKey)) { + for (const value of primaryValues) { + await this.memory.lpush(primaryKey, value); + } + if (primaryTtl > 0 && primaryValues.length > 0) { + await this.memory.expire(primaryKey, primaryTtl); + } + } + + return secondaryLength; + }, + () => { + this.queuePendingListSnapshotReplay(primaryKey); + }, + ); + } + async getRecentList(key: string, limit: number): Promise { return await this.useRuntime((runtime) => runtime.lrange(key, 0, Math.max(limit - 1, 0)) diff --git a/src/services/redis-events.test.ts b/src/services/redis-events.test.ts index bc4d9d6..1d7d2ea 100644 --- a/src/services/redis-events.test.ts +++ b/src/services/redis-events.test.ts @@ -224,6 +224,58 @@ class ClaimRuntime extends ToggleRedisRuntime { } } +class BatchedEvalRuntime extends ToggleRedisRuntime { + evalCalls = 0; + + override eval( + script: string, + numKeys: number, + ...args: string[] + ): Promise { + this.ensureAvailable(); + this.evalCalls += 1; + + if ( + !script.includes("redis.call('LPUSH', KEYS[1], unpack(primaryValues))") + ) { + throw new Error("unsupported eval script"); + } + if (numKeys !== 2) { + throw new Error("unexpected key count"); + } + + const [primaryKey, secondaryKey] = args; + let index = 2; + const primaryTtl = Number(args[index++]); + const primaryCount = Number(args[index++]); + const primaryValues = args.slice(index, index + primaryCount); + index += primaryCount; + const secondaryTtl = Number(args[index++]); + const secondaryCount = Number(args[index++]); + const secondaryValues = args.slice(index, index + secondaryCount); + + const primaryLength = this.llen(primaryKey); + const secondaryLength = this.llen(secondaryKey); + + return Promise.all([primaryLength, secondaryLength]).then(async ([, _]) => { + let latestSecondaryLength = await this.llen(secondaryKey); + for (const value of primaryValues) { + await this.lpush(primaryKey, value); + } + if (primaryTtl > 0 && primaryValues.length > 0) { + await this.expire(primaryKey, primaryTtl); + } + for (const value of secondaryValues) { + latestSecondaryLength = await this.lpush(secondaryKey, value); + } + if (secondaryTtl > 0 && secondaryValues.length > 0) { + await this.expire(secondaryKey, secondaryTtl); + } + return latestSecondaryLength; + }); + } +} + describe("redis events", () => { it("degrades durable queue writes to a warning during a redis outage", async () => { const state = { available: true }; @@ -281,6 +333,103 @@ describe("redis events", () => { } }); + it("records batched events in order and returns the final pending queue length", async () => { + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => + new ToggleRedisRuntime({ available: true }) as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + }); + const events = [{ + id: "event-1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "system", + summary: "first", + }, { + id: "event-2", + ts: Date.now() + 1, + category: "preference", + priority: 0, + role: "user", + summary: "second", + }] satisfies SessionEvent[]; + + try { + const queueLength = await redisEvents.recordEvents( + "session-1", + "group-1", + events, + ); + + assertEquals(queueLength, 2); + assertEquals( + (await redisEvents.getRecentSessionEvents("session-1")).map((event) => + event.id + ), + ["event-1", "event-2"], + ); + assertEquals( + (await redis.getListRange(drainPendingKey("group-1"), 0, -1)).map(( + raw, + ) => JSON.parse(raw).event.id), + ["event-2", "event-1"], + ); + } finally { + await redis.close(); + } + }); + + it("uses a single eval call for multi-event live Redis batching", async () => { + const runtime = new BatchedEvalRuntime({ available: true }); + const redis = new RedisClient({ + endpoint: "redis://unused", + runtimeFactory: () => runtime as never, + }); + await redis.connect(); + const redisEvents = new RedisEventsService(redis, { + sessionTtlSeconds: 60, + }); + const events = [{ + id: "event-1", + ts: Date.now(), + category: "decision", + priority: 0, + role: "system", + summary: "first", + }, { + id: "event-2", + ts: Date.now() + 1, + category: "preference", + priority: 0, + role: "user", + summary: "second", + }] satisfies SessionEvent[]; + + try { + const queueLength = await redisEvents.recordEvents( + "session-1", + "group-1", + events, + ); + + assertEquals(runtime.evalCalls, 1); + assertEquals(queueLength, 2); + assertEquals( + (await redisEvents.getRecentSessionEvents("session-1")).map((event) => + event.id + ), + ["event-1", "event-2"], + ); + } finally { + await redis.close(); + } + }); + it("dead-letters malformed claimed payloads and keeps valid entries claimable FIFO", async () => { class ClaimRuntime extends ToggleRedisRuntime { override lmove( diff --git a/src/services/redis-events.ts b/src/services/redis-events.ts index 84238b5..d947164 100644 --- a/src/services/redis-events.ts +++ b/src/services/redis-events.ts @@ -314,6 +314,47 @@ export class RedisEventsService { } } + async recordEvents( + sessionId: string, + groupId: string, + events: SessionEvent[], + ): Promise { + if (events.length === 0) return 0; + + const sanitizedEvents = events.map(sanitizeStoredEvent); + const sessionValues = sanitizedEvents.map((event) => JSON.stringify(event)); + const drainValues = sanitizedEvents.map((event) => + JSON.stringify( + { + sessionId, + groupId, + event, + } satisfies DrainQueueEntry, + ) + ); + + try { + return await this.redis.prependToTwoLists( + sessionEventsKey(sessionId), + sessionValues, + this.options.sessionTtlSeconds, + drainPendingKey(groupId), + drainValues, + DRAIN_TTL_SECONDS, + ); + } catch (error) { + if (!this.isDurableDrainMutationUnavailable(error)) { + throw error; + } + + let queueLength = 0; + for (const event of sanitizedEvents) { + queueLength = await this.recordEvent(sessionId, groupId, event); + } + return queueLength; + } + } + private isDurableDrainMutationUnavailable(error: unknown): boolean { return error instanceof Error && error.message === DURABLE_DRAIN_MUTATION_UNAVAILABLE; From f5acebac53c70decfbcba5e97bc0d41d63b01142 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 02:37:36 +0800 Subject: [PATCH 10/38] fix: restore dnt build compatibility --- CONTRIBUTING.md | 7 +++++++ docs/ContextOverhaul.md | 15 +++++++------- docs/ContextOverhaulTests.md | 1 + src/services/connection-manager.ts | 10 ++++++++-- src/services/session-executor.ts | 32 +++++++++++++++++++++++++++--- 5 files changed, 53 insertions(+), 12 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4592ab3..3d5fe83 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,6 +3,13 @@ ## Development ```bash +# Readiness check +deno test -A +deno run build +deno task check +deno task lint +deno fmt --check + # Format deno fmt diff --git a/docs/ContextOverhaul.md b/docs/ContextOverhaul.md index b9a48f5..aa36cc6 100644 --- a/docs/ContextOverhaul.md +++ b/docs/ContextOverhaul.md @@ -812,7 +812,7 @@ docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases - [ ] Policies exist for `Read`, `WebFetch`, `Bash`, `Grep`, `Glob`, `Task`. - [ ] Hard deny uses thrown errors from `tool.execute.before`. - [ ] Guidance is emitted at most once per canonical session lineage per type. -- [ ] `deno test` passes; `deno task check` passes. +- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. ### Phase 2: Pre-tool hook wiring @@ -834,7 +834,7 @@ docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases - [ ] The hook fires for parent and child sessions. - [ ] `tool.execute.before` does not call FalkorDB or Graphiti. - [ ] Parent and child sessions share one guidance throttle namespace. -- [ ] `deno test` passes; `deno task check` passes. +- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. ### Phase 3: Heavy-tool policies @@ -862,7 +862,7 @@ docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases args. - [ ] `Glob` does not rely on unsupported exclusion args. - [ ] `Task` rewrites delegated prompt text with routing instructions. -- [ ] `deno test` passes; `deno task check` passes. +- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. ### Phase 4: Extraction tightening @@ -882,7 +882,7 @@ docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases - [ ] Denied tool calls produce a compact event with the denial reason. - [ ] Modified/context-guided tool calls produce a compact event noting the routing action. -- [ ] `deno test` passes; `deno task check` passes. +- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. ### Phase 5: Snapshot tightening @@ -900,7 +900,7 @@ docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases - [ ] Snapshot with 50+ events (including routing events) stays within budget. - [ ] P0/P1 content (last request, active tasks, decisions) is never dropped. - [ ] Routing denial events are classified as P2. -- [ ] `deno test` passes; `deno task check` passes. +- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. ### Phase 6: Integration validation + documentation @@ -917,8 +917,8 @@ docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases **Acceptance criteria:** - [ ] All §3.1.1 alignment criteria (A1–A8) are met. -- [ ] `deno test` passes; `deno task check` passes; `deno lint` passes; - `deno fmt --check` passes. +- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes; + `deno lint` passes; `deno fmt --check` passes. - [ ] `README.md` documents the pre-tool routing behavior. - [ ] `AGENTS.md` lists `tool.execute.before` in the hot-path section. - [ ] `docs/ContextOverhaulTests.md` includes Suite N with ≥ 10 test cases. @@ -955,6 +955,7 @@ Add to `docs/ContextOverhaulTests.md` as Suite N: Before merging any part of this plan: - `deno test` +- `deno run build` - `deno task check` - `deno lint` - `deno fmt --check` diff --git a/docs/ContextOverhaulTests.md b/docs/ContextOverhaulTests.md index a0f5b47..a2c69a7 100644 --- a/docs/ContextOverhaulTests.md +++ b/docs/ContextOverhaulTests.md @@ -666,6 +666,7 @@ deno test --allow-net --allow-env --filter "suite-k" src/ # Full run docker compose -f tests/docker-compose.yml up -d deno test --allow-net --allow-env src/ +deno run build ``` ### 9.2 CI Artifacts to Collect diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index 4c332f7..da9bb79 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -124,10 +124,16 @@ const validateEndpoint = (endpoint: string): string => { try { new URL(normalized); } catch (cause) { - throw new Error( + const error = new Error( `Invalid Graphiti endpoint: ${JSON.stringify(normalized)}`, - { cause }, ); + Object.defineProperty(error, "cause", { + value: cause, + writable: true, + configurable: true, + enumerable: false, + }); + throw error; } return normalized; diff --git a/src/services/session-executor.ts b/src/services/session-executor.ts index 0a8a22d..51b153e 100644 --- a/src/services/session-executor.ts +++ b/src/services/session-executor.ts @@ -110,12 +110,37 @@ const clampTimeoutSeconds = ( defaults.maxCommandTimeoutSeconds, ); +type DenoCommandOutput = { + code: number; + stdout: Uint8Array; + stderr: Uint8Array; +}; + +type DenoCommandInstance = { + output(): Promise; +}; + +type DenoCommandConstructor = new ( + command: string | URL, + options?: { + args?: string[]; + cwd?: string; + stdin?: "null" | "piped" | "inherit"; + stdout?: "null" | "piped" | "inherit"; + stderr?: "null" | "piped" | "inherit"; + signal?: AbortSignal; + }, +) => DenoCommandInstance; + const defaultRunCommand: NonNullable = async ({ command, cwd, signal }) => { const shell = Deno.build.os === "windows" ? { executable: "cmd", args: ["/d", "/s", "/c", command] } : { executable: "/bin/sh", args: ["-lc", command] }; - const output = await new Deno.Command(shell.executable, { + const DenoWithCommand = Deno as typeof Deno & { + Command: DenoCommandConstructor; + }; + const output = await new DenoWithCommand.Command(shell.executable, { args: shell.args, cwd, stdin: "null", @@ -556,14 +581,15 @@ export const createSessionExecutor = ( truncated: false, }; - return await ensureBatchResponseWithinBudget(batchResponse, { + const bounded = await ensureBatchResponseWithinBudget(batchResponse, { root_session_id: request.root_session_id, commands: request.commands, }, { responseBudgetBytes, maxNormalizedIndexedBodyBytes, storeArtifact, - }) as SessionBatchExecuteResponse; + }); + return bounded as unknown as SessionBatchExecuteResponse; }, }; }; From c8c089cc6023e2cb50eaf0221af6e63d567388d1 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 02:52:34 +0800 Subject: [PATCH 11/38] fix: address remaining review follow-ups --- docs/ReviewProtocol.md | 12 +++------ src/index.test.ts | 14 +++++++---- src/index.ts | 9 ++++--- src/services/context-limit.test.ts | 39 ++++++++++++++++++++++++++++++ src/services/context-limit.ts | 16 +++++++----- 5 files changed, 67 insertions(+), 23 deletions(-) diff --git a/docs/ReviewProtocol.md b/docs/ReviewProtocol.md index 366ccf2..1018531 100644 --- a/docs/ReviewProtocol.md +++ b/docs/ReviewProtocol.md @@ -8,7 +8,7 @@ request and review feedback needs to be handled systematically. - use live GitHub review state as the source of truth - verify each review claim before changing code - keep fixes narrow and scoped to the verified issue -- resolve handled review threads and request a fresh review cycle +- resolve handled review threads and leave review re-requesting to the user ## Required Unresolved-Batch Query @@ -97,16 +97,11 @@ deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o: 7. Commit and push. - Run focused validation on the touched files while iterating. - Before commit, run the full test suite and confirm it passes. + - Run `deno task build` as a readiness check before push. - Create a review-follow-up commit. - Push the branch to the PR remote. -8. Request a fresh review. - - Request a new review on the PR after the fixes are pushed using the - available GitHub tooling. - - If GitHub rejects the request because you still have a pending review, - submit the pending review first, then retry the fresh-review request. - -9. Report status. +8. Report status. - Include: - PR number and URL - unresolved items found @@ -114,7 +109,6 @@ deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o: - items resolved/commented - commit sha - push status - - fresh review request status - final unresolved review count ## Guardrails diff --git a/src/index.test.ts b/src/index.test.ts index a527e32..2f7fbb5 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -972,7 +972,7 @@ describe("index", () => { assertEquals(args.groupId, "group-id"); }); - it("reports degraded startup once when both startup promises reject", async () => { + it("reports degraded startup separately for Graphiti and Redis when both startup promises reject", async () => { const { input, records, dependencies } = createEntrypointHarnessWithOptions({ readyError: new Error("graphiti startup failed"), @@ -983,10 +983,14 @@ describe("index", () => { await Promise.resolve(); await Promise.resolve(); - assertEquals( - records.graphitiWarnCalls.length + records.redisWarnCalls.length, - 1, - ); + assertEquals(records.graphitiWarnCalls, [{ + connected: false, + endpoint: "http://graphiti.test/mcp", + }]); + assertEquals(records.redisWarnCalls, [{ + connected: false, + endpoint: "redis://redis.test:6379", + }]); }); it("waits for previous runtime teardown before starting a new runtime", async () => { diff --git a/src/index.ts b/src/index.ts index b002c66..58b6a71 100644 --- a/src/index.ts +++ b/src/index.ts @@ -136,17 +136,20 @@ export const graphiti: Plugin = ( const config = dependencies.loadConfig(input.directory); dependencies.setOpenCodeClient(input.client); - let startupUnavailableReported = false; + let graphitiStartupUnavailableReported = false; + let redisStartupUnavailableReported = false; const reportStartupUnavailable = (service: "graphiti" | "redis") => { - if (startupUnavailableReported) return; - startupUnavailableReported = true; if (service === "graphiti") { + if (graphitiStartupUnavailableReported) return; + graphitiStartupUnavailableReported = true; dependencies.warnOnGraphitiStartupUnavailable( false, config.graphiti.endpoint, ); return; } + if (redisStartupUnavailableReported) return; + redisStartupUnavailableReported = true; dependencies.warnOnRedisStartupUnavailable(false, config.redis.endpoint); }; diff --git a/src/services/context-limit.test.ts b/src/services/context-limit.test.ts index f047077..da352d1 100644 --- a/src/services/context-limit.test.ts +++ b/src/services/context-limit.test.ts @@ -195,3 +195,42 @@ Deno.test("resolveContextLimit keeps positive cache entries without expiry re-pr assertEquals(calls, 1); }); + +Deno.test("resolveContextLimit re-probes when legacy numeric cache entry is non-positive", async () => { + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + cache.set("openai/gpt-5", -1); + + let calls = 0; + const client = { + provider: { + list: () => { + calls += 1; + return Promise.resolve({ + providers: [ + { + id: "openai", + models: [{ id: "gpt-5", limit: { context: 456_000 } }], + }, + ], + }); + }, + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 456_000, + ); + + assertEquals(calls, 1); + assertEquals(cache.get("openai/gpt-5"), 456_000); +}); diff --git a/src/services/context-limit.ts b/src/services/context-limit.ts index c50f3fc..874fb04 100644 --- a/src/services/context-limit.ts +++ b/src/services/context-limit.ts @@ -40,14 +40,18 @@ export async function resolveContextLimit( const cached = cache.get(modelKey); if (cached !== undefined) { if (typeof cached === "number") { - return cached > 0 ? cached : DEFAULT_CONTEXT_LIMIT; - } + if (cached > 0) { + return cached; + } - if (cached.expiresAt === undefined || cached.expiresAt > Date.now()) { - return cached.value > 0 ? cached.value : DEFAULT_CONTEXT_LIMIT; - } + cache.delete(modelKey); + } else { + if (cached.expiresAt === undefined || cached.expiresAt > Date.now()) { + return cached.value > 0 ? cached.value : DEFAULT_CONTEXT_LIMIT; + } - cache.delete(modelKey); + cache.delete(modelKey); + } } try { From 8d4f355afc0ddf9ca1879ec09fa61b26dc08f1cd Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 03:25:40 +0800 Subject: [PATCH 12/38] fix: keep review follow-ups green in full validation --- .github/scripts/version.test.ts | 22 +++++++++++++++- .github/scripts/version.ts | 45 ++++++++++++++++++++++++++------- CONTRIBUTING.md | 2 +- src/config.test.ts | 2 +- src/config.ts | 4 ++- src/handlers/messages.test.ts | 23 +++++++++++++++++ src/handlers/messages.ts | 5 ++-- 7 files changed, 87 insertions(+), 16 deletions(-) diff --git a/.github/scripts/version.test.ts b/.github/scripts/version.test.ts index dca3423..960d289 100644 --- a/.github/scripts/version.test.ts +++ b/.github/scripts/version.test.ts @@ -5,7 +5,7 @@ */ import { describe, it } from "jsr:@std/testing@1/bdd"; -import { assertEquals } from "jsr:@std/assert@1"; +import { assertEquals, assertThrows } from "jsr:@std/assert@1"; import { analyzeCommits, applyBump, @@ -14,6 +14,7 @@ import { hasBreakingChangeBody, hasNonTestChanges, parseChangedFiles, + parseCommandOutput, parseSemver, run, } from "./version.ts"; @@ -922,3 +923,22 @@ describe("run", () => { ]); }); }); + +describe("parseCommandOutput", () => { + it("throws on non-zero exit with stderr included", () => { + assertThrows( + () => + parseCommandOutput( + ["git", "describe", "--tags"], + { + stdout: new TextEncoder().encode(""), + stderr: new TextEncoder().encode("boom"), + success: false, + code: 5, + }, + ), + Error, + "boom", + ); + }); +}); diff --git a/.github/scripts/version.ts b/.github/scripts/version.ts index 4a25011..9be7382 100644 --- a/.github/scripts/version.ts +++ b/.github/scripts/version.ts @@ -27,6 +27,41 @@ export interface VersionCliDeps { now: () => Date; } +export interface CommandOutputResult { + stdout: Uint8Array; + stderr: Uint8Array; + success: boolean; + code: number; +} + +export function parseCommandOutput( + command: string[], + result: CommandOutputResult, +): string { + const stdoutText = new TextDecoder().decode(result.stdout).trim(); + const stderrText = new TextDecoder().decode(result.stderr).trim(); + + if (!result.success) { + const stderrSuffix = stderrText ? `: ${stderrText}` : ""; + throw new Error( + `Command failed with exit code ${result.code} (${ + command.join(" ") + })${stderrSuffix}`, + ); + } + + return stdoutText; +} + +export async function runCommand(...command: string[]): Promise { + const proc = new Deno.Command(command[0], { + args: command.slice(1), + stdout: "piped", + stderr: "piped", + }); + return parseCommandOutput(command, await proc.output()); +} + function stripJsonComments(text: string): string { let result = ""; let inString = false; @@ -108,15 +143,7 @@ function getPackageNameFromManifest(manifest: unknown): string | undefined { } const defaultVersionCliDeps: VersionCliDeps = { - cmd: async (...command: string[]): Promise => { - const proc = new Deno.Command(command[0], { - args: command.slice(1), - stdout: "piped", - stderr: "piped", - }); - const { stdout } = await proc.output(); - return new TextDecoder().decode(stdout).trim(); - }, + cmd: (...command: string[]) => runCommand(...command), readTextFile: (filePath) => Deno.readTextFile(filePath), envGet: (name) => Deno.env.get(name), appendFile: (filePath, text) => { diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3d5fe83..5cd6291 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,7 +5,7 @@ ```bash # Readiness check deno test -A -deno run build +deno task build deno task check deno task lint deno fmt --check diff --git a/src/config.test.ts b/src/config.test.ts index 9d4ff5d..8343346 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -220,7 +220,7 @@ describe("config", () => { assertThrows( () => loadConfig(), ConfigLoadError, - "Invalid Graphiti config value for graphiti.endpoint", + 'Invalid Graphiti config value for graphiti.endpoint: expected a valid URL, received "not a valid url"', ); }); diff --git a/src/config.ts b/src/config.ts index bbf92ba..2bd5da0 100644 --- a/src/config.ts +++ b/src/config.ts @@ -141,7 +141,9 @@ const assertExplicitUrl = ( if (value === undefined) return; if (isValidUrlString(value)) return; throw new ConfigLoadError( - `Invalid Graphiti config value for ${fieldName}: expected a valid URL`, + `Invalid Graphiti config value for ${fieldName}: expected a valid URL, received ${ + JSON.stringify(value) + }`, { code: "config-invalid" }, ); }; diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts index 2d743b5..db16f3a 100644 --- a/src/handlers/messages.test.ts +++ b/src/handlers/messages.test.ts @@ -965,4 +965,27 @@ describe("messages handler", () => { assertEquals(sessionManager.activeCalls, []); assertEquals(sessionManager.state.pendingInjection, undefined); }); + + it("skips transform work when the latest user text part is synthetic", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.prepareInjectionImpl = () => { + throw new Error("prepareInjection should not run"); + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ type: "text", text: "synthetic", synthetic: true }], + }], + }; + + await handler({ message: "should be ignored" } as never, output as never); + + assertEquals(sessionManager.activeCalls, []); + assertEquals(sessionManager.state.pendingInjection, undefined); + assertEquals(output.messages[0].parts[0].text, "synthetic"); + }); }); diff --git a/src/handlers/messages.ts b/src/handlers/messages.ts index 16696e5..229c915 100644 --- a/src/handlers/messages.ts +++ b/src/handlers/messages.ts @@ -64,8 +64,8 @@ export function createMessagesHandler( if (!lastUserEntry) return; const textPart = lastUserEntry.parts.find(isTextPart); - const latestUserText = textPart?.text; - if (latestUserText === undefined) return; + if (!textPart) return; + const latestUserText = textPart.text; const sourceSessionID = lastUserEntry.info.sessionID; @@ -93,7 +93,6 @@ export function createMessagesHandler( recallQuery, ); if (!prepared) return; - if (!textPart) return; const scrubbedUserText = scrubPromptMemoryText(latestUserText); const effectiveUserText = sanitizeMemoryInputPreservingMemoryBlocks( From b40f593291cf7a6df05642604d50f56580c7e77d Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 04:48:30 +0800 Subject: [PATCH 13/38] fix: fail soft when drain claim release errors --- src/services/batch-drain.test.ts | 33 +++++++++++++++++++++++++++++++ src/services/batch-drain.ts | 34 ++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index d01d986..09e4054 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -535,6 +535,39 @@ describe("batch drain", () => { assertEquals(await redis.getString(retryKey), JSON.stringify(retryState)); }); + it("returns backoff even if releasing the claim fails", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "wait before retry", + body: "wait before retry", + }); + await events.recordEvent("session-1", "group-1", event); + + const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); + const retryState = { attempts: 1, nextAttemptAt: Date.now() + 60_000 }; + await redis.setString(retryKey, JSON.stringify(retryState), 60); + + const originalReleaseClaim = events.releaseClaim.bind(events); + let releaseAttempts = 0; + events.releaseClaim = async (...args) => { + releaseAttempts += 1; + await originalReleaseClaim(...args); + throw new Error("redis unavailable"); + }; + + const result = await drain.drainGroup("group-1", { + addMemory() { + throw new Error("should not drain while backing off"); + }, + } as never); + + assertEquals(result.status, "backoff"); + assertEquals(result.drained, 0); + assertEquals(releaseAttempts, 1); + assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + assertEquals(await redis.getString(retryKey), JSON.stringify(retryState)); + }); + it("clears corrupted retry state before retrying a batch", async () => { const { redis, events, drain } = await createDeps(); const event = createSessionEvent("message", "user", { diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index 9f6d50e..e855625 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -190,6 +190,26 @@ export class BatchDrainService { ); } + private async releaseClaimSafely( + groupId: string, + claimToken: string, + context: "backoff" | "retry", + eventIds: string[], + ): Promise { + try { + await this.events.releaseClaim(groupId, claimToken); + return true; + } catch (err) { + logger.warn("Failed to release drain claim", { + groupId, + context, + eventIds, + err, + }); + return false; + } + } + async drainGroup( groupId: string, graphiti: GraphitiMcpClient, @@ -225,7 +245,12 @@ export class BatchDrainService { const now = Date.now(); if (retryState.nextAttemptAt > now) { const retryAfterMs = Math.max(0, retryState.nextAttemptAt - now); - await this.events.releaseClaim(groupId, claimed.claimToken); + await this.releaseClaimSafely( + groupId, + claimed.claimToken, + "backoff", + eventIds, + ); return { status: "backoff", drained: 0, retryAfterMs }; } } @@ -353,7 +378,12 @@ export class BatchDrainService { return { status: "dead-letter", drained: drainedCount }; } - await this.events.releaseClaim(groupId, claimed.claimToken); + await this.releaseClaimSafely( + groupId, + claimed.claimToken, + "retry", + eventIds, + ); await this.setRetryState(groupId, batchKey, { attempts, nextAttemptAt: Date.now() + 1_000 * (2 ** (attempts - 1)), From fdf8b854303ee45d3da645855cd77431a9626131 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 05:27:47 +0800 Subject: [PATCH 14/38] fix: harden injected memory and redact endpoints --- src/handlers/messages.test.ts | 115 ++++++++++++++++++++++++--------- src/handlers/messages.ts | 8 ++- src/index.test.ts | 116 ++++++++++++++++++++++++++++++++++ src/index.ts | 22 +++++-- 4 files changed, 227 insertions(+), 34 deletions(-) diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts index db16f3a..a7e39f4 100644 --- a/src/handlers/messages.test.ts +++ b/src/handlers/messages.test.ts @@ -354,7 +354,7 @@ describe("messages handler", () => { } }); - it("preserves user-authored persistent memory blocks away from the reinjection prefix", async () => { + it("neutralizes user-authored persistent memory blocks away from the reinjection prefix", async () => { const sessionManager = new MockSessionManager(); sessionManager.state.pendingInjection = { envelope: @@ -388,11 +388,11 @@ describe("messages handler", () => { assertStringIncludes(output.messages[0].parts[0].text, "stale memory', + "<persistent_memory fact_uuids="fact-standalone-1,fact-standalone-2">stale memory</persistent_memory>", ); }); - it("preserves literal user-authored session memory XML in the latest user message", async () => { + it("neutralizes literal user-authored session memory XML in the latest user message", async () => { const sessionManager = new MockSessionManager(); sessionManager.state.pendingInjection = { envelope: @@ -426,11 +426,11 @@ describe("messages handler", () => { assertStringIncludes(output.messages[0].parts[0].text, "example', + "<session_memory version="1">example</session_memory>", ); }); - it("preserves leading user-authored session_memory blocks that do not match the injected shape", async () => { + it("neutralizes leading user-authored session_memory blocks that do not match the injected shape", async () => { const sessionManager = new MockSessionManager(); sessionManager.state.pendingInjection = { envelope: @@ -462,10 +462,63 @@ describe("messages handler", () => { await handler({} as never, output as never); - assertStringIncludes(output.messages[0].parts[0].text, userAuthoredBlock); + assertStringIncludes( + output.messages[0].parts[0].text, + "<session_memory version="1">user-authored example</session_memory>", + ); + }); + + it("neutralizes leading user-authored legacy and persistent memory blocks", async () => { + const cases = [ + { + input: "user-authored example", + escaped: "<memory>user-authored example</memory>", + }, + { + input: "user-authored example", + escaped: + "<persistent_memory>user-authored example</persistent_memory>", + }, + ]; + + for (const { input, escaped } of cases) { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'inspect example', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "inspect example", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: `${input}\n\ninspect example`, + }], + }], + }; + + await handler({} as never, output as never); + + assertEquals( + output.messages[0].parts[0].text.includes(input), + false, + ); + assertStringIncludes(output.messages[0].parts[0].text, escaped); + } }); - it("preserves leading user-authored legacy and persistent memory blocks", async () => { + it("neutralizes leading user-authored non-empty legacy memory blocks without data-uuids", async () => { const sessionManager = new MockSessionManager(); sessionManager.state.pendingInjection = { envelope: @@ -483,29 +536,26 @@ describe("messages handler", () => { sessionManager: sessionManager as never, }); - const cases = [ - "user-authored example", - "user-authored example", - ]; - - for (const userAuthoredBlock of cases) { - const output = { - messages: [{ - info: { role: "user", sessionID: "session-1" }, - parts: [{ - type: "text", - text: `${userAuthoredBlock}\n\ninspect example`, - }], + const userAuthoredBlock = "user-authored example"; + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: `${userAuthoredBlock}\n\ninspect example`, }], - }; + }], + }; - await handler({} as never, output as never); + await handler({} as never, output as never); - assertStringIncludes(output.messages[0].parts[0].text, userAuthoredBlock); - } + assertStringIncludes( + output.messages[0].parts[0].text, + "<memory>user-authored example</memory>", + ); }); - it("preserves leading user-authored non-empty legacy memory blocks without data-uuids", async () => { + it("preserves the canonical injected block while neutralizing user-authored memory-envelope tags", async () => { const sessionManager = new MockSessionManager(); sessionManager.state.pendingInjection = { envelope: @@ -523,20 +573,27 @@ describe("messages handler", () => { sessionManager: sessionManager as never, }); - const userAuthoredBlock = "user-authored example"; const output = { messages: [{ info: { role: "user", sessionID: "session-1" }, parts: [{ type: "text", - text: `${userAuthoredBlock}\n\ninspect example`, + text: + 'Inspect this literal XML:\n\nexample', }], }], }; await handler({} as never, output as never); - assertStringIncludes(output.messages[0].parts[0].text, userAuthoredBlock); + assertEquals( + output.messages[0].parts[0].text.match(/example
</session_memory>", + ); }); it("reports rewroteExistingMemory when canonical or legacy blocks were scrubbed", async () => { @@ -843,7 +900,7 @@ describe("messages handler", () => { assertEquals( output.messages[0].parts[0].text, - `continue\n\n${trailingExample}`, + 'continue\n\nkeep transcript\n\n<session_memory version="1">example</session_memory>', ); }); diff --git a/src/handlers/messages.ts b/src/handlers/messages.ts index 229c915..9f30b31 100644 --- a/src/handlers/messages.ts +++ b/src/handlers/messages.ts @@ -1,6 +1,7 @@ import type { Hooks } from "@opencode-ai/plugin"; import { logger } from "../services/logger.ts"; import { + escapeXml, sanitizeMemoryInput, sanitizeMemoryInputPreservingMemoryBlocks, stripInjectedMemoryBlocks, @@ -36,6 +37,8 @@ const LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK = /^]*\bdata-uuids=)[^>]*>\s*<\/memory>(?:\r?\n){0,2}/; const LEADING_INJECTED_PERSISTENT_MEMORY_BLOCK = /^]*\b(?:node_refs|fact_uuids)=(["'])[^"']*\1)[^>]*>[\s\S]*?<\/persistent_memory>(?:\r?\n){0,2}/; +const USER_MEMORY_ENVELOPE_TAG_PATTERN = + /<\/?(?:session_memory|memory|persistent_memory)\b[^>]*>/gi; const scrubPromptMemoryText = (text: string): string => { let scrubbed = text; @@ -50,6 +53,9 @@ const scrubPromptMemoryText = (text: string): string => { } }; +const neutralizeUserMemoryEnvelopeTags = (text: string): string => + text.replace(USER_MEMORY_ENVELOPE_TAG_PATTERN, (tag) => escapeXml(tag)); + export function createMessagesHandler( deps: MessagesHandlerDeps, ): MessagesTransformHook { @@ -96,7 +102,7 @@ export function createMessagesHandler( const scrubbedUserText = scrubPromptMemoryText(latestUserText); const effectiveUserText = sanitizeMemoryInputPreservingMemoryBlocks( - scrubbedUserText, + neutralizeUserMemoryEnvelopeTags(scrubbedUserText), ); if (!effectiveUserText) { sessionManager.clearPendingInjection(state, prepared); diff --git a/src/index.test.ts b/src/index.test.ts index 2f7fbb5..24be321 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -542,6 +542,64 @@ describe("index", () => { for (const task of scheduledTasks) task(); assertEquals(appLogCalls.length, 1); + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: + "Graphiti MCP unavailable at http://graphiti.test/mcp; continuing without persistent memory.", + extra: { + endpoint: "http://graphiti.test/mcp", + }, + }, + }]); + assertEquals(toastCalls, [{ + body: { + message: + "Graphiti MCP unavailable at http://graphiti.test/mcp; continuing without persistent memory.", + variant: "warning", + }, + }]); + }); + + it("redacts URL credentials from Graphiti startup warnings", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnGraphitiStartupUnavailable( + false, + "http://user:secret@graphiti.test/mcp", + ); + + for (const task of scheduledTasks) task(); + + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: + "Graphiti MCP unavailable at http://graphiti.test/mcp; continuing without persistent memory.", + extra: { + endpoint: "http://graphiti.test/mcp", + }, + }, + }]); assertEquals(toastCalls, [{ body: { message: @@ -609,6 +667,64 @@ describe("index", () => { for (const task of scheduledTasks) task(); assertEquals(appLogCalls.length, 1); + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: + "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + extra: { + endpoint: "redis://redis.test:6379", + }, + }, + }]); + assertEquals(toastCalls, [{ + body: { + message: + "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + variant: "warning", + }, + }]); + }); + + it("redacts URL credentials from Redis startup warnings", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnRedisStartupUnavailable( + false, + "redis://user:secret@redis.test:6379", + ); + + for (const task of scheduledTasks) task(); + + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: + "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + extra: { + endpoint: "redis://redis.test:6379", + }, + }, + }]); assertEquals(toastCalls, [{ body: { message: diff --git a/src/index.ts b/src/index.ts index 58b6a71..0bd464c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -67,14 +67,27 @@ let activeRuntimeTeardown: | null = null; let runtimeInitialization = Promise.resolve(); +const redactEndpointUserInfo = (endpoint: string): string => { + try { + const url = new URL(endpoint); + if (!url.username && !url.password) return endpoint; + url.username = ""; + url.password = ""; + return url.toString(); + } catch { + return endpoint; + } +}; + export const warnOnGraphitiStartupUnavailable = ( connected: boolean, endpoint: string, ): void => { if (connected) return; + const redactedEndpoint = redactEndpointUserInfo(endpoint); notifyGraphitiAvailabilityIssue( - `Graphiti MCP unavailable at ${endpoint}; continuing without persistent memory.`, - { endpoint }, + `Graphiti MCP unavailable at ${redactedEndpoint}; continuing without persistent memory.`, + { endpoint: redactedEndpoint }, ); }; @@ -83,9 +96,10 @@ export const warnOnRedisStartupUnavailable = ( endpoint: string, ): void => { if (connected) return; + const redactedEndpoint = redactEndpointUserInfo(endpoint); notifyGraphitiAvailabilityIssue( - `Redis unavailable at ${endpoint}; continuing without persistent memory.`, - { endpoint }, + `Redis unavailable at ${redactedEndpoint}; continuing without persistent memory.`, + { endpoint: redactedEndpoint }, ); }; From d821ed79888f5fce7cd58d966934e31470018d5c Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 06:21:27 +0800 Subject: [PATCH 15/38] fix: redact invalid graphiti endpoint credentials --- src/services/connection-manager.test.ts | 4 ++-- src/services/connection-manager.ts | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index 8318f77..bf59f18 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -917,7 +917,7 @@ describe("connection manager", () => { const error = assertThrows( () => new GraphitiConnectionManager({ - endpoint: "not a valid url", + endpoint: "http://user:secret@bad host", connectionFactory: () => ({ connect: () => Promise.resolve(), close: () => Promise.resolve(), @@ -925,7 +925,7 @@ describe("connection manager", () => { }), }), Error, - 'Invalid Graphiti endpoint: "not a valid url"', + 'Invalid Graphiti endpoint: "http://bad host"', ); assertInstanceOf(error.cause, TypeError); diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index da9bb79..b48319d 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -115,6 +115,21 @@ type PendingRequest = { type ConnectionFactory = (endpoint: string) => GraphitiConnection; +const redactEndpointUserInfo = (endpoint: string): string => { + try { + const url = new URL(endpoint); + if (!url.username && !url.password) return endpoint; + url.username = ""; + url.password = ""; + return url.toString(); + } catch { + return endpoint.replace( + /^([a-z][a-z0-9+.-]*:\/\/)(?:[^/?#@]*@)/i, + "$1", + ); + } +}; + const validateEndpoint = (endpoint: string): string => { const normalized = endpoint.trim(); if (!normalized) { @@ -125,7 +140,9 @@ const validateEndpoint = (endpoint: string): string => { new URL(normalized); } catch (cause) { const error = new Error( - `Invalid Graphiti endpoint: ${JSON.stringify(normalized)}`, + `Invalid Graphiti endpoint: ${ + JSON.stringify(redactEndpointUserInfo(normalized)) + }`, ); Object.defineProperty(error, "cause", { value: cause, From 57c221a366b3717bc3383bef9893a23f87dc41ba Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 06:42:37 +0800 Subject: [PATCH 16/38] fix: address remaining review follow-ups --- scripts/bench-falkordb.ts | 12 +- src/index.test.ts | 102 ++++++++- src/index.ts | 7 +- src/services/context-limit.test.ts | 324 ++++++++++++++++------------- src/services/context-limit.ts | 14 +- 5 files changed, 310 insertions(+), 149 deletions(-) diff --git a/scripts/bench-falkordb.ts b/scripts/bench-falkordb.ts index 01cf934..0332c01 100644 --- a/scripts/bench-falkordb.ts +++ b/scripts/bench-falkordb.ts @@ -13,6 +13,8 @@ type Stats = { type SampleMode = "set" | "get" | "del" | "ping"; +const CLEANUP_DELETE_BATCH_SIZE = 1_000; + // Default to localhost for safe contributor use. // Pass an explicit endpoint argument to target a different Redis host. const endpoint = Deno.args[0] ?? "redis://localhost:6379"; @@ -109,7 +111,15 @@ const run = async () => { (_, index) => `${keyPrefix}:${index}`, ); if (cleanupKeys.length) { - await redis.del(...cleanupKeys); + for ( + let index = 0; + index < cleanupKeys.length; + index += CLEANUP_DELETE_BATCH_SIZE + ) { + await redis.del( + ...cleanupKeys.slice(index, index + CLEANUP_DELETE_BATCH_SIZE), + ); + } } } catch { // ignore cleanup failures in benchmarking utility diff --git a/src/index.test.ts b/src/index.test.ts index 24be321..7c6b5b3 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -609,6 +609,53 @@ describe("index", () => { }]); }); + it("redacts malformed Graphiti endpoint credentials in startup warnings", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnGraphitiStartupUnavailable( + false, + "http://user:secret@graphiti.test:bad", + ); + + for (const task of scheduledTasks) task(); + + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: + "Graphiti MCP unavailable at http://graphiti.test:bad; continuing without persistent memory.", + extra: { + endpoint: "http://graphiti.test:bad", + }, + }, + }]); + assertEquals(toastCalls, [{ + body: { + message: + "Graphiti MCP unavailable at http://graphiti.test:bad; continuing without persistent memory.", + variant: "warning", + }, + }]); + }); + it("does nothing when Graphiti is connected", () => { const appLogCalls: unknown[] = []; const toastCalls: unknown[] = []; @@ -672,7 +719,7 @@ describe("index", () => { service: "graphiti", level: "warn", message: - "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + "Redis unavailable at redis://redis.test:6379; continuing with in-memory hot-tier fallback.", extra: { endpoint: "redis://redis.test:6379", }, @@ -681,7 +728,7 @@ describe("index", () => { assertEquals(toastCalls, [{ body: { message: - "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + "Redis unavailable at redis://redis.test:6379; continuing with in-memory hot-tier fallback.", variant: "warning", }, }]); @@ -719,7 +766,7 @@ describe("index", () => { service: "graphiti", level: "warn", message: - "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + "Redis unavailable at redis://redis.test:6379; continuing with in-memory hot-tier fallback.", extra: { endpoint: "redis://redis.test:6379", }, @@ -728,7 +775,54 @@ describe("index", () => { assertEquals(toastCalls, [{ body: { message: - "Redis unavailable at redis://redis.test:6379; continuing without persistent memory.", + "Redis unavailable at redis://redis.test:6379; continuing with in-memory hot-tier fallback.", + variant: "warning", + }, + }]); + }); + + it("redacts malformed Redis endpoint credentials in startup warnings", () => { + const appLogCalls: unknown[] = []; + const toastCalls: unknown[] = []; + const scheduledTasks: Array<() => void> = []; + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log: (input: unknown) => { + appLogCalls.push(input); + }, + }, + tui: { + showToast: (input: unknown) => { + toastCalls.push(input); + }, + }, + }); + + warnOnRedisStartupUnavailable( + false, + "redis://user:secret@redis.test:bad", + ); + + for (const task of scheduledTasks) task(); + + assertEquals(appLogCalls, [{ + body: { + service: "graphiti", + level: "warn", + message: + "Redis unavailable at redis://redis.test:bad; continuing with in-memory hot-tier fallback.", + extra: { + endpoint: "redis://redis.test:bad", + }, + }, + }]); + assertEquals(toastCalls, [{ + body: { + message: + "Redis unavailable at redis://redis.test:bad; continuing with in-memory hot-tier fallback.", variant: "warning", }, }]); diff --git a/src/index.ts b/src/index.ts index 0bd464c..40a3b4f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -75,7 +75,10 @@ const redactEndpointUserInfo = (endpoint: string): string => { url.password = ""; return url.toString(); } catch { - return endpoint; + return endpoint.replace( + /^([a-z][a-z0-9+.-]*:\/\/)(?:[^/?#@]*@)/i, + "$1", + ); } }; @@ -98,7 +101,7 @@ export const warnOnRedisStartupUnavailable = ( if (connected) return; const redactedEndpoint = redactEndpointUserInfo(endpoint); notifyGraphitiAvailabilityIssue( - `Redis unavailable at ${redactedEndpoint}; continuing without persistent memory.`, + `Redis unavailable at ${redactedEndpoint}; continuing with in-memory hot-tier fallback.`, { endpoint: redactedEndpoint }, ); }; diff --git a/src/services/context-limit.test.ts b/src/services/context-limit.test.ts index da352d1..84a1011 100644 --- a/src/services/context-limit.test.ts +++ b/src/services/context-limit.test.ts @@ -2,152 +2,159 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { resolveContextLimit } from "./context-limit.ts"; Deno.test("resolveContextLimit re-probes after fallback cache expiry", async () => { - const originalNow = Date.now; let now = 100_000; - Date.now = () => now; - - try { - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); - let calls = 0; - const client = { - provider: { - list: () => { - calls += 1; - if (calls === 1) { - return Promise.reject(new Error("provider unavailable")); - } - - return Promise.resolve({ - providers: [ - { - id: "openai", - models: [{ id: "gpt-5", limit: { context: 123_456 } }], - }, - ], - }); - }, + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + let calls = 0; + const client = { + provider: { + list: () => { + calls += 1; + if (calls === 1) { + return Promise.reject(new Error("provider unavailable")); + } + + return Promise.resolve({ + providers: [ + { + id: "openai", + models: [{ id: "gpt-5", limit: { context: 123_456 } }], + }, + ], + }); }, - }; - - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - undefined, - cache, - ), - 200_000, - ); - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - undefined, - cache, - ), - 200_000, - ); - assertEquals(calls, 1); - - now += 60_001; - - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - undefined, - cache, - ), - 123_456, - ); - assertEquals(calls, 2); - } finally { - Date.now = originalNow; - } + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + () => now, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + () => now, + ), + 200_000, + ); + assertEquals(calls, 1); + + now += 60_001; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + () => now, + ), + 123_456, + ); + assertEquals(calls, 2); }); Deno.test("resolveContextLimit keeps fallback caches scoped per normalized directory until expiry", async () => { - const originalNow = Date.now; let now = 200_000; - Date.now = () => now; - - try { - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); - const calls: string[] = []; - const client = { - provider: { - list: ({ query }: { query?: { directory?: string } }) => { - calls.push(query?.directory ?? ""); - return Promise.reject(new Error("provider unavailable")); - }, + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + const calls: string[] = []; + const client = { + provider: { + list: ({ query }: { query?: { directory?: string } }) => { + calls.push(query?.directory ?? ""); + return Promise.reject(new Error("provider unavailable")); }, - }; - - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - "/tmp/project-a", - cache, - ), - 200_000, - ); - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - "/tmp/project-a", - cache, - ), - 200_000, - ); - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - " ", - cache, - ), - 200_000, - ); - assertEquals( - await resolveContextLimit("openai", "gpt-5", client as never, "", cache), - 200_000, - ); - assertEquals(calls, ["/tmp/project-a", ""]); - - now += 60_001; - - assertEquals( - await resolveContextLimit( - "openai", - "gpt-5", - client as never, - "/tmp/project-a", - cache, - ), - 200_000, - ); - assertEquals( - await resolveContextLimit("openai", "gpt-5", client as never, "", cache), - 200_000, - ); - assertEquals(calls, ["/tmp/project-a", "", "/tmp/project-a", ""]); - } finally { - Date.now = originalNow; - } + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "/tmp/project-a", + cache, + () => now, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "/tmp/project-a", + cache, + () => now, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + " ", + cache, + () => now, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "", + cache, + () => now, + ), + 200_000, + ); + assertEquals(calls, ["/tmp/project-a", ""]); + + now += 60_001; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "/tmp/project-a", + cache, + () => now, + ), + 200_000, + ); + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + "", + cache, + () => now, + ), + 200_000, + ); + assertEquals(calls, ["/tmp/project-a", "", "/tmp/project-a", ""]); }); Deno.test("resolveContextLimit keeps positive cache entries without expiry re-probes", async () => { @@ -234,3 +241,42 @@ Deno.test("resolveContextLimit re-probes when legacy numeric cache entry is non- assertEquals(calls, 1); assertEquals(cache.get("openai/gpt-5"), 456_000); }); + +Deno.test("resolveContextLimit re-probes when legacy object cache entry is non-positive without expiry", async () => { + const cache = new Map< + string, + number | { value: number; expiresAt?: number } + >(); + cache.set("openai/gpt-5", { value: -1 }); + + let calls = 0; + const client = { + provider: { + list: () => { + calls += 1; + return Promise.resolve({ + providers: [ + { + id: "openai", + models: [{ id: "gpt-5", limit: { context: 654_321 } }], + }, + ], + }); + }, + }, + }; + + assertEquals( + await resolveContextLimit( + "openai", + "gpt-5", + client as never, + undefined, + cache, + ), + 654_321, + ); + + assertEquals(calls, 1); + assertEquals(cache.get("openai/gpt-5"), 654_321); +}); diff --git a/src/services/context-limit.ts b/src/services/context-limit.ts index 874fb04..f3f7d33 100644 --- a/src/services/context-limit.ts +++ b/src/services/context-limit.ts @@ -30,6 +30,7 @@ export async function resolveContextLimit( client: OpencodeClient, directory: string | undefined, cache: Map, + now: () => number = Date.now, ): Promise { const normalizedDirectory = directory?.trim(); const modelKey = getContextLimitCacheKey( @@ -37,6 +38,7 @@ export async function resolveContextLimit( modelID, normalizedDirectory, ); + const currentTime = now(); const cached = cache.get(modelKey); if (cached !== undefined) { if (typeof cached === "number") { @@ -46,7 +48,13 @@ export async function resolveContextLimit( cache.delete(modelKey); } else { - if (cached.expiresAt === undefined || cached.expiresAt > Date.now()) { + if (cached.expiresAt === undefined) { + if (cached.value > 0) { + return cached.value; + } + + cache.delete(modelKey); + } else if (cached.expiresAt > currentTime) { return cached.value > 0 ? cached.value : DEFAULT_CONTEXT_LIMIT; } @@ -75,14 +83,14 @@ export async function resolveContextLimit( logger.warn("Failed to fetch provider context limit", err); cache.set(modelKey, { value: UNKNOWN_CONTEXT_LIMIT, - expiresAt: Date.now() + UNKNOWN_CONTEXT_LIMIT_TTL_MS, + expiresAt: currentTime + UNKNOWN_CONTEXT_LIMIT_TTL_MS, }); return DEFAULT_CONTEXT_LIMIT; } cache.set(modelKey, { value: UNKNOWN_CONTEXT_LIMIT, - expiresAt: Date.now() + UNKNOWN_CONTEXT_LIMIT_TTL_MS, + expiresAt: currentTime + UNKNOWN_CONTEXT_LIMIT_TTL_MS, }); return DEFAULT_CONTEXT_LIMIT; } From b2bbda12ba70427635a3bd04feb84ba0f9f4353a Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 12:28:50 +0800 Subject: [PATCH 17/38] refactor: share endpoint redaction helpers --- src/index.ts | 16 +--------------- src/services/connection-manager.ts | 16 +--------------- src/services/context-limit.ts | 2 +- src/services/endpoint-redaction.ts | 14 ++++++++++++++ 4 files changed, 17 insertions(+), 31 deletions(-) create mode 100644 src/services/endpoint-redaction.ts diff --git a/src/index.ts b/src/index.ts index 40a3b4f..131b4ef 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,6 +11,7 @@ import { BatchDrainService } from "./services/batch-drain.ts"; import { GraphitiConnectionManager } from "./services/connection-manager.ts"; import { GraphitiAsyncService } from "./services/graphiti-async.ts"; import { GraphitiMcpClient } from "./services/graphiti-mcp.ts"; +import { redactEndpointUserInfo } from "./services/endpoint-redaction.ts"; import { notifyGraphitiAvailabilityIssue, setOpenCodeClient, @@ -67,21 +68,6 @@ let activeRuntimeTeardown: | null = null; let runtimeInitialization = Promise.resolve(); -const redactEndpointUserInfo = (endpoint: string): string => { - try { - const url = new URL(endpoint); - if (!url.username && !url.password) return endpoint; - url.username = ""; - url.password = ""; - return url.toString(); - } catch { - return endpoint.replace( - /^([a-z][a-z0-9+.-]*:\/\/)(?:[^/?#@]*@)/i, - "$1", - ); - } -}; - export const warnOnGraphitiStartupUnavailable = ( connected: boolean, endpoint: string, diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index b48319d..06a27a0 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -1,6 +1,7 @@ import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; import manifest from "../../deno.json" with { type: "json" }; +import { redactEndpointUserInfo } from "./endpoint-redaction.ts"; import { logger } from "./logger.ts"; export type GraphitiConnectionState = @@ -115,21 +116,6 @@ type PendingRequest = { type ConnectionFactory = (endpoint: string) => GraphitiConnection; -const redactEndpointUserInfo = (endpoint: string): string => { - try { - const url = new URL(endpoint); - if (!url.username && !url.password) return endpoint; - url.username = ""; - url.password = ""; - return url.toString(); - } catch { - return endpoint.replace( - /^([a-z][a-z0-9+.-]*:\/\/)(?:[^/?#@]*@)/i, - "$1", - ); - } -}; - const validateEndpoint = (endpoint: string): string => { const normalized = endpoint.trim(); if (!normalized) { diff --git a/src/services/context-limit.ts b/src/services/context-limit.ts index f3f7d33..457e56b 100644 --- a/src/services/context-limit.ts +++ b/src/services/context-limit.ts @@ -6,7 +6,7 @@ import { extractSdkProviders } from "./sdk-normalize.ts"; const UNKNOWN_CONTEXT_LIMIT = -1; const UNKNOWN_CONTEXT_LIMIT_TTL_MS = 60_000; -type ContextLimitCacheEntry = +export type ContextLimitCacheEntry = | number | { value: number; diff --git a/src/services/endpoint-redaction.ts b/src/services/endpoint-redaction.ts new file mode 100644 index 0000000..7c70d10 --- /dev/null +++ b/src/services/endpoint-redaction.ts @@ -0,0 +1,14 @@ +export const redactEndpointUserInfo = (endpoint: string): string => { + try { + const url = new URL(endpoint); + if (!url.username && !url.password) return endpoint; + url.username = ""; + url.password = ""; + return url.toString(); + } catch { + return endpoint.replace( + /^([a-z][a-z0-9+.-]*:\/\/)(?:[^/?#@]*@)/i, + "$1", + ); + } +}; From 6bd5dcca74f8226ceb18f5d28b991ebbb0539e19 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 12:39:15 +0800 Subject: [PATCH 18/38] fix: preserve session tool root ids after routing --- src/handlers/tool-before.test.ts | 65 ++++++++++++++++++++++++-------- src/handlers/tool-before.ts | 4 +- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/src/handlers/tool-before.test.ts b/src/handlers/tool-before.test.ts index 586f75b..36851f3 100644 --- a/src/handlers/tool-before.test.ts +++ b/src/handlers/tool-before.test.ts @@ -279,6 +279,43 @@ describe("tool execute before handler", () => { assertEquals(routingOutcomes.take("call-9"), undefined); }); + it("preserves root_session_id when a session tool is modified by routing", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("child-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall: () => ({ + action: "modify", + args: { query: "rewritten" }, + reason: "test-modify", + }), + }); + const output = { + args: { root_session_id: "wrong-root", query: "original" }, + }; + + await handler( + { + tool: "session_search", + sessionID: "child-session", + callID: "call-10", + } as never, + output as never, + ); + + assertEquals(output.args, { + root_session_id: "root-session", + query: "rewritten", + }); + assertEquals(routingOutcomes.take("call-10"), { + source: "tool-routing", + action: "modify", + reason: "test-modify", + }); + }); + it("does not inject root_session_id into native tools", async () => { const canonicalizer = new MockSessionCanonicalizer(); canonicalizer.cached.set("root-session", "root-session"); @@ -317,26 +354,16 @@ describe("tool execute before handler", () => { } }); - it("does not perform Redis or Graphiti access on the before-hook path", async () => { + it("runs the before-hook path for Read without unexpected side effects", async () => { const canonicalizer = new MockSessionCanonicalizer(); canonicalizer.cached.set("root-session", "root-session"); - const unexpectedCalls: string[] = []; const handler = createToolBeforeHandler({ sessionCanonicalizer: canonicalizer as never, guidanceThrottle: new ToolGuidanceCache(), routingOutcomes, routeToolCall, - redisEvents: { - recordEvent: () => { - unexpectedCalls.push("redisEvents.recordEvent"); - }, - }, - graphitiAsync: { - scheduleDrain: () => { - unexpectedCalls.push("graphitiAsync.scheduleDrain"); - }, - }, - } as never); + }); + const output = { args: { filePath: "/tmp/a.ts" } }; await handler( { @@ -344,9 +371,17 @@ describe("tool execute before handler", () => { sessionID: "root-session", callID: "call-7", } as never, - { args: { filePath: "/tmp/a.ts" } } as never, + output as never, ); - assertEquals(unexpectedCalls, []); + assertEquals(output.args.filePath, "/tmp/a.ts"); + assertEquals(canonicalizer.cachedCalls, ["root-session"]); + assertEquals(canonicalizer.resolveCalls, []); + assertEquals(routingOutcomes.take("call-7"), { + source: "tool-routing", + action: "context", + guidanceType: "read", + reason: "read-guidance", + }); }); }); diff --git a/src/handlers/tool-before.ts b/src/handlers/tool-before.ts index dd3bbbf..bfb1a5e 100644 --- a/src/handlers/tool-before.ts +++ b/src/handlers/tool-before.ts @@ -85,7 +85,9 @@ export function createToolBeforeHandler( case "allow": return; case "modify": - output.args = decision.args; + output.args = isSessionMcpTool(tool) + ? injectRootSessionId(toRecord(decision.args), canonicalSessionId) + : decision.args; deps.routingOutcomes.set(callID, { source: "tool-routing", action: "modify", From 208bfac7ac6549aaba7fe83a1f2ad70361e6035e Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 14:14:04 +0800 Subject: [PATCH 19/38] fix: align no-tag version baselines --- .github/scripts/version.test.ts | 41 +++++++++++++++++++++++++++++---- .github/scripts/version.ts | 2 +- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.github/scripts/version.test.ts b/.github/scripts/version.test.ts index 960d289..a186634 100644 --- a/.github/scripts/version.test.ts +++ b/.github/scripts/version.test.ts @@ -812,7 +812,7 @@ describe("run", () => { "npm view fallback-package version": "0.1.0", "git log --format=%s": "docs: note fallback behavior", "git log --format=%b": "", - "git show --format= --name-only HEAD": "src/mod.ts\n", + "git log --format= --name-only": "src/mod.ts\n", }, now: new Date("2026-02-12T09:14:29Z"), }); @@ -848,7 +848,7 @@ describe("run", () => { "npm view commented-package version": "0.2.0", "git log --format=%s": "docs: note jsonc support", "git log --format=%b": "", - "git show --format= --name-only HEAD": ".github/scripts/version.ts\n", + "git log --format= --name-only": ".github/scripts/version.ts\n", }, now: new Date("2026-02-12T09:14:29Z"), }); @@ -908,8 +908,7 @@ describe("run", () => { "npm view fallback-package version": "0.1.0", "git log --format=%s": "docs: note fallback behavior", "git log --format=%b": "", - "git show --format= --name-only HEAD": - ".github/scripts/version.test.ts\n", + "git log --format= --name-only": ".github/scripts/version.test.ts\n", }, now: new Date("2026-02-12T09:14:29Z"), }); @@ -922,6 +921,40 @@ describe("run", () => { "No release-triggering commits since initial, skipping", ]); }); + + it("does not skip in the no-tag fallback when earlier unreleased commits changed non-test files", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_EVENT_NAME: "pull_request", + GITHUB_OUTPUT: "/tmp/github-output", + }, + files: { + "package.json": JSON.stringify({ name: "fallback-package" }), + }, + commands: { + "git rev-parse HEAD": "abcdef1234567890", + "git describe --tags --abbrev=0 --match v*": new Error("no tags"), + "npm view fallback-package version": "0.1.0", + "git log --format=%s": + "docs: follow-up test coverage\nfeat: ship fallback alignment", + "git log --format=%b": "\n", + "git log --format= --name-only": + ".github/scripts/version.test.ts\nsrc/mod.ts\n.github/scripts/version.test.ts\n", + }, + now: new Date("2026-02-12T09:14:29Z"), + }); + + await run([], cli.deps); + + assertEquals(cli.outputs, [ + "version=0.1.1-canary.abcdef1.20260212091429\n", + "tag=canary\n", + ]); + assertEquals( + cli.logs.at(-1), + "Canary version: 0.1.1-canary.abcdef1.20260212091429", + ); + }); }); describe("parseCommandOutput", () => { diff --git a/.github/scripts/version.ts b/.github/scripts/version.ts index 9be7382..23020c9 100644 --- a/.github/scripts/version.ts +++ b/.github/scripts/version.ts @@ -408,7 +408,7 @@ export async function run( subjects = (await cmd("git", "log", "--format=%s")).split("\n"); bodies = (await cmd("git", "log", "--format=%b")).split("\n"); changedFiles = parseChangedFiles( - await cmd("git", "show", "--format=", "--name-only", "HEAD"), + await cmd("git", "log", "--format=", "--name-only"), ); noGitTags = true; } else { From 87cce930fdb88e9672a21d5ce7b1a712cb42dc1e Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 15:23:48 +0800 Subject: [PATCH 20/38] fix: abort timed out graphiti requests --- src/services/connection-manager.test.ts | 67 +++++++++++++++++++++++++ src/services/connection-manager.ts | 45 +++++++++++++++-- 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index bf59f18..3be0f95 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -128,6 +128,8 @@ type FakeConnection = { callTool: (request: { name: string; arguments?: Record; + }, options?: { + signal?: AbortSignal; }) => Promise; }; @@ -242,6 +244,38 @@ describe("connection manager", () => { await assertRejects(() => request, GraphitiRequestTimeoutError); }); + it("aborts the underlying connected call when the request deadline expires", async () => { + const clock = new FakeClock(); + let aborted = false; + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + requestDeadlineMs: 10, + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: (_request, options) => + new Promise((_resolve, reject) => { + options?.signal?.addEventListener("abort", () => { + aborted = true; + reject(options.signal?.reason); + }, { once: true }); + }), + }), + now: clock.nowFn, + setTimer: clock.setTimer, + clearTimer: clock.clearTimer, + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + await clock.advanceBy(10); + + await assertRejects(() => request, GraphitiRequestTimeoutError); + assertEquals(aborted, true); + }); + it("times out already-connected calls at a per-request override", async () => { const clock = new FakeClock(); const manager = new GraphitiConnectionManager({ @@ -725,6 +759,39 @@ describe("connection manager", () => { assertEquals(clock.timers.size, 0); }); + it("stop aborts active in-flight connected calls", async () => { + const callGate = deferred(); + let aborted = false; + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: (_request, options) => + new Promise((_resolve, reject) => { + options?.signal?.addEventListener("abort", () => { + aborted = true; + reject(options.signal?.reason); + }, { once: true }); + void callGate.promise; + }), + }), + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + await manager.stop(); + + const error = await assertRejects( + () => request, + GraphitiOfflineError, + ); + assertEquals(error.state, "closing"); + assertEquals(aborted, true); + }); + it("stop keeps reconnect from transitioning back to connected", async () => { let connectionIndex = 0; let failed = false; diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index 06a27a0..cd8f86a 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -88,10 +88,17 @@ export type GraphitiToolRequest = { arguments?: Record; }; +export type GraphitiRequestOptions = { + signal?: AbortSignal; +}; + export interface GraphitiConnection { connect(): Promise; close(): Promise; - callTool(request: GraphitiToolRequest): Promise; + callTool( + request: GraphitiToolRequest, + options?: GraphitiRequestOptions, + ): Promise; } export interface GraphitiToolCaller { @@ -168,7 +175,8 @@ function createMcpConnection(endpoint: string): GraphitiConnection { return { connect: () => client.connect(transport), close: () => client.close(), - callTool: (request) => client.callTool(request), + callTool: (request, options) => + client.callTool(request, undefined, options), }; } @@ -254,6 +262,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { private started = false; private flushingQueue = false; private stopPromise: Promise | null = null; + private activeRequestControllers = new Set(); constructor(options: GraphitiConnectionManagerOptions) { this.endpoint = validateEndpoint(options.endpoint); @@ -309,6 +318,12 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { "Graphiti connection manager is closing", ), ); + this.abortActiveRequests( + new GraphitiOfflineError( + "closing", + "Graphiti connection manager is closing", + ), + ); this.resolveReadyWaiters(false); const connection = this.connection; @@ -471,9 +486,15 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { } try { + const controller = new AbortController(); + this.activeRequestControllers.add(controller); return await this.runWithRequestDeadline( - this.connection.callTool({ name, arguments: args }), + this.connection.callTool( + { name, arguments: args }, + { signal: controller.signal }, + ), deadlineMs, + controller, ); } catch (err) { if (isRequestTimeout(err)) { @@ -551,10 +572,16 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { private runWithRequestDeadline( task: Promise, deadlineMs: number, + controller?: AbortController, ): Promise { return new Promise((resolve, reject) => { let settled = false; let timer: TimerHandle | null = null; + const finish = () => { + if (controller) { + this.activeRequestControllers.delete(controller); + } + }; const clearDeadlineTimer = () => { if (timer !== null) { this.clearTimerImpl(timer); @@ -566,6 +593,8 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { if (settled) return; settled = true; clearDeadlineTimer(); + controller?.abort(new GraphitiRequestTimeoutError()); + finish(); reject(new GraphitiRequestTimeoutError()); }, deadlineMs); @@ -574,18 +603,28 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { if (settled) return; settled = true; clearDeadlineTimer(); + finish(); resolve(value); }, (error) => { if (settled) return; settled = true; clearDeadlineTimer(); + finish(); reject(error); }, ); }); } + private abortActiveRequests(reason: unknown): void { + const controllers = [...this.activeRequestControllers]; + this.activeRequestControllers.clear(); + for (const controller of controllers) { + controller.abort(reason); + } + } + private enqueueRequest( name: string, args: Record, From 2e9a5b063e98bd1875d9c67900a79fa78edc49f4 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 15:41:56 +0800 Subject: [PATCH 21/38] docs: clarify local build version fallback --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 685c3d9..3c26cb6 100644 --- a/README.md +++ b/README.md @@ -155,8 +155,9 @@ Add the plugin to your `opencode.json` (or `opencode.jsonc`): Local distributable builds are not a routine local setup step: `deno task build` -requires an explicit `VERSION` via `dnt.ts`. If you already have a built -artifact, add it to your `opencode.json`: +uses the `VERSION` environment variable when set and otherwise falls back to the +`version` in `deno.json` via `dnt.ts`. If you already have a built artifact, add +it to your `opencode.json`: ```jsonc { From 39fa3fc02ddd5f1ee42e6926e859a43743d2a87b Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 15:59:38 +0800 Subject: [PATCH 22/38] fix: tighten follow-up review edge cases --- README.md | 4 ++++ src/services/connection-manager.test.ts | 28 +++++++++++++++++++++++ src/services/connection-manager.ts | 17 ++++++++++++++ src/services/context-limit.test.ts | 30 ++++++++----------------- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 3c26cb6..1610e1e 100644 --- a/README.md +++ b/README.md @@ -231,6 +231,10 @@ top-level aliases remain supported for backward compatibility. Precedence is: 1. `redis.*` (canonical) 2. top-level Graphiti aliases such as `endpoint` and `groupIdPrefix` +Endpoint values must be valid URLs, so include the scheme explicitly - for +example `redis://localhost:6379` for Redis and `http://localhost:8000/mcp` for +Graphiti. + ### Legacy Top-Level Keys For backward compatibility, the following original Graphiti top-level keys are diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index 3be0f95..4c62eec 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -792,6 +792,34 @@ describe("connection manager", () => { assertEquals(aborted, true); }); + it("maps AbortError rejections during shutdown to GraphitiOfflineError", async () => { + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: (_request, options) => + new Promise((_resolve, reject) => { + options?.signal?.addEventListener("abort", () => { + reject(new DOMException("aborted", "AbortError")); + }, { once: true }); + }), + }), + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + const request = manager.callTool("search", {}); + await manager.stop(); + + const error = await assertRejects( + () => request, + GraphitiOfflineError, + ); + assertEquals(error.state, "closing"); + }); + it("stop keeps reconnect from transitioning back to connected", async () => { let connectionIndex = 0; let failed = false; diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index cd8f86a..9809a94 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -214,6 +214,15 @@ function isSessionExpired(err: unknown): boolean { ); } +function isAbortError(err: unknown): boolean { + if (!err || typeof err !== "object") return false; + if (typeof (err as { name?: unknown }).name === "string") { + return (err as { name: string }).name === "AbortError"; + } + return typeof DOMException !== "undefined" && err instanceof DOMException && + err.name === "AbortError"; +} + function isTransportFailure(err: unknown): boolean { if (!err) return false; if (isRequestTimeout(err) || isSessionExpired(err)) return false; @@ -497,6 +506,14 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { controller, ); } catch (err) { + if (err instanceof GraphitiOfflineError) { + throw err; + } + + if (this.stopPromise && isAbortError(err)) { + throw new GraphitiOfflineError("closing"); + } + if (isRequestTimeout(err)) { throw new GraphitiRequestTimeoutError( getErrorMessage(err) || undefined, diff --git a/src/services/context-limit.test.ts b/src/services/context-limit.test.ts index 84a1011..e6386a5 100644 --- a/src/services/context-limit.test.ts +++ b/src/services/context-limit.test.ts @@ -1,12 +1,12 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; -import { resolveContextLimit } from "./context-limit.ts"; +import { + type ContextLimitCacheEntry, + resolveContextLimit, +} from "./context-limit.ts"; Deno.test("resolveContextLimit re-probes after fallback cache expiry", async () => { let now = 100_000; - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); + const cache = new Map(); let calls = 0; const client = { provider: { @@ -70,10 +70,7 @@ Deno.test("resolveContextLimit re-probes after fallback cache expiry", async () Deno.test("resolveContextLimit keeps fallback caches scoped per normalized directory until expiry", async () => { let now = 200_000; - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); + const cache = new Map(); const calls: string[] = []; const client = { provider: { @@ -158,10 +155,7 @@ Deno.test("resolveContextLimit keeps fallback caches scoped per normalized direc }); Deno.test("resolveContextLimit keeps positive cache entries without expiry re-probes", async () => { - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); + const cache = new Map(); let calls = 0; const client = { provider: { @@ -204,10 +198,7 @@ Deno.test("resolveContextLimit keeps positive cache entries without expiry re-pr }); Deno.test("resolveContextLimit re-probes when legacy numeric cache entry is non-positive", async () => { - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); + const cache = new Map(); cache.set("openai/gpt-5", -1); let calls = 0; @@ -243,10 +234,7 @@ Deno.test("resolveContextLimit re-probes when legacy numeric cache entry is non- }); Deno.test("resolveContextLimit re-probes when legacy object cache entry is non-positive without expiry", async () => { - const cache = new Map< - string, - number | { value: number; expiresAt?: number } - >(); + const cache = new Map(); cache.set("openai/gpt-5", { value: -1 }); let calls = 0; From 080030df0391a5b16773ae4861552734cde47193 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 16:12:06 +0800 Subject: [PATCH 23/38] fix: clarify config and tool denial behavior --- src/config.ts | 5 ++--- src/handlers/tool-before.test.ts | 4 ++-- src/handlers/tool-before.ts | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/config.ts b/src/config.ts index 2bd5da0..f4ed017 100644 --- a/src/config.ts +++ b/src/config.ts @@ -170,9 +170,8 @@ const resolveConfig = (value: RawGraphitiConfig | null): GraphitiConfig => { const resolvedCacheTtlSeconds = resolveNumber(raw.redis?.cacheTtlSeconds); const resolvedDrainRetryMax = resolveNumber(raw.redis?.drainRetryMax); const requestedGraphitiEndpoint = raw.graphiti?.endpoint ?? raw.endpoint; - const resolvedGraphitiEndpoint = isValidUrlString(requestedGraphitiEndpoint) - ? requestedGraphitiEndpoint - : DEFAULT_CONFIG.graphiti.endpoint; + const resolvedGraphitiEndpoint = requestedGraphitiEndpoint ?? + DEFAULT_CONFIG.graphiti.endpoint; const resolvedGroupIdPrefix = raw.graphiti?.groupIdPrefix ?? raw.groupIdPrefix ?? DEFAULT_CONFIG.graphiti.groupIdPrefix; diff --git a/src/handlers/tool-before.test.ts b/src/handlers/tool-before.test.ts index 36851f3..98f66db 100644 --- a/src/handlers/tool-before.test.ts +++ b/src/handlers/tool-before.test.ts @@ -55,7 +55,7 @@ describe("tool execute before handler", () => { { args: { url: "https://example.com" } } as never, ), Error, - "WebFetch", + "Tool denied (WebFetch):", ); assertEquals(routingOutcomes.take("call-1"), { @@ -86,7 +86,7 @@ describe("tool execute before handler", () => { { args: { url: "https://example.com" } } as never, ), Error, - "WebFetch", + "Tool denied (WebFetch):", ); assertEquals(canonicalizer.cachedCalls, ["child-session"]); diff --git a/src/handlers/tool-before.ts b/src/handlers/tool-before.ts index bfb1a5e..6fd9fa9 100644 --- a/src/handlers/tool-before.ts +++ b/src/handlers/tool-before.ts @@ -108,7 +108,7 @@ export function createToolBeforeHandler( action: "deny", reason: decision.reason, }); - throw new Error(decision.guidance); + throw new Error(`Tool denied (${tool}): ${decision.guidance}`); } }; } From 0ae3086cc647f36f9a0723e10fb61116c1816bd6 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 16:36:44 +0800 Subject: [PATCH 24/38] fix: sweep recurring review issues repo-wide --- AGENTS.md | 12 +- deno.lock | 21 +- docs/ContextOverhaulTests.md | 766 ------------------ ...03-24-agentic-runtime-test-plan-rewrite.md | 463 +++++++++++ ...-03-24-agentic-runtime-test-plan-design.md | 304 +++++++ ...6-03-24-review-issue-class-sweep-design.md | 229 ++++++ src/config.test.ts | 22 + src/config.ts | 3 +- src/services/batch-drain.test.ts | 3 +- src/services/connection-manager.test.ts | 34 +- src/services/connection-manager.ts | 14 +- src/services/graphiti-mcp.test.ts | 77 +- src/services/graphiti-mcp.ts | 8 +- src/services/hot-tier-slice.test.ts | 3 +- src/services/redis-cache.test.ts | 3 +- src/services/redis-client.test.ts | 3 +- src/services/redis-events.test.ts | 3 +- src/services/session-corpus.ts | 3 +- src/services/session-executor.test.ts | 3 +- src/services/session-executor.ts | 8 +- src/services/session-mcp-runtime.test.ts | 32 +- src/services/session-mcp-runtime.ts | 4 +- src/services/test-helpers.ts | 1 + src/utils.test.ts | 17 + src/utils.ts | 22 + 25 files changed, 1248 insertions(+), 810 deletions(-) delete mode 100644 docs/ContextOverhaulTests.md create mode 100644 docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md create mode 100644 docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md create mode 100644 docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md create mode 100644 src/services/test-helpers.ts diff --git a/AGENTS.md b/AGENTS.md index 0b5c531..fcb0f71 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,7 +22,7 @@ asynchronously on idle or after compaction. ### Hot Path -- **Redis/FalkorDB** only. ioredis TCP client at `localhost:6379` +- **Redis/FalkorDB** only. ioredis client at `redis://localhost:6379` (configurable). - Stores: session events, snapshots, memory cache, pending drain batches. - Used by: `chat.message`, `messages.transform`, `session.compacting`, event @@ -30,7 +30,8 @@ asynchronously on idle or after compaction. ### Async Tier (Background) -- **Graphiti MCP** HTTP endpoint (default `localhost:8000/mcp`, configurable). +- **Graphiti MCP** HTTP endpoint (default `http://localhost:8000/mcp`, + configurable). - Async drain service: batches buffered events, retries on failure, flushes on idle or post-compaction. - Background cache refresh: searches Graphiti when topic drift is detected, @@ -142,8 +143,10 @@ Canonical shape (nested): "redis": { "endpoint": "redis://localhost:6379", "batchSize": 20, + "batchMaxBytes": 51200, "sessionTtlSeconds": 86400, - "cacheTtlSeconds": 600 + "cacheTtlSeconds": 600, + "drainRetryMax": 3 }, "graphiti": { "endpoint": "http://localhost:8000/mcp", @@ -153,6 +156,9 @@ Canonical shape (nested): } ``` +Endpoint values must be explicit URLs with schemes, for example +`redis://localhost:6379` for Redis and `http://localhost:8000/mcp` for Graphiti. + ## Key Files & Their Scope | File | Purpose | diff --git a/deno.lock b/deno.lock index 0ede3b8..c5d9fe7 100644 --- a/deno.lock +++ b/deno.lock @@ -13,6 +13,7 @@ "jsr:@std/testing@1": "1.0.17", "jsr:@ts-morph/bootstrap@0.27": "0.27.0", "jsr:@ts-morph/common@0.27": "0.27.0", + "npm:@modelcontextprotocol/sdk@1.3.0": "1.3.0", "npm:@modelcontextprotocol/sdk@^1.25.2": "1.25.2_zod@4.3.6_ajv@8.17.1_express@5.2.1", "npm:@opencode-ai/plugin@1.2.26": "1.2.26", "npm:@opencode-ai/sdk@1.2.26": "1.2.26", @@ -119,7 +120,16 @@ "pkce-challenge", "raw-body", "zod@4.3.6", - "zod-to-json-schema" + "zod-to-json-schema@3.25.1_zod@4.3.6" + ] + }, + "@modelcontextprotocol/sdk@1.3.0": { + "integrity": "sha512-no7j22gAE5dYZ65PppPfbnevO5GiX8b53LA7tb6KfUrmHMFac4ciESZctoivC97aoH4i0xQBtTzli1Q+wpMy5w==", + "dependencies": [ + "content-type", + "raw-body", + "zod@3.25.76", + "zod-to-json-schema@3.25.1_zod@3.25.76" ] }, "@opencode-ai/plugin@1.2.26": { @@ -711,12 +721,21 @@ "wrappy@1.0.2": { "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, + "zod-to-json-schema@3.25.1_zod@3.25.76": { + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "dependencies": [ + "zod@3.25.76" + ] + }, "zod-to-json-schema@3.25.1_zod@4.3.6": { "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", "dependencies": [ "zod@4.3.6" ] }, + "zod@3.25.76": { + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==" + }, "zod@4.1.8": { "integrity": "sha512-5R1P+WwQqmmMIEACyzSvo4JXHY5WiAFHRMg+zBZKgKS+Q1viRa0C1hmUKtHltoIFKtIdki3pRxkmpP74jnNYHQ==" }, diff --git a/docs/ContextOverhaulTests.md b/docs/ContextOverhaulTests.md deleted file mode 100644 index a2c69a7..0000000 --- a/docs/ContextOverhaulTests.md +++ /dev/null @@ -1,766 +0,0 @@ -# Context Overhaul — Test Plan - -**Status:** Superseded — retained as historical context only\ -**Date:** 2026-03-14\ -**Original design:** [`docs/ContextOverhaul.md`](ContextOverhaul.md) (also -superseded)\ -**Active acceptance matrix:** -`docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` - -> **Note:** This test plan was written for the native-hook-first hot-path -> design. The active architecture is now MCP-first; see the implementation plan -> linked above for the current acceptance criteria. The suites below remain as -> historical reference for the original hot-path invariants. -> -> **Historical-only note:** Checklist items in this document are not the -> authoritative Task 7 gate. Use the implementation plan above for current -> MCP-first acceptance, including local-first ``, compaction -> continuity, and Graphiti-off-the-hot-path verification. - ---- - -## 1 Purpose - -Verify that the Context Overhaul implementation delivers on its four core -promises: - -1. **Zero Graphiti on the hot path** — no synchronous MCP/Graphiti call blocks - any hook return. -2. **High-quality session continuity** — compact `session_memory` envelopes - restore task state, decisions, files, and rules after compaction or restart. -3. **High-quality cross-session persistent memory** — `persistent_memory` - surfaces relevant project-bound facts from the Graphiti cache without noise. -4. **Graceful degradation** — the plugin remains functional when Redis or - Graphiti is unavailable. - -Secondary goals: - -- Confirm the implementation avoids legacy verbose `` - hot-path injection. -- Confirm context payloads stay within budget and do not regress in size or - latency. -- Produce CI-friendly artifacts (timing logs, payload snapshots, pass/fail exit - codes). - ---- - -## 2 Non-Goals / Scope Boundaries - -- [ ] **Not testing Graphiti internals** — entity extraction quality, vector - search recall, or FalkorDB query plans are out of scope. -- [ ] **Not testing OpenCode core** — compaction summarizer quality, hook - dispatch ordering, or provider prefix caching are assumed correct. -- [ ] **Not testing MCP protocol compliance** — the MCP transport layer is - covered by `ConnectionManager` tests. -- [ ] **Not benchmarking LLM output quality** — we test structural properties of - injected context, not whether the LLM "understands" it. -- [ ] **Not covering UI/UX** — no visual or interactive-shell UX assertions. - ---- - -## 3 Test Environment / Dependencies - -### 3.1 Required Services - -| Service | Purpose | Test mode | -| -------- | --------------------------- | ---------------------------------------------------------- | -| FalkorDB | Redis-protocol hot tier | Real instance (Docker) or `MockRedisClient` for unit tests | -| Graphiti | Async consolidation backend | Real MCP endpoint or stub/mock for isolation tests | -| Deno | Runtime | `deno test` with `--allow-net --allow-env` | - -### 3.2 Test Tiers - -| Tier | Scope | External deps | Speed | -| ----------- | --------------------------- | ------------- | ------- | -| Unit | Pure functions, extractors | None (mocks) | < 5 s | -| Integration | Redis read/write, MCP calls | FalkorDB | < 30 s | -| System | Full hook lifecycle | Both services | < 120 s | -| Regression | Size/latency budgets | Both services | < 60 s | - -### 3.3 CI Matrix - -```yaml -# Suggested GitHub Actions matrix -strategy: - matrix: - tier: [unit, integration, system, regression] - redis: [real, mock] - graphiti: [real, stub] - exclude: - - tier: unit - redis: real - - tier: unit - graphiti: real -``` - ---- - -## 4 Required Fixtures and Seeded Memory Data - -### 4.1 Redis Fixtures - -| Fixture key | Content | Used by suites | -| ------------------------------ | ------------------------------------------------------------- | -------------------------------- | -| `session:test-1:events` | 15 `SessionEvent` objects spanning all `EventCategory` values | Continuity, compaction, snapshot | -| `session:test-1:snapshot` | Pre-built priority-tiered XML snapshot (< 3 KB) | Compaction, restart/recovery | -| `memory-cache:test-group` | Serialized Graphiti search results (3 facts, 2 nodes) | Persistent memory, drift refresh | -| `memory-cache:test-group:meta` | `lastQuery`, `lastRefresh`, `factUuids` hash | Drift detection, staleness | -| `drain:pending:test-group` | 5 serialized drain-batch entries | Drain, crash recovery | -| `drain:cursor:test-group` | Event ID of last drained event | Drain resume | - -### 4.2 Graphiti Stub Responses - -| MCP tool call | Stub response | -| --------------------- | ---------------------------------------------------------------------- | -| `search_memory_facts` | 3 facts with UUIDs, validity dates, and relevance scores | -| `search_nodes` | 2 entity nodes with summaries | -| `get_episodes` | 1 recent session snapshot episode | -| `add_memory` | Success acknowledgment (or configurable failure for degradation tests) | -| `get_status` | Health OK (or configurable timeout/error) | - -### 4.3 Legacy Fixture - -A message array containing a -`verbose block...` -part, used to verify migration/compatibility behavior. - ---- - -## 5 Observability / Instrumentation - -Tests must capture and assert on the following observable signals: - -### 5.1 Timing - -- [ ] Wall-clock time of every hook return (`chat.message`, - `messages.transform`, `session.compacting`). -- [ ] Async operation durations (drain batch, cache refresh) logged but not on - the critical path. - -### 5.2 Payload Snapshots - -- [ ] Serialized `session_memory` envelope captured as a CI artifact on every - injection. -- [ ] Snapshot XML captured on every `session.idle` and `session.compacted` - event. -- [ ] Byte size of each injected payload recorded for regression tracking. - -### 5.3 Structured Logs - -- [ ] All Redis reads/writes logged with key name and byte size. -- [ ] All async MCP calls logged with tool name, duration, and success/failure. -- [ ] Drift detection decisions logged with Jaccard score and refresh trigger. - -### 5.4 CI Artifact Collection - -``` -artifacts/ - timing-report.json # per-hook wall-clock times - payload-snapshots/ # serialized XML/envelope per test case - size-regression.csv # payload byte sizes across runs - coverage-report/ # deno test --coverage output -``` - ---- - -## 6 Test Suites - -### Suite A: Hot-Path No-Graphiti Guarantee - -**Goal:** Prove that no synchronous Graphiti/MCP call occurs during any hot-path -hook. - -**Tier:** Unit + Integration - -**Method:** Instrument the MCP client with a call counter. Assert the counter is -zero after each hot-path hook completes. - -#### Checklist - -- [ ] A-1: `chat.message` handler completes without any MCP `callTool` - invocation. -- [ ] A-2: `experimental.chat.messages.transform` completes without any MCP - `callTool` invocation. -- [ ] A-3: `experimental.session.compacting` completes without any MCP - `callTool` invocation. -- [ ] A-4: `event: message.updated` handler completes without any MCP `callTool` - invocation. -- [ ] A-5: `event: session.compacted` synchronous portion completes without any - MCP `callTool` invocation. -- [ ] A-6: `event: session.idle` synchronous portion completes without any MCP - `callTool` invocation. -- [ ] A-7: All hot-path hooks return within 5 ms when Redis is available - (wall-clock assertion). -- [ ] A-8: Async MCP calls (drain, cache refresh) are confirmed to fire _after_ - the hook returns, via event ordering in the log. - -**Automation:** Fully automatable with mock MCP client and `MockRedisClient`. - ---- - -### Suite B: Compact Memory Payloads - -**Goal:** Verify injected `session_memory` envelopes are compact, structured, -and within budget. - -**Tier:** Unit - -#### Checklist - -- [ ] B-1: `session_memory` envelope byte size is <= 2 400 chars (1 600 session - guide + 800 snapshot). -- [ ] B-2: `persistent_memory` section, when present, fits within the remainder - of the 5% context budget. -- [ ] B-3: Total injected payload (session + persistent) does not exceed 5% of a - 128k-token model context (≈ 25 600 chars). -- [ ] B-4: Snapshot XML conforms to the priority-tiered schema from - [`docs/ContextOverhaul.md` §8.3](ContextOverhaul.md#83-snapshot-policy). -- [ ] B-5: Snapshot respects the 3 KB budget — lower-priority sections are - truncated first. -- [ ] B-6: Each `session_memory` always contains `last_request`; list sections - (`active_tasks`, `key_decisions`, `files_in_play`, `project_rules`) are - present only when they have content and are omitted when empty. -- [ ] B-7: Optional sections (`unresolved_errors`, `git_state`, `subagent_work`, - `session_snapshot`, `persistent_memory`) appear only when source data - exists. -- [ ] B-8: No raw tool output, raw transcript text, or multi-KB body content - appears in the injected envelope. - -**Automation:** Fully automatable — parse XML, measure byte sizes, assert -structure. - ---- - -### Suite C: No Raw Tool/Transcript Dumps in Hot-Tier State - -**Goal:** Confirm the implementation follows the context-mode strategy of -capturing structured events rather than raw transcripts. - -**Tier:** Unit - -#### Checklist - -- [ ] C-1: `SessionEvent.body` field is truncated to <= 4 KB per the schema. -- [ ] C-2: Events extracted from tool-result messages store a summary (≤ 200 - chars) and metadata, not the full tool output. -- [ ] C-3: `session:{id}:events` list entries do not contain raw assistant - message text longer than the `body` limit. -- [ ] C-4: The priority-tiered snapshot contains no raw tool output — only - summaries, file paths, and structured state. -- [ ] C-5: Compaction context (`session.compacting` output) contains no raw - transcript replay — only the canonical `session_memory` envelope. -- [ ] C-6: `memory-cache:{groupId}` stores parsed/structured Graphiti results, - not raw MCP response JSON. - -**Automation:** Fully automatable — inspect serialized Redis values and hook -outputs. - ---- - -### Suite D: Session Continuity Quality - -**Goal:** Verify that within a single session, the injected context accurately -reflects the conversation state. - -**Tier:** Integration - -#### Checklist - -- [ ] D-1: After 5 user/assistant exchanges, `session_memory` reflects the - current task, recent decisions, and touched files. -- [ ] D-2: After a user correction ("actually, use X instead of Y"), the next - `session_memory` includes the correction in `key_decisions`. -- [ ] D-3: After a file edit event, `files_in_play` lists the edited file. -- [ ] D-4: After an error event, `unresolved_errors` appears in the envelope. -- [ ] D-5: After the error is resolved, `unresolved_errors` is removed from - subsequent envelopes. -- [ ] D-6: `last_request` always reflects the most recent user message intent, - not a stale prior message. -- [ ] D-7: Session events are ordered chronologically in Redis (`LRANGE` returns - FIFO order). -- [ ] D-8: The `session_memory` envelope is idempotent — calling - `prepareInjection` twice with the same state produces identical output. - -**Automation:** Automatable with simulated hook sequences against -`MockRedisClient`. - ---- - -### Suite E: Compaction Continuity - -**Goal:** Verify that context survives compaction with no loss of critical -state. - -**Tier:** Integration - -#### Checklist - -- [ ] E-1: `session.compacting` hook injects a `session_memory` envelope into - `output.context`. -- [ ] E-2: The compaction-injected envelope contains the same required sections - as chat-time injection (B-6). -- [ ] E-3: After `session.compacted` fires, a new snapshot is built from - surviving events and stored in Redis. -- [ ] E-4: The post-compaction snapshot preserves P0 content (decisions, - constraints, active task) even when lower-priority sections are truncated. -- [ ] E-5: A `chat.message` arriving after compaction produces a - `session_memory` that includes the post-compaction snapshot. -- [ ] E-6: Compaction summary is enqueued to `drain:pending:{groupId}` for async - Graphiti ingestion. -- [ ] E-7: Multiple sequential compactions do not cause snapshot drift — each - rebuild uses the current event list. -- [ ] E-8: Compaction with an empty `memory-cache` (cold Graphiti) still - produces a valid `session_memory` and omits ``. - -**Automation:** Automatable with simulated compaction lifecycle against mocks. - ---- - -### Suite F: Cross-Session Project-Bound Persistent Memory - -**Goal:** Verify that `persistent_memory` surfaces relevant project-scoped facts -from the Graphiti cache and that cross-session recall works. - -**Tier:** Integration + System - -#### Checklist - -- [ ] F-1: On a new session with a warm `memory-cache:{groupId}`, the first - `messages.transform` includes `persistent_memory` with cached facts. -- [ ] F-2: On a new session with a cold cache, the first turn omits - `persistent_memory`; subsequent turns include it after async warmup - completes. -- [ ] F-3: `persistent_memory` omits legacy `fact_uuids`; the emitted shape uses - `node_refs` only. -- [ ] F-4: Facts from a different `groupId` (different project) do not appear in - `persistent_memory`. -- [ ] F-5: Stale facts (older than `factStaleDays`) are annotated or filtered - per configuration. -- [ ] F-6: `persistent_memory` content is a structured summary, not raw Graphiti - JSON. -- [ ] F-7: After draining events to Graphiti and refreshing the cache, newly - created fact/node summaries appear in `persistent_memory` on subsequent - sessions. -- [ ] F-8: The `node_refs` attribute in `persistent_memory` lists entity node - references when present. - -**Automation:** F-1 through F-6 automatable with mocks. F-7 requires a real -Graphiti endpoint (system tier). F-8 automatable with stub responses. - ---- - -### Suite G: Memory Relevance / Anti-Noise - -**Goal:** Confirm that injected memory is relevant to the current conversation -and does not include noise. - -**Tier:** Unit + Integration - -#### Checklist - -- [ ] G-1: When the user asks about "Redis configuration", `persistent_memory` - does not include facts about unrelated topics (e.g., "CSS styling - preferences"). -- [ ] G-2: Duplicate facts (same UUID) are never injected twice in a single - envelope. -- [ ] G-3: The `visibleFactUuids` tracking prevents re-injection of - already-visible facts within the same session. -- [ ] G-4: `persistent_memory` respects the budget remainder — it does not crowd - out `session_memory` core sections. -- [ ] G-5: When cached persistent memory has zero relevant results, - `persistent_memory` is omitted entirely (not rendered as an empty tag). -- [ ] G-6: The legacy `` block is never emitted by the new - implementation — only `` with optional - ``. - -**Automation:** G-1 requires semantic evaluation (semi-automated with keyword -matching on stub data). G-2 through G-6 fully automatable. - ---- - -### Suite H: Drift Refresh Behavior - -**Goal:** Verify that topic drift triggers an async cache refresh and that the -refreshed cache is used on the next turn. - -**Tier:** Integration - -#### Checklist - -- [ ] H-1: When Jaccard similarity between current query text and cached query - text drops below `driftThreshold`, an async cache refresh is scheduled. -- [ ] H-2: The current (stale) cache is still injected on the drift-triggering - message (one-message staleness tradeoff). -- [ ] H-3: On the next `chat.message` after the refresh completes, the updated - cache is injected. -- [ ] H-4: When Jaccard similarity is above `driftThreshold`, no refresh is - scheduled. -- [ ] H-5: Drift detection uses the cached query metadata in - `memory-cache:{groupId}:meta`, not a live Graphiti query. -- [ ] H-6: Rapid successive messages with different topics do not cause - thundering-herd refresh calls — only one refresh is in flight per group at - a time, with newer queries picked up after the in-flight refresh settles. - -**Automation:** Fully automatable with mock MCP client tracking call counts and -timing. - ---- - -### Suite I: Restart / Recovery Behavior - -**Goal:** Verify that plugin restart recovers state from Redis and resumes -normal operation. - -**Tier:** Integration - -#### Checklist - -- [ ] I-1: After plugin restart, `drain:pending:{groupId}` is read and pending - events are re-drained. -- [ ] I-2: After plugin restart, `drain:cursor:{groupId}` is read and only - events after the cursor are drained. -- [ ] I-3: After plugin restart, `session:{id}:snapshot` is available for the - next session's compaction context. -- [ ] I-4: Duplicate drain (events re-sent due to cursor not advancing) is - handled idempotently by Graphiti (UUID-keyed). -- [ ] I-5: After plugin restart with Redis available but Graphiti down, the - plugin operates in degraded mode (session continuity works, drain queues - up). -- [ ] I-6: TTL expiry of session keys (24h for events, 48h for snapshots) does - not cause errors — the plugin handles missing keys gracefully. -- [ ] I-7: `memory-cache:{groupId}` TTL expiry (10 min) results in omitted - `persistent_memory`, not an error. - -**Automation:** Automatable by resetting plugin state and re-initializing -against pre-seeded Redis fixtures. - ---- - -### Suite J: Redis Outage / Graphiti Outage Degradation - -**Goal:** Verify graceful degradation when one or both backends are unavailable. - -**Tier:** Integration + System - -#### Checklist - -- [ ] J-1: **Redis down at startup:** plugin logs error, falls back to in-memory - event buffer, hooks still fire. -- [ ] J-2: **Redis down at startup:** `session_memory` is still produced from - in-memory state (degraded but functional). -- [ ] J-3: **Redis down mid-session:** ioredis auto-reconnect fires; events - buffered in memory during outage. -- [ ] J-4: **Redis down mid-session:** after reconnect, state rebuilds and - subsequent hooks use Redis again. -- [ ] J-5: **Graphiti down at startup:** plugin logs warning, continues; - `persistent_memory` is omitted. -- [ ] J-6: **Graphiti down mid-session:** drain retries with exponential - backoff; cache stales out after TTL. -- [ ] J-7: **Graphiti down mid-session:** `session_memory` (Redis-sourced) is - unaffected. -- [ ] J-8: **Both down:** plugin operates with in-memory buffer only; equivalent - to no-plugin-at-all baseline. -- [ ] J-9: **Graphiti returns after outage:** drain resumes; cache refreshes on - next trigger. -- [ ] J-10: **Redis returns after outage:** state rebuilds; no duplicate events - from the in-memory buffer period. -- [ ] J-11: Dead-letter batches (`drain:dead:{groupId}`) are created after 3 - failed drain attempts. -- [ ] J-12: No hook throws an unhandled exception during any outage scenario — - all failures are caught and logged. - -**Automation:** J-1 through J-8 automatable by controlling mock service -availability. J-9, J-10 require timed reconnection simulation. J-11, J-12 fully -automatable. - ---- - -### Suite K: Context-Size / Latency Regression Detection - -**Goal:** Detect regressions in injected payload size and hook latency across -commits. - -**Tier:** Regression - -#### Checklist - -- [ ] K-1: `session_memory` envelope byte size is recorded per test run and - compared against a baseline. -- [ ] K-2: A > 20% increase in envelope size from baseline fails the regression - check. -- [ ] K-3: `chat.message` hook wall-clock time is recorded and compared against - a 5 ms threshold (Redis available). -- [ ] K-4: `messages.transform` hook wall-clock time is recorded and compared - against a 3 ms threshold. -- [ ] K-5: `session.compacting` hook wall-clock time is recorded and compared - against a 5 ms threshold. -- [ ] K-6: Async drain batch duration is recorded (informational, no hard - threshold — Graphiti latency varies). -- [ ] K-7: Payload size CSV is published as a CI artifact for trend analysis. -- [ ] K-8: Latency percentiles (p50, p95, p99) are computed over 100 iterations - of each hook. - -**Automation:** Fully automatable once a baseline file -(`tests/baselines/payload-sizes.json`) is created and checked into the repo -(proposed infrastructure). - ---- - -### Suite L: Migration / Compatibility — Legacy `data-uuids` - -**Goal:** Verify that the new implementation correctly handles legacy -`` blocks and does not emit them. - -**Tier:** Unit - -#### Checklist - -- [ ] L-1: The `messages.transform` handler extracts `fact_uuids` from legacy - `` blocks found in existing message history. -- [ ] L-2: Extracted legacy UUIDs are added to `visibleFactUuids` to prevent - re-injection. -- [ ] L-3: The new implementation never emits a `` - block — only `` with ``. -- [ ] L-4: A message array containing both legacy `` and new - `` blocks is handled without errors. -- [ ] L-5: Legacy `data-uuids` remain parse-only compatibility input; - `` itself emits `node_refs` only. -- [ ] L-6: Legacy config keys (`endpoint`, `groupIdPrefix`, `driftThreshold`) at - the top level are resolved correctly when nested `graphiti.*` keys are - absent. -- [ ] L-7: When both legacy top-level and nested config keys are present, nested - values take precedence. -- [ ] L-8: No verbose multi-paragraph memory block (characteristic of the legacy - Graphiti injection) appears in any hot-path output. - -**Automation:** Fully automatable — existing test in `messages.test.ts` already -covers L-1/L-2 partially. - ---- - -### Suite M: Child / Subagent Session Routing - -**Goal:** Verify that child/subagent sessions are resolved to the canonical root -session and that their activity flows through the same memory pipeline as the -parent. - -**Tier:** Unit + Integration - -**Historical design reference:** -[`docs/ContextOverhaul.md` §11.1](ContextOverhaul.md#111-kept-divergence) - -**Divergence note:** This behavior intentionally differs from official -`mksglu/context-mode`, which treats subagent work as summarized tool events -rather than first-class session participants. See the historical §11.1 design -note above for the rationale and alignment guidance. - -#### Checklist - -- [x] M-1: `session.created` with a `parentID` caches the parent/child linkage - and resolves the canonical (root) session ID. -- [x] M-2: `chat.message` from a child session records events under the - canonical root session's `session:{canonicalId}:events` key. -- [x] M-3: `experimental.chat.messages.transform` from a child session injects - the root session's `` envelope. -- [x] M-4: `experimental.session.compacting` from a child session uses the root - session's state and snapshot. -- [x] M-5: `message.updated` from a child session finalizes the assistant - message under the canonical root session. -- [x] M-6: `message.part.updated` from a child session buffers assistant text - under the canonical root session ID. -- [x] M-7: `session.deleted` for a child session removes only the child's local - bookkeeping (parent-ID cache, canonical-ID cache, buffered messages) and - does **not** delete the root session's state, events, or snapshot. -- [x] M-8: Child-derived events appear in the priority-tiered snapshot when it - is rebuilt at `session.idle` or `session.compacted`. -- [x] M-9: Future `` injections for the parent session include - events that originated from child sessions. -- [x] M-10: Canonical ID resolution handles multi-level nesting (grandchild → - child → root) and detects cycles without infinite loops. - -**Automation:** Fully automatable with mock SDK client and `MockRedisClient`. -Tests exist in `event.test.ts`, `chat.test.ts`, `messages.test.ts`, -`compacting.test.ts`, and `session-snapshot.test.ts`. - ---- - -## 7 Metrics and Thresholds - -| Metric | Threshold | Source | Action on breach | -| -------------------------------------- | ---------------------- | ---------------------- | -------------------- | -| Hot-path hook wall-clock (p95) | < 5 ms (Redis up) | Timing instrumentation | Fail CI | -| `session_memory` envelope size | <= 2 400 chars | Payload snapshot | Fail CI | -| Total injected payload size | <= 5% of context limit | Payload snapshot | Fail CI | -| Snapshot XML size | <= 3 072 bytes (3 KB) | Redis `GET` | Fail CI | -| `SessionEvent.summary` length | <= 200 chars | Event extractor output | Fail CI | -| `SessionEvent.body` length | <= 4 096 bytes (4 KB) | Event extractor output | Fail CI | -| Async drain batch duration (p95) | < 5 000 ms | Async timing log | Warn (informational) | -| Cache refresh duration (p95) | < 2 000 ms | Async timing log | Warn (informational) | -| MCP calls during hot-path hooks | 0 | Call counter | Fail CI | -| Payload size regression (vs. baseline) | < 20% increase | Size regression CSV | Fail CI | -| Dead-letter batches per session | 0 (healthy run) | Redis key count | Warn (informational) | - ---- - -## 8 Pass / Fail Criteria - -### 8.1 Overall Pass - -All of the following must be true: - -- [ ] All Suite A checks pass (zero Graphiti on hot path). -- [ ] All Suite B checks pass (compact payloads within budget). -- [ ] All Suite C checks pass (no raw tool/transcript dumps). -- [ ] All Suite L checks pass (no legacy `data-uuids` emission). -- [ ] All Suite K thresholds are within bounds (no regressions). -- [ ] No unhandled exceptions in any degradation scenario (Suite J-12). -- [ ] Test coverage for hot-path code paths >= 90%. - -### 8.2 Conditional Pass (with known gaps) - -The following suites may have items that require manual verification or a real -interactive shell lifecycle: - -- Suite D (session continuity quality) — D-1 through D-5 require multi-turn - simulation. -- Suite F (cross-session persistent memory) — F-7 requires real Graphiti. -- Suite J (degradation) — J-9, J-10 require timed reconnection. - -These items are tracked as known gaps (see §10) and do not block CI pass if the -automatable subset passes. - -### 8.3 Fail - -Any of the following triggers a fail: - -- Any MCP call detected during a hot-path hook (Suite A). -- Injected payload exceeds budget (Suite B, K). -- Legacy `` block emitted by new code (Suite L-3). -- Unhandled exception during degradation (Suite J-12). -- Hot-path hook latency exceeds 5 ms p95 (Suite K-3 through K-5). - ---- - -## 9 CI/CD Automation Strategy (Proposed) - -> **Status:** Not yet implemented. The following sections describe the -> _intended_ CI/CD flow. Docker Compose fixtures (`tests/docker-compose.yml`) -> and baseline files (`tests/baselines/payload-sizes.json`) do not yet exist. -> Current runnable tasks available in `deno.json`: `build`, `deploy`, `dev`, -> `check`, `lint`, `fmt`. - -### 9.1 Test Execution (Proposed) - -```bash -# Unit tests (no external deps) -deno test --allow-env --filter "suite-[a-c,g,l]" src/ - -# Integration tests (requires FalkorDB) -docker compose -f tests/docker-compose.yml up -d falkordb -deno test --allow-net --allow-env --filter "suite-[d-f,h-j]" src/ - -# Regression tests (requires both services) -docker compose -f tests/docker-compose.yml up -d -deno test --allow-net --allow-env --filter "suite-k" src/ - -# Full run -docker compose -f tests/docker-compose.yml up -d -deno test --allow-net --allow-env src/ -deno run build -``` - -### 9.2 CI Artifacts to Collect - -| Artifact | Format | Purpose | -| ------------------------- | ------ | ----------------------------------------- | -| `timing-report.json` | JSON | Per-hook latency data for trend analysis | -| `payload-snapshots/*.xml` | XML | Injected envelopes for manual review | -| `size-regression.csv` | CSV | Payload sizes for cross-commit comparison | -| `coverage-report/` | HTML | Deno test coverage output | -| `test-results.json` | JSON | Structured pass/fail per checklist item | -| `dead-letter-report.json` | JSON | Dead-letter batches created during run | - -### 9.3 Suggested CI Pipeline - -```mermaid -graph LR - A[Push / PR] --> B[Unit Tests
no deps] - B --> C{Pass?} - C -->|Yes| D[Start FalkorDB
Docker] - C -->|No| X[Fail] - D --> E[Integration Tests] - E --> F{Pass?} - F -->|Yes| G[Start Graphiti
Docker] - F -->|No| X - G --> H[System + Regression] - H --> I{Pass?} - I -->|Yes| J[Collect Artifacts
Publish Report] - I -->|No| X -``` - -### 9.4 Baseline Management (Proposed) - -- Payload size baselines _would be_ stored in - `tests/baselines/payload-sizes.json` (file does not yet exist). -- Baselines _would be_ updated manually via `deno task update-baselines` (task - not yet available) after intentional size changes. -- CI _would_ compare current sizes against the checked-in baseline and fail on > - 20% regression once infrastructure is available. - ---- - -## 10 Remaining Gaps / Hard-to-Automate Tests - -### 10.1 Tests Requiring a True Interactive Shell Lifecycle - -The following tests cannot be fully automated within the current OpenCode plugin -test harness because they require a real OpenCode session lifecycle (hook -dispatch, compaction trigger, multi-turn LLM interaction): - -| Test ID | Description | Approximation strategy | -| ------- | ------------------------------------------- | -------------------------------------------------------------------------------- | -| D-1 | Multi-turn continuity after 5 exchanges | Simulate by calling hook handlers sequentially with synthetic payloads. | -| D-2 | User correction reflected in next injection | Simulate with synthetic `decision` event insertion. | -| E-5 | Post-compaction chat uses new snapshot | Simulate by calling compaction handler then chat handler in sequence. | -| F-2 | Cold-start first turn, warm second turn | Simulate with timed async warmup and sequential handler calls. | -| F-7 | Cross-session fact recall after drain | Requires real Graphiti; approximate with stub that returns pre-seeded facts. | -| J-9 | Graphiti recovery triggers drain resume | Simulate by toggling mock MCP availability and advancing timers. | -| J-10 | Redis recovery rebuilds state | Simulate by toggling mock Redis availability and verifying event list integrity. | - -### 10.2 Tests Requiring Real Services - -| Test ID | Description | Why | -| ------- | --------------------------------------- | -------------------------------------------------------------- | -| F-7 | End-to-end cross-session recall | Needs real Graphiti entity extraction and vector search. | -| K-6 | Async drain batch duration | Meaningful only against real Graphiti (LLM-backed extraction). | -| K-8 | Latency percentiles over 100 iterations | Meaningful only against real services under realistic load. | - -### 10.3 Tests Requiring Manual / Exploratory Verification - -| Area | What to verify | -| ----------------------------- | ------------------------------------------------------------------------------------------------ | -| LLM continuity quality | Does the LLM actually "feel" continuous after compaction? Requires human judgment. | -| Memory relevance (semantic) | Are the right facts surfaced for a given topic? Keyword matching approximates. | -| Multi-agent orchestration | Subagent events in a real swarm session. Unit-level child-session routing is covered by Suite M. | -| Long-running session (> 1 hr) | TTL expiry, cache staleness, and drift behavior over extended use. | - -### 10.4 OpenCode Shell Model Limitations - -The current OpenCode plugin architecture has these constraints for test -automation: - -1. **No programmatic session creation** — tests cannot create a real OpenCode - session; they must simulate hook calls. -2. **No compaction trigger API** — compaction is triggered by OpenCode - internally; tests simulate `session.compacting` and `session.compacted` - events. -3. **No multi-session orchestration** — testing cross-session behavior requires - separate test runs or simulated session boundaries. -4. **Hook dispatch is synchronous in tests** — async fire-and-forget behavior - must be verified by awaiting explicit flush/drain calls rather than relying - on event-loop timing. - -**Mitigation:** The test harness simulates the hook lifecycle by calling handler -functions directly with synthetic inputs. This covers ~85% of the test plan. The -remaining ~15% (marked in §10.1–10.3) requires either real services, real -OpenCode sessions, or human judgment. diff --git a/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md b/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md new file mode 100644 index 0000000..c3dc2a6 --- /dev/null +++ b/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md @@ -0,0 +1,463 @@ +# Agentic Runtime Test Plan Rewrite Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the historical `docs/ContextOverhaulTests.md` with a new +authoritative, execution-ready MCP-first agentic runtime test plan, including +mandatory live multi-agent validation and explicit `` +coverage. + +**Architecture:** Keep the deliverable narrowly scoped to documentation +authority and validation procedure rather than code changes to the runtime +itself. Rewrite the test plan in place at `docs/ContextOverhaulTests.md`, +restructure it around automated verification and live agentic runtime scenarios, +and clean up stale repository references that would misdirect future operators +or test authors. + +**Tech Stack:** Markdown documentation, existing repository architecture docs, +Deno task/test commands, OpenCode live subagent runtime assumptions, +Redis/FalkorDB and Graphiti local service defaults. + +--- + +## File structure and responsibility lock-in + +- `docs/ContextOverhaulTests.md` + - The authoritative living runtime test plan. This file must be fully + rewritten in place, not replaced with a dated path. +- `AGENTS.md` + - Cleanup only: correct stale references to the old + `plans/ContextOverhaul*.md` paths so the repository reading order, + validation notes, and key-file hints point to the authoritative `docs/` + paths. +- `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` + - Read-only grounding spec for the rewrite. Do not rewrite the spec during + implementation unless the user explicitly asks for spec changes. + +`docs/ContextOverhaul.md` is a historical design document. Do not broaden this +task into updating its historical references unless the user explicitly asks for +that cleanup as separate work. Treat any stale references encountered there as +deferred follow-up documentation cleanup, not as part of this plan's required +edits. + +Known deferred stale references outside `AGENTS.md` may still exist in code or +tests (for example references inside `src/services/` tests/guidance comments). +Those are follow-up cleanup candidates, not part of this documentation-focused +rewrite unless the user explicitly expands scope. + +Do not broaden this work into runtime code changes, unrelated docs refreshes, or +clean-slate architecture edits. + +### Task 1: Add guardrail tests for stale doc-path references + +**Files:** + +- Modify: `AGENTS.md` +- Test: repository-wide path/reference verification via `grep` + +- [ ] **Step 1: Write down the failing reference checks to satisfy** + +Capture these reference expectations before editing: + +- `AGENTS.md` must not reference the stale non-existent path + `plans/ContextOverhaulTests.md` +- `AGENTS.md` must not reference the stale non-existent path + `plans/ContextOverhaul.md` +- `AGENTS.md` must reference `docs/ContextOverhaulTests.md` wherever it points + to the authoritative test plan +- `AGENTS.md` must reference `docs/ContextOverhaul.md` wherever it points to the + historical design document + +- [ ] **Step 2: Run the failing reference search** + +Run: +`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md" AGENTS.md` + +Expected: FAIL in the sense that the output still shows stale `plans/` path +references that need correction. + +- [ ] **Step 3: Make the minimal doc cleanup in `AGENTS.md`** + +Update only the stale path references in: + +- Validation Expectations +- Resume-Reading Order +- Key Files table + +Also add a `docs/ContextOverhaulTests.md` row to the Key Files table if the +table would otherwise omit the repository's authoritative runtime test plan. +Also update the existing stale `plans/ContextOverhaul.md` Key Files row to +`docs/ContextOverhaul.md` rather than removing that historical design entry. + +Do not rewrite surrounding architecture guidance. + +- [ ] **Step 4: Re-run the reference search to verify the cleanup** + +Run: +`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md" AGENTS.md` + +Expected: PASS in the sense that only `docs/ContextOverhaul.md` and +`docs/ContextOverhaulTests.md` remain as the authoritative/historical `docs/` +paths. + +### Task 2: Build the new test-plan outline with mandatory sections + +**Files:** + +- Modify: `docs/ContextOverhaulTests.md` +- Grounding: + `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` +- Grounding: `README.md` +- Grounding: `AGENTS.md` +- Grounding: `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md` +- Grounding: + `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` +- Grounding: + `docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md` +- Grounding: `docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md` +- Grounding: + `docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md` + +- [ ] **Step 1: Write the failing outline checklist** + +Before rewriting the file, make a checklist of the required sections from the +spec that the current document does not satisfy: + +- Purpose and Authority +- Runtime Guarantees Under Test +- Test Environment and Operators +- Evidence Model +- Automated Verification Matrix +- Live Agentic Runtime Scenarios +- Coverage Map +- Release Gates + +- [ ] **Step 2: Confirm the current document fails the new shape** + +If `docs/ContextOverhaulTests.md` already exists, run a manual read of it +against the spec and record which required sections are missing or +historical-only. If the file is absent in the working tree, treat that absence +itself as a failing precondition that the rewrite must correct by creating the +authoritative file at that path. + +Expected: FAIL because the existing document is explicitly historical and does +not provide the new authoritative MCP-first structure, or because the +authoritative file is absent and must be created. + +- [ ] **Step 3: Rewrite the document header and section skeleton in place** + +The replacement must include near the top: + +- `Status: Active` +- `Last Updated: 2026-03-24` (or the actual rewrite date if implementation + slips) +- `Replaces: historical native-hook-first test plan` +- a short note about the file carrying both historical and replacement-era git + history + +Then create the full mandatory section structure before filling in all test +content, including an explicit `Runtime Guarantees Under Test` section scaffold +that later automated/live sections can reference. + +The scaffold must explicitly name the proof targets from the spec, including: + +- `session_*` as the primary bounded execution surface +- `session_batch_execute` mixed command/search ordering, boundedness, and typed + results +- `session_index` replacement semantics for the same + `(rootSessionId, source, + label)` logical document +- canonical root-session sharing across parent/child agents +- local-first bounded corpus behavior +- Graphiti off the hot path +- optional bounded `` behavior +- compaction continuity +- restart and degradation expectations, including combined-backend boundaries + +- [ ] **Step 4: Re-read the rewritten skeleton against the spec** + +Expected: PASS in structure only — every mandatory top-level section exists, +even if the detailed test content is not complete yet. + +### Task 3: Author the test environment and operator model + +**Files:** + +- Modify: `docs/ContextOverhaulTests.md` +- Verify against: `README.md`, `AGENTS.md`, + `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` + +- [ ] **Step 1: Write the failing environment/operator checklist** + +List the section content that must be written explicitly: + +- required services and default endpoints/configuration assumptions +- any version/runtime assumptions the operator must know +- artifact capture locations +- CI-runnable versus live-runtime-only boundaries +- operator roles (`human operator`, `root agent`, `child agent`, + `observer/evidence collector`) + +- [ ] **Step 2: Confirm the current document does not provide this content** + +Expected: FAIL because the historical document does not define the current +MCP-first runtime environment model or the required operator-role split. + +- [ ] **Step 3: Write the `Test Environment and Operators` section** + +Use: + +- `README.md` for service defaults such as Redis/FalkorDB on `localhost:6379` + and Graphiti MCP on `http://localhost:8000/mcp` +- `AGENTS.md` for hot-path, async-tier, and continuity constraints +- the spec for required operator-role definitions and CI-vs-live distinctions + +- [ ] **Step 4: Re-read the section for execution readiness** + +Expected: PASS if a fresh operator can tell what services must be running, what +assumptions hold, who performs each role in live testing, and what can run in CI +versus only in a live OpenCode runtime. + +### Task 4: Author the automated verification matrix + +**Files:** + +- Modify: `docs/ContextOverhaulTests.md` +- Verify against: `README.md`, `AGENTS.md`, + `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`, + `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md`, + `docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md`, + `docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md`, + `docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md` + +- [ ] **Step 1: Write the failing automated-coverage checklist** + +List the mandatory automated suite groups that must exist, including: + +- per-tool `session_*` contract coverage +- explicit `session_batch_execute` mixed command/search ordering, boundedness, + and typed-result coverage +- bounded output and artifact spillover +- local corpus search/ranking/replacement semantics +- explicit `session_index` replacement semantics coverage for the same + `(rootSessionId, source, label)` logical document +- `` cache-hit, cold-cache, refresh, omission, and stale-data + behavior +- root-session propagation/lifecycle +- hook enforcement/attribution +- continuity assembly/compaction survival +- async Graphiti drain/cache refresh +- restart/recovery/degradation +- regression thresholds for payload size, latency, and storage growth + +- [ ] **Step 2: Verify the current automated section is insufficient** + +Compare the historical suites to the new checklist. + +Expected: FAIL because the historical suites are hot-path/native-hook-first in +framing and under-specify current MCP-first runtime obligations. + +- [ ] **Step 3: Write the automated matrix with execution-ready detail** + +For each automated suite, include: + +- objective +- prerequisites +- exact commands, primarily using the repo's existing `deno task` commands plus + the built-in `deno test` command where test execution is required +- expected result +- artifacts/evidence to save +- common failure signatures +- release-gate severity + +Also ensure the document states clearly when an additional helper harness would +need explicit justification rather than being assumed by default, and do not +invent a new `deno task test` alias as part of this docs-only rewrite. + +The automated matrix documents expected verification commands and procedures for +future test execution. Do not create new test files or expand into runtime-test +implementation as part of this documentation rewrite. + +- [ ] **Step 4: Re-read the automated matrix for architecture alignment** + +Expected: PASS if every active runtime guarantee has at least one automated +proof path and none of the automated sections drift back to native-hook-first +framing. + +### Task 5: Author the live agentic runtime scenarios + +**Files:** + +- Modify: `docs/ContextOverhaulTests.md` +- Verify against: `README.md`, `AGENTS.md`, + `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`, + `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md`, + `docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md`, + `docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md`, + `docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md` + +- [ ] **Step 1: Write the failing live-scenario checklist** + +List the required live runtime scenario families, including: + +- two-subagent parallel investigation with root-session continuity roll-up +- child `session_search` / `session_index` effects visible to parent/root +- live mixed `session_batch_execute` + search workflow +- delegated work leading to later bounded `` recall +- native-tool fallback and routing/enforcement toward `session_*` +- compaction after delegated work and resumed execution from preserved memory +- restart after delegated/indexed work with continuity/corpus recovery +- Graphiti-unavailable delegated work with local-first continuity +- Redis/FalkorDB degradation or reconnect during delegated work +- combined-backend degradation, or explicit justification for automated-only + coverage +- high-volume artifact generation proving boundedness in real agent use + +- [ ] **Step 2: Verify the historical document does not satisfy live proof** + +Expected: FAIL because the historical plan does not make full live agentic +runtime validation mandatory and does not provide execution-ready multi-agent +procedures. + +- [ ] **Step 3: Write the live agentic runtime scenarios in full** + +Requirements: + +- use a root agent and at least two child agents unless a scenario explicitly + justifies a single-child exception +- make at least one scenario fully concrete with exact prompts, topology, + evidence capture, and pass/fail interpretation +- fully flesh out every scenario as an executable procedure, not a stub +- include operator roles (`human operator`, `root agent`, `child agent`, + `observer/evidence collector`) where relevant + +- [ ] **Step 4: Re-read the live section for true runtime proof** + +Expected: PASS if the section proves real delegation behavior rather than merely +rephrasing mock or synthetic-hook coverage. + +### Task 6: Add the evidence model, coverage map, and release gates + +**Files:** + +- Modify: `docs/ContextOverhaulTests.md` + +- [ ] **Step 1: Write the failing proof-model checklist** + +The document must explicitly define: + +- mandatory evidence classes +- anti-evidence rules +- a table-based coverage map +- ship/no-ship release gates + +The coverage map must include explicit rows for: + +- `session_batch_execute` mixed-step behavior +- `session_index` replacement semantics +- `` presence/omission and bounded formatting +- stale-cache behavior +- cross-session recall +- Graphiti-unavailable degradation +- combined-backend degradation boundaries or explicit automated-only + justification + +- [ ] **Step 2: Confirm the current document fails the proof-model checklist** + +Expected: FAIL because the historical plan does not define the new evidence +model or the required coverage mapping between automated and live proof. + +- [ ] **Step 3: Write the evidence model, table coverage map, and release + gates** + +Be explicit that the following do **not** count as sufficient proof on their +own: + +- mocked child-session routing +- passing unit tests alone +- synthetic hook invocation alone +- transcript claims without tool/log/state evidence when runtime proof is being + claimed + +Release gates should identify: + +- the minimum automated suites that must pass +- the mandatory live scenarios that must pass +- degradation expectations +- allowed known gaps and their justification +- conditions that immediately fail release readiness + +- [ ] **Step 4: Re-read the proof-model sections for completeness** + +Expected: PASS if an operator can tell exactly what evidence must be collected, +what coverage exists, and what blocks release. + +### Task 7: Final consistency pass and repository-facing cleanup + +**Files:** + +- Modify: `docs/ContextOverhaulTests.md` +- Modify: `AGENTS.md` + +- [ ] **Step 1: Run a final doc consistency review against the grounding spec** + +Read `docs/ContextOverhaulTests.md` side by side with +`docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md`. + +Check for: + +- missing mandatory sections +- MCP-first drift +- missing `` coverage +- live multi-agent scenarios that are still too abstract +- stale path references in `AGENTS.md` + +- [ ] **Step 2: Run repository verification commands** + +Run: `deno task check && deno task lint && deno task fmt` + +Expected: PASS. These tasks exist in `deno.json` for this repository. If +formatting changes are applied by `deno task fmt`, review the doc diff and +ensure only intended documentation formatting changed. + +These are repository-health checks for the docs rewrite itself. They are not the +same as the `deno test` commands documented inside +`docs/ContextOverhaulTests.md` for future operators, and this plan does not +require adding a new `deno task test` alias. + +- [ ] **Step 3: Run final reference searches** + +Run: +`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md" AGENTS.md` + +Expected: PASS in the sense that `AGENTS.md` points only at +`docs/ContextOverhaul.md` and `docs/ContextOverhaulTests.md` for these +historical/authoritative references. + +- [ ] **Step 4: Perform a final manual release-gate check** + +Confirm the finished document now provides: + +- an authoritative living runtime test plan +- extensive automated verification procedures +- extensive live agentic runtime procedures +- explicit `` validation +- proof/evidence criteria and release gates + +- [ ] **Step 5: Commit the task** + +Only perform this step if the user explicitly asks for a commit in the +implementation session. + +```bash +git add docs/ContextOverhaulTests.md AGENTS.md +git commit -m "docs: rewrite the agentic runtime test plan" +``` + +If you intentionally updated +`docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md` during +execution (for example by marking checkboxes), stage it separately before +committing. Otherwise leave the plan file out of the commit. diff --git a/docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md b/docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md new file mode 100644 index 0000000..71410ac --- /dev/null +++ b/docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md @@ -0,0 +1,304 @@ +# Agentic Runtime Test Plan Design + +## Goal + +Replace the legacy historical test plan with a new authoritative, +execution-ready test plan for the current MCP-first agentic runtime. + +The replacement document must validate the product as it now exists: an OpenCode +plugin whose primary execution surface is `session_*` MCP tools, whose +continuity model is rooted in canonical root-session state, and whose Graphiti +integration remains asynchronous and off the hot path. + +The new plan must not be a loose strategy memo. It must be usable by an agent or +operator as a verification manual with exact setup, commands, procedures, +evidence requirements, and pass/fail gates. + +## Why Replace The Old Plan + +The existing `docs/ContextOverhaulTests.md` no longer matches the active product +center of gravity: + +- it is explicitly marked superseded/historical +- it was written for the native-hook-first overhaul rather than the MCP-first + runtime that now defines the product +- it under-specifies live multi-agent runtime validation, which is now a hard + requirement + +The replacement plan must therefore be written from scratch, even if some test +ideas are adapted and re-scoped. + +The replacement document should live at the stable authoritative path +`docs/ContextOverhaulTests.md` by fully overwriting the historical content at +that path. It should not be moved to a dated filename because it is intended to +remain the living source of truth for runtime verification. + +## Required Inputs + +The new plan must be grounded in the current superpowers-era architecture and +gap-closure work, especially: + +- `README.md` +- `AGENTS.md` +- `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md` +- `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` +- `docs/superpowers/plans/2026-03-23-context-mode-batch-index-gap-closure.md` +- `docs/superpowers/plans/2026-03-23-mcp-first-gap-closure.md` +- `docs/superpowers/specs/2026-03-23-clean-slate-architecture-design.md` + +These sources define what must be proven: bounded MCP-first execution, local +corpus behavior, canonical root-session sharing across subagents, continuity +capture through compaction, asynchronous Graphiti augmentation, and correct +optional `` behavior when Graphiti-backed recall is +available. + +## Non-Negotiable Design Decisions + +1. The new test plan is the authoritative runtime test plan for the repository. +2. Full agentic runtime testing is mandatory. +3. Mock-only or unit-only child-session coverage does not satisfy the + multi-agent requirement. +4. The document must remain split for operator convenience: + - automated verification + - live agentic runtime verification +5. The document must be execution-ready rather than descriptive. +6. The document must state exactly what evidence counts as proof and what does + not. +7. The replacement should correct stale legacy references encountered in the + grounding docs when those references would otherwise misdirect future test + authors or operators. In particular, stale `plans/ContextOverhaulTests.md` + references in `AGENTS.md` should be treated as cleanup items during the + rewrite or in immediate follow-up documentation work. + +## Required Document Shape + +All sections below are mandatory unless this spec explicitly marks them as +optional. + +### 1. Purpose And Authority + +- explain that the document replaces the legacy plan +- state that it is the current source of truth for validation +- identify the active architecture promises it proves +- include stable living-document metadata near the top of the replacement plan, + at minimum: + - `Status: Active` + - `Last Updated: YYYY-MM-DD` + - `Replaces: historical native-hook-first test plan` + - a short note that git history at `docs/ContextOverhaulTests.md` will include + both the historical and replacement eras of the document + +### 2. Runtime Guarantees Under Test + +Map the architecture to explicit proof targets, such as: + +- `session_*` tools are the primary bounded execution surface +- `session_batch_execute` mixed command/search steps preserve order, + boundedness, and typed results in both automated and live runtime usage +- `session_index` replacement semantics for the same + `(rootSessionId, source, + label)` replace prior logical documents rather than + appending duplicates +- risky native tools are enforced or redirected by hooks rather than becoming + the primary execution path +- parent and child agents share one canonical root-session continuity model +- local corpus indexing/search stays local-first and bounded +- Graphiti never blocks hot-path correctness +- `` appears only when supported by the current cache/runtime + state, stays bounded/structured, and never becomes a hot-path dependency +- compaction preserves continuity for both direct and delegated work +- restart behavior preserves safe operation and state recovery expectations +- Graphiti-unavailable behavior degrades to local-first continuity without + breaking hot-path correctness +- Redis/FalkorDB-unavailable behavior degrades safely according to the active + runtime fallback rules +- combined-backend degradation boundaries are explicitly tested or explicitly + justified as non-live-only coverage + +### 3. Test Environment And Operators + +Define: + +- required services and optional degraded variants, including minimum expected + endpoints/configuration and any version assumptions the operator must satisfy +- test accounts / local runtime assumptions +- artifact capture locations +- operator roles when live sessions require a root agent plus two child agents: + - `human operator`: starts/stops services, launches the root session, issues + scripted prompts when a manual trigger is required, and records evidence + - `root agent`: receives the primary task and delegates to child agents + - `child agent`: executes delegated work inside the same canonical runtime + model + - `observer/evidence collector`: may be the human operator or a separate + agentic step, but the plan must say who captures logs, tool results, and + state evidence for each scenario +- what can run in CI versus what requires a live OpenCode runtime + +### 4. Evidence Model + +Specify these mandatory evidence classes: + +- command output +- `session_*` tool responses +- emitted `` envelopes +- emitted optional `` sections and their surrounding + `` context +- Redis/FalkorDB state observations +- Graphiti cache/drain observations +- logs and warnings +- screenshots or copied transcripts only when necessary + +Also define these mandatory anti-evidence rules: + +- mocked child-session routing is supporting evidence only +- passing unit tests alone do not prove multi-agent runtime behavior +- synthetic hook invocation alone does not prove real delegation continuity +- transcript claims without corresponding tool/log/state evidence do not satisfy + release gates when the scenario claims runtime proof + +### 5. Automated Verification Matrix + +This section should contain extensive, execution-ready suites with exact +commands, setup, assertions, expected artifacts, and pass/fail criteria. + +Automated suites should run through the repository's existing test +infrastructure by default, using `deno test` unless the plan explicitly +justifies an additional helper harness. + +Recommended suite groups: + +- runtime contract tests for each `session_*` tool +- bounded output and artifact spillover tests +- local corpus indexing/search ranking and replacement tests +- `` cache-hit, cold-cache, refresh, omission, and + stale-data-behavior tests +- root-session propagation and lifecycle tests +- hook enforcement and attribution tests +- continuity assembly and compaction survival tests +- async Graphiti drain/cache refresh tests +- restart/recovery/degradation tests +- regression thresholds for payload size, latency, and storage growth + +The automated section should be at least as broad as the architecture coverage +represented by the historical plan, but rewritten for the MCP-first runtime and +its current `session_*` contracts rather than copied mechanically. + +Each suite should include: + +- objective +- prerequisites +- exact command(s) +- expected result +- artifacts/evidence to save +- common failure signatures +- severity / release gate classification + +### 6. Live Agentic Runtime Scenarios + +This is the mandatory section that makes the plan truly agentic. + +It must define real session scenarios that exercise live delegation rather than +simulated hooks alone. Unless the scenario is explicitly justified as a +single-child exception, each scenario should use a root agent and at least two +subagent sessions. + +At least one scenario template in the final plan must be fully concrete rather +than abstract, including exact prompts, expected subagent topology, evidence +capture steps, and pass/fail interpretation. The rest of the scenarios may reuse +that template shape, but they must still be fully fleshed out as executable +procedures rather than stubs. + +Recommended scenario groups: + +- two-subagent parallel investigation with root-session continuity roll-up +- child agent uses `session_search` and `session_index`, parent later sees the + shared continuity effects +- mixed `session_batch_execute` + corpus search workflow in live runtime +- delegated work creates or refreshes Graphiti-backed recall that later appears + as bounded `` in a subsequent live session +- native-tool fallback attempt followed by routing/enforcement toward + `session_*` +- compaction after delegated work, followed by resumed execution from preserved + memory +- session restart after delegated/indexed work with corpus and continuity + recovery +- Graphiti unavailable during delegated work, followed by local-first continuity + and later recovery +- Redis/FalkorDB unavailable or reconnecting during delegated work, followed by + safe degraded operation and recovery evidence +- combined backend degradation boundaries, if safely reproducible in the live + runtime harness; otherwise the final plan must explicitly justify why that + proof remains automated-only +- high-volume artifact generation proving bounded response behavior under real + agent usage + +Every live scenario must specify: + +- objective +- topology of root and child agents +- exact operator prompts or scripted actions +- expected runtime observations +- expected root-session state sharing behavior +- evidence to collect +- failure signatures and likely fault domains + +### 7. Coverage Map + +Add a matrix that maps each architecture promise to one or more automated suites +and one or more live runtime scenarios. + +This should be presented as a table so operators can quickly verify that every +critical guarantee has both a proof path and an evidence path. + +This section ensures nothing critical is validated only in mocks when it should +also be proven live. + +The coverage map should include explicit rows for `` +presence/omission behavior, bounded formatting, stale-cache behavior, +cross-session recall, and Graphiti-unavailable degradation. + +### 8. Release Gates + +Define clear ship/no-ship criteria, for example: + +- minimum automated suite pass set +- mandatory live scenario pass set +- degradation expectations +- allowed known gaps and why +- conditions that immediately fail the release + +## Authoring Principles For The Replacement Test Plan + +The new plan should be: + +- specific enough that a fresh agent can execute it without inventing missing + procedure +- honest about what requires real runtime proof +- explicit about evidence and artifacts +- architecture-aligned to the current MCP-first product +- strict about boundedness, continuity, and off-hot-path Graphiti behavior +- broad enough to cover failure recovery, not just happy-path success +- maintainable as a living document: when `session_*` contracts, continuity + guarantees, or degradation behavior change, the authoritative plan should be + updated in the same change stream or explicitly flagged for follow-up + +## Out Of Scope For The Replacement Plan + +The new plan should not: + +- drift back into native-hook-first framing +- rely on undocumented OpenCode APIs +- present speculative future architecture as a test obligation unless already + adopted in active plans +- treat clean-slate modularization as a prerequisite for runtime verification + +## Expected Outcome + +After implementation, the repository should have: + +1. the legacy `docs/ContextOverhaulTests.md` removed or fully replaced +2. a new authoritative runtime test plan written from scratch in its place +3. an execution-ready verification manual with extensive automated and live + agentic test cases +4. an explicit proof model for multi-agent runtime behavior that can be reused + by future implementers and reviewers diff --git a/docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md b/docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md new file mode 100644 index 0000000..fd127db --- /dev/null +++ b/docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md @@ -0,0 +1,229 @@ +# Review Issue-Class Sweep Design + +## Goal + +Systematically hunt for every instance of each issue class raised in resolved PR +review comments and fix them repo-wide wherever local evidence supports the +change. The sweep operates on issue _classes_, not individual review threads: a +class may yield fixes in files that were never mentioned in the original review. + +## Scope + +### Issue Classes + +Each class below is an independent sweep track. A dedicated subagent session +handles exactly one class. + +#### 1. Abort/Cancellation Semantics and Typed Abort Normalization + +**Pattern**: The codebase has two independent `isAbortError` implementations +(`connection-manager.ts:217` and `session-executor.ts:208`) with slightly +different shapes. Abort reasons are constructed ad-hoc +(`new GraphitiRequestTimeoutError()`, raw +`DOMException("aborted", +"AbortError")` in tests). + +**Sweep target**: + +- Unify `isAbortError` into a single shared utility (likely in `utils.ts` or a + dedicated abort module). +- Audit every `AbortController.abort(reason)` call site to ensure the abort + reason is a typed error, not a bare string or untyped value. +- Verify test doubles construct abort errors via the shared helper or the + canonical `DOMException` constructor. + +**Files likely in scope**: `src/services/connection-manager.ts`, +`src/services/session-executor.ts`, `src/services/session-corpus.ts`, +`src/services/connection-manager.test.ts`, +`src/services/session-executor.test.ts`, +`src/services/session-mcp-runtime.test.ts`, `src/utils.ts`. + +#### 2. Endpoint Validation/Redaction Consistency + +**Pattern**: `redactEndpointUserInfo` is applied in `index.ts` startup warnings +and `connection-manager.ts` logging, but not necessarily at every other site +that formats an endpoint for user-visible output (log messages, error messages, +warning toasts). + +**Sweep target**: + +- Grep every log/warn/error/toast call that interpolates a config endpoint + string. +- Ensure `redactEndpointUserInfo` is applied before any endpoint reaches a + user-visible surface. +- Verify `isValidUrlString` is used consistently for explicit-URL validation + (config layer) and that no call site silently swallows malformed URLs outside + the config validator. + +**Files likely in scope**: `src/index.ts`, `src/config.ts`, +`src/services/connection-manager.ts`, `src/services/endpoint-redaction.ts`, +`src/services/graphiti-mcp.ts`, `src/services/opencode-warning.ts`, +`src/services/redis-client.ts`, `src/services/session-mcp-runtime.ts`. + +#### 3. Stable User-Facing Denial/Error Messaging + +**Pattern**: Denial and degradation messages must be stable strings that do not +leak internal state. The codebase already uses structured patterns like +`"Graphiti MCP unavailable at …; continuing without persistent memory."` and +`"Redis unavailable at …; continuing with in-memory hot-tier fallback."` but +`graphiti-mcp.ts` uses shorter forms like +`"Graphiti unavailable; memory was not +saved."`. + +**Sweep target**: + +- Audit every user-facing warning/error string for consistency in phrasing, + structure, and information density. +- Ensure no message leaks raw error `.message` content, stack traces, or + internal keys to the user-visible surface (logger.warn payloads shown via + toast vs. structured-only fields). +- Verify tool denial messages in `tool-routing.ts` / `tool-guidance.ts` are + stable and do not embed variable internal state. + +**Files likely in scope**: `src/services/graphiti-mcp.ts`, +`src/services/opencode-warning.ts`, `src/services/redis-client.ts`, +`src/services/redis-events.ts`, `src/services/session-mcp-runtime.ts`, +`src/services/tool-routing.ts`, `src/services/tool-guidance.ts`, `src/index.ts`. + +#### 4. Public Type Reuse in Tests + +**Pattern**: Multiple test files independently define identical local types +(`type RedisEvent = "close" | "end" | "error" | "ready"` appears in at least 5 +test files). Test-local type aliases for public shapes create maintenance drift. + +**Sweep target**: + +- Identify types duplicated across test files that mirror or subset public + exports from `src/types/index.ts` or service modules. +- Extract shared test-utility types to a common test-helper module or re-export + from the source module, whichever is simpler. +- The `RedisEvent` union duplicated in `batch-drain.test.ts`, + `redis-events.test.ts`, `session-mcp-runtime.test.ts`, `redis-client.test.ts`, + `hot-tier-slice.test.ts`, `redis-cache.test.ts` is the primary target. + +**Files likely in scope**: All `*.test.ts` files under `src/services/` that +define `type RedisEvent`, plus any shared test-helper file created or extended. + +#### 5. Config/Docs Consistency and Dead-Path Simplification + +**Pattern**: Config defaults live in `config.ts` (`DEFAULT_CONFIG`), in +`AGENTS.md` (§ Configuration), and in `README.md` (§ Configuration). The three +must agree. Legacy config paths and deprecated keys may still be referenced in +docs but removed from code, or vice versa. + +**Sweep target**: + +- Cross-check `DEFAULT_CONFIG` values in `config.ts` against every doc that + states defaults (`AGENTS.md`, `README.md`). +- Verify documented config keys match the actual `RawGraphitiConfig` / + `GraphitiConfig` types — no phantom keys, no missing keys. +- Identify dead code paths in `config.ts` (e.g. `loadLegacyConfig` if legacy + path is no longer documented or tested) and simplify or document their + retention rationale. +- Check `deno.json` task names match any doc references. + +**Files likely in scope**: `src/config.ts`, `src/config.test.ts`, `AGENTS.md`, +`README.md`, `deno.json`. + +## Non-Goals + +- Unrelated refactors outside the five issue classes. +- Stylistic churn (formatting, import ordering, naming preferences) unless it is + directly part of an issue-class fix. +- Speculative API redesigns or public contract changes. +- Touching files that are dirty in the worktree for reasons unrelated to the + sweep. + +## Execution Model + +``` +Main session +├── Class 1 subagent ─ abort/cancellation +├── Class 2 subagent ─ endpoint validation/redaction +├── Class 3 subagent ─ user-facing messaging +├── Class 4 subagent ─ type reuse in tests +├── Class 5 subagent ─ config/docs consistency +│ +├── Integration pass ─ merge non-conflicting changes, resolve overlaps +├── Broad verification ─ full test suite, type check, lint +└── Thread follow-up ─ update unresolved review threads if changes apply +``` + +Each subagent: + +1. Receives its single issue class, the target file list, and the evidence + standard. +2. Greps/reads to find all instances of the class pattern. +3. Fixes only instances with clear local evidence. +4. Runs targeted verification (the specific test files affected). +5. Returns: changed files, verification commands + results, any instances it + chose _not_ to fix with rationale. + +The main session: + +1. Reviews each subagent's summary for correctness. +2. Integrates changes, resolving file overlaps (especially `utils.ts`, + `connection-manager.ts`, `index.ts` which appear in multiple classes). +3. Runs `deno task check` and `deno test` across the full repo. +4. Only after green: updates review threads with evidence of repo-wide fixes. + +## Evidence Standard + +A fix is applied only when: + +- The code pattern matches the issue class definition above. +- The fix is locally verifiable (tests pass, types check, behavior is equivalent + or strictly improved). +- No intentional contract is changed (e.g., a message string that is part of a + stable API or documented interface must not be altered without explicit + confirmation). + +If no further instances exist beyond what was already fixed in the review, the +subagent reports "no further instances found" and exits cleanly. + +## Verification Strategy + +**Per-class (subagent)**: + +- Run only the test files that import or exercise the changed modules. +- Run `deno check` on changed files. +- Report exact commands and their exit codes. + +**Integrated (main session)**: + +- `deno task check` — full type check. +- `deno test` — full test suite. +- `deno task build` — DNT build (catches Node.js compat regressions). +- Report pass/fail with truncated output on failure. + +## Git Hygiene + +- `git diff --name-only` before and after sweep to confirm only sweep-related + files are touched. +- Do not `git add` files that were already dirty before the sweep started. +- Commit only validated changes. One commit per integrated sweep is acceptable; + per-class commits are preferred if they are independently green. + +## Risks + +| Risk | Mitigation | +| ------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------- | +| Overlapping files between classes (e.g., `connection-manager.ts` touched by class 1 and class 2) | Main session integrates sequentially; later class rebases on earlier class's changes. | +| False positives from grep (pattern match ≠ actual issue) | Evidence standard requires local verification, not just pattern match. | +| Changing intentional contracts (stable error messages used in downstream parsing) | Check for downstream consumers before changing any string constant. | +| Review-thread state drifts during sweep (new comments, re-reviews) | Fetch fresh thread state immediately before posting follow-up; skip threads that have new activity. | +| Subagent scope creep | Each subagent prompt includes explicit non-goals and a "stop if unsure" directive. | + +## Deliverables + +Per class: + +- List of changed files with one-line description of each change. +- Verification commands executed and their results. +- List of instances inspected but intentionally not changed, with rationale. + +Integrated: + +- Final `deno task check` + `deno test` + `deno task build` results. +- Combined changed-files summary. +- Live unresolved-thread status after any follow-up posts. diff --git a/src/config.test.ts b/src/config.test.ts index 8343346..25838ff 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -44,6 +44,10 @@ describe("config", () => { assertEquals(config.graphiti.driftThreshold, 0.5); assertEquals(config.redis.endpoint, "redis://localhost:6379"); assertEquals(config.redis.batchSize, 20); + assertEquals(config.redis.batchMaxBytes, 51_200); + assertEquals(config.redis.sessionTtlSeconds, 86_400); + assertEquals(config.redis.cacheTtlSeconds, 600); + assertEquals(config.redis.drainRetryMax, 3); }); it("prefers nested graphiti and redis values over legacy top-level graphiti keys", () => { @@ -224,6 +228,24 @@ describe("config", () => { ); }); + it("redacts credentials from malformed configured endpoint errors", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + endpoint: "http://user:secret@bad host", + }, + }, + }) + ); + + assertThrows( + () => loadConfig(), + ConfigLoadError, + 'Invalid Graphiti config value for graphiti.endpoint: expected a valid URL, received "http://bad host"', + ); + }); + it("accepts endpoint-like config values with incidental surrounding whitespace", () => { setConfigExplorerAdapterForTesting(() => makeAdapter({ diff --git a/src/config.ts b/src/config.ts index f4ed017..211f4e3 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,6 +1,7 @@ import os from "node:os"; import { createRequire } from "node:module"; import { join } from "node:path"; +import { redactEndpointUserInfo } from "./services/endpoint-redaction.ts"; import { logger } from "./services/logger.ts"; import type { GraphitiConfig, RawGraphitiConfig } from "./types/index.ts"; @@ -142,7 +143,7 @@ const assertExplicitUrl = ( if (isValidUrlString(value)) return; throw new ConfigLoadError( `Invalid Graphiti config value for ${fieldName}: expected a valid URL, received ${ - JSON.stringify(value) + JSON.stringify(redactEndpointUserInfo(value)) }`, { code: "config-invalid" }, ); diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index 09e4054..57a1240 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -17,8 +17,7 @@ import { drainRetryKey, RedisEventsService, } from "./redis-events.ts"; - -type RedisEvent = "close" | "end" | "error" | "ready"; +import type { RedisEvent } from "./test-helpers.ts"; setSuppressConsoleWarningsDuringTestsOverride(true); diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index 4c62eec..cd6d10f 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -14,6 +14,7 @@ import { GraphitiTransportError, } from "./connection-manager.ts"; import { logger } from "./logger.ts"; +import { createAbortError } from "../utils.ts"; const originalLogger = { ...logger }; logger.info = () => {}; @@ -801,7 +802,7 @@ describe("connection manager", () => { callTool: (_request, options) => new Promise((_resolve, reject) => { options?.signal?.addEventListener("abort", () => { - reject(new DOMException("aborted", "AbortError")); + reject(createAbortError("aborted")); }, { once: true }); }), }), @@ -1026,6 +1027,37 @@ describe("connection manager", () => { assertInstanceOf(error.cause, TypeError); }); + it("redacts endpoint credentials in successful connection logs", async () => { + const infoCalls: unknown[][] = []; + const originalInfo = logger.info; + logger.info = (...args: unknown[]) => { + infoCalls.push(args); + }; + + try { + const manager = new GraphitiConnectionManager({ + endpoint: "http://user:secret@test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => Promise.resolve({ ok: true }), + }), + }); + + manager.start(); + assertEquals(await manager.ready(10), true); + + assertEquals(infoCalls, [[ + "Connected to Graphiti MCP server at", + "http://test/", + ]]); + + await manager.stop(); + } finally { + logger.info = originalInfo; + } + }); + it("moves back offline when connectionFactory throws synchronously", async () => { const clock = new FakeClock(); const manager = new GraphitiConnectionManager({ diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index 9809a94..88a5723 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -1,6 +1,7 @@ import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; import manifest from "../../deno.json" with { type: "json" }; +import { isAbortError } from "../utils.ts"; import { redactEndpointUserInfo } from "./endpoint-redaction.ts"; import { logger } from "./logger.ts"; @@ -214,15 +215,6 @@ function isSessionExpired(err: unknown): boolean { ); } -function isAbortError(err: unknown): boolean { - if (!err || typeof err !== "object") return false; - if (typeof (err as { name?: unknown }).name === "string") { - return (err as { name: string }).name === "AbortError"; - } - return typeof DOMException !== "undefined" && err instanceof DOMException && - err.name === "AbortError"; -} - function isTransportFailure(err: unknown): boolean { if (!err) return false; if (isRequestTimeout(err) || isSessionExpired(err)) return false; @@ -245,6 +237,7 @@ function isTransportFailure(err: unknown): boolean { export class GraphitiConnectionManager implements GraphitiToolCaller { private readonly endpoint: string; + private readonly redactedEndpoint: string; private readonly requestDeadlineMs: number; private readonly queueCapacity: number; private readonly startupTimeoutMs: number; @@ -275,6 +268,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { constructor(options: GraphitiConnectionManagerOptions) { this.endpoint = validateEndpoint(options.endpoint); + this.redactedEndpoint = redactEndpointUserInfo(this.endpoint); this.requestDeadlineMs = options.requestDeadlineMs ?? 15_000; this.queueCapacity = options.queueCapacity ?? 32; this.startupTimeoutMs = options.startupTimeoutMs ?? this.requestDeadlineMs; @@ -457,7 +451,7 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { this.state = "connected"; this.reconnectDelayMs = this.reconnectInitialDelayMs; this.resolveReadyWaiters(true); - logger.info("Connected to Graphiti MCP server at", this.endpoint); + logger.info("Connected to Graphiti MCP server at", this.redactedEndpoint); void this.flushPendingQueue(); return true; } catch (err) { diff --git a/src/services/graphiti-mcp.test.ts b/src/services/graphiti-mcp.test.ts index 4c65393..06f8ed1 100644 --- a/src/services/graphiti-mcp.test.ts +++ b/src/services/graphiti-mcp.test.ts @@ -1,4 +1,8 @@ -import { assertEquals, assertRejects } from "jsr:@std/assert@^1.0.0"; +import { + assertEquals, + assertRejects, + assertStringIncludes, +} from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { GraphitiOfflineError } from "./connection-manager.ts"; import { GraphitiMcpClient } from "./graphiti-mcp.ts"; @@ -110,6 +114,77 @@ describe("GraphitiMcpClient", () => { .extra.operation, "searchNodesWithStatus", ); + assertEquals( + (appLogCalls[0] as { body: { message: string } }).body.message, + "Graphiti MCP unavailable; continuing without memory nodes.", + ); + } finally { + setOpenCodeClient(undefined); + setWarningTaskScheduler(undefined); + setSuppressConsoleWarningsDuringTestsOverride(undefined); + } + }); + + it("uses stable Graphiti MCP availability messages across degraded operations", async () => { + const scheduledTasks: Array<() => void> = []; + const appLogCalls: Array< + { body: { message: string; extra?: { operation?: string } } } + > = []; + setSuppressConsoleWarningsDuringTestsOverride(true); + setWarningTaskScheduler((callback) => { + scheduledTasks.push(callback); + }); + setOpenCodeClient({ + app: { + log(input: unknown) { + appLogCalls.push( + input as { + body: { message: string; extra?: { operation?: string } }; + }, + ); + }, + }, + }); + + try { + const client = new GraphitiMcpClient({ + start() {}, + stop() { + return Promise.resolve(); + }, + ready() { + return Promise.resolve(true); + }, + callTool() { + return Promise.reject(new GraphitiOfflineError("offline", "offline")); + }, + }); + + await assertRejects( + () => client.addMemory({ name: "test", episodeBody: "body" }), + GraphitiOfflineError, + ); + assertEquals(await client.searchMemoryFacts({ query: "test" }), []); + assertEquals(await client.getEpisodes({ groupId: "group" }), []); + + assertEquals(scheduledTasks.length, 3); + for (const task of scheduledTasks) task(); + + assertEquals( + appLogCalls.map((call) => call.body.message), + [ + "Graphiti MCP unavailable; persistent memory was not saved.", + "Graphiti MCP unavailable; continuing without memory facts.", + "Graphiti MCP unavailable; continuing without episode history.", + ], + ); + assertEquals( + appLogCalls.map((call) => call.body.extra?.operation), + ["addMemory", "searchMemoryFacts", "getEpisodes"], + ); + for (const call of appLogCalls) { + assertStringIncludes(call.body.message, "Graphiti MCP unavailable;"); + } } finally { setOpenCodeClient(undefined); setWarningTaskScheduler(undefined); diff --git a/src/services/graphiti-mcp.ts b/src/services/graphiti-mcp.ts index e36f590..9855171 100644 --- a/src/services/graphiti-mcp.ts +++ b/src/services/graphiti-mcp.ts @@ -118,7 +118,7 @@ export class GraphitiMcpClient { err instanceof GraphitiSessionExpiredError ) { notifyGraphitiAvailabilityIssue( - "Graphiti unavailable; memory was not saved.", + "Graphiti MCP unavailable; persistent memory was not saved.", { operation: "addMemory", err, @@ -159,7 +159,7 @@ export class GraphitiMcpClient { err instanceof GraphitiSessionExpiredError ) { notifyGraphitiAvailabilityIssue( - "Graphiti unavailable; continuing without memory facts.", + "Graphiti MCP unavailable; continuing without memory facts.", { operation: "searchMemoryFacts", err, @@ -212,7 +212,7 @@ export class GraphitiMcpClient { err instanceof GraphitiSessionExpiredError ) { notifyGraphitiAvailabilityIssue( - "Graphiti unavailable; continuing without memory nodes.", + "Graphiti MCP unavailable; continuing without memory nodes.", { operation: "searchNodesWithStatus", err, @@ -245,7 +245,7 @@ export class GraphitiMcpClient { err instanceof GraphitiSessionExpiredError ) { notifyGraphitiAvailabilityIssue( - "Graphiti unavailable; continuing without episode history.", + "Graphiti MCP unavailable; continuing without episode history.", { operation: "getEpisodes", err, diff --git a/src/services/hot-tier-slice.test.ts b/src/services/hot-tier-slice.test.ts index d4c0f2c..3988183 100644 --- a/src/services/hot-tier-slice.test.ts +++ b/src/services/hot-tier-slice.test.ts @@ -17,8 +17,7 @@ import { RedisCacheService } from "./redis-cache.ts"; import { RedisClient } from "./redis-client.ts"; import { RedisEventsService } from "./redis-events.ts"; import { RedisSnapshotService } from "./redis-snapshot.ts"; - -type RedisEvent = "close" | "end" | "error" | "ready"; +import type { RedisEvent } from "./test-helpers.ts"; setSuppressConsoleWarningsDuringTestsOverride(true); diff --git a/src/services/redis-cache.test.ts b/src/services/redis-cache.test.ts index 055e736..3794ae3 100644 --- a/src/services/redis-cache.test.ts +++ b/src/services/redis-cache.test.ts @@ -4,8 +4,7 @@ import { setLoggerSilentOverride } from "./logger.ts"; import { RedisCacheService } from "./redis-cache.ts"; import { RedisClient } from "./redis-client.ts"; import { memoryCacheMetaKey } from "./redis-events.ts"; - -type RedisEvent = "close" | "end" | "error" | "ready"; +import type { RedisEvent } from "./test-helpers.ts"; class HashRedisRuntime { private readonly values = new Map(); diff --git a/src/services/redis-client.test.ts b/src/services/redis-client.test.ts index 1c4ba90..8981a57 100644 --- a/src/services/redis-client.test.ts +++ b/src/services/redis-client.test.ts @@ -3,8 +3,7 @@ import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { setSuppressConsoleWarningsDuringTestsOverride } from "./opencode-warning.ts"; import { RedisClient } from "./redis-client.ts"; import { RedisSnapshotService } from "./redis-snapshot.ts"; - -type RedisEvent = "close" | "end" | "error" | "ready"; +import type { RedisEvent } from "./test-helpers.ts"; setSuppressConsoleWarningsDuringTestsOverride(true); diff --git a/src/services/redis-events.test.ts b/src/services/redis-events.test.ts index 1d7d2ea..2b3535a 100644 --- a/src/services/redis-events.test.ts +++ b/src/services/redis-events.test.ts @@ -14,8 +14,7 @@ import { drainPendingKey, RedisEventsService, } from "./redis-events.ts"; - -type RedisEvent = "close" | "end" | "error" | "ready"; +import type { RedisEvent } from "./test-helpers.ts"; setSuppressConsoleWarningsDuringTestsOverride(true); diff --git a/src/services/session-corpus.ts b/src/services/session-corpus.ts index 04edc70..aadb8ec 100644 --- a/src/services/session-corpus.ts +++ b/src/services/session-corpus.ts @@ -1,4 +1,5 @@ import type { RedisClient, RedisKeySnapshot } from "./redis-client.ts"; +import { createAbortError } from "../utils.ts"; const MAX_INDEXED_BODY_BYTES = 512 * 1024; const SEARCH_RESULT_LIMIT = 5; @@ -1599,7 +1600,7 @@ export const createSessionCorpusService = (options: SessionCorpusOptions) => { async fetchAndIndex(input: FetchAndIndexInput) { const controller = new AbortController(); const timeout = setTimeout( - () => controller.abort(), + () => controller.abort(createAbortError("Fetch timed out")), (input.timeoutSeconds ?? 15) * 1000, ); try { diff --git a/src/services/session-executor.test.ts b/src/services/session-executor.test.ts index 31d9fe0..cbd680f 100644 --- a/src/services/session-executor.test.ts +++ b/src/services/session-executor.test.ts @@ -10,6 +10,7 @@ import { SESSION_EXECUTOR_RESPONSE_BUDGET_BYTES, } from "./session-executor.ts"; import { sessionMcpResponseSchemas } from "./session-mcp-types.ts"; +import { createAbortError } from "../utils.ts"; const textEncoder = new TextEncoder(); type ExecutorOptions = NonNullable[0]>; @@ -31,7 +32,7 @@ describe("session-executor", () => { runCommand: ({ signal }: RunCommandInput) => new Promise((_resolve, reject) => { signal.addEventListener("abort", () => { - reject(new DOMException("Aborted", "AbortError")); + reject(createAbortError("Aborted")); }, { once: true }); }), readFile: () => Promise.reject(new Error("unexpected file read")), diff --git a/src/services/session-executor.ts b/src/services/session-executor.ts index 51b153e..9e528f6 100644 --- a/src/services/session-executor.ts +++ b/src/services/session-executor.ts @@ -1,4 +1,5 @@ import path from "node:path"; +import { createAbortError, isAbortError } from "../utils.ts"; import type { SessionMcpRequestMap, SessionMcpResponseMap, @@ -205,11 +206,6 @@ const summarizeFileBody = (paths: string[], contents: string[]): string => paths.map((filePath, index) => `==> ${filePath} <==\n${contents[index]}`) .join("\n\n").trim(); -const isAbortError = (error: unknown): boolean => - error instanceof DOMException - ? error.name === "AbortError" - : error instanceof Error && error.name === "AbortError"; - const truncateToBudget = (value: string, budgetBytes: number): string => { if (byteLength(value) <= budgetBytes) return value; let result = value; @@ -485,7 +481,7 @@ export const createSessionExecutor = ( }); const controller = new AbortController(); const timeout = setTimeout( - () => controller.abort(), + () => controller.abort(createAbortError("Command timed out")), timeoutSeconds * 1000, ); diff --git a/src/services/session-mcp-runtime.test.ts b/src/services/session-mcp-runtime.test.ts index 2e3c30e..7fb1f47 100644 --- a/src/services/session-mcp-runtime.test.ts +++ b/src/services/session-mcp-runtime.test.ts @@ -19,8 +19,7 @@ import { } from "./session-mcp-types.ts"; import { RedisClient } from "./redis-client.ts"; import { SessionManager } from "../session.ts"; - -type RedisEvent = "close" | "end" | "error" | "ready"; +import type { RedisEvent } from "./test-helpers.ts"; class DoctorRedisRuntime { private readonly hashes = new Map>(); @@ -1551,6 +1550,35 @@ describe("session-mcp-runtime", () => { } }); + it("returns a stable bounded error when session_index cannot read the requested path", async () => { + const runtime = createSessionMcpRuntime({ + readSessionIndexFile: () => + Promise.reject(new Error("EACCES: secret detail")), + } as never); + + try { + const error = await assertRejects( + () => + runtime.tools.session_index.execute( + { + root_session_id: "root-path-error", + path: "README.md", + }, + toolContext, + ), + ) as Error & { code?: string; bounded?: boolean }; + + assertEquals( + error.message, + "session_index could not read the requested path.", + ); + assertEquals(error.code, "session_index_path_unreadable"); + assertEquals(error.bounded, true); + } finally { + await runtime.dispose(); + } + }); + it("replaces prior indexed content when session_index repeats the same source and label", async () => { const redis = new RedisClient({ endpoint: "redis://unused" }); const runtime = createSessionMcpRuntime({ diff --git a/src/services/session-mcp-runtime.ts b/src/services/session-mcp-runtime.ts index dcb734a..0355ea6 100644 --- a/src/services/session-mcp-runtime.ts +++ b/src/services/session-mcp-runtime.ts @@ -325,9 +325,7 @@ const readSessionIndexBody = async ( } catch (error) { throw createBoundedSessionIndexError( "session_index_path_unreadable", - error instanceof Error - ? `session_index could not read path: ${resolvedPath}: ${error.message}` - : `session_index could not read path: ${String(error)}`, + "session_index could not read the requested path.", ); } }; diff --git a/src/services/test-helpers.ts b/src/services/test-helpers.ts new file mode 100644 index 0000000..ad27b0c --- /dev/null +++ b/src/services/test-helpers.ts @@ -0,0 +1 @@ +export type RedisEvent = "close" | "end" | "error" | "ready"; diff --git a/src/utils.test.ts b/src/utils.test.ts index d653334..67bf662 100644 --- a/src/utils.test.ts +++ b/src/utils.test.ts @@ -4,7 +4,9 @@ import { stub } from "jsr:@std/testing@^1.0.0/mock"; import type { Part, TextPart } from "@opencode-ai/sdk"; import os from "node:os"; import { + createAbortError, extractTextFromParts, + isAbortError, isTextPart, makeGroupId, makeUserGroupId, @@ -105,6 +107,21 @@ describe("utils", () => { }); }); + describe("abort helpers", () => { + it("creates canonical abort-shaped errors", () => { + const error = createAbortError("timed out"); + + assertEquals(error.name, "AbortError"); + assertEquals(error.message, "timed out"); + assertEquals(isAbortError(error), true); + }); + + it("rejects non-abort errors", () => { + assertEquals(isAbortError(new Error("boom")), false); + assertEquals(isAbortError("AbortError"), false); + }); + }); + describe("group id helpers", () => { it("normalizes Windows-style project paths", () => { assertEquals( diff --git a/src/utils.ts b/src/utils.ts index 3527bf0..8a5021d 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -89,6 +89,28 @@ export const isTextPart = (value: unknown): value is Part & { export const extractTextFromParts = (parts: Part[]): string => parts.filter(isTextPart).map((part) => part.text).join(" ").trim(); +/** + * Construct a canonical abort-shaped error for use as an abort reason or test double. + */ +export const createAbortError = (message = "Aborted"): Error => { + if (typeof DOMException !== "undefined") { + return new DOMException(message, "AbortError"); + } + + const error = new Error(message); + error.name = "AbortError"; + return error; +}; + +/** + * Narrow unknown values to abort-shaped errors across runtimes. + */ +export const isAbortError = (error: unknown): boolean => { + if (!error || typeof error !== "object") return false; + return (error instanceof DOMException && error.name === "AbortError") || + (error instanceof Error && error.name === "AbortError"); +}; + /** * Truncate `text` to at most `budget` characters without cutting mid-line. * Prefers to break at the last newline within the budget window; falls back From f32e0b9b9c898e27fc58fe30db2f874b511c8d18 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 17:29:49 +0800 Subject: [PATCH 25/38] fix: address remaining review follow-ups --- AGENTS.md | 11 +- docs/ContextOverhaulTests.md | 16 ++ docs/ReviewProtocol.md | 97 +++++-- ...duped-issue-class-sweeps-implementation.md | 254 ++++++++++++++++++ ...tocol-deduped-issue-class-sweeps-design.md | 204 ++++++++++++++ src/config.test.ts | 22 +- src/config.ts | 2 +- src/services/batch-drain.test.ts | 43 +++ src/services/batch-drain.ts | 30 ++- 9 files changed, 645 insertions(+), 34 deletions(-) create mode 100644 docs/ContextOverhaulTests.md create mode 100644 docs/superpowers/plans/2026-03-24-review-protocol-deduped-issue-class-sweeps-implementation.md create mode 100644 docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md diff --git a/AGENTS.md b/AGENTS.md index fcb0f71..7824570 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -87,7 +87,7 @@ asynchronously on idle or after compaction. in-memory fallback. Graphiti is optional; plugin continues with local-only mode if unavailable. - **Compaction survival**: Snapshots and events must persist across compaction - cycles. Test via `plans/ContextOverhaulTests.md`. + cycles. Test via `docs/ContextOverhaulTests.md`. - **Concurrency**: Multiple child sessions should not corrupt root snapshot. Serialize child event writes to avoid race conditions. @@ -122,10 +122,10 @@ When starting work, read in this order: transform. 7. **src/services/** — Redis clients, batch drain, Graphiti async worker, cache management. -8. **plans/ContextOverhaul.md** — full design rationale (especially for async +8. **docs/ContextOverhaul.md** — full design rationale (especially for async decisions and event taxonomy). -9. **plans/ContextOverhaulTests.md** — test expectations and validation - checklist. +9. **docs/ContextOverhaulTests.md** — runtime validation entry point and rewrite + status. 10. **deno.json** — dependencies and build tasks. ## Configuration @@ -174,7 +174,8 @@ Endpoint values must be explicit URLs with schemes, for example | `src/services/graphiti-async.ts` | Async drain worker, Graphiti interaction. | | `src/services/connection-manager.ts` | Graphiti MCP health checks. | | `src/services/batch-drain.ts` | Event batching, retry logic. | -| `plans/ContextOverhaul.md` | Full design document. | +| `docs/ContextOverhaul.md` | Full design document. | +| `docs/ContextOverhaulTests.md` | Runtime validation entry point and rewrite status. | | `docs/ReviewProtocol.md` | PR review handling workflow. | --- diff --git a/docs/ContextOverhaulTests.md b/docs/ContextOverhaulTests.md new file mode 100644 index 0000000..332f412 --- /dev/null +++ b/docs/ContextOverhaulTests.md @@ -0,0 +1,16 @@ +# Context Overhaul Test Plan + +This file is the stable `docs/` entry point for context-overhaul runtime +validation references. + +The active rewrite of the authoritative test plan is tracked in +`docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md`. + +Supporting context lives in: + +- `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` for + the rewrite scope and acceptance shape. +- `docs/ContextOverhaul.md` for the historical architecture rationale. + +Until the full rewrite lands here, use this path as the canonical non-dated +reference from repository docs and operator guides. diff --git a/docs/ReviewProtocol.md b/docs/ReviewProtocol.md index 1018531..268cf14 100644 --- a/docs/ReviewProtocol.md +++ b/docs/ReviewProtocol.md @@ -7,7 +7,11 @@ request and review feedback needs to be handled systematically. - use live GitHub review state as the source of truth - verify each review claim before changing code -- keep fixes narrow and scoped to the verified issue +- treat verified claims as seed evidence for repo-wide issue-class sweeps, not + as the full endpoint of work +- dedupe verified claims into issue classes and sweep the repo for each class +- keep per-thread verification evidence narrow; keep class-sweep fixes within + the evidence-supported issue class - resolve handled review threads and leave review re-requesting to the user ## Required Unresolved-Batch Query @@ -62,23 +66,75 @@ deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o: 3. Create a working checklist. - Write the current unresolved items into a local artifact or todo list. - - Treat the checklist as execution tracking only, not as the code-change - plan. + - The checklist tracks execution state, not the code-change plan. It must + carry three distinct state layers: + - per-thread verification status (verified / already satisfied / stale / + invalid / unclear) + - deduped issue classes discovered in the batch (populated after Step 4a) + - per-class sweep outcomes (populated after Step 4b) + - These layers may live in the same artifact but must remain distinguishable. -4. Verify each unresolved claim independently. +4. Verify claims and sweep issue classes. + + **4a. Verify each unresolved claim independently.** - Spawn one swarm session per unresolved review item. - Run independent sessions concurrently when scopes do not overlap. - Serialize items that touch the same risky area. - Each session must: - verify the claim against the current working tree - - classify it as verified, already satisfied, stale, invalid, or unclear - - apply a narrow fix only if verified - - add or update focused tests when needed - - run targeted validation for the touched scope + - classify it as: verified, already satisfied, stale, invalid, or unclear + - A verified classification means the claim is confirmed as a real issue in + the current working tree. It becomes seed evidence for the class-sweep + phase, not the endpoint of work. + - Non-verified classifications (already satisfied, stale, invalid, unclear) + proceed directly to thread handling in Step 5. + + **4b. Dedupe verified claims into issue classes and dispatch class sweeps.** + + _Zero-verified short-circuit:_ if no claims in the current batch are + classified as `verified`, skip this sub-step entirely and proceed to Step 5. + + For all `verified` claims, normalize into deduped issue classes. Each class + entry must capture: + - issue-class label + - seed review thread IDs + - seed files / evidence locations + - risky area / likely search scope + - whether the class can run in parallel with other classes + + Multiple verified comments that describe the same underlying pattern must be + collapsed into one issue class. Do not launch duplicate class sweeps for the + same issue class within one batch. + + Dispatch one subagent per deduped verified issue class: + - Launch all non-overlapping class sweeps in parallel. + - Serialize classes that overlap. Overlap is defined conservatively as: + - any shared seed or touched file already known from verification, or + - the same explicitly identified risky area / search scope. + - If overlap between two classes is unknown, serialize rather than guess. + - This dispatch-time serialization rule is authoritative for the review + protocol, even if earlier repo-wide sweep examples resolved overlap at + integration time instead. + + Each class-sweep subagent must: + 1. take the verified review comment(s) as seed evidence + 2. identify the reusable issue-class definition from those seeds + 3. search the repo for the same class of issue + 4. fix all locally-supported matches within scope, not just the seed location + 5. add or extend focused tests where appropriate + 6. run targeted validation for every touched scope + 7. report touched files, validations, and any residual risk or skipped + matches + + The sweep is repo-wide within the evidence-supported scope, but not a license + for unrelated cleanup. If the sweep subagent finds no further instances + beyond the seed fix, it may report "no further instances found" and exit + successfully. 5. Resolve review items on GitHub. - For each handled item: - - reply if a short explanation is useful + - reply if a short explanation is useful; cite the repo-wide class-sweep + result where applicable rather than only the seed fix - resolve the review thread when the issue is fixed or already satisfied - If a claim is exaggerated, stale, or invalid, leave a brief factual reply before resolving when appropriate. @@ -88,6 +144,9 @@ deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o: - If replies were added through a pending personal review, explicitly submit that review so the comments are visible to reviewers and no replies remain stuck in `PENDING` state. + - Thread resolution remains a per-thread artifact. The broader class-sweep + outcome is valid evidence for the reply, but each thread is still resolved + individually. 6. Re-check live review state. - Query GitHub again. @@ -104,18 +163,26 @@ deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o: 8. Report status. - Include: - PR number and URL - - unresolved items found - - items fixed - - items resolved/commented + - unresolved threads found + - per-thread verification classifications + - deduped verified issue classes + - repo-wide sweep fixes per class (files touched, validations, residual + risk) + - threads resolved / replied to - commit sha - push status - - final unresolved review count + - final unresolved review count with reasons for any remaining items ## Guardrails - always use live `gh` data, not stale local notes, as the source of truth - preserve unrelated uncommitted work - do not perform opportunistic refactors while addressing reviews -- keep fixes local to the verified claim -- prefer focused tests and validation per review item before broader checks +- keep per-thread verification evidence narrow and local to the specific claim +- once a claim is verified, the resulting class sweep may expand repo-wide but + only within the evidence-supported issue class +- never launch duplicate sweeps for the same verified issue class in one batch +- serialize overlapping or unknown-overlap class sweeps at dispatch time +- prefer focused tests and validation per review item and per class sweep before + broader checks - treat resolved or outdated threads as historical context, not current work diff --git a/docs/superpowers/plans/2026-03-24-review-protocol-deduped-issue-class-sweeps-implementation.md b/docs/superpowers/plans/2026-03-24-review-protocol-deduped-issue-class-sweeps-implementation.md new file mode 100644 index 0000000..908a5e1 --- /dev/null +++ b/docs/superpowers/plans/2026-03-24-review-protocol-deduped-issue-class-sweeps-implementation.md @@ -0,0 +1,254 @@ +# Review Protocol Deduped Issue-Class Sweeps Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Update `docs/ReviewProtocol.md` so verified review comments are +deduped into issue classes and each non-overlapping class is swept repo-wide by +one subagent in parallel. + +**Architecture:** Keep the change doc-only and local to the review protocol. +Preserve the existing live-GitHub query and per-thread verification flow, then +extend Step 4 into a two-phase model (`4a` verification, `4b` deduped class +sweeps), update downstream thread-resolution/reporting language, and align the +guardrails so they no longer contradict repo-wide class sweeps. + +**Tech Stack:** Markdown docs, existing review workflow in +`docs/ReviewProtocol.md`, approved spec in +`docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md`. + +--- + +## File Structure And Responsibility Lock-In + +- Modify: `docs/ReviewProtocol.md` + - The only implementation target. It must absorb the approved workflow changes + without drifting into a broader process rewrite. +- Reference: + `docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md` + - The authoritative design for the protocol update. +- Reference: + `docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md` + - Supporting evidence for the repo-wide issue-class sweep model and expected + subagent contract. + +No code files should change. No additional docs should be created during +implementation. + +### Task 1: Update Purpose And Step 4 Workflow + +**Files:** + +- Modify: `docs/ReviewProtocol.md` +- Reference: + `docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md` + +- [ ] **Step 1: Read the current protocol and approved spec side by side** + +Read: + +- `docs/ReviewProtocol.md` +- `docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md` + +Expected: identify exactly which bullets in `Purpose` and `Workflow` still +describe a per-thread-only process. + +- [ ] **Step 2: Write the failing expectation checklist in notes** + +Capture these required deltas before editing: + +- `Purpose` must no longer imply the workflow stops at the single verified + claim. +- Step `4` must become two explicit sub-steps: `4a` per-thread verification and + `4b` deduped issue-class sweeps. +- `4b` must state one subagent per deduped verified class. +- `4b` must state all non-overlapping classes run in parallel. +- `4b` must state overlapping risky areas/files are serialized. +- `4b` must state unknown overlap is serialized rather than guessed. +- the protocol must make dispatch-time serialization authoritative even if older + sweep examples discuss overlap resolution later during integration. + +- [ ] **Step 3: Edit the Purpose bullets minimally** + +Update `docs/ReviewProtocol.md` so `Purpose` still emphasizes verification and +narrow evidence, but no longer contradicts repo-wide issue-class sweeps. + +Expected result: + +- verification remains the gate +- repo-wide class sweeps are allowed only after `verified` classification +- thread handling remains explicit + +- [ ] **Step 4: Rewrite Workflow Step 4 into `4a` and `4b`** + +Implement this structure in `docs/ReviewProtocol.md`: + +- `4a.` verify each unresolved claim independently +- `4b.` dedupe all `verified` claims into issue classes, then dispatch one + subagent per deduped class + +The `4a` bullets must keep the existing classifications: + +- `verified` +- `already satisfied` +- `stale` +- `invalid` +- `unclear` + +The `4b` bullets must include: + +- zero-verified short-circuit +- per-class normalized fields (`class label`, `seed thread ids`, `seed files`, + `risky area/search scope`) +- dedupe rule (no duplicate class sweeps in one batch) +- parallel/non-parallel dispatch rule +- serialize-when-unknown default +- dispatch-time serialization authority over older integration-time examples + +- [ ] **Step 5: Verify the doc now mentions the zero-verified edge case** + +Run: + +```bash +rg -n "zero|verified|issue class|dedupe|parallel|serialize" docs/ReviewProtocol.md +``` + +Expected: the rewritten Step `4` includes a clear skip path when no claims are +classified as `verified`. + +### Task 2: Update Class-Sweep Contract, Thread Handling, And Reporting + +**Files:** + +- Modify: `docs/ReviewProtocol.md` +- Reference: + `docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md` +- Reference: + `docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md` + +- [ ] **Step 1: Update Step `3` so the working checklist carries the status + model** + +Edit Step `3` in `docs/ReviewProtocol.md` so the working checklist/local +artifact explicitly tracks distinct states for: + +- per-thread verification status +- deduped issue classes for the batch +- per-class sweep outcomes + +Expected: the protocol says these states may live in the same artifact, but must +remain distinct, and the existing "execution tracking only" wording is softened +enough to allow classification and sweep-outcome tracking without turning the +checklist into a separate implementation plan. + +- [ ] **Step 2: Add the class-sweep subagent contract to Step `4b`** + +Edit `docs/ReviewProtocol.md` so the class-sweep subagent responsibilities +explicitly include: + +- using verified review comments as seed evidence +- identifying the reusable issue-class definition from those seeds +- searching the repo for the same issue class +- fixing all locally-supported matches in scope +- adding/extending focused tests when needed +- running targeted validation for touched scope +- reporting touched files, validation, and residual risk +- allowing a clean "no further instances found" outcome + +- [ ] **Step 3: Make the overlap rules explicit and authoritative** + +Edit `docs/ReviewProtocol.md` so `4b` explicitly says: + +- known overlap means shared seed/touched files or the same risky area/search + scope +- unknown overlap must be serialized rather than guessed +- this dispatch-time serialization rule is authoritative for the review + protocol, even if older sweep examples handled overlap later during + integration + +- [ ] **Step 4: Update Step `5` thread-resolution wording** + +Edit Step `5` so thread replies and resolutions can cite repo-wide class-sweep +evidence where useful, while preserving thread-level accountability. + +Expected: Step `5` still resolves GitHub review threads individually, but the +prose now recognizes broader class-sweep fixes as valid evidence. + +- [ ] **Step 5: Update Step `8` reporting bullets** + +Edit the report section so it requires: + +- unresolved threads found +- per-thread classifications +- deduped verified issue classes +- repo-wide sweep fixes per class +- threads resolved/commented +- final unresolved review count + +- [ ] **Step 6: Re-read the step numbering for consistency** + +Manual check: + +- top-level steps remain `1` through `8` +- Step `4` uses substeps `4a` and `4b` +- downstream steps remain `5`, `6`, `7`, `8` + +Expected: no accidental renumbering drift. + +### Task 3: Align Guardrails And Verify The Final Document + +**Files:** + +- Modify: `docs/ReviewProtocol.md` +- Test: manual doc read + targeted `rg` + +- [ ] **Step 1: Update the contradictory guardrails** + +Adjust the guardrails so they say, in effect: + +- avoid opportunistic unrelated refactors +- keep per-thread verification evidence local and narrow +- allow repo-wide fixes only within the verified, evidence-supported issue class +- never launch duplicate sweeps for the same class in one batch + +- [ ] **Step 2: Run targeted verification searches** + +Run: + +```bash +rg -n "verified|issue class|dedupe|parallel|serialize|guardrails|local to the verified claim|duplicate sweeps" docs/ReviewProtocol.md +``` + +Expected: all accepted concepts from the spec are visibly present in the final +doc and the old contradictory guardrail text is gone or qualified. + +- [ ] **Step 3: Manually read the final protocol end to end** + +Read `docs/ReviewProtocol.md` from top to bottom. + +Expected checks: + +- the workflow is still concise +- the new class-sweep rules are understandable without reading the spec +- the guardrails no longer contradict the workflow +- thread-level verification and class-level execution are clearly distinct + +- [ ] **Step 4: Verify the implementation against the acceptance checklist** + +Check each item from +`docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md`: + +1. verified comments are seed evidence, not the endpoint +2. same-class verified comments are deduped +3. one subagent per deduped verified class +4. non-overlapping classes run in parallel +5. overlapping scopes serialize +6. each class sweep searches repo-wide in evidence-supported scope +7. reporting distinguishes per-thread and per-class results +8. zero-verified batches skip class-sweep dispatch +9. overlap is defined conservatively +10. guardrails do not contradict the sweep behavior + +Expected: every item is satisfied directly in `docs/ReviewProtocol.md`. diff --git a/docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md b/docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md new file mode 100644 index 0000000..f178119 --- /dev/null +++ b/docs/superpowers/specs/2026-03-24-review-protocol-deduped-issue-class-sweeps-design.md @@ -0,0 +1,204 @@ +# Review Protocol Deduped Issue-Class Sweep Design + +## Goal + +Update `docs/ReviewProtocol.md` so PR review handling no longer stops at +per-comment verification. Once a review claim is found `verified`, the protocol +must require a repo-wide sweep for the same issue class, deduped across all +verified comments in the current batch, with all resulting class sweeps launched +in parallel when their scopes do not overlap. + +## Why + +The current review protocol treats each unresolved review thread as an isolated +fix unit. That is useful for truth-tracking, but it leaves an execution gap: +when one review comment exposes a broader issue pattern, the protocol does not +require the agent to search for and fix the same class elsewhere in the repo. + +That gap has already shown up in practice. The repository now has an explicit +issue-class sweep design in +`docs/superpowers/specs/2026-03-24-review-issue-class-sweep-design.md`, but +`docs/ReviewProtocol.md` still documents the older per-thread-only workflow. The +protocol should reflect the stronger workflow so future review handling is +systematic rather than opportunistic. + +## Non-Goals + +- Do not change the authoritative live-GitHub query requirement. +- Do not remove per-comment verification as the first gate. +- Do not require broad speculative refactors unrelated to a verified issue + class. +- Do not force parallel execution when verified issue classes overlap in the + same risky area. + +## Required Workflow Changes + +### 1. Preserve per-comment verification as the first gate + +The protocol must continue to verify each unresolved review claim against the +current working tree before any broader action is taken. + +Each review-item verification session still needs to classify the claim as one +of: + +- `verified` +- `already satisfied` +- `stale` +- `invalid` +- `unclear` + +Only `verified` claims are eligible to seed repo-wide class sweeps. + +### 2. Add a deduped issue-class normalization phase + +After the per-comment verification pass completes for the current unresolved +batch, the main flow must group all `verified` claims into deduped issue +classes. + +The protocol should require each class entry to capture at least: + +- issue-class label +- seed review thread ids +- seed files / evidence locations +- risky area / likely search scope +- whether the class can run in parallel with other classes + +Multiple verified comments that describe the same underlying pattern must be +collapsed into one issue class for that batch. The protocol must explicitly say +that the agent should not launch duplicate repo-wide sweep subagents for the +same class. + +If the current batch produces zero `verified` claims, this normalization phase +must be skipped entirely and the protocol should continue with thread handling +for the non-verified classifications only. + +### 3. Require one subagent per deduped verified class + +For the class-sweep phase, the protocol must require: + +- one subagent per deduped verified issue class +- all non-overlapping class sweeps launched at the same time +- overlapping classes serialized when they touch the same risky area or files + +For this protocol update, overlap should be defined conservatively as either: + +- any shared touched or seed file already known from verification, or +- the same explicitly identified risky area / search scope + +If overlap is unknown, the protocol should direct the agent to serialize rather +than guess. + +This changes the current mental model from “one review comment equals one full +fix unit” to “one review comment verifies a seed example, and one deduped class +sweep handles the repo-wide fix unit.” + +This dispatch-time serialization rule should be treated as authoritative for the +review protocol, even if earlier repo-wide sweep examples resolved overlap at +integration time instead. + +### 4. Define each class-sweep subagent’s contract + +The protocol should state that each issue-class sweep subagent must: + +1. take the verified review comment(s) as seed evidence +2. identify the reusable class definition from those seeds +3. search the repo for the same class of issue +4. fix all locally-supported matches within scope, not just the seed location +5. add or extend focused tests where appropriate +6. run targeted validation for every touched scope +7. report touched files, validations, and any residual risk or skipped matches + +The wording should be explicit that the sweep is repo-wide within the evidence- +supported scope, but not a license for unrelated cleanup. + +The protocol should also allow a clean no-op outcome: if the sweep subagent +finds no further supported instances beyond the seed fix, it may report “no +further instances found” and exit successfully. + +### 5. Separate thread handling from class-sweep execution + +The protocol must continue to resolve GitHub review threads as thread-level +artifacts, but the implementation evidence used in replies/resolution should now +reference the broader class-sweep result where applicable. + +That means the status model should distinguish: + +- per-thread verification status +- deduped issue classes discovered in the batch +- repo-wide class-sweep outcomes per class + +The protocol should explicitly say this intermediate model can live in the same +working checklist or local artifact already created for unresolved items, as +long as thread-level and class-level states stay distinct. + +### 6. Update reporting requirements + +The final report section in `docs/ReviewProtocol.md` should now require: + +- unresolved threads found +- per-thread classifications +- deduped verified issue classes +- repo-wide sweep fixes per class +- threads resolved / replied to +- remaining unresolved threads with reasons + +## Recommended `docs/ReviewProtocol.md` Edit Shape + +The document should stay concise and procedural. The best update is to revise +the existing workflow steps rather than bolt on a disconnected appendix. + +Recommended structure: + +- keep Steps 1-3 mostly unchanged +- rewrite Step 4 into two phases: + - `4a.` verify each unresolved claim independently + - `4b.` dedupe verified claims into issue classes and launch one sweep + subagent per class +- keep existing top-level numbering after Step 4 unchanged; `4a` and `4b` are + sub-steps, not new top-level numbered steps +- update Step 5 to mention thread replies/resolution can cite the repo-wide + sweep result for that class +- update Step 8 reporting bullets to include per-thread classifications and + deduped issue classes +- update the guardrails so “keep fixes local to the verified claim” applies to + the verification decision itself, while verified class sweeps are allowed to + expand repo-wide within the evidence-supported issue class +- add a guardrail that duplicate sweeps for the same verified class are not + allowed within one batch + +## Acceptance Criteria + +The update is complete when `docs/ReviewProtocol.md` clearly states all of the +following: + +1. verified review comments are only the seed, not the full endpoint of work +2. verified comments in the same issue class must be deduped +3. one subagent per deduped verified class is required +4. all non-overlapping class sweeps run in parallel +5. overlapping risky areas are serialized +6. each class-sweep subagent must search the repo for the same issue class and + fix all supported matches in scope +7. final reporting distinguishes per-thread results from per-class sweep results +8. the protocol explicitly skips class-sweep dispatch when no claims are + classified as `verified` +9. the protocol defines overlap conservatively enough to avoid conflicting + parallel edits +10. the guardrails no longer contradict the required repo-wide sweep behavior + +## Risks and Mitigations + +- **Over-broad sweeps** + - Mitigation: require issue classes to be seeded by verified comments and keep + the search bounded to evidence-supported patterns. +- **Duplicate or conflicting edits** + - Mitigation: dedupe verified comments into one class before dispatch and + serialize overlapping risky areas. +- **Loss of review-thread accountability** + - Mitigation: preserve per-thread verification status as a first-class output + even though execution expands to class-level sweeps. + +## Implementation Note + +This design updates the protocol document only. It does not itself require +changes to runtime code or GitHub automation; it changes the documented review +handling workflow that future agents must follow. diff --git a/src/config.test.ts b/src/config.test.ts index 25838ff..8c1aeec 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -224,7 +224,25 @@ describe("config", () => { assertThrows( () => loadConfig(), ConfigLoadError, - 'Invalid Graphiti config value for graphiti.endpoint: expected a valid URL, received "not a valid url"', + 'Invalid config value for graphiti.endpoint: expected a valid URL, received "not a valid url"', + ); + }); + + it("uses the same neutral validation wording for invalid redis endpoints", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + redis: { + endpoint: "not a valid redis url", + }, + }, + }) + ); + + assertThrows( + () => loadConfig(), + ConfigLoadError, + 'Invalid config value for redis.endpoint: expected a valid URL, received "not a valid redis url"', ); }); @@ -242,7 +260,7 @@ describe("config", () => { assertThrows( () => loadConfig(), ConfigLoadError, - 'Invalid Graphiti config value for graphiti.endpoint: expected a valid URL, received "http://bad host"', + 'Invalid config value for graphiti.endpoint: expected a valid URL, received "http://bad host"', ); }); diff --git a/src/config.ts b/src/config.ts index 211f4e3..cd68ba7 100644 --- a/src/config.ts +++ b/src/config.ts @@ -142,7 +142,7 @@ const assertExplicitUrl = ( if (value === undefined) return; if (isValidUrlString(value)) return; throw new ConfigLoadError( - `Invalid Graphiti config value for ${fieldName}: expected a valid URL, received ${ + `Invalid config value for ${fieldName}: expected a valid URL, received ${ JSON.stringify(redactEndpointUserInfo(value)) }`, { code: "config-invalid" }, diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index 57a1240..3df78ba 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -369,6 +369,49 @@ describe("batch drain", () => { assertEquals(maxInFlight, 1); }); + it("cancels heartbeat rescheduling once drain cleanup begins", async () => { + const { events, drain } = await createDeps({ + events: { claimLockTtlSeconds: 2 }, + drain: { batchSize: 1, claimHeartbeatIntervalMs: 250 }, + }); + const event = createSessionEvent("message", "user", { + summary: "cleanup race", + body: "cleanup race", + }); + await events.recordEvent("session-1", "group-1", event); + + const originalRefreshClaimLease = events.refreshClaimLease.bind(events); + let releaseRefresh!: () => void; + const refreshBlocked = new Promise((resolve) => { + releaseRefresh = resolve; + }); + let refreshCalls = 0; + events.refreshClaimLease = async (...args) => { + refreshCalls += 1; + if (refreshCalls === 1) { + await refreshBlocked; + } + return await originalRefreshClaimLease(...args); + }; + + const drainPromise = drain.drainGroup("group-1", { + addMemory() { + return Promise.resolve(); + }, + } as never); + + await new Promise((resolve) => setTimeout(resolve, 300)); + releaseRefresh(); + + const result = await drainPromise; + assertEquals(result, { status: "success", drained: 1 }); + + const callsAtCompletion = refreshCalls; + assertEquals(callsAtCompletion >= 4, true); + await new Promise((resolve) => setTimeout(resolve, 300)); + assertEquals(refreshCalls, callsAtCompletion); + }); + it("limits batches using serialized Graphiti episode bodies", async () => { const first = createSessionEvent("message", "user", { summary: "first", diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index e855625..b770cfc 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -258,7 +258,15 @@ export class BatchDrainService { let lostClaim = false; let claimRefreshChain: Promise = Promise.resolve(); let heartbeatTimer: ReturnType | null = null; + let cancelHeartbeat = false; let refreshClaimHeartbeatRunning = false; + const scheduleHeartbeat = (): void => { + if (cancelHeartbeat || lostClaim) return; + heartbeatTimer = setTimeout( + refreshClaimHeartbeat, + this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), + ); + }; const refreshClaimOwnership = (): Promise => { const refreshTask = claimRefreshChain.then(async () => { if (lostClaim) return false; @@ -284,12 +292,7 @@ export class BatchDrainService { await refreshClaimOwnership(); } finally { refreshClaimHeartbeatRunning = false; - if (!lostClaim) { - heartbeatTimer = setTimeout( - refreshClaimHeartbeat, - this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), - ); - } + scheduleHeartbeat(); } }; const confirmClaimOwnership = (): Promise => @@ -299,10 +302,7 @@ export class BatchDrainService { throw new DrainClaimLostError(); } }; - heartbeatTimer = setTimeout( - refreshClaimHeartbeat, - this.getClaimHeartbeatIntervalMs(claimed.lockTtlSeconds), - ); + scheduleHeartbeat(); let checkpointedCount = 0; try { @@ -391,8 +391,16 @@ export class BatchDrainService { logger.warn("Drain batch failed; will retry later", { groupId, err }); return { status: "retry", drained: 0 }; } finally { - if (heartbeatTimer !== null) clearTimeout(heartbeatTimer); + cancelHeartbeat = true; + if (heartbeatTimer !== null) { + clearTimeout(heartbeatTimer); + heartbeatTimer = null; + } await claimRefreshChain; + if (heartbeatTimer !== null) { + clearTimeout(heartbeatTimer); + heartbeatTimer = null; + } } } } From 1abe4a93e06ae5ea6416400a3a5543f5fae84992 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 17:40:57 +0800 Subject: [PATCH 26/38] fix: close follow-up review gaps --- src/config.test.ts | 8 ++++++++ src/config.ts | 14 +++++--------- src/handlers/tool-before.test.ts | 18 +++++++++++++----- src/handlers/tool-before.ts | 7 ++++--- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/config.test.ts b/src/config.test.ts index 8c1aeec..5dfd5e6 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -398,4 +398,12 @@ describe("config", () => { assertEquals(error.cause, cause); assert(!Object.prototype.propertyIsEnumerable.call(error, "cause")); }); + + it("omits cause when no wrapped error is provided", () => { + const error = new ConfigLoadError("Unable to discover Graphiti config", { + code: "config-discovery-search", + }); + + assert(!Object.hasOwn(error, "cause")); + }); }); diff --git a/src/config.ts b/src/config.ts index cd68ba7..9be4d60 100644 --- a/src/config.ts +++ b/src/config.ts @@ -36,17 +36,13 @@ export class ConfigLoadError extends Error { message: string, options: { cause?: unknown; code: ConfigLoadErrorCode }, ) { - super(message); + if (options.cause === undefined) { + super(message); + } else { + super(message, { cause: options.cause }); + } this.name = "ConfigLoadError"; this.code = options.code; - if (options.cause !== undefined) { - Object.defineProperty(this, "cause", { - value: options.cause, - writable: true, - configurable: true, - enumerable: false, - }); - } } } diff --git a/src/handlers/tool-before.test.ts b/src/handlers/tool-before.test.ts index 98f66db..8ae7b21 100644 --- a/src/handlers/tool-before.test.ts +++ b/src/handlers/tool-before.test.ts @@ -282,15 +282,19 @@ describe("tool execute before handler", () => { it("preserves root_session_id when a session tool is modified by routing", async () => { const canonicalizer = new MockSessionCanonicalizer(); canonicalizer.cached.set("child-session", "root-session"); + let routedArgs: Record | undefined; const handler = createToolBeforeHandler({ sessionCanonicalizer: canonicalizer as never, guidanceThrottle: new ToolGuidanceCache(), routingOutcomes, - routeToolCall: () => ({ - action: "modify", - args: { query: "rewritten" }, - reason: "test-modify", - }), + routeToolCall: ({ args }) => { + routedArgs = args; + return { + action: "modify", + args: { query: "rewritten" }, + reason: "test-modify", + }; + }, }); const output = { args: { root_session_id: "wrong-root", query: "original" }, @@ -305,6 +309,10 @@ describe("tool execute before handler", () => { output as never, ); + assertEquals(routedArgs, { + root_session_id: "root-session", + query: "original", + }); assertEquals(output.args, { root_session_id: "root-session", query: "rewritten", diff --git a/src/handlers/tool-before.ts b/src/handlers/tool-before.ts index 6fd9fa9..088e885 100644 --- a/src/handlers/tool-before.ts +++ b/src/handlers/tool-before.ts @@ -68,10 +68,11 @@ export function createToolBeforeHandler( deps.sessionCanonicalizer, sessionID, ); - const args = isSessionMcpTool(tool) + const sessionTool = isSessionMcpTool(tool); + const args = sessionTool ? injectRootSessionId(toRecord(output.args), canonicalSessionId) : toRecord(output.args); - if (isSessionMcpTool(tool)) { + if (sessionTool) { output.args = args; } const decision = route({ @@ -85,7 +86,7 @@ export function createToolBeforeHandler( case "allow": return; case "modify": - output.args = isSessionMcpTool(tool) + output.args = sessionTool ? injectRootSessionId(toRecord(decision.args), canonicalSessionId) : decision.args; deps.routingOutcomes.set(callID, { From 5ad04b7f5b6bc8cbc0812d165b66144f8ba6ca9e Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 17:45:51 +0800 Subject: [PATCH 27/38] fix: keep config errors dnt-compatible --- src/config.test.ts | 2 +- src/config.ts | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/config.test.ts b/src/config.test.ts index 5dfd5e6..fbd4c35 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -388,7 +388,7 @@ describe("config", () => { assertEquals(config.redis.endpoint, "redis://localhost:6379"); }); - it("uses standard Error.cause when wrapping config load failures", () => { + it("preserves Error.cause semantics when wrapping config load failures", () => { const cause = new Error("search failed"); const error = new ConfigLoadError("Unable to discover Graphiti config", { cause, diff --git a/src/config.ts b/src/config.ts index 9be4d60..c853b61 100644 --- a/src/config.ts +++ b/src/config.ts @@ -36,13 +36,20 @@ export class ConfigLoadError extends Error { message: string, options: { cause?: unknown; code: ConfigLoadErrorCode }, ) { - if (options.cause === undefined) { - super(message); - } else { - super(message, { cause: options.cause }); - } + super(message); this.name = "ConfigLoadError"; this.code = options.code; + if (options.cause !== undefined) { + // dnt's Node-side type check still narrows Error to the legacy + // single-argument constructor here, so preserve standard cause semantics + // manually while keeping the generated build green. + Object.defineProperty(this, "cause", { + value: options.cause, + writable: true, + configurable: true, + enumerable: false, + }); + } } } From 4060cc6370648d634ee791f96f50ec62b2b86833 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 17:55:42 +0800 Subject: [PATCH 28/38] fix: close runtime and denial review gaps --- src/handlers/tool-before.test.ts | 40 +++- src/handlers/tool-before.ts | 2 +- src/index.test.ts | 69 ++++++- src/index.ts | 308 ++++++++++++++++++------------- src/services/batch-drain.ts | 7 +- 5 files changed, 287 insertions(+), 139 deletions(-) diff --git a/src/handlers/tool-before.test.ts b/src/handlers/tool-before.test.ts index 8ae7b21..b59773e 100644 --- a/src/handlers/tool-before.test.ts +++ b/src/handlers/tool-before.test.ts @@ -55,7 +55,7 @@ describe("tool execute before handler", () => { { args: { url: "https://example.com" } } as never, ), Error, - "Tool denied (WebFetch):", + "Tool denied (WebFetch)", ); assertEquals(routingOutcomes.take("call-1"), { @@ -86,7 +86,7 @@ describe("tool execute before handler", () => { { args: { url: "https://example.com" } } as never, ), Error, - "Tool denied (WebFetch):", + "Tool denied (WebFetch)", ); assertEquals(canonicalizer.cachedCalls, ["child-session"]); @@ -98,6 +98,42 @@ describe("tool execute before handler", () => { }); }); + it("throws a stable denial message without embedding guidance text", async () => { + const canonicalizer = new MockSessionCanonicalizer(); + canonicalizer.cached.set("root-session", "root-session"); + const handler = createToolBeforeHandler({ + sessionCanonicalizer: canonicalizer as never, + guidanceThrottle: new ToolGuidanceCache(), + routingOutcomes, + routeToolCall: () => ({ + action: "deny", + reason: "test-deny", + guidance: + "Dynamic guidance details that should stay out of the thrown error.", + }), + }); + + const error = await assertRejects( + () => + handler( + { + tool: "Bash", + sessionID: "root-session", + callID: "call-stable-deny", + } as never, + { args: { command: "curl https://example.com" } } as never, + ), + Error, + "Tool denied (Bash)", + ); + + assertEquals(error.message, "Tool denied (Bash)"); + assertStringIncludes( + String(routingOutcomes.take("call-stable-deny")?.reason), + "test-deny", + ); + }); + it("mutates args for Bash rewrite cases", async () => { const canonicalizer = new MockSessionCanonicalizer(); canonicalizer.cached.set("root-session", "root-session"); diff --git a/src/handlers/tool-before.ts b/src/handlers/tool-before.ts index 088e885..6a82c16 100644 --- a/src/handlers/tool-before.ts +++ b/src/handlers/tool-before.ts @@ -109,7 +109,7 @@ export function createToolBeforeHandler( action: "deny", reason: decision.reason, }); - throw new Error(`Tool denied (${tool}): ${decision.guidance}`); + throw new Error(`Tool denied (${tool})`); } }; } diff --git a/src/index.test.ts b/src/index.test.ts index 7c6b5b3..c072512 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -1,4 +1,8 @@ -import { assertEquals, assertStrictEquals } from "jsr:@std/assert@^1.0.0"; +import { + assertEquals, + assertRejects, + assertStrictEquals, +} from "jsr:@std/assert@^1.0.0"; import { afterEach, describe, it } from "jsr:@std/testing@^1.0.0/bdd"; import { graphiti, @@ -27,6 +31,9 @@ function createEntrypointHarnessWithOptions(options: { redisConnectError?: Error; teardownRun?: () => Promise; teardownDispose?: () => void; + createSessionMcpRuntimeError?: Error; + createEventHandlerError?: Error; + teardownRunError?: Error; }) { const connected = options.connected ?? true; const config = { @@ -129,6 +136,7 @@ function createEntrypointHarnessWithOptions(options: { createToolAfterHandlerArgs: [] as Array>, toolGuidanceCacheInstances: [] as unknown[], toolRoutingOutcomeCacheInstances: [] as unknown[], + teardownDisposeCalls: 0, }; class MockGraphitiConnectionManager { @@ -340,11 +348,16 @@ function createEntrypointHarnessWithOptions(options: { const registration = { run: options.teardownRun ?? (async () => { + if (options.teardownRunError) { + throw options.teardownRunError; + } for (const task of tasks) { await task.run(); } }), - dispose: options.teardownDispose ?? (() => {}), + dispose: options.teardownDispose ?? (() => { + records.teardownDisposeCalls += 1; + }), }; records.teardownRegistrations.push({ tasks, registration }); return registration; @@ -358,9 +371,17 @@ function createEntrypointHarnessWithOptions(options: { createSessionExecutor: (args?: Record) => new MockSessionExecutor(args), createSessionMcpRuntime: (args?: Record) => - new MockSessionMcpRuntime(args), + (() => { + if (options.createSessionMcpRuntimeError) { + throw options.createSessionMcpRuntimeError; + } + return new MockSessionMcpRuntime(args); + })(), SessionManager: MockSessionManager, createEventHandler: (args: Record) => { + if (options.createEventHandlerError) { + throw options.createEventHandlerError; + } records.createEventHandlerArgs.push(args); return hooks.event; }, @@ -1296,5 +1317,47 @@ describe("index", () => { assertEquals(firstHarness.records.connectionStopCalls, 1); assertEquals(firstHarness.records.redisCloseCalls, 1); }); + + it("best-effort cleans up partial resources when setup fails before teardown registration", async () => { + const { input, records, dependencies } = + createEntrypointHarnessWithOptions({ + createSessionMcpRuntimeError: new Error("runtime setup failed"), + }); + + await assertRejects( + () => invokeGraphiti(input, dependencies), + Error, + "runtime setup failed", + ); + + assertEquals(records.teardownRegistrations.length, 0); + assertEquals(records.graphitiAsyncDisposeCalls, 1); + assertEquals(records.connectionStopCalls, 1); + assertEquals(records.redisCloseCalls, 1); + assertEquals(records.sessionMcpRuntimeDisposeCalls, 0); + }); + + it("runs registered teardown when setup fails after teardown registration", async () => { + const { input, records, dependencies } = + createEntrypointHarnessWithOptions({ + createEventHandlerError: new Error("event handler setup failed"), + }); + + await assertRejects( + () => invokeGraphiti(input, dependencies), + Error, + "event handler setup failed", + ); + + assertEquals(records.teardownRegistrations.length, 1); + assertEquals(records.teardownDisposeCalls, 1); + assertEquals(records.teardownTaskRuns, [ + "graphiti-drain-flush", + "graphiti-async", + "session-mcp-runtime", + "graphiti", + "redis", + ]); + }); }); }); diff --git a/src/index.ts b/src/index.ts index 131b4ef..483c48f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -156,151 +156,201 @@ export const graphiti: Plugin = ( dependencies.warnOnRedisStartupUnavailable(false, config.redis.endpoint); }; - const connectionManager = new dependencies.GraphitiConnectionManager({ - endpoint: config.graphiti.endpoint, - }); - connectionManager.start(); - void connectionManager.ready() - .then((connected) => { - if (!connected) { - reportStartupUnavailable("graphiti"); - } - }) - .catch(() => { - reportStartupUnavailable("graphiti"); - }); + const startupCleanupTasks: Array<{ + name: string; + run: () => void | Promise; + }> = []; + let startupTeardown: ReturnType | null = + null; - const redisClient = new dependencies.RedisClient({ - endpoint: config.redis.endpoint, - }); - void redisClient.connect() - .catch(() => { - reportStartupUnavailable("redis"); + try { + const connectionManager = new dependencies.GraphitiConnectionManager({ + endpoint: config.graphiti.endpoint, }); - const graphitiClient = new dependencies.GraphitiMcpClient( - connectionManager, - ); - const redisEvents = new dependencies.RedisEventsService(redisClient, { - sessionTtlSeconds: config.redis.sessionTtlSeconds, - }); - const redisSnapshot = new dependencies.RedisSnapshotService(redisClient, { - ttlSeconds: config.redis.sessionTtlSeconds * 2, - }); - const redisCache = new dependencies.RedisCacheService(redisClient, { - ttlSeconds: config.redis.cacheTtlSeconds, - driftThreshold: config.graphiti.driftThreshold, - }); - const batchDrain = new dependencies.BatchDrainService( - redisClient, - redisEvents, - { - batchSize: config.redis.batchSize, - batchMaxBytes: config.redis.batchMaxBytes, - drainRetryMax: config.redis.drainRetryMax, - }, - ); - const defaultGroupId = dependencies.makeGroupId( - config.graphiti.groupIdPrefix, - input.directory, - ); - const defaultUserGroupId = dependencies.makeUserGroupId( - config.graphiti.groupIdPrefix, - input.directory, - ); - - const graphitiAsync = new dependencies.GraphitiAsyncService( - graphitiClient, - redisCache, - batchDrain, - ); - const sessionExecutor = dependencies.createSessionExecutor(); - const sessionMcpRuntime = dependencies.createSessionMcpRuntime({ - redisClient, - graphitiCache: redisCache, - sessionTtlSeconds: config.redis.sessionTtlSeconds, - groupId: defaultGroupId, - sessionExecutor, - createSessionExecutor: dependencies.createSessionExecutor, - }); + startupCleanupTasks.unshift({ + name: "graphiti", + run: () => connectionManager.stop(), + }); + connectionManager.start(); + void connectionManager.ready() + .then((connected) => { + if (!connected) { + reportStartupUnavailable("graphiti"); + } + }) + .catch(() => { + reportStartupUnavailable("graphiti"); + }); - const sessionManager = new dependencies.SessionManager( - defaultGroupId, - defaultUserGroupId, - input.client, - redisEvents, - redisSnapshot, - redisCache, - { - idleRetentionMs: config.redis.sessionTtlSeconds * 1000, - runtimeStateMigrator: sessionMcpRuntime, - }, - ); - sessionMcpRuntime.setSessionCanonicalizer(sessionManager); - const toolGuidanceCache = new dependencies.ToolGuidanceCache(); - const toolRoutingOutcomes = new dependencies.ToolRoutingOutcomeCache(); + const redisClient = new dependencies.RedisClient({ + endpoint: config.redis.endpoint, + }); + startupCleanupTasks.unshift({ + name: "redis", + run: () => redisClient.close(), + }); + void redisClient.connect() + .catch(() => { + reportStartupUnavailable("redis"); + }); + const graphitiClient = new dependencies.GraphitiMcpClient( + connectionManager, + ); + const redisEvents = new dependencies.RedisEventsService(redisClient, { + sessionTtlSeconds: config.redis.sessionTtlSeconds, + }); + const redisSnapshot = new dependencies.RedisSnapshotService(redisClient, { + ttlSeconds: config.redis.sessionTtlSeconds * 2, + }); + const redisCache = new dependencies.RedisCacheService(redisClient, { + ttlSeconds: config.redis.cacheTtlSeconds, + driftThreshold: config.graphiti.driftThreshold, + }); + const batchDrain = new dependencies.BatchDrainService( + redisClient, + redisEvents, + { + batchSize: config.redis.batchSize, + batchMaxBytes: config.redis.batchMaxBytes, + drainRetryMax: config.redis.drainRetryMax, + }, + ); + const defaultGroupId = dependencies.makeGroupId( + config.graphiti.groupIdPrefix, + input.directory, + ); + const defaultUserGroupId = dependencies.makeUserGroupId( + config.graphiti.groupIdPrefix, + input.directory, + ); - activeRuntimeTeardown = dependencies.registerRuntimeTeardown([ - { - name: "graphiti-drain-flush", - run: () => - graphitiAsync.flushPendingGroups( - sessionManager.getTrackedGroupIds(), - ), - }, - { + const graphitiAsync = new dependencies.GraphitiAsyncService( + graphitiClient, + redisCache, + batchDrain, + ); + startupCleanupTasks.unshift({ name: "graphiti-async", run: () => graphitiAsync.dispose(), - }, - { + }); + const sessionExecutor = dependencies.createSessionExecutor(); + const sessionMcpRuntime = dependencies.createSessionMcpRuntime({ + redisClient, + graphitiCache: redisCache, + sessionTtlSeconds: config.redis.sessionTtlSeconds, + groupId: defaultGroupId, + sessionExecutor, + createSessionExecutor: dependencies.createSessionExecutor, + }); + startupCleanupTasks.unshift({ name: "session-mcp-runtime", run: () => sessionMcpRuntime.dispose(), - }, - { - name: "graphiti", - run: () => connectionManager.stop(), - }, - { - name: "redis", - run: () => redisClient.close(), - }, - ]); + }); - return { - event: dependencies.createEventHandler({ - sessionManager, - redisEvents, - redisCache, - redisSnapshot, - graphitiAsync, + const sessionManager = new dependencies.SessionManager( defaultGroupId, defaultUserGroupId, - sdkClient: input.client, - directory: input.directory, - }), - "chat.message": dependencies.createChatHandler({ - sessionManager, + input.client, redisEvents, - graphitiAsync, - drainTriggerSize: config.redis.batchSize, - }), - "experimental.session.compacting": dependencies - .createCompactingHandler({ + redisSnapshot, + redisCache, + { + idleRetentionMs: config.redis.sessionTtlSeconds * 1000, + runtimeStateMigrator: sessionMcpRuntime, + }, + ); + sessionMcpRuntime.setSessionCanonicalizer(sessionManager); + const toolGuidanceCache = new dependencies.ToolGuidanceCache(); + const toolRoutingOutcomes = new dependencies.ToolRoutingOutcomeCache(); + + startupTeardown = dependencies.registerRuntimeTeardown([ + { + name: "graphiti-drain-flush", + run: () => + graphitiAsync.flushPendingGroups( + sessionManager.getTrackedGroupIds(), + ), + }, + { + name: "graphiti-async", + run: () => graphitiAsync.dispose(), + }, + { + name: "session-mcp-runtime", + run: () => sessionMcpRuntime.dispose(), + }, + { + name: "graphiti", + run: () => connectionManager.stop(), + }, + { + name: "redis", + run: () => redisClient.close(), + }, + ]); + activeRuntimeTeardown = startupTeardown; + + return { + event: dependencies.createEventHandler({ sessionManager, + redisEvents, + redisCache, + redisSnapshot, + graphitiAsync, + defaultGroupId, + defaultUserGroupId, + sdkClient: input.client, + directory: input.directory, }), - "experimental.chat.messages.transform": dependencies - .createMessagesHandler({ + "chat.message": dependencies.createChatHandler({ sessionManager, + redisEvents, + graphitiAsync, + drainTriggerSize: config.redis.batchSize, }), - tool: sessionMcpRuntime.tools, - "tool.execute.before": dependencies.createToolBeforeHandler({ - sessionCanonicalizer: sessionManager, - guidanceThrottle: toolGuidanceCache, - routingOutcomes: toolRoutingOutcomes, - }), - "tool.execute.after": dependencies.createToolAfterHandler({ - routingOutcomes: toolRoutingOutcomes, - }), - }; + "experimental.session.compacting": dependencies + .createCompactingHandler({ + sessionManager, + }), + "experimental.chat.messages.transform": dependencies + .createMessagesHandler({ + sessionManager, + }), + tool: sessionMcpRuntime.tools, + "tool.execute.before": dependencies.createToolBeforeHandler({ + sessionCanonicalizer: sessionManager, + guidanceThrottle: toolGuidanceCache, + routingOutcomes: toolRoutingOutcomes, + }), + "tool.execute.after": dependencies.createToolAfterHandler({ + routingOutcomes: toolRoutingOutcomes, + }), + }; + } catch (err) { + if (startupTeardown) { + if (activeRuntimeTeardown === startupTeardown) { + activeRuntimeTeardown = null; + } + startupTeardown.dispose(); + try { + await startupTeardown.run(); + } catch (cleanupErr) { + logger.warn("Runtime setup cleanup rejected", cleanupErr); + } + } else { + for (const task of startupCleanupTasks) { + try { + await task.run(); + } catch (cleanupErr) { + logger.warn("Runtime setup cleanup failed", { + resource: task.name, + err: cleanupErr, + }); + } + } + } + throw err; + } }); runtimeInitialization = setup.then(() => undefined, () => undefined); diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index b770cfc..2be7cf0 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -396,11 +396,10 @@ export class BatchDrainService { clearTimeout(heartbeatTimer); heartbeatTimer = null; } + // Wait for any in-flight lease refresh to finish after cancellation so its + // finally block cannot race with claim cleanup. No second clearTimeout is + // needed because scheduleHeartbeat() is a no-op once cancelHeartbeat=true. await claimRefreshChain; - if (heartbeatTimer !== null) { - clearTimeout(heartbeatTimer); - heartbeatTimer = null; - } } } } From 781f33451b148e0d07e964193928fee4aa202405 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 18:28:20 +0800 Subject: [PATCH 29/38] fix: close review follow-up edge cases --- scripts/bench-falkordb-format.ts | 4 ++++ scripts/bench-falkordb.test.ts | 20 +++++++++++++++++ scripts/bench-falkordb.ts | 3 ++- src/handlers/messages.test.ts | 37 ++++++++++++++++++++++++++++++++ src/handlers/messages.ts | 2 +- src/services/batch-drain.test.ts | 29 ++++++++++++++++++++++++- src/services/batch-drain.ts | 17 ++++++++++++++- 7 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 scripts/bench-falkordb-format.ts create mode 100644 scripts/bench-falkordb.test.ts diff --git a/scripts/bench-falkordb-format.ts b/scripts/bench-falkordb-format.ts new file mode 100644 index 0000000..660aaf0 --- /dev/null +++ b/scripts/bench-falkordb-format.ts @@ -0,0 +1,4 @@ +import { redactEndpointUserInfo } from "../src/services/endpoint-redaction.ts"; + +export const formatEndpointForDisplay = (endpoint: string): string => + redactEndpointUserInfo(endpoint); diff --git a/scripts/bench-falkordb.test.ts b/scripts/bench-falkordb.test.ts new file mode 100644 index 0000000..c1d97c9 --- /dev/null +++ b/scripts/bench-falkordb.test.ts @@ -0,0 +1,20 @@ +import { assertEquals } from "jsr:@std/assert@^1.0.0"; +import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; + +import { formatEndpointForDisplay } from "./bench-falkordb-format.ts"; + +describe("bench-falkordb", () => { + it("redacts Redis endpoint credentials before display", () => { + assertEquals( + formatEndpointForDisplay("redis://user:secret@redis.test:6379"), + "redis://redis.test:6379", + ); + }); + + it("leaves credential-free endpoints unchanged", () => { + assertEquals( + formatEndpointForDisplay("redis://redis.test:6379"), + "redis://redis.test:6379", + ); + }); +}); diff --git a/scripts/bench-falkordb.ts b/scripts/bench-falkordb.ts index 0332c01..af29adc 100644 --- a/scripts/bench-falkordb.ts +++ b/scripts/bench-falkordb.ts @@ -1,4 +1,5 @@ import RedisModule from "ioredis"; +import { formatEndpointForDisplay } from "./bench-falkordb-format.ts"; const Redis = RedisModule as unknown as typeof import("ioredis").default; @@ -88,7 +89,7 @@ const run = async () => { samples.del.push(performance.now() - started); } - console.log(`Endpoint: ${endpoint}`); + console.log(`Endpoint: ${formatEndpointForDisplay(endpoint)}`); console.log(`Iterations: ${iterations}`); console.log(""); diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts index a7e39f4..6179a7f 100644 --- a/src/handlers/messages.test.ts +++ b/src/handlers/messages.test.ts @@ -904,6 +904,43 @@ describe("messages handler", () => { ); }); + it("scrubs leading local-first session_memory envelopes regardless of source/version values", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + 'stale\n\ncontinue', + }], + }], + }; + + await handler({} as never, output as never); + + assertEquals( + output.messages[0].parts[0].text, + 'continue\n\ncontinue', + ); + }); + it("remains compatible with extended prepareInjection results", async () => { const prepared = { envelope: '', diff --git a/src/handlers/messages.ts b/src/handlers/messages.ts index 9f30b31..8fd9a67 100644 --- a/src/handlers/messages.ts +++ b/src/handlers/messages.ts @@ -30,7 +30,7 @@ const getTransformMessage = (input: unknown): string | undefined => { }; const LEADING_INJECTED_SESSION_MEMORY_BLOCK = - /^]*\bsource=(['"])graphiti\1)(?=[^>]*\bversion=(['"])1\2)[^>]*>[\s\S]*?<\/session_memory>(?:\r?\n){0,2}/; + /^]*\bsource=(['"])[^'"]+\1)(?=[^>]*\bversion=(['"])[^'"]+\2)[^>]*>[\s\S]*?<\/session_memory>(?:\r?\n){0,2}/; const LEADING_INJECTED_LEGACY_MEMORY_BLOCK_WITH_UUIDS = /^]*\bdata-uuids=(["'])(?:[^"']*)\1)[^>]*>[\s\S]*?<\/memory>(?:\r?\n){0,2}/; const LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK = diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index 3df78ba..12fbecb 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -1,6 +1,6 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { spy } from "jsr:@std/testing@^1.0.0/mock"; +import { spy, stub } from "jsr:@std/testing@^1.0.0/mock"; import { BatchDrainService } from "./batch-drain.ts"; import { createSessionEvent } from "./event-extractor.ts"; import { logger } from "./logger.ts"; @@ -547,6 +547,33 @@ describe("batch drain", () => { assertEquals(await redis.getListLength(drainDeadKey("group-1")), 1); }); + it("adds bounded jitter to retry scheduling", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("error", "tool", { + summary: "failing batch", + body: "failing batch", + metadata: { resolved: false }, + }); + await events.recordEvent("session-1", "group-1", event); + + using _dateNow = stub(Date, "now", () => 10_000); + using _random = stub(Math, "random", () => 1); + + const result = await drain.drainGroup("group-1", { + addMemory() { + throw new Error("boom"); + }, + } as never); + + assertEquals(result, { status: "retry", drained: 0 }); + assertEquals( + await redis.getString( + drainRetryKey("group-1", `${event.id}:${event.id}`), + ), + JSON.stringify({ attempts: 1, nextAttemptAt: 11_250 }), + ); + }); + it("backs off and releases the claim when retry state is scheduled for later", async () => { const { redis, events, drain } = await createDeps(); const event = createSessionEvent("message", "user", { diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index 2be7cf0..cea34a2 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -23,6 +23,9 @@ export interface BatchDrainServiceOptions { type RetryState = { attempts: number; nextAttemptAt: number }; +const RETRY_BACKOFF_BASE_MS = 1_000; +const RETRY_BACKOFF_JITTER_RATIO = 0.25; + const isValidRetryState = (value: unknown): value is RetryState => { if (!value || typeof value !== "object") return false; const state = value as Partial; @@ -190,6 +193,18 @@ export class BatchDrainService { ); } + private getRetryDelayMs(attempts: number): number { + const baseDelayMs = RETRY_BACKOFF_BASE_MS * (2 ** (attempts - 1)); + const jitterWindowMs = Math.round( + baseDelayMs * RETRY_BACKOFF_JITTER_RATIO, + ); + const minDelayMs = Math.max(1, baseDelayMs - jitterWindowMs); + const maxDelayMs = baseDelayMs + jitterWindowMs; + return Math.round( + minDelayMs + (Math.random() * (maxDelayMs - minDelayMs)), + ); + } + private async releaseClaimSafely( groupId: string, claimToken: string, @@ -386,7 +401,7 @@ export class BatchDrainService { ); await this.setRetryState(groupId, batchKey, { attempts, - nextAttemptAt: Date.now() + 1_000 * (2 ** (attempts - 1)), + nextAttemptAt: Date.now() + this.getRetryDelayMs(attempts), }); logger.warn("Drain batch failed; will retry later", { groupId, err }); return { status: "retry", drained: 0 }; From 9fe46a490887ccfec690c6132e523a6f9acf61e8 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 18:46:56 +0800 Subject: [PATCH 30/38] docs: clarify review query and root exports --- README.md | 4 ++++ docs/ReviewProtocol.md | 17 ++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1610e1e..d8fb3d3 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,10 @@ long-term knowledge graph on top of the same backend. Add the plugin to your `opencode.json` (or `opencode.jsonc`): +The package root intentionally exports only the `graphiti` plugin entrypoint. +Helper symbols under `src/` are internal implementation details and are not a +supported public import surface. + ```jsonc { "plugin": ["opencode-graphiti"] diff --git a/docs/ReviewProtocol.md b/docs/ReviewProtocol.md index 268cf14..08c3e0d 100644 --- a/docs/ReviewProtocol.md +++ b/docs/ReviewProtocol.md @@ -16,11 +16,13 @@ request and review feedback needs to be handled systematically. ## Required Unresolved-Batch Query -Use this command exactly as written for metadata-first traversal across -review-thread pages until it collects the first 10 unresolved threads. The 10 -unresolved items may be sparse, non-contiguous, and spread across multiple -pages. After that metadata pass, fetch narrow details only for that unresolved -batch. Do not rewrite, broaden, or replace it with an equivalent query. +Use this command with `OWNER`, `REPO`, and `PR_NUMBER` replaced by the actual +repository owner, repository name, and PR number. Otherwise keep the command as +written for metadata-first traversal across review-thread pages until it +collects the first 10 unresolved threads. The 10 unresolved items may be sparse, +non-contiguous, and spread across multiple pages. After that metadata pass, +fetch narrow details only for that unresolved batch. Do not rewrite, broaden, or +replace it with an equivalent query. If this command fails for any reason, stop and report the failure explicitly before taking any further review-handling action. @@ -40,8 +42,9 @@ deno eval 'const o="OWNER",r="REPO",n="PR_NUMBER",maxUnresolved=10,mq="query($o: - Use GraphQL `reviewThreads` as the source of truth for unresolved state; REST review comments do not expose thread resolution and cannot be filtered to unresolved-only. - - Run the required unresolved-batch query command exactly as written in - `Required Unresolved-Batch Query`. + - Run the required unresolved-batch query command from + `Required Unresolved-Batch Query`, replacing only the `OWNER`, `REPO`, and + `PR_NUMBER` placeholders with the active PR values. - If the command fails, stop and report the failure explicitly. - Keep GraphQL payloads narrow: request small pages (`first: 20` or similar) and fetch only thread metadata first (`id`, `isResolved`, `isOutdated`, From 1c38b70718bae6838e09422e9b10e9fe63712eb2 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 19:10:19 +0800 Subject: [PATCH 31/38] fix: close remaining review follow-ups --- .github/scripts/version.test.ts | 34 + .github/scripts/version.ts | 63 +- deno.json | 1 - deno.lock | 15 + docs/ConnectionManager.md | 297 ----- docs/ContextOverhaul.md | 1014 ----------------- docs/ContextOverhaulTests.md | 16 - ...03-24-agentic-runtime-test-plan-rewrite.md | 122 +- src/services/batch-drain.test.ts | 154 ++- src/services/batch-drain.ts | 48 +- 10 files changed, 281 insertions(+), 1483 deletions(-) delete mode 100644 docs/ConnectionManager.md delete mode 100644 docs/ContextOverhaul.md delete mode 100644 docs/ContextOverhaulTests.md diff --git a/.github/scripts/version.test.ts b/.github/scripts/version.test.ts index a186634..2cfc590 100644 --- a/.github/scripts/version.test.ts +++ b/.github/scripts/version.test.ts @@ -865,6 +865,40 @@ describe("run", () => { ); }); + it("reads the package name from deno.jsonc with trailing commas", async () => { + const cli = makeCliDeps({ + env: { + GITHUB_OUTPUT: "/tmp/github-output", + }, + files: { + "deno.jsonc": `{ + // Package metadata for release automation. + "name": "commented-package", + "version": "0.0.0-development", +}`, + }, + commands: { + "git describe --tags --abbrev=0 --match v*": new Error("no tags"), + "npm view commented-package version": "0.2.0", + "git log --format=%s": "docs: note jsonc support", + "git log --format=%b": "", + "git log --format= --name-only": ".github/scripts/version.ts\n", + }, + now: new Date("2026-02-12T09:14:29Z"), + }); + + await run(["pull_request", "abcdef1234567890"], cli.deps); + + assertEquals(cli.outputs, [ + "version=0.2.1-canary.abcdef1.20260212091429\n", + "tag=canary\n", + ]); + assertEquals( + cli.calls.includes("npm view commented-package version"), + true, + ); + }); + it("emits skip=true when only test files changed", async () => { const cli = makeCliDeps({ env: { diff --git a/.github/scripts/version.ts b/.github/scripts/version.ts index 23020c9..05c455a 100644 --- a/.github/scripts/version.ts +++ b/.github/scripts/version.ts @@ -10,6 +10,8 @@ * COMMIT_SHA - override for GITHUB_SHA (e.g. PR head SHA) */ +import { parse as parseJsonc } from "jsr:@std/jsonc@^1.0.2"; + /** Semantic version bump type. */ export type Bump = "major" | "minor" | "patch" | "none"; @@ -62,68 +64,9 @@ export async function runCommand(...command: string[]): Promise { return parseCommandOutput(command, await proc.output()); } -function stripJsonComments(text: string): string { - let result = ""; - let inString = false; - let escaped = false; - - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const nextChar = text[index + 1]; - - if (inString) { - result += char; - if (escaped) { - escaped = false; - } else if (char === "\\") { - escaped = true; - } else if (char === '"') { - inString = false; - } - continue; - } - - if (char === '"') { - inString = true; - result += char; - continue; - } - - if (char === "/" && nextChar === "/") { - index += 2; - while (index < text.length && text[index] !== "\n") { - index += 1; - } - if (index < text.length) { - result += text[index]; - } - continue; - } - - if (char === "/" && nextChar === "*") { - index += 2; - while ( - index < text.length - 1 && - !(text[index] === "*" && text[index + 1] === "/") - ) { - if (text[index] === "\n") { - result += "\n"; - } - index += 1; - } - index += 1; - continue; - } - - result += char; - } - - return result; -} - function parsePackageManifest(text: string, filePath: string): unknown { if (filePath.endsWith(".jsonc")) { - return JSON.parse(stripJsonComments(text)); + return parseJsonc(text); } return JSON.parse(text); diff --git a/deno.json b/deno.json index 4142565..98ba481 100644 --- a/deno.json +++ b/deno.json @@ -6,7 +6,6 @@ "tasks": { "build": "deno run -A dnt.ts", "check": "deno check src/index.ts", - "deploy": "deno eval \"throw new Error('Manual publish is disabled. Use the GitHub Actions publish workflow for releases.')\"", "dev": "deno run --allow-all src/index.ts", "lint": "deno lint", "fmt": "deno fmt" diff --git a/deno.lock b/deno.lock index c5d9fe7..10350f1 100644 --- a/deno.lock +++ b/deno.lock @@ -8,9 +8,12 @@ "jsr:@std/fmt@1": "1.0.9", "jsr:@std/fs@1": "1.0.22", "jsr:@std/internal@^1.0.12": "1.0.12", + "jsr:@std/json@^1.0.2": "1.0.3", + "jsr:@std/jsonc@^1.0.2": "1.0.2", "jsr:@std/path@1": "1.1.4", "jsr:@std/path@^1.1.4": "1.1.4", "jsr:@std/testing@1": "1.0.17", + "jsr:@std/ulid@1": "1.0.0", "jsr:@ts-morph/bootstrap@0.27": "0.27.0", "jsr:@ts-morph/common@0.27": "0.27.0", "npm:@modelcontextprotocol/sdk@1.3.0": "1.3.0", @@ -54,6 +57,15 @@ "@std/internal@1.0.12": { "integrity": "972a634fd5bc34b242024402972cd5143eac68d8dffaca5eaa4dba30ce17b027" }, + "@std/json@1.0.3": { + "integrity": "97d5710996293a027b7aa5f0d1f4fa29f246f269e6b5597e08807613f37d426c" + }, + "@std/jsonc@1.0.2": { + "integrity": "909605dae3af22bd75b1cbda8d64a32cf1fd2cf6efa3f9e224aba6d22c0f44c7", + "dependencies": [ + "jsr:@std/json" + ] + }, "@std/path@1.1.4": { "integrity": "1d2d43f39efb1b42f0b1882a25486647cb851481862dc7313390b2bb044314b5", "dependencies": [ @@ -67,6 +79,9 @@ "jsr:@std/internal" ] }, + "@std/ulid@1.0.0": { + "integrity": "d41c3d27a907714413649fee864b7cde8d42ee68437d22b79d5de4f81d808780" + }, "@ts-morph/bootstrap@0.27.0": { "integrity": "b8d7bc8f7942ce853dde4161b28f9aa96769cef3d8eebafb379a81800b9e2448", "dependencies": [ diff --git a/docs/ConnectionManager.md b/docs/ConnectionManager.md deleted file mode 100644 index d8a3ff7..0000000 --- a/docs/ConnectionManager.md +++ /dev/null @@ -1,297 +0,0 @@ -# Graphiti Connection Manager Refactor Plan - -## Goal - -Remove session-creation stalls caused by Graphiti connection setup by moving MCP -transport lifecycle management into a dedicated connection manager that starts -on plugin launch, stays alive for the process lifetime, reconnects -automatically, buffers requests while connecting, and transparently drops new -requests while offline so higher-level memory features fail open. - -## Current Problem - -- `src/index.ts` awaits `client.connect()` during plugin initialization. -- OpenCode appears to instantiate the plugin lazily on first real session use, - so the first session pays the MCP connection warmup cost. -- Higher-level methods in `src/services/client.ts` mix transport lifecycle, - retry logic, request execution, and response parsing in one class. -- Timeouts and disconnects are handled per call, but there is no separate - always-on connection state machine. - -## Target Design - -Introduce a dedicated `GraphitiConnectionManager` layer under `src/services/`. - -Responsibilities: - -- Own the MCP `Client` and `StreamableHTTPClientTransport` lifecycle. -- Start connecting as soon as the plugin launches, without blocking hook - registration. -- Maintain explicit connection state: `connecting`, `connected`, `offline`, and - `closing`. -- Auto-reconnect after disconnect with exponential backoff (see - [Reconnect Strategy](#reconnect-strategy)). -- Classify transport-level failures (session expiry, network errors, timeouts) - internally so callers never inspect raw transport errors. -- Queue requests that arrive while state is `connecting`, subject to per-request - deadlines. -- Reject requests that arrive while state is `offline` with a typed error, - allowing higher-level APIs to degrade gracefully instead of stalling. -- Expose a readiness signal (`ready(): Promise`) that resolves when the - first connection succeeds or a caller-supplied timeout elapses, for - diagnostics and background coordination only — never to gate hot-path memory - injection. -- Expose a single request API for tool execution so `GraphitiClient` becomes a - thin domain adapter. - -Non-goals: - -- No durable disk-backed queue. -- No guaranteed delivery while Graphiti is offline. -- No change to memory search, injection, or compaction semantics beyond their - behavior during transport failure. - -## Proposed Architecture - -### 1. New connection-manager service - -Create `src/services/connection-manager.ts` with: - -- A connection-state union type: - `"connecting" | "connected" | "offline" | - "closing"`. -- A manager class that stores: - - endpoint - - MCP client instance - - transport instance - - current state - - in-flight connect promise (serialized; see below) - - bounded queue of pending requests created during `connecting` - - reconnect backoff metadata (attempt count, next delay, timer handle) - - a readiness `Promise` that resolves on first successful connect or - on a configurable startup timeout -- Methods: - - `start()` — begin background connection on plugin launch; transitions - immediately to `connecting`. - - `stop()` — transition to `closing`, drain or reject queued requests, close - the MCP client, cancel any pending reconnect timer, then become inert. After - `stop()` all subsequent `callTool` calls reject immediately. - - `ready(timeoutMs?)` — returns a promise that resolves `true` when the - manager reaches `connected`, or `false` if the timeout elapses first. This - is an observability/background coordination helper, not a hot-path gating - primitive. - - `callTool(name, args, deadlineMs?)` — route requests according to current - state; accepts an optional per-request deadline. - - `reconnect()` — rebuild client and transport after disconnect/session loss. - Serialized: concurrent callers share a single in-flight attempt. - -#### State behavior - -- **`connecting`** — execute `client.connect()`. Incoming `callTool` requests - are enqueued. Each queued request carries a per-request deadline (default: - configurable, e.g. 15 s). If the deadline fires before the connection is - established, the request rejects with a typed timeout error so hook flows do - not hang indefinitely. -- **`connected`** — execute `callTool` immediately. If a call fails with a - transport error (network reset, socket hang-up, etc.) or an MCP 404 - session-expiry error, the manager transitions to `connecting` and triggers a - serialized reconnect. The failed request is retried once after the reconnect - succeeds. -- **`offline`** — the manager enters this state when a connect or reconnect - attempt fails after exhausting the current backoff step. Incoming `callTool` - requests reject immediately with a typed offline error. A background reconnect - timer continues with exponential backoff; on success the manager transitions - back to `connected`. -- **`closing`** — entered by `stop()`. All queued requests are rejected. No new - requests are accepted. The MCP client is closed and the reconnect timer is - cancelled. - -#### Failure classification - -The connection manager owns all transport-error classification so that callers -never inspect raw error shapes: - -- **Session expiry** — MCP error code 404. Action: rebuild client + transport, - retry the request once. -- **Transport failure** — network errors, socket resets, connection refused, - unexpected stream termination. Action: transition to `connecting`, trigger - serialized reconnect. -- **Request timeout** — MCP error code -32001 or message matching - `request timed out`. Action: surface to caller as a typed timeout error (no - reconnect needed). - -This keeps transport concerns encapsulated inside the connection manager. - -#### Serialized reconnects - -All reconnect triggers (failed requests, transport errors, backoff timer) funnel -through a single `reconnect()` path that deduplicates concurrent attempts. If a -reconnect is already in flight, additional callers await the same promise. This -prevents thundering-herd behavior when multiple concurrent requests fail -simultaneously. - -#### Reconnect strategy - -Auto-reconnect is mandatory, not optional. Use exponential backoff with jitter: - -- Initial delay: 1 s. -- Max delay: 60 s. -- Multiplier: 2. -- Jitter: +/- 25%. -- Reset delay to initial on successful connect. - -The backoff timer runs in `offline` state. On each tick the manager transitions -to `connecting` and attempts a reconnect. If the attempt fails, the manager -returns to `offline` with an increased delay. - -### 2. Refactor GraphitiClient into a domain adapter - -Update `src/services/client.ts` so it: - -- Depends on the new connection manager instead of directly owning MCP transport - state. -- Keeps response parsing and Graphiti-specific helpers such as `searchFacts`, - `searchNodes`, `getEpisodes`, and `addEpisode`. -- Treats offline errors as soft failures for **read** operations by returning - empty results and logging at warn/debug level. -- Treats offline errors as soft failures for **write** operations by logging and - **re-throwing** the error so higher-level code can decide whether to retry. In - particular, `SessionManager.flushPendingMessages` already re-queues messages - on failure; silently dropping writes here would break that retry path. The - connection manager's typed offline error makes it easy for callers to - distinguish "server unreachable" from permanent failures. - -### 3. Update plugin initialization and impacted files - -**`src/index.ts`** — primary changes: - -- Construct the connection manager first. -- Call `connectionManager.start()` without awaiting a full connect. -- Pass the manager into `GraphitiClient`. -- Optionally expose a cleanup hook that calls `connectionManager.stop()` if the - plugin API supports lifecycle teardown. - -**`src/session.ts`** — `SessionManager.flushPendingMessages` already re-queues -messages on `addEpisode` failure. No semantic change needed, but verify that the -new typed offline error propagates correctly through the catch block so the -re-queue path still triggers. - -**`src/handlers/event.ts`** — calls `flushPendingMessages` and -`client.addEpisode` in session-idle and session-delete flows. These call sites -should continue to catch and log failures; no behavioral change beyond receiving -typed errors instead of raw transport errors. - -**`src/handlers/chat.ts`** — hot-path memory injection must remain Redis/cache -only. The chat handler should not call `searchFacts`, `searchNodes`, or -`connectionManager.ready(timeoutMs)` before injection; Graphiti warmup and -refresh remain background-only. - -**`src/handlers/compacting.ts`** — compaction injection should use the same -Redis snapshot + cached-memory inputs as chat-time injection. It must not make -synchronous Graphiti reads on the hot path. - -**`src/services/client.ts`** — refactored as described in section 2. - -### 4. Error model - -Add typed internal errors or discriminators for: - -- **offline** — request rejected because the manager is in `offline` or - `closing` state. -- **queue-timeout** — request was queued during `connecting` but its per-request - deadline elapsed before the connection was established. -- **transport-failure** — a connected call failed due to a network-level error - (not a Graphiti application error); the manager is now reconnecting. -- **session-expired** — MCP 404; the manager is rebuilding the session. - -These typed errors let `GraphitiClient` and `SessionManager` distinguish -transient transport problems from permanent failures without inspecting raw -error text. - -### 5. Queue policy - -Use a small bounded in-memory queue only for the `connecting` state. - -- FIFO dispatch order. -- Cap queue length (e.g. 32) to avoid unbounded growth if many requests arrive - during a slow connect. -- Each queued request carries a per-request deadline (default configurable, e.g. - 15 s). When the deadline fires, the request is removed from the queue and - rejected with a `queue-timeout` error. -- When the queue is full, **drop the oldest entry** (reject it with a - `queue-timeout` error) and enqueue the new request. Rationale: in a - hook-driven system the most recent request is likelier to carry the most - relevant context (e.g. the latest user message). Older queued requests are - already stale by the time the connection recovers. - -This preserves the requested semantics: buffering while connecting, but -rejecting requests when the manager is offline. - -## Implementation Steps - -1. Add `src/services/connection-manager.ts` with state machine, queue with - per-request deadlines, serialized reconnect, exponential backoff, readiness - signal, and typed error classes. -2. Refactor `src/services/client.ts` to delegate raw tool calls to the manager. - Remove transport/session-expiry logic from `GraphitiClient`. Preserve - write-error propagation for `addEpisode` so - `SessionManager.flushPendingMessages` retry semantics are maintained. -3. Update `src/index.ts` to construct the connection manager, call `start()` - without awaiting, and pass it into `GraphitiClient`. -4. Verify `src/session.ts` — confirm `flushPendingMessages` catch block handles - the new typed offline error correctly (re-queue path). -5. Verify `src/handlers/event.ts`, `src/handlers/chat.ts`, and - `src/handlers/compacting.ts` — confirm read-path fail-open behavior is - unchanged, and do not add a pre-injection `ready()` call in `chat.ts`. -6. Update tests in `src/services/client.test.ts` and add focused tests for the - connection manager (see [Testing Plan](#testing-plan)). -7. Run `deno test`, `deno check src/index.ts`, and any relevant linting. - -## Testing Plan - -Add or update tests for: - -- startup does not block on a successful or failed background connect -- `ready()` resolves `true` on successful connect, `false` on timeout -- requests issued during `connecting` are queued and later resolved -- queued requests that exceed their per-request deadline reject with - `queue-timeout` -- requests issued during `offline` reject immediately with typed offline error -- mid-session transport disconnect triggers serialized reconnect and retries the - failed request once -- expired-session (MCP 404) errors trigger one reconnect and one retry -- concurrent transport failures share a single reconnect attempt (no thundering - herd) -- auto-reconnect backoff fires in `offline` state and transitions back to - `connected` on success -- read APIs return empty collections on offline/timeout conditions -- write APIs (`addEpisode`) propagate offline errors so - `SessionManager.flushPendingMessages` can re-queue -- queue-full policy drops oldest entry, not newest -- `stop()` transitions to `closing`, rejects queued requests, cancels reconnect - timer - -## Resolved Design Decisions - -- **Auto-reconnect is mandatory.** The manager always runs exponential backoff - in `offline` state. There is no "stay offline until explicit trigger" mode. -- **No `idle` state.** `start()` transitions directly to `connecting`. Before - `start()` is called the manager does not exist; after `stop()` it is inert. -- **Write errors propagate to callers.** `addEpisode` failures (offline or - otherwise) throw so that higher-level retry logic such as - `SessionManager.flushPendingMessages` can re-queue. Read operations continue - to fail open with empty results. - -## Open Questions - -- Exact default values for per-request deadline and queue capacity (proposed: 15 - s and 32; confirm during implementation). -- Whether `ready()` timeout should be configurable per call site or set once at - construction. - -## Expected Outcome - -The first OpenCode session should no longer stall on Graphiti warmup. Graphiti -availability becomes a background concern managed by one process-wide transport -layer, while memory features continue to operate on a best-effort basis with -fast failure when the backend is unavailable. diff --git a/docs/ContextOverhaul.md b/docs/ContextOverhaul.md deleted file mode 100644 index aa36cc6..0000000 --- a/docs/ContextOverhaul.md +++ /dev/null @@ -1,1014 +0,0 @@ -# Context Overhaul — Context-Mode-Aligned Hot Path on FalkorDB - -**Status:** Superseded — retained as historical context only\ -**Superseded by:** - -- `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md` (architecture) -- `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md` - (implementation) - -> **Historical-only note:** This document preserves the earlier native-routing -> overhaul proposal and its original section numbering. Any implementation -> phases, file-change lists, or acceptance checklists below are historical notes -> only and are **not** the active backlog for the repository. For current -> architecture and acceptance criteria, use the two superseding MCP-first plan -> documents above together with `README.md`. - -**Date:** 2026-03-20\ -**Historical refs:** `README.md`, `docs/ContextOverhaulTests.md` - ---- - -## 1 Problem - -The current plugin is only partially aligned with the hot-path behavior that -makes `context-mode` effective. - -Today, this repository does well at: - -- keeping Graphiti off the steady-state chat path -- extracting compact continuity events instead of replaying full transcripts -- rebuilding a deterministic `` envelope from local state - -But it still falls short in the most important real-time token-saving area: - -- native heavy tool calls are usually allowed to run first -- large tool outputs can still enter the live OpenCode transcript -- the plugin mainly compresses what it remembers and re-injects later -- it does not yet consistently prevent high-volume context from being created at - the source - -In contrast, `context-mode` achieves most of its context savings by intercepting -tool calls before execution and routing them toward lighter, bounded behavior. -For this plugin to follow that design closely enough, the hot path must shift -from "compact after the fact" to "prevent or bound transcript growth before it -happens." - -This plan updates the architecture to target at least **80% behavioral -alignment** with `context-mode` on the hot path while preserving this repo's two -intentional differences: - -1. **Storage layer:** short-term state remains in FalkorDB via the Redis - protocol and existing `redis.*` config keys. -2. **Session lineage model:** child sessions remain first-class participants in - the root session's continuity state rather than being reduced to summarized - agent-tool output only. - ---- - -## 2 Goals - -1. **Adopt source-side token reduction.** Heavy native tool calls must be - intercepted before execution and denied, bounded, or rewritten so raw - payloads do not enter the live transcript unnecessarily. -2. **Reach >=80% context-mode hot-path alignment.** Match `context-mode` on - pre-tool routing, deterministic enforcement, compact event extraction, and - conservative session snapshotting. -3. **Keep Graphiti off the hot path.** No synchronous Graphiti call may block - `tool.execute.before`, `chat.message`, `messages.transform`, - `session.compacting`, or any per-message event hook. -4. **Keep short-term state in FalkorDB.** The hot tier continues to use the - Redis-compatible FalkorDB endpoint configured through canonical `redis.*` - settings only. -5. **Preserve session continuity.** The plugin must still inject deterministic - `` derived from local typed events, snapshots, and optional - cached Graphiti recall. -6. **Preserve intentional divergence for child sessions.** Child/subagent work - must continue to accumulate into the canonical root session instead of being - flattened to opaque tool summaries. - ---- - -## 3 Alignment Target - -### 3.1 What "80% aligned" means - -This repo does **not** need to become a clone of `context-mode`. It does need to -match its core hot-path mechanics closely enough that the same practical -benefits appear in OpenCode sessions. - -The required alignment surface is: - -- **Pre-tool interception** for heavy tools -- **Deterministic routing policy** implemented in code, not by a separate LLM -- **Allow / modify / deny** style decisions at tool-call time -- **Compact post-tool continuity extraction** from metadata and short summaries -- **Priority-tiered session snapshot building** from typed events -- **Stable reinjection** of compact continuity state before LLM calls - -#### 3.1.1 Concrete alignment checklist - -The 80% target is met when **all** of the following are true: - -| # | Criterion | Measurement | -| -- | ----------------------------------- | -------------------------------------------------------------------------------------------------------------- | -| A1 | Pre-tool interception exists | `tool.execute.before` hook is registered and exercised for every tool in the minimum set (§6.2). | -| A2 | Deterministic routing decisions | Each tool in the minimum set has a coded policy that returns allow / modify / deny without calling an LLM. | -| A3 | Source-side token prevention | At least one heavy-tool class (`Read`, `Bash`, `WebFetch`) is demonstrably bounded or denied before execution. | -| A4 | Compact event extraction | No `SessionEvent.body` exceeds 4 KB; no raw tool output stored as a hot-tier event. | -| A5 | Priority-tiered snapshot | Snapshot respects P0–P3 tiers and stays within `SNAPSHOT_BODY_BUDGET`. | -| A6 | Stable reinjection | `` is injected on every `messages.transform` and `session.compacting` call. | -| A7 | No Graphiti on hot path | Zero synchronous MCP calls during any hook return (existing invariant, must not regress). | -| A8 | Context-mode-style routing guidance | Read/Grep/Bash guidance is injected once per session; WebFetch is blocked; Task prompt routing is rewritten. | - -Criteria A1–A3 are the **new** requirements from this plan. Criteria A4–A7 are -**existing** invariants that must not regress. Criterion A8 captures the -session-scoped guidance and prompt-rewrite mechanics that make `context-mode`'s -OpenCode routing practical without replacing native tools. - -The allowed divergence surface is: - -- FalkorDB/Redis instead of SQLite for local state -- root-session promotion for child/subagent continuity -- Graphiti-backed async long-term memory and cache refresh -- this repo's existing `` envelope instead of `context-mode`'s - `` format - -### 3.2 Non-goals - -This plan does **not** include: - -- replacing FalkorDB with SQLite -- moving Graphiti back onto the hot path -- removing the existing Graphiti async drain/cache architecture -- reverting child-session aggregation to summarized-only agent events -- introducing a second LLM summarization pass for the hot tier - ---- - -## 4 Architecture - -```text -opencode-graphiti plugin (TypeScript / Deno) - | - |- Hot path — OpenCode hooks + FalkorDB over Redis protocol - | |- tool.execute.before - | | - inspect native tool call - | | - allow / modify / deny based on deterministic routing rules - | | - prevent oversized raw outputs from entering transcript - | | - | |- event / chat.message / messages.transform / session.compacting - | | - extract typed continuity events - | | - rebuild compact snapshot from FalkorDB state - | | - inject canonical - | | - | '- FalkorDB storage via Redis commands - | - session events - | - snapshots - | - memory cache - | - pending async drain batches - | - '- Async tier — Graphiti MCP - - drain semantic episodes - - refresh cached long-term recall - - prime cold sessions opportunistically - - never block hook returns -``` - -### 4.1 Architectural shift - -The old hot-path posture was: - -- let native tools run -- observe their output afterward -- store only a compact continuity representation - -The revised hot-path posture becomes: - -- intercept the tool call first -- prevent or rewrite the expensive form when appropriate -- only then observe the resulting bounded tool activity -- store compact continuity from the bounded result - -This is the single biggest design change in the plan. - ---- - -## 5 Hook Model - -### 5.1 Required hooks - -| Hook | Purpose | -| -------------------------------------- | -------------------------------------------------------------- | -| `tool.execute.before` | Pre-tool routing, deny/modify/allow decisions | -| `event` | Session lifecycle + typed event capture | -| `chat.message` | Prepare local continuity state for the current turn | -| `experimental.chat.messages.transform` | Inject canonical `` into the last user message | -| `experimental.session.compacting` | Inject the same continuity envelope into compaction | - -### 5.2 Hook API contract (from `@opencode-ai/plugin@1.2.26`) - -The OpenCode plugin SDK exposes these tool-lifecycle hooks: - -```ts -// tool.execute.before — fires before tool execution -"tool.execute.before"?: ( - input: { tool: string; sessionID: string; callID: string }, - output: { args: any }, -) => Promise; - -// tool.execute.after — fires after tool execution -"tool.execute.after"?: ( - input: { tool: string; sessionID: string; callID: string; args: any }, - output: { title: string; output: string; metadata: any }, -) => Promise; -``` - -**Key constraint:** `tool.execute.before` can only mutate `output.args`. There -is no first-class `deny` return value in the SDK. - -`context-mode`'s published OpenCode plugin resolves this by **throwing an -Error** from `tool.execute.before` for `deny` and `ask` decisions, and by using -in-place arg mutation for `modify` decisions. This plan adopts the same -mechanism. - -Therefore: - -1. **Hard deny** = throw an error from `tool.execute.before`. -2. **Modify** = mutate args in place before native tool execution. -3. **Context guidance** = no-op at the SDK layer; routing guidance should reach - the model through `AGENTS.md`, injected subagent prompt blocks, or bounded - tool-arg rewrites. - -### 5.3 `tool.execute.after` - -Unlike the speculative earlier draft, `context-mode`'s OpenCode plugin does not -use `tool.execute.after` to rewrite or truncate visible tool output. It uses the -after-hook for continuity capture only. - -This plan follows that design: - -- `tool.execute.after` remains available for event extraction and metadata - capture -- it is **not** part of the primary routing/token-reduction mechanism -- source-side prevention must happen in `tool.execute.before` - -### 5.4 New hot-path invariant - -`tool.execute.before` becomes part of the core hot-path contract. - -No heavy native tool class should be considered fully supported unless it has: - -1. an explicit routing decision policy -2. tests for allow / modify / deny behavior -3. a documented bounded-output rationale - ---- - -## 6 Pre-Tool Routing Design - -### 6.1 Decision model - -The plugin should adopt a `context-mode`-style routing engine that returns one -of these decisions: - -- `allow` — safe to run unchanged -- `modify` — safe only after input is rewritten or bounded -- `deny` — unsafe/raw-output-heavy; reject with actionable guidance - -Unlike `context-mode`, this repo does not need to reproduce every external -sandbox tool. But it must reproduce the same **mechanical behavior**: - -- decisions are deterministic and local -- decisions happen before execution -- decisions are based on tool name, arguments, and risk heuristics -- denial/modification prevents transcript blow-up at the source - -#### 6.1.1 Routing principles - -1. **Deterministic, not heuristic-heavy.** Each tool's policy is a short - decision tree based on argument inspection (file extension, path pattern, - presence/absence of `limit`, command prefix). No LLM calls, no embedding - lookups. -2. **Follow context-mode's guidance-first posture.** Prefer a once-per-session - routing nudge for broad native tools (`Read`, `Grep`, general `Bash`) and - reserve hard blocks for tools/patterns that are known context sinks (for - example `WebFetch`, raw `curl`, `wget`, and certain build-tool invocations). -3. **Composable policies.** Each tool's policy is a pure function - `(toolName, args) => RoutingDecision`. The routing engine dispatches by tool - name and delegates to the per-tool policy. New tools are added by registering - a new policy function. -4. **No cross-tool state.** Routing decisions are stateless per call. The engine - does not track how many times a tool has been called or accumulate context - across calls. -5. **Fail-open for unknown tools.** Tools not in the minimum set (§6.2) are - allowed unchanged. This mirrors `context-mode`'s OpenCode posture more - closely than an aggressive deny-by-default design. - -#### 6.1.2 `RoutingDecision` type - -```ts -type RoutingDecision = - | { action: "allow" } - | { action: "modify"; args: Record; reason: string } - | { action: "deny"; guidance: string } - | { action: "context"; guidance: string }; -``` - -The routing engine applies the decision: - -- `allow` → no mutation to `output.args` -- `modify` → replace `output.args` with the rewritten args -- `deny` → throw an error from `tool.execute.before` -- `context` → deliver once-per-session guidance through the routing layer - -#### 6.1.3 Session-scoped guidance throttling - -`context-mode` only emits its advisory guidance once per session so the model is -nudged without flooding the transcript with repeated routing instructions. This -plan should do the same. - -The guidance throttle should be: - -- keyed by the **canonical root session ID**, not the raw child session ID -- keyed by guidance type (`read`, `grep`, `bash`, etc.) -- held in local process state only; no FalkorDB round-trip is required -- shared across parent and child sessions in the same lineage because child work - contributes to the same continuity stream - -This preserves the intentional child-session model while still matching -`context-mode`'s once-per-session guidance behavior closely. - -### 6.2 Tool classes in scope - -Initial routing coverage must include at least: - -- `Read` -- `WebFetch` -- `Bash` -- `Grep` -- `Glob` -- `Task` - -Additional coverage may later expand to tools such as browser snapshots or other -large-payload integrations, but these six are the minimum alignment set. - -### 6.3 Routing policy matrix - -The following matrix defines the concrete routing policy for each tool in the -minimum set. Each row describes the argument conditions that trigger each -decision. - -#### Summary matrix - -| Tool | Allow when | Modify when | Deny when | -| ---------- | ------------------------------------------------------ | -------------------------------------------- | ------------------------------------------------------------------ | -| `Read` | Usually allow | Never rewrite args by default | Never hard deny by default; emit once-per-session guidance | -| `WebFetch` | — | — | Always hard deny and redirect to the safer context-mode-style path | -| `Bash` | Allow by default | Rewrite known bad patterns to safe guidance | Hard deny only for explicit security/policy matches | -| `Grep` | Usually allow | Never rewrite args by default | Never hard deny by default; emit once-per-session guidance | -| `Glob` | Allow | Optionally scope `path` only if clearly safe | Avoid speculative rewrites; do not invent unsupported excludes | -| `Task` | Allow, but rewrite delegated prompt with routing block | Rewrite prompt field to append routing block | — | - -#### `Read` — detailed policy - -```text -if tool is Read: - → emit a once-per-session routing guidance block that nudges the agent toward - the safer bounded/file-processing path - → otherwise allow the native tool call to proceed unchanged -``` - -This follows `context-mode`'s OpenCode behavior more closely than silently -rewriting read limits. The goal is to change agent behavior at the source while -preserving the native tool contract unless a stricter block is truly necessary. - -#### `WebFetch` — detailed policy - -```text -if tool is WebFetch: - → hard deny by throwing an error - → denial guidance must redirect to the safer fetch/index/search flow rather - than allowing raw page content into transcript -``` - -This is the clearest source-side prevention mechanism in `context-mode`'s -OpenCode plugin and should be copied directly. - -#### `Bash` — detailed policy - -```text -if command hits explicit security-policy deny pattern: - → hard deny (same as context-mode security layer) - -if command contains raw network patterns (`curl`, `wget`, inline HTTP clients): - → modify command into a short guidance command that redirects to the safer - fetch/index or sandbox-execute path - -if command invokes high-volume build tools (`gradle`, `mvn`, wrappers): - → modify command into a short guidance command that redirects to a safer - sandboxed execution path - -otherwise: - → allow, but emit once-per-session routing guidance for Bash -``` - -**Design note:** `Bash` should follow `context-mode`'s actual OpenCode strategy: -pattern-based rewrites for the worst offenders, not an oversized allowlist plus -post-hoc truncation design. - -**Ordering note:** the Bash policy should run in this order: - -1. repo security-policy deny/ask checks -2. raw network rewrite checks (`curl`, `wget`, inline HTTP) -3. high-volume build-tool rewrite checks -4. once-per-session Bash guidance fallback -5. otherwise passthrough - -This keeps security authoritative while preserving the same routing shape as -`context-mode`. - -#### `Grep` — detailed policy - -```text -if tool is Grep: - → emit a once-per-session routing guidance block that nudges the agent toward - safer bounded execution/search behavior - → otherwise allow the native tool call to proceed unchanged -``` - -#### `Glob` — detailed policy - -```text -if `path` is omitted: - → allow (native tool already defaults to cwd) - -if `pattern` is pathologically broad: - → prefer guidance in docs/tests rather than speculative arg mutation - -→ allow unless a future verified-safe rewrite exists -``` - -**Implementation note:** the native OpenCode `Glob` tool only accepts `pattern` -and optional `path`. It has no exclusion parameter, so this plan should not rely -on synthetic exclude rewrites. - -#### `Task` — detailed policy - -```text -detect prompt field (`prompt`, `request`, `objective`, `question`, `query`, or -`task`) -append a routing block to the delegated prompt -preserve `subagent_type` unless a validated future change is explicitly chosen -→ modify -``` - -**Rationale:** this follows `context-mode`'s actual delegated-prompt rewrite -mechanic while preserving this repo's child-session-first continuity model. - -### 6.4 Guardrails against over-copying `context-mode` - -This section documents where this repo **intentionally does not** follow -`context-mode`, even when the behavior looks similar: - -1. **No SQLite local store.** `context-mode` uses SQLite for local state. This - repo uses FalkorDB via Redis protocol. The routing engine must not assume - SQLite-style queries or schema. -2. **No second LLM summarization pass.** `context-mode` may use an LLM to - summarize tool output. This repo's hot tier is deterministic and - programmatic. Summaries come from structured event extraction, not LLM calls. -3. **No flattened subagent events.** `context-mode` records subagent work as - summarized tool events. This repo promotes child sessions to first-class - participants in the root session (§11). -4. **No `` envelope.** This repo uses `` with - its own section taxonomy (§9). The envelope shape is not a copy target. -5. **No external sandbox tools in Phase 1.** `context-mode` routes users toward - its own custom tooling. This repo copies the pre-tool mechanics first without - requiring the full tool ecosystem in the initial phase. -6. **Hard deny is supported by thrown errors.** This repo should follow - `context-mode`'s OpenCode implementation and treat thrown errors in - `tool.execute.before` as the authoritative deny mechanism. - -### 6.5 User-facing denial behavior - -When a tool call is denied, the plugin should return a concise actionable error -that explains the safer bounded path. - -The goal is not just to block. It is to steer the agent toward the same safer -workflow that `context-mode` would have chosen. - -Denial messages must: - -- be ≤ 200 characters -- name the denied tool and the problematic argument -- suggest a concrete alternative (e.g. "Use Read with limit=200 instead") -- not include raw argument values that could themselves be large - -Guidance messages should follow the same philosophy: - -- concise enough to fit comfortably in a single tool result or prompt suffix -- specific about the safer path to take next -- stable across repeated runs so tests can assert against them - ---- - -## 7 Short-Term Storage Layer - -### 7.1 Storage decision - -Short-term state remains in FalkorDB, accessed over the Redis protocol using the -existing `RedisClient` and canonical `redis.*` config. - -There is **no new `falkordb.*` config section** in the revised plan. - -`redis.*` remains canonical because: - -- the transport is Redis-compatible -- the runtime already uses Redis-oriented primitives -- FalkorDB is the deployment choice behind that endpoint - -### 7.2 Key layout - -| Key | Type | Purpose | -| ----------------------------- | ------ | ------------------------------------------ | -| `session:{id}:events` | List | typed hot-tier continuity events | -| `session:{id}:snapshot` | String | compact snapshot XML | -| `memory-cache:{groupId}` | String | cached Graphiti-derived recall | -| `memory-cache:{groupId}:meta` | Hash | cache query / refresh metadata | -| `drain:pending:{groupId}` | List | queued semantic drain entries for Graphiti | -| `drain:dead:{groupId}` | List | dead-lettered drain entries | - -### 7.3 Invariant - -FalkorDB is the hot-path system of record for: - -- session continuity -- compact restore snapshots -- cached long-term memory projections -- pending async Graphiti consolidation work - -Graphiti is never required for the current turn to proceed. - ---- - -## 8 Revised Hot-Tier Data Contract - -### 8.1 Event contract - -The hot tier should continue using compact typed events rather than raw copied -transcripts, but the contract becomes stricter: - -1. **pre-tool routing first** -2. **sanitize before extraction** -3. **extract compact typed events only** -4. **build conservative snapshot** -5. **inject stable canonical memory envelope** -6. **drain semantic episodes asynchronously** - -### 8.2 Event policy - -Keep: - -- file paths -- search queries -- tool names -- exit/error signals -- explicit task/decision state -- terse subagent summaries -- concrete environment/git state - -Reject as durable hot-tier memory: - -- raw file contents from `Read` -- large shell/web transcripts -- wrapper tags like `` / `` when they come from replayed output -- assistant operational narration -- previously injected memory blocks -- verbose delegated reports - -### 8.3 Snapshot policy - -The snapshot should move even closer to `context-mode`'s priority-tiered style: - -- P0/P1: last request, active tasks, user decisions, files in play, rules -- P2: unresolved blockers, environment, git state -- P3: subagent summaries, low-volume integration markers -- drop low-value residue aggressively under budget pressure - -The point is resumability, not archival completeness. - ---- - -## 9 Injection Strategy - -The canonical injected shape remains: - -```xml - - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - -``` - -This is intentionally different from `context-mode`'s resume envelope, but it -must be generated from the same style of compact typed state. - -### 9.1 Important distinction - -This plugin's injection layer is **not** the primary token-saving mechanism. - -Under the revised plan, token savings come from two layers together: - -1. **source-side prevention** via `tool.execute.before` -2. **compact continuity reinjection** via `` - -Without the first layer, alignment remains incomplete. - ---- - -## 10 Async Tier - -The async tier remains structurally the same: - -- Graphiti MCP drains semantic episodes in the background -- cache refreshes happen asynchronously on drift or after new facts land -- primers remain best-effort -- no Graphiti request may block a hot-path hook return - -This is an intentional divergence from `context-mode`, not an alignment gap. - ---- - -## 10A Hook Interaction Model - -This section documents how the new `tool.execute.before` and -`tool.execute.after` hooks interact with the existing hook pipeline. - -### 10A.1 Hook execution order (per user turn) - -```text -1. chat.message - → Prepare session state from FalkorDB. - → Stage for injection. - -2. experimental.chat.messages.transform - → Inject into last user message. - → LLM generates response (may include tool calls). - -3. [For each tool call in the LLM response:] - a. tool.execute.before ← NEW: routing decision - → allow / modify / deny the tool call args, with optional once-per-session guidance. - b. [Native tool executes with (possibly modified) args.] - c. tool.execute.after ← continuity capture / metadata only - → Observe resulting bounded tool activity. - d. event (tool.called / tool.completed) - → Extract compact SessionEvent from tool activity. - → Store in FalkorDB via RedisEventsService. - -4. event (message.updated) - → Finalize assistant message as SessionEvent. - -5. [If idle:] event (session.idle) - → Drain pending events to Graphiti (async). - → Rebuild snapshot. - -6. [If compacting:] experimental.session.compacting - → Inject into compaction context. - → event (session.compacted) → async drain + snapshot rebuild. -``` - -### 10A.2 Data flow between hooks - -| Producer hook | Data produced | Consumer hook | -| ------------------------ | ----------------------------------------------- | -------------------------------------------------------------- | -| `chat.message` | Staged `` envelope | `messages.transform` | -| `tool.execute.before` | Modified args / thrown deny / one-time guidance | Native tool execution, routed failure, or prompt/tool guidance | -| `tool.execute.after` | Tool metadata for continuity capture | `event` extraction / hot-tier state | -| `event` (tool.completed) | Compact `SessionEvent` | FalkorDB → snapshot → next `chat.message` | -| `session.compacting` | Injected compaction context | OpenCode compaction summarizer | - -### 10A.3 Invariants across hooks - -1. **No hook reads Graphiti synchronously.** This applies to the new hooks too. -2. **`tool.execute.before` must not call FalkorDB.** Routing decisions are pure - functions of tool name and args. No Redis round-trip. -3. **No hook-level output rewriting is required for alignment.** - `tool.execute.after` may remain metadata/event focused; token prevention - should be achieved in `tool.execute.before`. -4. **Event extraction happens after tool execution or routed denial handling**, - not during routing policy evaluation. `tool.execute.before` may cache compact - routing metadata, but routed `SessionEvent`s are only emitted later through - `tool.execute.after` and the existing event extraction pipeline. - ---- - -## 11 Session Lifecycle and Child Sessions - -### 11.1 Kept divergence - -This repo continues to resolve child/subagent sessions to a canonical root -session and stores their work as first-class continuity events in the root -session state. - -This diverges from `context-mode`, which summarizes subagent work more narrowly, -but the divergence remains intentional and in-scope. - -### 11.2 Constraint on new routing logic - -The new pre-tool routing layer must work correctly for both parent and child -sessions. - -Specifically: - -- routed decisions should be evaluated per live tool call regardless of lineage -- post-tool compact event extraction should still aggregate into the root - session -- child session teardown must never delete canonical root continuity state - ---- - -## 12 Configuration - -Canonical config shape remains: - -```jsonc -{ - "redis": { - "endpoint": "redis://localhost:6379", - "batchSize": 20, - "batchMaxBytes": 51200, - "sessionTtlSeconds": 86400, - "cacheTtlSeconds": 600, - "drainRetryMax": 3 - }, - "graphiti": { - "endpoint": "http://localhost:8000/mcp", - "groupIdPrefix": "opencode", - "driftThreshold": 0.5 - } -} -``` - -### 12.1 Config decision - -- `redis.*` stays canonical for the FalkorDB-backed hot tier -- `graphiti.*` stays canonical for async Graphiti integration -- legacy compatibility may remain temporarily in implementation if needed, but - the plan no longer treats `falkordb.*` as a target configuration shape - ---- - -## 13 File Changes - -### 13.1 New files - -```text -src/handlers/tool-before.ts — OpenCode tool.execute.before hook handler -src/services/tool-routing.ts — deterministic routing engine + per-tool policy functions -src/services/tool-guidance.ts — shared once-per-session guidance blocks / routing text -src/services/tool-guidance-cache.ts — in-memory per-session guidance throttle keyed by canonical session -``` - -### 13.2 Modified files - -```text -src/index.ts — register tool.execute.before; wire routing deps -src/handlers/event.ts — extract compact events from routed tool activity (deny/modify/context signals) -src/handlers/chat.ts — no structural change; continues local prep from FalkorDB state -src/handlers/messages.ts — no structural change; continues canonical injection from local state -src/handlers/compacting.ts — no structural change; continues local-only compaction injection -src/services/event-extractor.ts — add extraction rules for routing denial/modification events -src/services/redis-snapshot.ts — classify routing events as P2; tighten budget enforcement -src/session.ts — ensure routing hooks resolve canonical session ID for child sessions and guidance throttling -README.md — document source-side routing and updated hot-path mechanics -AGENTS.md — add tool.execute.before to hot-path section -docs/ContextOverhaulTests.md — add Suite N (pre-tool routing) test cases -``` - ---- - -## 14 Implementation Phases - -### Phase 1: Routing contract - -**Scope:** `src/services/tool-routing.ts`, `src/services/tool-guidance.ts` - -**Tasks:** - -1. Implement the `tool.execute.before` deny path by throwing an error, matching - `context-mode`'s OpenCode plugin (§5.2). -2. Define the `RoutingDecision` type (§6.1.2). -3. Implement the routing engine: dispatch by tool name, delegate to per-tool - policy functions. -4. Implement once-per-session guidance for `Read`, `Grep`, and general `Bash`. -5. Implement hard deny for `WebFetch`. -6. Implement delegated prompt rewriting for `Task`. -7. Implement the guidance throttle keyed by canonical root session ID. -8. Write unit tests for the engine dispatch and the `RoutingDecision` type. - -**Acceptance criteria:** - -- [ ] `RoutingDecision` type exists and is exported. -- [ ] Routing engine accepts `(toolName: string, args: unknown)` and returns - `RoutingDecision`. -- [ ] Policies exist for `Read`, `WebFetch`, `Bash`, `Grep`, `Glob`, `Task`. -- [ ] Hard deny uses thrown errors from `tool.execute.before`. -- [ ] Guidance is emitted at most once per canonical session lineage per type. -- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. - -### Phase 2: Pre-tool hook wiring - -**Scope:** `src/handlers/tool-before.ts`, `src/index.ts` - -**Tasks:** - -1. Create `tool-before.ts` handler that calls the routing engine and applies the - decision to `output.args` or throws for deny. -2. Wire the hook in `src/index.ts` alongside the existing hooks. -3. Ensure the hook resolves the canonical session ID via `SessionManager` so - child sessions are handled correctly. -4. Thread canonical session identity into the guidance throttle so parent and - child sessions share the same once-per-session routing nudges. - -**Acceptance criteria:** - -- [ ] `tool.execute.before` hook is registered in the plugin return value. -- [ ] The hook fires for parent and child sessions. -- [ ] `tool.execute.before` does not call FalkorDB or Graphiti. -- [ ] Parent and child sessions share one guidance throttle namespace. -- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. - -### Phase 3: Heavy-tool policies - -**Scope:** `src/services/tool-routing.ts`, `src/services/tool-guidance.ts` - -**Tasks:** - -1. Implement the `Read` guidance policy per §6.3. -2. Implement the `WebFetch` policy per §6.3. -3. Implement the `Bash` policy per §6.3 with command-pattern rewrites and - once-per-session guidance. -4. Implement the `Grep` and `Glob` policies per §6.3. -5. Implement the `Task` prompt-rewrite policy. -6. Write unit tests for each policy covering allow, modify, and deny cases. - -**Acceptance criteria:** - -- [ ] Each tool in the minimum set has ≥ 3 test cases (allow, modify, deny). -- [ ] `Read` emits guidance once per session and otherwise preserves native - args. -- [ ] `WebFetch` is denied with actionable redirect guidance. -- [ ] `Bash` rewrites `curl`/`wget`/inline HTTP/build-tool patterns. -- [ ] Bash routing preserves the documented evaluation order from §6.3. -- [ ] `Grep` emits guidance once per session and otherwise preserves native - args. -- [ ] `Glob` does not rely on unsupported exclusion args. -- [ ] `Task` rewrites delegated prompt text with routing instructions. -- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. - -### Phase 4: Extraction tightening - -**Scope:** `src/handlers/event.ts`, `src/services/event-extractor.ts` - -**Tasks:** - -1. Ensure `tool.called` and `tool.completed` events from routed tool calls - extract only compact metadata (tool name, file path, exit code, summary). -2. Verify that `SessionEvent.body` never contains raw tool output. -3. Add extraction rules for the new `tool.execute.before` deny/modify/context - signals so they appear as lightweight events. - -**Acceptance criteria:** - -- [ ] No `SessionEvent.body` exceeds 4 KB after routing is active. -- [ ] Denied tool calls produce a compact event with the denial reason. -- [ ] Modified/context-guided tool calls produce a compact event noting the - routing action. -- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. - -### Phase 5: Snapshot tightening - -**Scope:** `src/services/redis-snapshot.ts` - -**Tasks:** - -1. Review snapshot builder against the P0–P3 tier definitions in §8.3. -2. Ensure routing-related events (denials, modifications, guidance nudges) are - classified as P2 or P3 and dropped first under budget pressure. -3. Verify snapshot stays within `SNAPSHOT_BODY_BUDGET` with the new event types. - -**Acceptance criteria:** - -- [ ] Snapshot with 50+ events (including routing events) stays within budget. -- [ ] P0/P1 content (last request, active tasks, decisions) is never dropped. -- [ ] Routing denial events are classified as P2. -- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes. - -### Phase 6: Integration validation + documentation - -**Scope:** tests, `README.md`, `docs/ContextOverhaulTests.md`, `AGENTS.md` - -**Tasks:** - -1. Add Suite N (pre-tool routing) to `docs/ContextOverhaulTests.md`. -2. Run the full test suite including new routing tests. -3. Update `README.md` to document source-side routing. -4. Update `AGENTS.md` hot-path section to include `tool.execute.before`. -5. Verify all alignment checklist items from §3.1.1. - -**Acceptance criteria:** - -- [ ] All §3.1.1 alignment criteria (A1–A8) are met. -- [ ] `deno test` passes; `deno run build` passes; `deno task check` passes; - `deno lint` passes; `deno fmt --check` passes. -- [ ] `README.md` documents the pre-tool routing behavior. -- [ ] `AGENTS.md` lists `tool.execute.before` in the hot-path section. -- [ ] `docs/ContextOverhaulTests.md` includes Suite N with ≥ 10 test cases. - ---- - -## 15 Validation Requirements - -### 15.1 Required tests — Suite N (Pre-Tool Routing) - -Add to `docs/ContextOverhaulTests.md` as Suite N: - -| ID | Test case | Tier | -| ---- | -------------------------------------------------------------------- | ----------- | -| N-1 | `Read` with ordinary args passes through after guidance handling | Unit | -| N-2 | `Read` emits guidance once, then falls through | Unit | -| N-3 | `WebFetch` throws hard deny with actionable guidance | Unit | -| N-4 | `Bash` with `curl` rewrites to guidance command | Unit | -| N-5 | `Bash` with inline HTTP rewrites to guidance command | Unit | -| N-6 | `Bash` with build tool command rewrites to guidance command | Unit | -| N-7 | `Bash` with ordinary command emits guidance once, then falls through | Unit | -| N-8 | `Grep` emits guidance once, then falls through | Unit | -| N-9 | `Glob` with ordinary args passes through unchanged | Unit | -| N-10 | `Task` appends routing block to delegated prompt | Unit | -| N-11 | guidance throttle emits once per canonical root session | Unit | -| N-12 | child-session tool calls share the same guidance throttle | Integration | -| N-13 | `Task` preserves child-session-first continuity model | Integration | -| N-14 | `tool.execute.before` does not call FalkorDB | Unit | -| N-15 | `tool.execute.before` fires for child session tool calls | Integration | -| N-16 | Unknown tool name → allow (fail-open) | Unit | - -### 15.2 Required full-suite checks - -Before merging any part of this plan: - -- `deno test` -- `deno run build` -- `deno task check` -- `deno lint` -- `deno fmt --check` - -### 15.3 Behavioral success criteria - -The implementation is only considered successful when all of these are true: - -1. large native tool outputs are materially reduced because the expensive call - is prevented or bounded before execution -2. hot-tier memory no longer depends on observing large transcript dumps first -3. `` remains compact and deterministic -4. Graphiti remains fully async -5. FalkorDB remains the hot-tier storage backend through `redis.*` -6. child-session aggregation still works as designed -7. all §3.1.1 alignment criteria (A1–A8) are met -8. Suite N tests all pass - ---- - -## 16 Tradeoffs - -| Tradeoff | Impact | Mitigation | -| ------------------------------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | -| More pre-tool blocking | Some previously tolerated raw tool usage will now be rejected | Return clear actionable denial messages and safe bounded defaults | -| More policy complexity | Routing adds maintenance cost | Centralize all heuristics in `tool-routing.ts` and `tool-guidance.ts` | -| Not a full context-mode clone | Some behavior still differs | Alignment target is explicit: hot-path mechanics, not storage or session-lineage identity | -| Bounded results may omit detail | Some calls will return less raw data | Agent can make additional focused bounded calls when needed | - ---- - -## 17 Confirmed Decisions - -- The repo should move to **>=80% context-mode alignment on the hot path**. -- The key missing mechanic to copy is **pre-tool routing and source-side token - prevention**. -- The storage layer remains **FalkorDB over the Redis protocol**. -- Canonical config remains **`redis.*` + `graphiti.*`** only. -- Graphiti remains **async-only**. -- Child sessions remain **first-class entries in root continuity state**. -- The hot tier remains **deterministic and programmatic**, not LLM-summarized. - ---- - -## 18 Immediate Next Step - -Implement Phase 1 first: - -1. **Implement thrown-error deny** in `tool.execute.before`, matching - `context-mode`'s OpenCode plugin. -2. **Define `RoutingDecision`** and the routing engine dispatch. -3. **Implement actual context-mode-aligned baseline policies** for `Read`, - `WebFetch`, `Bash`, `Grep`, `Glob`, and `Task`. -4. **Write unit tests** for the engine and these baseline policies. -5. **Cleanly document any repo-specific divergence** only where required by the - child-session model or FalkorDB storage boundary. diff --git a/docs/ContextOverhaulTests.md b/docs/ContextOverhaulTests.md deleted file mode 100644 index 332f412..0000000 --- a/docs/ContextOverhaulTests.md +++ /dev/null @@ -1,16 +0,0 @@ -# Context Overhaul Test Plan - -This file is the stable `docs/` entry point for context-overhaul runtime -validation references. - -The active rewrite of the authoritative test plan is tracked in -`docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md`. - -Supporting context lives in: - -- `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` for - the rewrite scope and acceptance shape. -- `docs/ContextOverhaul.md` for the historical architecture rationale. - -Until the full rewrite lands here, use this path as the canonical non-dated -reference from repository docs and operator guides. diff --git a/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md b/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md index c3dc2a6..1ce2720 100644 --- a/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md +++ b/docs/superpowers/plans/2026-03-24-agentic-runtime-test-plan-rewrite.md @@ -5,17 +5,18 @@ > superpowers:executing-plans to implement this plan task-by-task. Steps use > checkbox (`- [ ]`) syntax for tracking. -**Goal:** Replace the historical `docs/ContextOverhaulTests.md` with a new -authoritative, execution-ready MCP-first agentic runtime test plan, including -mandatory live multi-agent validation and explicit `` -coverage. +**Goal:** Replace the retiring `ContextOverhaul` docs with a new authoritative, +execution-ready MCP-first agentic runtime validation manual at +`docs/SmokeTests.md`, including mandatory live multi-agent validation and +explicit `` coverage. **Architecture:** Keep the deliverable narrowly scoped to documentation authority and validation procedure rather than code changes to the runtime -itself. Rewrite the test plan in place at `docs/ContextOverhaulTests.md`, -restructure it around automated verification and live agentic runtime scenarios, -and clean up stale repository references that would misdirect future operators -or test authors. +itself. Produce a new `docs/SmokeTests.md` validation manual, restructure it +around automated verification and live agentic runtime scenarios, and clean up +repository references that would misdirect future operators or test authors now +that `docs/ContextOverhaul.md` and `docs/ContextOverhaulTests.md` are being +removed. **Tech Stack:** Markdown documentation, existing repository architecture docs, Deno task/test commands, OpenCode live subagent runtime assumptions, @@ -25,23 +26,20 @@ Redis/FalkorDB and Graphiti local service defaults. ## File structure and responsibility lock-in -- `docs/ContextOverhaulTests.md` - - The authoritative living runtime test plan. This file must be fully - rewritten in place, not replaced with a dated path. +- `docs/SmokeTests.md` + - The authoritative living runtime validation manual. Create this file as the + stable replacement for the retiring `ContextOverhaul` docs. - `AGENTS.md` - - Cleanup only: correct stale references to the old - `plans/ContextOverhaul*.md` paths so the repository reading order, - validation notes, and key-file hints point to the authoritative `docs/` - paths. + - Cleanup only: correct stale references so the repository reading order, + validation notes, and key-file hints point to `docs/SmokeTests.md` instead + of the retiring `ContextOverhaul` docs. - `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` - Read-only grounding spec for the rewrite. Do not rewrite the spec during implementation unless the user explicitly asks for spec changes. -`docs/ContextOverhaul.md` is a historical design document. Do not broaden this -task into updating its historical references unless the user explicitly asks for -that cleanup as separate work. Treat any stale references encountered there as -deferred follow-up documentation cleanup, not as part of this plan's required -edits. +`docs/ContextOverhaul.md` and `docs/ContextOverhaulTests.md` are being removed. +This plan should treat references to them as stale documentation links that need +to be redirected or removed where they would mislead operators. Known deferred stale references outside `AGENTS.md` may still exist in code or tests (for example references inside `src/services/` tests/guidance comments). @@ -62,22 +60,21 @@ clean-slate architecture edits. Capture these reference expectations before editing: -- `AGENTS.md` must not reference the stale non-existent path - `plans/ContextOverhaulTests.md` -- `AGENTS.md` must not reference the stale non-existent path - `plans/ContextOverhaul.md` -- `AGENTS.md` must reference `docs/ContextOverhaulTests.md` wherever it points - to the authoritative test plan -- `AGENTS.md` must reference `docs/ContextOverhaul.md` wherever it points to the - historical design document +- `AGENTS.md` must not reference `plans/ContextOverhaulTests.md` +- `AGENTS.md` must not reference `plans/ContextOverhaul.md` +- `AGENTS.md` must not reference `docs/ContextOverhaulTests.md` +- `AGENTS.md` must not reference `docs/ContextOverhaul.md` +- `AGENTS.md` must reference `docs/SmokeTests.md` wherever it points to the + authoritative runtime validation manual - [ ] **Step 2: Run the failing reference search** Run: -`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md" AGENTS.md` +`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md\|docs/SmokeTests\.md" AGENTS.md` -Expected: FAIL in the sense that the output still shows stale `plans/` path -references that need correction. +Expected: FAIL in the sense that the output still shows retiring +`ContextOverhaul` references that need correction and/or is missing the new +`docs/SmokeTests.md` target. - [ ] **Step 3: Make the minimal doc cleanup in `AGENTS.md`** @@ -87,27 +84,25 @@ Update only the stale path references in: - Resume-Reading Order - Key Files table -Also add a `docs/ContextOverhaulTests.md` row to the Key Files table if the -table would otherwise omit the repository's authoritative runtime test plan. -Also update the existing stale `plans/ContextOverhaul.md` Key Files row to -`docs/ContextOverhaul.md` rather than removing that historical design entry. +Add or replace entries so the authoritative validation document is +`docs/SmokeTests.md`. Remove `ContextOverhaul` doc references from these AGENTS +sections if they would otherwise point at deleted files. Do not rewrite surrounding architecture guidance. - [ ] **Step 4: Re-run the reference search to verify the cleanup** Run: -`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md" AGENTS.md` +`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md\|docs/SmokeTests\.md" AGENTS.md` -Expected: PASS in the sense that only `docs/ContextOverhaul.md` and -`docs/ContextOverhaulTests.md` remain as the authoritative/historical `docs/` -paths. +Expected: PASS in the sense that stale `ContextOverhaul` references are gone +from `AGENTS.md` and `docs/SmokeTests.md` remains as the active validation path. -### Task 2: Build the new test-plan outline with mandatory sections +### Task 2: Build the new smoke-test manual outline with mandatory sections **Files:** -- Modify: `docs/ContextOverhaulTests.md` +- Create: `docs/SmokeTests.md` - Grounding: `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` - Grounding: `README.md` @@ -137,15 +132,13 @@ spec that the current document does not satisfy: - [ ] **Step 2: Confirm the current document fails the new shape** -If `docs/ContextOverhaulTests.md` already exists, run a manual read of it -against the spec and record which required sections are missing or -historical-only. If the file is absent in the working tree, treat that absence -itself as a failing precondition that the rewrite must correct by creating the -authoritative file at that path. +Confirm that `docs/SmokeTests.md` does not yet provide the required +authoritative structure. If the file is absent in the working tree, treat that +absence itself as the failing precondition that this task must correct by +creating it. -Expected: FAIL because the existing document is explicitly historical and does -not provide the new authoritative MCP-first structure, or because the -authoritative file is absent and must be created. +Expected: FAIL because the authoritative smoke-test manual is absent or does not +yet provide the required MCP-first structure. - [ ] **Step 3: Rewrite the document header and section skeleton in place** @@ -155,8 +148,8 @@ The replacement must include near the top: - `Last Updated: 2026-03-24` (or the actual rewrite date if implementation slips) - `Replaces: historical native-hook-first test plan` -- a short note about the file carrying both historical and replacement-era git - history +- a short note that `docs/SmokeTests.md` replaces the retiring + `docs/ContextOverhaulTests.md` as the authoritative validation manual Then create the full mandatory section structure before filling in all test content, including an explicit `Runtime Guarantees Under Test` section scaffold @@ -186,7 +179,7 @@ even if the detailed test content is not complete yet. **Files:** -- Modify: `docs/ContextOverhaulTests.md` +- Modify: `docs/SmokeTests.md` - Verify against: `README.md`, `AGENTS.md`, `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md` @@ -225,7 +218,7 @@ versus only in a live OpenCode runtime. **Files:** -- Modify: `docs/ContextOverhaulTests.md` +- Modify: `docs/SmokeTests.md` - Verify against: `README.md`, `AGENTS.md`, `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`, `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md`, @@ -291,7 +284,7 @@ framing. **Files:** -- Modify: `docs/ContextOverhaulTests.md` +- Modify: `docs/SmokeTests.md` - Verify against: `README.md`, `AGENTS.md`, `docs/superpowers/plans/2026-03-20-context-mode-mcp-first.md`, `docs/superpowers/plans/2026-03-20-context-mode-mcp-first-implementation.md`, @@ -343,7 +336,7 @@ rephrasing mock or synthetic-hook coverage. **Files:** -- Modify: `docs/ContextOverhaulTests.md` +- Modify: `docs/SmokeTests.md` - [ ] **Step 1: Write the failing proof-model checklist** @@ -399,12 +392,12 @@ what coverage exists, and what blocks release. **Files:** -- Modify: `docs/ContextOverhaulTests.md` +- Modify: `docs/SmokeTests.md` - Modify: `AGENTS.md` - [ ] **Step 1: Run a final doc consistency review against the grounding spec** -Read `docs/ContextOverhaulTests.md` side by side with +Read `docs/SmokeTests.md` side by side with `docs/superpowers/specs/2026-03-24-agentic-runtime-test-plan-design.md`. Check for: @@ -424,18 +417,17 @@ formatting changes are applied by `deno task fmt`, review the doc diff and ensure only intended documentation formatting changed. These are repository-health checks for the docs rewrite itself. They are not the -same as the `deno test` commands documented inside -`docs/ContextOverhaulTests.md` for future operators, and this plan does not -require adding a new `deno task test` alias. +same as the `deno test` commands documented inside `docs/SmokeTests.md` for +future operators, and this plan does not require adding a new `deno task test` +alias. - [ ] **Step 3: Run final reference searches** Run: -`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md" AGENTS.md` +`grep -n "plans/ContextOverhaul\.md\|plans/ContextOverhaulTests\.md\|docs/ContextOverhaul\.md\|docs/ContextOverhaulTests\.md\|docs/SmokeTests\.md" AGENTS.md` -Expected: PASS in the sense that `AGENTS.md` points only at -`docs/ContextOverhaul.md` and `docs/ContextOverhaulTests.md` for these -historical/authoritative references. +Expected: PASS in the sense that `AGENTS.md` no longer points at retiring +`ContextOverhaul` docs and now points at `docs/SmokeTests.md` for validation. - [ ] **Step 4: Perform a final manual release-gate check** @@ -453,7 +445,7 @@ Only perform this step if the user explicitly asks for a commit in the implementation session. ```bash -git add docs/ContextOverhaulTests.md AGENTS.md +git add docs/SmokeTests.md AGENTS.md git commit -m "docs: rewrite the agentic runtime test plan" ``` diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index 12fbecb..46b3052 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -211,6 +211,23 @@ const createDeps = async (options?: { return { redis, events, drain }; }; +const drainRetryAliasKey = (groupId: string, eventIds: string[]): string => + `drain:retry-alias:${groupId}:${eventIds.join(",") || "empty"}`; + +const seedRetryStateForEvents = async ( + redis: RedisClient, + groupId: string, + eventIds: string[], + batchKey: string, + state: { attempts: number; nextAttemptAt: number }, +): Promise<{ aliasKey: string; retryKey: string }> => { + const aliasKey = drainRetryAliasKey(groupId, eventIds); + const retryKey = drainRetryKey(groupId, batchKey); + await redis.setString(aliasKey, batchKey, 60); + await redis.setString(retryKey, JSON.stringify(state), 60); + return { aliasKey, retryKey }; +}; + describe("batch drain", () => { it("uses a sub-TTL default heartbeat when the claim TTL is small", () => { const drain = new BatchDrainService( @@ -534,8 +551,11 @@ describe("batch drain", () => { assertEquals(first.status, "retry"); assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); + const aliasKey = drainRetryAliasKey("group-1", [event.id]); + const batchKey = await redis.getString(aliasKey); + if (!batchKey) throw new Error("Expected retry alias key to be set"); await redis.setString( - drainRetryKey("group-1", `${event.id}:${event.id}`), + drainRetryKey("group-1", batchKey), JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), 60, ); @@ -547,6 +567,71 @@ describe("batch drain", () => { assertEquals(await redis.getListLength(drainDeadKey("group-1")), 1); }); + it("reuses the first monotonic ULID batch key across retries", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("error", "tool", { + summary: "retry with stable key", + body: "retry with stable key", + metadata: { resolved: false }, + }); + await events.recordEvent("session-1", "group-1", event); + + const failingGraphiti = { + addMemory() { + throw new Error("boom"); + }, + }; + + const first = await drain.drainGroup("group-1", failingGraphiti as never); + assertEquals(first.status, "retry"); + + const aliasKey = drainRetryAliasKey("group-1", [event.id]); + const batchKey = await redis.getString(aliasKey); + if (!batchKey) throw new Error("Expected retry alias key to be set"); + assertEquals(/^[0-9A-HJKMNP-TV-Z]{26}$/.test(batchKey), true); + assertEquals(batchKey === `${event.id}:${event.id}`, false); + await redis.setString( + drainRetryKey("group-1", batchKey), + JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), + 60, + ); + + const second = await drain.drainGroup("group-1", failingGraphiti as never); + assertEquals(second.status, "dead-letter"); + assertEquals(await redis.getString(aliasKey), null); + assertEquals( + await redis.getString(drainRetryKey("group-1", batchKey)), + null, + ); + }); + + it("preserves a previously assigned batch key when retry state already exists", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "stable retry key", + body: "stable retry key", + }); + await events.recordEvent("session-1", "group-1", event); + + const stableBatchKey = "01ARZ3NDEKTSV4RRFFQ69G5FC0"; + const { retryKey } = await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + stableBatchKey, + { attempts: 1, nextAttemptAt: 0 }, + ); + + const result = await drain.drainGroup("group-1", { + addMemory() { + throw new Error("boom"); + }, + } as never); + + assertEquals(result.status, "dead-letter"); + assertEquals(await redis.getString(retryKey), null); + }); + it("adds bounded jitter to retry scheduling", async () => { const { redis, events, drain } = await createDeps(); const event = createSessionEvent("error", "tool", { @@ -566,10 +651,11 @@ describe("batch drain", () => { } as never); assertEquals(result, { status: "retry", drained: 0 }); + const aliasKey = drainRetryAliasKey("group-1", [event.id]); + const batchKey = await redis.getString(aliasKey); + if (!batchKey) throw new Error("Expected retry alias key to be set"); assertEquals( - await redis.getString( - drainRetryKey("group-1", `${event.id}:${event.id}`), - ), + await redis.getString(drainRetryKey("group-1", batchKey)), JSON.stringify({ attempts: 1, nextAttemptAt: 11_250 }), ); }); @@ -582,9 +668,14 @@ describe("batch drain", () => { }); await events.recordEvent("session-1", "group-1", event); - const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); const retryState = { attempts: 1, nextAttemptAt: Date.now() + 60_000 }; - await redis.setString(retryKey, JSON.stringify(retryState), 60); + const { retryKey } = await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + "01ARZ3NDEKTSV4RRFFQ69G5FAX", + retryState, + ); let addMemoryCalls = 0; const result = await drain.drainGroup("group-1", { @@ -612,9 +703,14 @@ describe("batch drain", () => { }); await events.recordEvent("session-1", "group-1", event); - const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); const retryState = { attempts: 1, nextAttemptAt: Date.now() + 60_000 }; - await redis.setString(retryKey, JSON.stringify(retryState), 60); + const { retryKey } = await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + "01ARZ3NDEKTSV4RRFFQ69G5FAY", + retryState, + ); const originalReleaseClaim = events.releaseClaim.bind(events); let releaseAttempts = 0; @@ -645,7 +741,13 @@ describe("batch drain", () => { }); await events.recordEvent("session-1", "group-1", event); - const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); + const { retryKey } = await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + "01ARZ3NDEKTSV4RRFFQ69G5FAZ", + { attempts: 1, nextAttemptAt: 0 }, + ); await redis.setString(retryKey, "{not-json", 60); let calls = 0; @@ -675,7 +777,13 @@ describe("batch drain", () => { }); await events.recordEvent("session-1", "group-1", event); - const retryKey = drainRetryKey("group-1", `${event.id}:${event.id}`); + const { retryKey } = await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + "01ARZ3NDEKTSV4RRFFQ69G5FB0", + { attempts: 1, nextAttemptAt: 0 }, + ); await redis.setString(retryKey, JSON.stringify(invalidState), 60); let calls = 0; @@ -706,10 +814,12 @@ describe("batch drain", () => { }); await events.recordEvent("session-1", "group-1", first); await events.recordEvent("session-1", "group-1", second); - await redis.setString( - drainRetryKey("group-1", `${first.id}:${second.id}`), - JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), - 60, + await seedRetryStateForEvents( + redis, + "group-1", + [first.id, second.id], + "01ARZ3NDEKTSV4RRFFQ69G5FB1", + { attempts: 1, nextAttemptAt: 0 }, ); let calls = 0; @@ -735,10 +845,12 @@ describe("batch drain", () => { metadata: { resolved: false }, }); await events.recordEvent("session-1", "group-1", event); - await redis.setString( - drainRetryKey("group-1", `${event.id}:${event.id}`), - JSON.stringify({ attempts: 1, nextAttemptAt: 0 }), - 60, + const { retryKey } = await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + "01ARZ3NDEKTSV4RRFFQ69G5FB2", + { attempts: 1, nextAttemptAt: 0 }, ); const deadLetterSpy = spy(events, "moveBatchToDeadLetter"); @@ -758,9 +870,7 @@ describe("batch drain", () => { assertEquals(await redis.getListLength(drainPendingKey("group-1")), 0); assertEquals(await redis.getListLength(drainDeadKey("group-1")), 0); assertEquals( - await redis.getString( - drainRetryKey("group-1", `${event.id}:${event.id}`), - ), + await redis.getString(retryKey), null, ); assertEquals( @@ -950,7 +1060,7 @@ describe("batch drain", () => { assertEquals(result.status, "retry"); assertEquals( await redis.getString( - drainRetryKey("group-1", `${first.id}:${second.id}`), + drainRetryAliasKey("group-1", [first.id, second.id]), ), null, ); diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index cea34a2..07f0f1b 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -1,3 +1,4 @@ +import * as ulid from "jsr:@std/ulid@^1.0.0"; import { type DrainQueueEntry, getSessionEventRecallText, @@ -25,6 +26,7 @@ type RetryState = { attempts: number; nextAttemptAt: number }; const RETRY_BACKOFF_BASE_MS = 1_000; const RETRY_BACKOFF_JITTER_RATIO = 0.25; +const RETRY_STATE_TTL_SECONDS = 7 * 24 * 60 * 60; const isValidRetryState = (value: unknown): value is RetryState => { if (!value || typeof value !== "object") return false; @@ -43,8 +45,11 @@ class DrainClaimLostError extends Error { } } -const makeBatchKey = (entries: DrainQueueEntry[]): string => - `${entries[0]?.event.id ?? "empty"}:${entries.at(-1)?.event.id ?? "empty"}`; +const makeBatchFingerprint = (entries: DrainQueueEntry[]): string => + entries.map((entry) => entry.event.id).join(",") || "empty"; + +const drainRetryAliasKey = (groupId: string, fingerprint: string): string => + `drain:retry-alias:${groupId}:${fingerprint}`; type PreparedDrainEntry = { entry: DrainQueueEntry; @@ -189,10 +194,37 @@ export class BatchDrainService { await this.redis.setString( drainRetryKey(groupId, batchKey), JSON.stringify(state), - 7 * 24 * 60 * 60, + RETRY_STATE_TTL_SECONDS, ); } + private async resolveBatchKey( + groupId: string, + entries: DrainQueueEntry[], + ): Promise<{ aliasKey: string; batchKey: string }> { + const aliasKey = drainRetryAliasKey(groupId, makeBatchFingerprint(entries)); + let batchKey = await this.redis.getString(aliasKey); + if (batchKey) { + return { aliasKey, batchKey }; + } + + batchKey = ulid.monotonicUlid(); + + await this.redis.setString(aliasKey, batchKey, RETRY_STATE_TTL_SECONDS); + return { aliasKey, batchKey }; + } + + private async clearRetryState( + groupId: string, + aliasKey: string, + batchKey: string, + ): Promise { + await Promise.all([ + this.redis.deleteKey(aliasKey), + this.redis.deleteKey(drainRetryKey(groupId, batchKey)), + ]); + } + private getRetryDelayMs(attempts: number): number { const baseDelayMs = RETRY_BACKOFF_BASE_MS * (2 ** (attempts - 1)); const jitterWindowMs = Math.round( @@ -246,12 +278,12 @@ export class BatchDrainService { const batch = claimed.entries; const preparedBatch = prepareDrainEntries(batch); - const batchKey = makeBatchKey(batch); + const { aliasKey, batchKey } = await this.resolveBatchKey(groupId, batch); const eventIds = batch.map((entry) => entry.event.id); const drainableEntryIds = getDrainableEntryIds(preparedBatch); if (drainableEntryIds.size === 0) { await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); - await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + await this.clearRetryState(groupId, aliasKey, batchKey); return { status: "success", drained: 0 }; } @@ -343,7 +375,7 @@ export class BatchDrainService { } await assertClaimOwnership(); await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); - await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + await this.clearRetryState(groupId, aliasKey, batchKey); return { status: "success", drained: drainableEntryIds.size }; } catch (err) { const lostOwnership = err instanceof DrainClaimLostError; @@ -362,7 +394,7 @@ export class BatchDrainService { eventIds, }); } - await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + await this.clearRetryState(groupId, aliasKey, batchKey); logger.warn( "Drain batch failed after claim loss; waiting for recovery", { @@ -389,7 +421,7 @@ export class BatchDrainService { claimed.claimToken, batch, ); - await this.redis.deleteKey(drainRetryKey(groupId, batchKey)); + await this.clearRetryState(groupId, aliasKey, batchKey); return { status: "dead-letter", drained: drainedCount }; } From ab7545194ec057630e85459dc8848c049bc70be3 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 20:16:33 +0800 Subject: [PATCH 32/38] fix: isolate drain retry batch keys --- src/services/batch-drain.test.ts | 37 ++++++++++++++++++++++++++++++++ src/services/batch-drain.ts | 1 + 2 files changed, 38 insertions(+) diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index 46b3052..a8b764e 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -632,6 +632,43 @@ describe("batch drain", () => { assertEquals(await redis.getString(retryKey), null); }); + it("refreshes the alias TTL when reusing an existing batch key", async () => { + const { redis, events, drain } = await createDeps(); + const event = createSessionEvent("message", "user", { + summary: "refresh alias ttl", + body: "refresh alias ttl", + }); + await events.recordEvent("session-1", "group-1", event); + + const touchSpy = spy(redis, "touch"); + try { + await seedRetryStateForEvents( + redis, + "group-1", + [event.id], + "01ARZ3NDEKTSV4RRFFQ69G5FC1", + { attempts: 1, nextAttemptAt: Date.now() + 60_000 }, + ); + + const result = await drain.drainGroup("group-1", { + addMemory() { + throw new Error("should not run during backoff"); + }, + } as never); + + assertEquals(result.status, "backoff"); + assertEquals( + touchSpy.calls.some((call) => + call.args[0] === drainRetryAliasKey("group-1", [event.id]) && + call.args[1] === 7 * 24 * 60 * 60 + ), + true, + ); + } finally { + touchSpy.restore(); + } + }); + it("adds bounded jitter to retry scheduling", async () => { const { redis, events, drain } = await createDeps(); const event = createSessionEvent("error", "tool", { diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index 07f0f1b..f82ccd5 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -205,6 +205,7 @@ export class BatchDrainService { const aliasKey = drainRetryAliasKey(groupId, makeBatchFingerprint(entries)); let batchKey = await this.redis.getString(aliasKey); if (batchKey) { + await this.redis.touch(aliasKey, RETRY_STATE_TTL_SECONDS); return { aliasKey, batchKey }; } From 11637979c21c8ed011f91be1d9b15563afad5ba6 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 20:18:49 +0800 Subject: [PATCH 33/38] fix: harden connection manager request cleanup --- deno.json | 1 - src/services/connection-manager.test.ts | 26 +++++++++++++++++++++++++ src/services/connection-manager.ts | 13 ++++++++++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/deno.json b/deno.json index 98ba481..6418d80 100644 --- a/deno.json +++ b/deno.json @@ -4,7 +4,6 @@ "version": "0.0.0-development", "license": "MIT", "tasks": { - "build": "deno run -A dnt.ts", "check": "deno check src/index.ts", "dev": "deno run --allow-all src/index.ts", "lint": "deno lint", diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index cd6d10f..923c767 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -1009,6 +1009,32 @@ describe("connection manager", () => { ); }); + it("does not leak active request controllers when callTool throws synchronously", async () => { + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: () => { + throw new Error("sync boom"); + }, + }), + }); + const internals = manager as unknown as { + activeRequestControllers: Set; + }; + + manager.start(); + assertEquals(await manager.ready(10), true); + + await assertRejects( + () => manager.callTool("search", {}), + Error, + "sync boom", + ); + assertEquals(internals.activeRequestControllers.size, 0); + }); + it("rejects invalid non-empty endpoints up front", () => { const error = assertThrows( () => diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index 88a5723..62cdc10 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -491,11 +491,18 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { try { const controller = new AbortController(); this.activeRequestControllers.add(controller); - return await this.runWithRequestDeadline( - this.connection.callTool( + let task: Promise; + try { + task = this.connection.callTool( { name, arguments: args }, { signal: controller.signal }, - ), + ); + } catch (err) { + this.activeRequestControllers.delete(controller); + throw err; + } + return await this.runWithRequestDeadline( + task, deadlineMs, controller, ); From 4c333b47177268c731ee09f3a1de732e953c36b9 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 20:21:53 +0800 Subject: [PATCH 34/38] build: restore dnt task --- deno.json | 1 + 1 file changed, 1 insertion(+) diff --git a/deno.json b/deno.json index 6418d80..98ba481 100644 --- a/deno.json +++ b/deno.json @@ -4,6 +4,7 @@ "version": "0.0.0-development", "license": "MIT", "tasks": { + "build": "deno run -A dnt.ts", "check": "deno check src/index.ts", "dev": "deno run --allow-all src/index.ts", "lint": "deno lint", From 309012c4e025d5a633bc89345c8508f6916b5910 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 20:38:43 +0800 Subject: [PATCH 35/38] refactor: centralize connection controller cleanup --- src/services/connection-manager.test.ts | 43 +++++++++++++++++++++++++ src/services/connection-manager.ts | 24 ++++---------- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/services/connection-manager.test.ts b/src/services/connection-manager.test.ts index 923c767..69dca0f 100644 --- a/src/services/connection-manager.test.ts +++ b/src/services/connection-manager.test.ts @@ -1035,6 +1035,49 @@ describe("connection manager", () => { assertEquals(internals.activeRequestControllers.size, 0); }); + it("removes active request controllers in finally for sync throw and settled calls", async () => { + const barrier = Promise.withResolvers(); + let releaseCalls = 0; + const manager = new GraphitiConnectionManager({ + endpoint: "http://test", + connectionFactory: () => ({ + connect: () => Promise.resolve(), + close: () => Promise.resolve(), + callTool: (_request, options) => { + if (releaseCalls === 0) { + releaseCalls += 1; + throw new Error("sync boom"); + } + return new Promise((resolve, reject) => { + options?.signal?.addEventListener("abort", () => { + reject(options.signal?.reason ?? new Error("aborted")); + }, { once: true }); + barrier.promise.then(() => resolve({ ok: true })); + }); + }, + }), + }); + const internals = manager as unknown as { + activeRequestControllers: Set; + }; + + manager.start(); + assertEquals(await manager.ready(10), true); + + await assertRejects( + () => manager.callTool("search", {}), + Error, + "sync boom", + ); + assertEquals(internals.activeRequestControllers.size, 0); + + const pending = manager.callTool("search", {}); + assertEquals(internals.activeRequestControllers.size, 1); + barrier.resolve(); + await pending; + assertEquals(internals.activeRequestControllers.size, 0); + }); + it("rejects invalid non-empty endpoints up front", () => { const error = assertThrows( () => diff --git a/src/services/connection-manager.ts b/src/services/connection-manager.ts index 62cdc10..357e4c8 100644 --- a/src/services/connection-manager.ts +++ b/src/services/connection-manager.ts @@ -491,21 +491,19 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { try { const controller = new AbortController(); this.activeRequestControllers.add(controller); - let task: Promise; try { - task = this.connection.callTool( + const task = this.connection.callTool( { name, arguments: args }, { signal: controller.signal }, ); - } catch (err) { + return await this.runWithRequestDeadline( + task, + deadlineMs, + controller, + ); + } finally { this.activeRequestControllers.delete(controller); - throw err; } - return await this.runWithRequestDeadline( - task, - deadlineMs, - controller, - ); } catch (err) { if (err instanceof GraphitiOfflineError) { throw err; @@ -595,11 +593,6 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { return new Promise((resolve, reject) => { let settled = false; let timer: TimerHandle | null = null; - const finish = () => { - if (controller) { - this.activeRequestControllers.delete(controller); - } - }; const clearDeadlineTimer = () => { if (timer !== null) { this.clearTimerImpl(timer); @@ -612,7 +605,6 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { settled = true; clearDeadlineTimer(); controller?.abort(new GraphitiRequestTimeoutError()); - finish(); reject(new GraphitiRequestTimeoutError()); }, deadlineMs); @@ -621,14 +613,12 @@ export class GraphitiConnectionManager implements GraphitiToolCaller { if (settled) return; settled = true; clearDeadlineTimer(); - finish(); resolve(value); }, (error) => { if (settled) return; settled = true; clearDeadlineTimer(); - finish(); reject(error); }, ); From 57649cc68d021f5dbc847dcd135799da54cc7200 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 21:46:20 +0800 Subject: [PATCH 36/38] fix: harden config and drain validation --- src/config.test.ts | 58 ++++++++++++++++++++++++++++++++ src/config.ts | 44 +++++++++++++++++------- src/services/batch-drain.test.ts | 29 +++++++++++----- src/services/batch-drain.ts | 16 ++++++--- 4 files changed, 123 insertions(+), 24 deletions(-) diff --git a/src/config.test.ts b/src/config.test.ts index fbd4c35..45a31f3 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -246,6 +246,64 @@ describe("config", () => { ); }); + it("rejects graphiti endpoints with non-http schemes", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + endpoint: "redis://wrong-scheme:6379", + }, + }, + }) + ); + + assertThrows( + () => loadConfig(), + ConfigLoadError, + 'Invalid config value for graphiti.endpoint: expected URL scheme "http" or "https", received "redis://wrong-scheme:6379"', + ); + }); + + it("rejects redis endpoints with non-redis schemes", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + redis: { + endpoint: "http://wrong-scheme.example", + }, + }, + }) + ); + + assertThrows( + () => loadConfig(), + ConfigLoadError, + 'Invalid config value for redis.endpoint: expected URL scheme "redis" or "rediss", received "http://wrong-scheme.example"', + ); + }); + + it("accepts supported endpoint schemes for each setting", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + endpoint: "https://legacy.example/mcp", + graphiti: { + endpoint: "http://nested.example/mcp", + }, + redis: { + endpoint: "rediss://cache.example:6379", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.endpoint, "http://nested.example/mcp"); + assertEquals(config.graphiti.endpoint, "http://nested.example/mcp"); + assertEquals(config.redis.endpoint, "rediss://cache.example:6379"); + }); + it("redacts credentials from malformed configured endpoint errors", () => { setConfigExplorerAdapterForTesting(() => makeAdapter({ diff --git a/src/config.ts b/src/config.ts index c853b61..a6b9cee 100644 --- a/src/config.ts +++ b/src/config.ts @@ -128,35 +128,55 @@ const isUnitInterval = (value: number | undefined): value is number => typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1; -const isValidUrlString = (value: string | undefined): value is string => { - if (!value) return false; +const parseUrlString = (value: string | undefined): URL | null => { + if (!value) return null; try { - new URL(value); - return true; + return new URL(value); } catch { - return false; + return null; } }; const assertExplicitUrl = ( value: string | undefined, fieldName: string, + allowedSchemes?: string[], ): void => { if (value === undefined) return; - if (isValidUrlString(value)) return; + const url = parseUrlString(value); + if (!url) { + throw new ConfigLoadError( + `Invalid config value for ${fieldName}: expected a valid URL, received ${ + JSON.stringify(redactEndpointUserInfo(value)) + }`, + { code: "config-invalid" }, + ); + } + if ( + !allowedSchemes || + allowedSchemes.includes(url.protocol.slice(0, -1)) + ) { + return; + } throw new ConfigLoadError( - `Invalid config value for ${fieldName}: expected a valid URL, received ${ - JSON.stringify(redactEndpointUserInfo(value)) - }`, + `Invalid config value for ${fieldName}: expected URL scheme ${ + allowedSchemes.map((scheme) => JSON.stringify(scheme)).join(" or ") + }, received ${JSON.stringify(redactEndpointUserInfo(value))}`, { code: "config-invalid" }, ); }; const validateExplicitConfig = (value: RawGraphitiConfig | null): void => { if (!value) return; - assertExplicitUrl(value.endpoint, "endpoint"); - assertExplicitUrl(value.graphiti?.endpoint, "graphiti.endpoint"); - assertExplicitUrl(value.redis?.endpoint, "redis.endpoint"); + assertExplicitUrl(value.endpoint, "endpoint", ["http", "https"]); + assertExplicitUrl(value.graphiti?.endpoint, "graphiti.endpoint", [ + "http", + "https", + ]); + assertExplicitUrl(value.redis?.endpoint, "redis.endpoint", [ + "redis", + "rediss", + ]); }; const resolveNumber = ( diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index a8b764e..dc8b1d5 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -183,6 +183,8 @@ const createDeps = async (options?: { batchMaxBytes?: number; batchSize?: number; claimHeartbeatIntervalMs?: number | null; + now?: () => number; + random?: () => number; }; }) => { const redis = new RedisClient({ @@ -198,6 +200,8 @@ const createDeps = async (options?: { batchSize: options?.drain?.batchSize ?? 2, batchMaxBytes: options?.drain?.batchMaxBytes ?? 20_000, drainRetryMax: 2, + now: options?.drain?.now, + random: options?.drain?.random, }; const heartbeatIntervalMs = options?.drain?.claimHeartbeatIntervalMs; const drain = new BatchDrainService( @@ -670,7 +674,12 @@ describe("batch drain", () => { }); it("adds bounded jitter to retry scheduling", async () => { - const { redis, events, drain } = await createDeps(); + const { redis, events, drain } = await createDeps({ + drain: { + now: () => 10_000, + random: () => 1, + }, + }); const event = createSessionEvent("error", "tool", { summary: "failing batch", body: "failing batch", @@ -678,9 +687,6 @@ describe("batch drain", () => { }); await events.recordEvent("session-1", "group-1", event); - using _dateNow = stub(Date, "now", () => 10_000); - using _random = stub(Math, "random", () => 1); - const result = await drain.drainGroup("group-1", { addMemory() { throw new Error("boom"); @@ -698,14 +704,17 @@ describe("batch drain", () => { }); it("backs off and releases the claim when retry state is scheduled for later", async () => { - const { redis, events, drain } = await createDeps(); + const now = 50_000; + const { redis, events, drain } = await createDeps({ + drain: { now: () => now }, + }); const event = createSessionEvent("message", "user", { summary: "wait before retry", body: "wait before retry", }); await events.recordEvent("session-1", "group-1", event); - const retryState = { attempts: 1, nextAttemptAt: Date.now() + 60_000 }; + const retryState = { attempts: 1, nextAttemptAt: now + 60_000 }; const { retryKey } = await seedRetryStateForEvents( redis, "group-1", @@ -723,6 +732,7 @@ describe("batch drain", () => { assertEquals(result.status, "backoff"); assertEquals(result.drained, 0); + assertEquals(result.retryAfterMs, 60_000); if (result.retryAfterMs === undefined || result.retryAfterMs <= 0) { throw new Error("Expected backoff result to include retryAfterMs"); } @@ -733,14 +743,17 @@ describe("batch drain", () => { }); it("returns backoff even if releasing the claim fails", async () => { - const { redis, events, drain } = await createDeps(); + const now = 75_000; + const { redis, events, drain } = await createDeps({ + drain: { now: () => now }, + }); const event = createSessionEvent("message", "user", { summary: "wait before retry", body: "wait before retry", }); await events.recordEvent("session-1", "group-1", event); - const retryState = { attempts: 1, nextAttemptAt: Date.now() + 60_000 }; + const retryState = { attempts: 1, nextAttemptAt: now + 60_000 }; const { retryKey } = await seedRetryStateForEvents( redis, "group-1", diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index f82ccd5..fc648f1 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -20,6 +20,8 @@ export interface BatchDrainServiceOptions { batchMaxBytes: number; drainRetryMax: number; claimHeartbeatIntervalMs?: number; + now?: () => number; + random?: () => number; } type RetryState = { attempts: number; nextAttemptAt: number }; @@ -121,11 +123,17 @@ const shouldDrainEntry = (entry: PreparedDrainEntry): boolean => { }; export class BatchDrainService { + private readonly now: () => number; + private readonly random: () => number; + constructor( private readonly redis: RedisClient, private readonly events: RedisEventsService, private readonly options: BatchDrainServiceOptions, - ) {} + ) { + this.now = options.now ?? Date.now; + this.random = options.random ?? Math.random; + } private getClaimHeartbeatIntervalMs(lockTtlSeconds: number): number { const ttlMs = Math.max(1_000, Math.floor(lockTtlSeconds * 1000)); @@ -234,7 +242,7 @@ export class BatchDrainService { const minDelayMs = Math.max(1, baseDelayMs - jitterWindowMs); const maxDelayMs = baseDelayMs + jitterWindowMs; return Math.round( - minDelayMs + (Math.random() * (maxDelayMs - minDelayMs)), + minDelayMs + (this.random() * (maxDelayMs - minDelayMs)), ); } @@ -290,7 +298,7 @@ export class BatchDrainService { const retryState = await this.getRetryState(groupId, batchKey); if (retryState) { - const now = Date.now(); + const now = this.now(); if (retryState.nextAttemptAt > now) { const retryAfterMs = Math.max(0, retryState.nextAttemptAt - now); await this.releaseClaimSafely( @@ -434,7 +442,7 @@ export class BatchDrainService { ); await this.setRetryState(groupId, batchKey, { attempts, - nextAttemptAt: Date.now() + this.getRetryDelayMs(attempts), + nextAttemptAt: this.now() + this.getRetryDelayMs(attempts), }); logger.warn("Drain batch failed; will retry later", { groupId, err }); return { status: "retry", drained: 0 }; From da8afe9e9c0ee84d1d87751cad99f1573661e12f Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Tue, 24 Mar 2026 23:36:47 +0800 Subject: [PATCH 37/38] fix: harden retry alias canonicalization and endpoint coercion --- AGENTS.md | 6 +- README.md | 11 +- src/config.test.ts | 78 ++++++++++ src/config.ts | 95 +++++++++---- src/services/batch-drain.test.ts | 101 +++++++++++-- src/services/batch-drain.ts | 211 +++++++++++++++++++++++++--- src/services/session-mcp-runtime.ts | 2 +- 7 files changed, 443 insertions(+), 61 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 7824570..a986c42 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -156,8 +156,10 @@ Canonical shape (nested): } ``` -Endpoint values must be explicit URLs with schemes, for example -`redis://localhost:6379` for Redis and `http://localhost:8000/mcp` for Graphiti. +Endpoint values must resolve to valid URLs. Config loading performs best-effort +coercion by adding the expected scheme when omitted and defaulting the port only +for scheme-less inputs that do not already include one (`6379` for Redis and +`8000` for Graphiti); explicit disallowed schemes still fail validation. ## Key Files & Their Scope diff --git a/README.md b/README.md index d8fb3d3..d506bdf 100644 --- a/README.md +++ b/README.md @@ -235,9 +235,14 @@ top-level aliases remain supported for backward compatibility. Precedence is: 1. `redis.*` (canonical) 2. top-level Graphiti aliases such as `endpoint` and `groupIdPrefix` -Endpoint values must be valid URLs, so include the scheme explicitly - for -example `redis://localhost:6379` for Redis and `http://localhost:8000/mcp` for -Graphiti. +Endpoint values must resolve to valid URLs. The loader applies best-effort +coercion for endpoint-like inputs by trimming whitespace, adding the expected +scheme when omitted, and filling the default port only when a missing-scheme +input also omits a port. For example, `localhost` under `redis.endpoint` +resolves to `redis://localhost:6379`, `cache.internal:6380` resolves to +`redis://cache.internal:6380`, and `graphiti.internal/mcp` under +`graphiti.endpoint` resolves to `http://graphiti.internal:8000/mcp`. Inputs that +still fail URL parsing, or that use a disallowed explicit scheme, are rejected. ### Legacy Top-Level Keys diff --git a/src/config.test.ts b/src/config.test.ts index 45a31f3..5f6b4a9 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -304,6 +304,84 @@ describe("config", () => { assertEquals(config.redis.endpoint, "rediss://cache.example:6379"); }); + it("best-effort coerces missing schemes for graphiti and redis endpoints", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + endpoint: "legacy.example/mcp", + redis: { + endpoint: "cache.internal", + }, + graphiti: { + endpoint: "graphiti.internal/mcp", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.endpoint, "http://graphiti.internal:8000/mcp"); + assertEquals(config.graphiti.endpoint, "http://graphiti.internal:8000/mcp"); + assertEquals(config.redis.endpoint, "redis://cache.internal:6379"); + }); + + it("preserves an explicit port on scheme-less redis endpoints", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + redis: { + endpoint: "cache.internal:6380", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.redis.endpoint, "redis://cache.internal:6380"); + }); + + it("preserves explicit schemes while still requiring an allowed protocol", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + endpoint: "https://secure.example/mcp", + }, + redis: { + endpoint: "rediss://cache.example", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "https://secure.example/mcp"); + assertEquals(config.redis.endpoint, "rediss://cache.example"); + }); + + it("coerces scheme-relative endpoint inputs before validation", () => { + setConfigExplorerAdapterForTesting(() => + makeAdapter({ + searchResult: { + graphiti: { + endpoint: "//graphiti.internal/mcp", + }, + redis: { + endpoint: "//cache.internal", + }, + }, + }) + ); + + const config = loadConfig(); + + assertEquals(config.graphiti.endpoint, "http://graphiti.internal:8000/mcp"); + assertEquals(config.redis.endpoint, "redis://cache.internal:6379"); + }); + it("redacts credentials from malformed configured endpoint errors", () => { setConfigExplorerAdapterForTesting(() => makeAdapter({ diff --git a/src/config.ts b/src/config.ts index a6b9cee..4061830 100644 --- a/src/config.ts +++ b/src/config.ts @@ -137,13 +137,25 @@ const parseUrlString = (value: string | undefined): URL | null => { } }; -const assertExplicitUrl = ( +const URL_SCHEME_PREFIX = /^[A-Za-z][A-Za-z\d+\-.]*:\/\//; + +const coerceConfiguredUrl = ( value: string | undefined, fieldName: string, - allowedSchemes?: string[], -): void => { - if (value === undefined) return; - const url = parseUrlString(value); + options: { + allowedSchemes?: string[]; + defaultScheme: string; + defaultPort?: string; + }, +): string | undefined => { + if (value === undefined) return undefined; + + const hasExplicitScheme = URL_SCHEME_PREFIX.test(value); + const candidate = hasExplicitScheme + ? value + : `${options.defaultScheme}://${value.replace(/^\/\//, "")}`; + + const url = parseUrlString(candidate); if (!url) { throw new ConfigLoadError( `Invalid config value for ${fieldName}: expected a valid URL, received ${ @@ -153,30 +165,62 @@ const assertExplicitUrl = ( ); } if ( - !allowedSchemes || - allowedSchemes.includes(url.protocol.slice(0, -1)) + !options.allowedSchemes || + options.allowedSchemes.includes(url.protocol.slice(0, -1)) ) { - return; + if (!hasExplicitScheme && options.defaultPort && !url.port) { + url.port = options.defaultPort; + } + return url.toString(); } + throw new ConfigLoadError( `Invalid config value for ${fieldName}: expected URL scheme ${ - allowedSchemes.map((scheme) => JSON.stringify(scheme)).join(" or ") + options.allowedSchemes.map((scheme) => JSON.stringify(scheme)).join( + " or ", + ) }, received ${JSON.stringify(redactEndpointUserInfo(value))}`, { code: "config-invalid" }, ); }; -const validateExplicitConfig = (value: RawGraphitiConfig | null): void => { - if (!value) return; - assertExplicitUrl(value.endpoint, "endpoint", ["http", "https"]); - assertExplicitUrl(value.graphiti?.endpoint, "graphiti.endpoint", [ - "http", - "https", - ]); - assertExplicitUrl(value.redis?.endpoint, "redis.endpoint", [ - "redis", - "rediss", - ]); +const normalizeConfiguredEndpoints = ( + value: RawGraphitiConfig | null, +): RawGraphitiConfig | null => { + if (!value) return value; + + return { + ...value, + endpoint: coerceConfiguredUrl(value.endpoint, "endpoint", { + allowedSchemes: ["http", "https"], + defaultScheme: "http", + defaultPort: "8000", + }), + graphiti: value.graphiti + ? { + ...value.graphiti, + endpoint: coerceConfiguredUrl( + value.graphiti.endpoint, + "graphiti.endpoint", + { + allowedSchemes: ["http", "https"], + defaultScheme: "http", + defaultPort: "8000", + }, + ), + } + : value.graphiti, + redis: value.redis + ? { + ...value.redis, + endpoint: coerceConfiguredUrl(value.redis.endpoint, "redis.endpoint", { + allowedSchemes: ["redis", "rediss"], + defaultScheme: "redis", + defaultPort: "6379", + }), + } + : value.redis, + }; }; const resolveNumber = ( @@ -288,8 +332,9 @@ const loadConfigFile = ( ): RawGraphitiConfig | null => { try { const loaded = adapter?.load(filePath); - const normalized = loaded ? normalizeConfig(loaded.config) : null; - validateExplicitConfig(normalized); + const normalized = loaded + ? normalizeConfiguredEndpoints(normalizeConfig(loaded.config)) + : null; return normalized; } catch (err) { if (err instanceof ConfigLoadError) throw err; @@ -314,8 +359,9 @@ const searchConfig = ( ): RawGraphitiConfig | null => { try { const loaded = adapter.search(directory); - const normalized = loaded ? normalizeConfig(loaded.config) : null; - validateExplicitConfig(normalized); + const normalized = loaded + ? normalizeConfiguredEndpoints(normalizeConfig(loaded.config)) + : null; return normalized; } catch (err) { if (err instanceof ConfigLoadError) throw err; @@ -349,7 +395,6 @@ export function loadConfig(directory?: string): GraphitiConfig { const adapter = getConfigExplorerAdapter(); const loaded = searchConfig(adapter, directory); const resolved = loaded ?? loadLegacyConfig(adapter); - validateExplicitConfig(resolved); return resolveConfig(resolved); } catch (error) { if ( diff --git a/src/services/batch-drain.test.ts b/src/services/batch-drain.test.ts index dc8b1d5..060897a 100644 --- a/src/services/batch-drain.test.ts +++ b/src/services/batch-drain.test.ts @@ -1,6 +1,6 @@ import { assertEquals } from "jsr:@std/assert@^1.0.0"; import { describe, it } from "jsr:@std/testing@^1.0.0/bdd"; -import { spy, stub } from "jsr:@std/testing@^1.0.0/mock"; +import { spy } from "jsr:@std/testing@^1.0.0/mock"; import { BatchDrainService } from "./batch-drain.ts"; import { createSessionEvent } from "./event-extractor.ts"; import { logger } from "./logger.ts"; @@ -215,8 +215,11 @@ const createDeps = async (options?: { return { redis, events, drain }; }; -const drainRetryAliasKey = (groupId: string, eventIds: string[]): string => - `drain:retry-alias:${groupId}:${eventIds.join(",") || "empty"}`; +const drainRetryAliasKey = (groupId: string, eventId: string): string => + `drain:retry-alias:${groupId}:event:${eventId}`; + +const drainRetryMembersKey = (groupId: string, batchKey: string): string => + `drain:retry-members:${groupId}:${batchKey}`; const seedRetryStateForEvents = async ( redis: RedisClient, @@ -225,9 +228,16 @@ const seedRetryStateForEvents = async ( batchKey: string, state: { attempts: number; nextAttemptAt: number }, ): Promise<{ aliasKey: string; retryKey: string }> => { - const aliasKey = drainRetryAliasKey(groupId, eventIds); + const aliasKey = drainRetryAliasKey(groupId, eventIds[0]!); const retryKey = drainRetryKey(groupId, batchKey); - await redis.setString(aliasKey, batchKey, 60); + for (const eventId of eventIds) { + await redis.setString(drainRetryAliasKey(groupId, eventId), batchKey, 60); + } + await redis.setString( + drainRetryMembersKey(groupId, batchKey), + JSON.stringify(eventIds), + 60, + ); await redis.setString(retryKey, JSON.stringify(state), 60); return { aliasKey, retryKey }; }; @@ -555,7 +565,7 @@ describe("batch drain", () => { assertEquals(first.status, "retry"); assertEquals(await redis.getListLength(drainPendingKey("group-1")), 1); - const aliasKey = drainRetryAliasKey("group-1", [event.id]); + const aliasKey = drainRetryAliasKey("group-1", event.id); const batchKey = await redis.getString(aliasKey); if (!batchKey) throw new Error("Expected retry alias key to be set"); await redis.setString( @@ -589,7 +599,7 @@ describe("batch drain", () => { const first = await drain.drainGroup("group-1", failingGraphiti as never); assertEquals(first.status, "retry"); - const aliasKey = drainRetryAliasKey("group-1", [event.id]); + const aliasKey = drainRetryAliasKey("group-1", event.id); const batchKey = await redis.getString(aliasKey); if (!batchKey) throw new Error("Expected retry alias key to be set"); assertEquals(/^[0-9A-HJKMNP-TV-Z]{26}$/.test(batchKey), true); @@ -663,7 +673,7 @@ describe("batch drain", () => { assertEquals(result.status, "backoff"); assertEquals( touchSpy.calls.some((call) => - call.args[0] === drainRetryAliasKey("group-1", [event.id]) && + call.args[0] === drainRetryAliasKey("group-1", event.id) && call.args[1] === 7 * 24 * 60 * 60 ), true, @@ -673,6 +683,77 @@ describe("batch drain", () => { } }); + it("canonicalizes conflicting retry aliases onto the strongest batch key", async () => { + const now = 50_000; + const { redis, events, drain } = await createDeps({ + drain: { batchSize: 2, now: () => now }, + }); + const first = createSessionEvent("message", "user", { + summary: "first retry alias", + body: "first retry alias", + }); + const second = createSessionEvent("message", "user", { + summary: "second retry alias", + body: "second retry alias", + }); + await events.recordEvent("session-1", "group-1", first); + await events.recordEvent("session-1", "group-1", second); + + const weakerBatchKey = "01ARZ3NDEKTSV4RRFFQ69G5FC2"; + const strongerBatchKey = "01ARZ3NDEKTSV4RRFFQ69G5FC3"; + await seedRetryStateForEvents( + redis, + "group-1", + [first.id], + weakerBatchKey, + { attempts: 1, nextAttemptAt: 0 }, + ); + await seedRetryStateForEvents( + redis, + "group-1", + [second.id], + strongerBatchKey, + { attempts: 2, nextAttemptAt: now + 60_000 }, + ); + + const result = await drain.drainGroup("group-1", { + addMemory() { + throw new Error("should not run during backoff"); + }, + } as never); + + assertEquals(result, { + status: "backoff", + drained: 0, + retryAfterMs: 60_000, + }); + assertEquals( + await redis.getString(drainRetryAliasKey("group-1", first.id)), + strongerBatchKey, + ); + assertEquals( + await redis.getString(drainRetryAliasKey("group-1", second.id)), + strongerBatchKey, + ); + assertEquals( + await redis.getString(drainRetryKey("group-1", weakerBatchKey)), + null, + ); + assertEquals( + await redis.getString(drainRetryMembersKey("group-1", weakerBatchKey)), + null, + ); + assertEquals( + JSON.parse( + await redis.getString( + drainRetryMembersKey("group-1", strongerBatchKey), + ) ?? + "[]", + ).sort(), + [first.id, second.id].sort(), + ); + }); + it("adds bounded jitter to retry scheduling", async () => { const { redis, events, drain } = await createDeps({ drain: { @@ -694,7 +775,7 @@ describe("batch drain", () => { } as never); assertEquals(result, { status: "retry", drained: 0 }); - const aliasKey = drainRetryAliasKey("group-1", [event.id]); + const aliasKey = drainRetryAliasKey("group-1", event.id); const batchKey = await redis.getString(aliasKey); if (!batchKey) throw new Error("Expected retry alias key to be set"); assertEquals( @@ -1110,7 +1191,7 @@ describe("batch drain", () => { assertEquals(result.status, "retry"); assertEquals( await redis.getString( - drainRetryAliasKey("group-1", [first.id, second.id]), + drainRetryAliasKey("group-1", first.id), ), null, ); diff --git a/src/services/batch-drain.ts b/src/services/batch-drain.ts index fc648f1..1475fe6 100644 --- a/src/services/batch-drain.ts +++ b/src/services/batch-drain.ts @@ -47,11 +47,44 @@ class DrainClaimLostError extends Error { } } -const makeBatchFingerprint = (entries: DrainQueueEntry[]): string => - entries.map((entry) => entry.event.id).join(",") || "empty"; +const drainRetryAliasKey = (groupId: string, eventId: string): string => + `drain:retry-alias:${groupId}:event:${eventId}`; -const drainRetryAliasKey = (groupId: string, fingerprint: string): string => - `drain:retry-alias:${groupId}:${fingerprint}`; +const drainRetryMembersKey = (groupId: string, batchKey: string): string => + `drain:retry-members:${groupId}:${batchKey}`; + +const dedupeEventIds = ( + entries: DrainQueueEntry[], +): string[] => [...new Set(entries.map((entry) => entry.event.id))]; + +const parseRetryMembers = (value: unknown): string[] | null => { + if (!Array.isArray(value)) return null; + const members = value.filter((member): member is string => { + return typeof member === "string" && member.length > 0; + }); + return members.length === value.length ? members : null; +}; + +const sameStringSet = (left: string[], right: string[]): boolean => { + if (left.length !== right.length) return false; + const rightSet = new Set(right); + return left.every((value) => rightSet.has(value)); +}; + +const compareRetryStates = ( + left: RetryState | null, + right: RetryState | null, +): number => { + if (left && !right) return 1; + if (!left && right) return -1; + if (!left && !right) return 0; + const leftState = left as RetryState; + const rightState = right as RetryState; + if (leftState.attempts !== rightState.attempts) { + return leftState.attempts - rightState.attempts; + } + return leftState.nextAttemptAt - rightState.nextAttemptAt; +}; type PreparedDrainEntry = { entry: DrainQueueEntry; @@ -206,31 +239,169 @@ export class BatchDrainService { ); } + private async getRetryMembers( + groupId: string, + batchKey: string, + ): Promise { + const key = drainRetryMembersKey(groupId, batchKey); + const raw = await this.redis.getString(key); + if (!raw) return null; + try { + const parsed = JSON.parse(raw); + const members = parseRetryMembers(parsed); + if (members) return members; + await this.redis.deleteKey(key); + logger.warn("Cleared invalid drain retry member state", { + groupId, + batchKey, + }); + return null; + } catch { + await this.redis.deleteKey(key); + logger.warn("Cleared corrupted drain retry member state", { + groupId, + batchKey, + }); + return null; + } + } + + private async setRetryMembers( + groupId: string, + batchKey: string, + eventIds: string[], + ): Promise { + await this.redis.setString( + drainRetryMembersKey(groupId, batchKey), + JSON.stringify(eventIds), + RETRY_STATE_TTL_SECONDS, + ); + } + + private async syncRetryMembers( + groupId: string, + batchKey: string, + eventIds: string[], + existingMembers: string[] | null, + ): Promise { + if (existingMembers && sameStringSet(existingMembers, eventIds)) { + await this.redis.touch( + drainRetryMembersKey(groupId, batchKey), + RETRY_STATE_TTL_SECONDS, + ); + return; + } + + await this.setRetryMembers(groupId, batchKey, eventIds); + } + + private async syncRetryAliases( + groupId: string, + batchKey: string, + eventIds: string[], + ): Promise { + await Promise.all(eventIds.map(async (eventId) => { + const aliasKey = drainRetryAliasKey(groupId, eventId); + const existingBatchKey = await this.redis.getString(aliasKey); + if (existingBatchKey === batchKey) { + await this.redis.touch(aliasKey, RETRY_STATE_TTL_SECONDS); + return; + } + + await this.redis.setString(aliasKey, batchKey, RETRY_STATE_TTL_SECONDS); + })); + } + private async resolveBatchKey( groupId: string, entries: DrainQueueEntry[], - ): Promise<{ aliasKey: string; batchKey: string }> { - const aliasKey = drainRetryAliasKey(groupId, makeBatchFingerprint(entries)); - let batchKey = await this.redis.getString(aliasKey); - if (batchKey) { - await this.redis.touch(aliasKey, RETRY_STATE_TTL_SECONDS); - return { aliasKey, batchKey }; + ): Promise<{ batchKey: string }> { + const eventIds = dedupeEventIds(entries); + const discoveredBatchKeys = new Set(); + for (const eventId of eventIds) { + const batchKey = await this.redis.getString( + drainRetryAliasKey(groupId, eventId), + ); + if (batchKey) discoveredBatchKeys.add(batchKey); } - batchKey = ulid.monotonicUlid(); + if (discoveredBatchKeys.size === 0) { + const batchKey = ulid.monotonicUlid(); + await Promise.all([ + this.setRetryMembers(groupId, batchKey, eventIds), + this.syncRetryAliases(groupId, batchKey, eventIds), + ]); + return { batchKey }; + } - await this.redis.setString(aliasKey, batchKey, RETRY_STATE_TTL_SECONDS); - return { aliasKey, batchKey }; + const candidateBatchKeys = [...discoveredBatchKeys]; + const candidateMembers = new Map(); + let batchKey = candidateBatchKeys[0]; + let batchRetryState: RetryState | null = null; + for (const candidate of candidateBatchKeys) { + const [candidateState, members] = await Promise.all([ + this.getRetryState(groupId, candidate), + this.getRetryMembers(groupId, candidate), + ]); + candidateMembers.set(candidate, members ?? []); + if (candidate === batchKey) { + batchRetryState = candidateState; + continue; + } + if (compareRetryStates(candidateState, batchRetryState) > 0) { + batchKey = candidate; + batchRetryState = candidateState; + } + } + + if (candidateBatchKeys.length > 1) { + logger.warn("Canonicalized conflicting drain retry aliases", { + groupId, + eventIds, + batchKeys: candidateBatchKeys, + chosenBatchKey: batchKey, + }); + } + + const mergedEventIds = [ + ...new Set([ + ...candidateBatchKeys.flatMap((candidate) => + candidateMembers.get(candidate) ?? [] + ), + ...eventIds, + ]), + ]; + await Promise.all([ + this.syncRetryMembers( + groupId, + batchKey, + mergedEventIds, + candidateMembers.get(batchKey) ?? null, + ), + this.syncRetryAliases(groupId, batchKey, mergedEventIds), + ...candidateBatchKeys + .filter((candidate) => candidate !== batchKey) + .flatMap((candidate) => [ + this.redis.deleteKey(drainRetryKey(groupId, candidate)), + this.redis.deleteKey(drainRetryMembersKey(groupId, candidate)), + ]), + ]); + return { batchKey }; } private async clearRetryState( groupId: string, - aliasKey: string, batchKey: string, + fallbackEventIds: string[] = [], ): Promise { + const memberIds = await this.getRetryMembers(groupId, batchKey) ?? []; + const aliasIds = [...new Set([...memberIds, ...fallbackEventIds])]; await Promise.all([ - this.redis.deleteKey(aliasKey), this.redis.deleteKey(drainRetryKey(groupId, batchKey)), + this.redis.deleteKey(drainRetryMembersKey(groupId, batchKey)), + ...aliasIds.map((eventId) => + this.redis.deleteKey(drainRetryAliasKey(groupId, eventId)) + ), ]); } @@ -287,12 +458,12 @@ export class BatchDrainService { const batch = claimed.entries; const preparedBatch = prepareDrainEntries(batch); - const { aliasKey, batchKey } = await this.resolveBatchKey(groupId, batch); + const { batchKey } = await this.resolveBatchKey(groupId, batch); const eventIds = batch.map((entry) => entry.event.id); const drainableEntryIds = getDrainableEntryIds(preparedBatch); if (drainableEntryIds.size === 0) { await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); - await this.clearRetryState(groupId, aliasKey, batchKey); + await this.clearRetryState(groupId, batchKey, eventIds); return { status: "success", drained: 0 }; } @@ -384,7 +555,7 @@ export class BatchDrainService { } await assertClaimOwnership(); await this.events.markBatchSuccess(groupId, claimed.claimToken, batch); - await this.clearRetryState(groupId, aliasKey, batchKey); + await this.clearRetryState(groupId, batchKey, eventIds); return { status: "success", drained: drainableEntryIds.size }; } catch (err) { const lostOwnership = err instanceof DrainClaimLostError; @@ -403,7 +574,7 @@ export class BatchDrainService { eventIds, }); } - await this.clearRetryState(groupId, aliasKey, batchKey); + await this.clearRetryState(groupId, batchKey, eventIds); logger.warn( "Drain batch failed after claim loss; waiting for recovery", { @@ -430,7 +601,7 @@ export class BatchDrainService { claimed.claimToken, batch, ); - await this.clearRetryState(groupId, aliasKey, batchKey); + await this.clearRetryState(groupId, batchKey, eventIds); return { status: "dead-letter", drained: drainedCount }; } diff --git a/src/services/session-mcp-runtime.ts b/src/services/session-mcp-runtime.ts index 0355ea6..6b9de4e 100644 --- a/src/services/session-mcp-runtime.ts +++ b/src/services/session-mcp-runtime.ts @@ -322,7 +322,7 @@ const readSessionIndexBody = async ( try { await requestSessionIndexPermissions(resolvedPath, context); return await readSessionIndexFile(resolvedPath); - } catch (error) { + } catch { throw createBoundedSessionIndexError( "session_index_path_unreadable", "session_index could not read the requested path.", From 497b6261be5f8d47e7dd95dec115326b75bb2296 Mon Sep 17 00:00:00 2001 From: "Vicary A." Date: Wed, 25 Mar 2026 02:27:51 +0800 Subject: [PATCH 38/38] fix: harden prompt memory review follow-ups --- AGENTS.md | 7 ++-- src/handlers/messages.test.ts | 74 +++++++++++++++++++++++++++++++++++ src/handlers/messages.ts | 68 +++++++++++++++++++++++++++----- 3 files changed, 137 insertions(+), 12 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index a986c42..9fe95ac 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -74,9 +74,10 @@ asynchronously on idle or after compaction. ### GitHub PR Review Handling - **See `docs/ReviewProtocol.md`** for the complete workflow. -- Detect active PR → fetch unresolved review comments → spawn concurrent swarm - sessions per item → verify claims → apply narrow fixes → resolve threads → - push → request fresh review. +- Detect active PR → fetch unresolved review comments → verify claims → dedupe + verified claims into issue classes → run repo-wide class sweeps with + conservative parallelization/serialization → resolve threads → push → request + fresh review. ## Validation Expectations diff --git a/src/handlers/messages.test.ts b/src/handlers/messages.test.ts index 6179a7f..d64374c 100644 --- a/src/handlers/messages.test.ts +++ b/src/handlers/messages.test.ts @@ -941,6 +941,80 @@ describe("messages handler", () => { ); }); + it("scrubs multiple sequential leading session_memory envelopes even when later blocks omit attrs", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + 'stale\n\nolder stale\n\ncontinue', + }], + }], + }; + + await handler({} as never, output as never); + + assertEquals( + output.messages[0].parts[0].text, + 'continue\n\ncontinue', + ); + }); + + it("scrubs leading standalone persistent_memory envelopes even without identifying attrs", async () => { + const sessionManager = new MockSessionManager(); + sessionManager.state.pendingInjection = { + envelope: + 'continue', + nodeRefs: [], + refreshDecision: { + classification: "aligned", + shouldRefresh: false, + similarity: 1, + threshold: 0.5, + cachedQuery: "continue", + }, + }; + const handler = createMessagesHandler({ + sessionManager: sessionManager as never, + }); + + const output = { + messages: [{ + info: { role: "user", sessionID: "session-1" }, + parts: [{ + type: "text", + text: + "stale cached recall\n\ncontinue", + }], + }], + }; + + await handler({} as never, output as never); + + assertEquals( + output.messages[0].parts[0].text, + 'continue\n\ncontinue', + ); + }); + it("remains compatible with extended prepareInjection results", async () => { const prepared = { envelope: '', diff --git a/src/handlers/messages.ts b/src/handlers/messages.ts index 8fd9a67..cf08c81 100644 --- a/src/handlers/messages.ts +++ b/src/handlers/messages.ts @@ -29,27 +29,77 @@ const getTransformMessage = (input: unknown): string | undefined => { return typeof message === "string" ? message : undefined; }; -const LEADING_INJECTED_SESSION_MEMORY_BLOCK = - /^]*\bsource=(['"])[^'"]+\1)(?=[^>]*\bversion=(['"])[^'"]+\2)[^>]*>[\s\S]*?<\/session_memory>(?:\r?\n){0,2}/; +const LEADING_SESSION_MEMORY_BLOCK = + /^]*>[\s\S]*?<\/session_memory>(?:\r?\n){0,2}/; const LEADING_INJECTED_LEGACY_MEMORY_BLOCK_WITH_UUIDS = /^]*\bdata-uuids=(["'])(?:[^"']*)\1)[^>]*>[\s\S]*?<\/memory>(?:\r?\n){0,2}/; const LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK = /^]*\bdata-uuids=)[^>]*>\s*<\/memory>(?:\r?\n){0,2}/; -const LEADING_INJECTED_PERSISTENT_MEMORY_BLOCK = - /^]*\b(?:node_refs|fact_uuids)=(["'])[^"']*\1)[^>]*>[\s\S]*?<\/persistent_memory>(?:\r?\n){0,2}/; +const LEADING_PERSISTENT_MEMORY_BLOCK = + /^]*>[\s\S]*?<\/persistent_memory>(?:\r?\n){0,2}/; +const SESSION_MEMORY_SOURCE_ATTR_PATTERN = + /]*\bsource=(['"])[^'"]+\1/i; +const SESSION_MEMORY_GENERATED_SECTION_PATTERN = + /<(?:session_snapshot|persistent_memory)\b/i; +const PERSISTENT_MEMORY_GENERATED_CONTENT_PATTERN = /<(?:node|fact|episode)\b/i; const USER_MEMORY_ENVELOPE_TAG_PATTERN = /<\/?(?:session_memory|memory|persistent_memory)\b[^>]*>/gi; +const looksLikeInjectedSessionMemoryBlock = ( + block: string, + allowAttrlessFollowup: boolean, +): boolean => + SESSION_MEMORY_SOURCE_ATTR_PATTERN.test(block) || + SESSION_MEMORY_GENERATED_SECTION_PATTERN.test(block) || + allowAttrlessFollowup; + +const looksLikeInjectedPersistentMemoryBlock = (block: string): boolean => + PERSISTENT_MEMORY_GENERATED_CONTENT_PATTERN.test(block); + const scrubPromptMemoryText = (text: string): string => { let scrubbed = text; + let scrubbedInjectedPrefix = false; + while (true) { + const leadingSessionMemory = scrubbed.match(LEADING_SESSION_MEMORY_BLOCK) + ?.[0]; + if ( + leadingSessionMemory && + // Once we have confirmed an injected prefix, immediately following + // attrless session_memory blocks are treated as stale reinjections too. + looksLikeInjectedSessionMemoryBlock( + leadingSessionMemory, + scrubbedInjectedPrefix, + ) + ) { + scrubbed = scrubbed.slice(leadingSessionMemory.length); + scrubbedInjectedPrefix = true; + continue; + } + const next = scrubbed - .replace(LEADING_INJECTED_SESSION_MEMORY_BLOCK, "") .replace(LEADING_INJECTED_LEGACY_MEMORY_BLOCK_WITH_UUIDS, "") - .replace(LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK, "") - .replace(LEADING_INJECTED_PERSISTENT_MEMORY_BLOCK, ""); - if (next === scrubbed) return scrubbed; - scrubbed = next; + .replace(LEADING_INJECTED_EMPTY_LEGACY_MEMORY_BLOCK, ""); + if (next !== scrubbed) { + scrubbed = next; + scrubbedInjectedPrefix = true; + continue; + } + + const leadingPersistentMemory = scrubbed.match( + LEADING_PERSISTENT_MEMORY_BLOCK, + ) + ?.[0]; + if ( + leadingPersistentMemory && + looksLikeInjectedPersistentMemoryBlock(leadingPersistentMemory) + ) { + scrubbed = scrubbed.slice(leadingPersistentMemory.length); + scrubbedInjectedPrefix = true; + continue; + } + + return scrubbed; } };