diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7a34cf1..3d3940a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,8 +41,8 @@ jobs: # NOTE: this means CI does NOT currently exercise the zero-dependency # graceful-degradation paths (blake2b/unsigned/keyword) — several tests # assume the optional deps are present rather than skipif-guarding their - # absence. Adding a second minimal-install matrix leg to cover the - # degradation paths is a tracked follow-up (see docs/03-roadmap.md). + # absence. A second minimal-install matrix leg to cover those paths is a + # known follow-up. pip install -e ".[dev,crypto,smart]" - name: Run tests diff --git a/README.md b/README.md index 789d7bb..7bc3ef9 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ You can change anything after install, e.g. `komi-learn config set recall.semant 3. Curate: over time it merges overlapping lessons and archives stale ones. 4. Share (optional): general lessons can be contributed to the community pool, but only ones you approve. -It tries not to learn the wrong things. Secrets, machine-specific paths, one-off failures, and "tool X is broken" complaints are filtered out by a deterministic check before the LLM ever sees them. Design notes: [docs/02-architecture.md](docs/02-architecture.md). +It tries not to learn the wrong things. Secrets, machine-specific paths, one-off failures, and "tool X is broken" complaints are filtered out by a deterministic check before the LLM ever sees them. ## Community pool (optional) @@ -79,11 +79,6 @@ It runs two sessions: you correct the agent in the first, and the second shows i The engine has no required dependencies. Optional extras add real signing (`pip install komi-learn[crypto]`) and local semantic recall (`[smart]`); without them it falls back to a stdlib hash and keyword search. -## Docs - -- [docs/02-architecture.md](docs/02-architecture.md) — system design -- [docs/03-roadmap.md](docs/03-roadmap.md) — what's built and what's next -- [docs/05-adr-log.md](docs/05-adr-log.md) — key decisions and trade-offs -- [pool-repo-template/](pool-repo-template/) — drop-in contents to run your own pool +To run your own pool, see [pool-repo-template/](pool-repo-template/). MIT. Issues and PRs welcome. diff --git a/docs/00-hermes-source-notes.md b/docs/00-hermes-source-notes.md deleted file mode 100644 index 9984750..0000000 --- a/docs/00-hermes-source-notes.md +++ /dev/null @@ -1,305 +0,0 @@ -# HERMES AGENT: MEMORY AND LEARNING SYSTEM — TECHNICAL WRITEUP - -## 1. THE NUDGE MECHANISM - -### What is a Nudge? - -A nudge is an inactivity-triggered background review spawning a forked AIAgent that replays the conversation with a special review prompt. - -### Trigger Thresholds - -**MEMORY NUDGE** — Turn-based counter in agent._memory_nudge_interval: -- Default: 10 turns -- Config key: memory.nudge_interval -- Set at: agent/agent_init.py line 1067 -- Checked at: agent/conversation_loop.py:548-556 -- Logic: agent._turns_since_memory increments each turn; when >= nudge_interval, spawn background review and reset - -**SKILL NUDGE** — Iteration-based counter in agent._skill_nudge_interval: -- Default: 10 tool iterations -- Config key: skills.creation_nudge_interval -- Set at: agent/agent_init.py line 1187 -- Checked at: agent/codex_runtime.py:124-129 - -### NUDGE PROMPT TEXT (EXACT QUOTES) - -**Memory Review Prompt** (agent/background_review.py:31-45): - -"Review the conversation above and consider saving to memory if appropriate. - -Focus on: -1. Has the user revealed things about themselves — their persona, desires, preferences, or personal details worth remembering? -2. Has the user expressed expectations about how you should behave, their work style, or ways they want you to operate? - -If something stands out, save it using the memory tool. If nothing is worth saving, just say 'Nothing to save.' and stop." - -**Skill Review Prompt** (agent/background_review.py:47-159, 113 lines): - -Opens: "Review the conversation above and update the skill library. Be ACTIVE — most sessions produce at least one skill update, even if small..." - -Key directives: -- TARGET SHAPE: CLASS-LEVEL SKILLS with rich SKILL.md + references/ -- NOT a flat list of one-session-one-skill entries -- Hard rule 1: DO NOT touch bundled/hub-installed skills -- Hard rule 2: DO NOT delete (archive only) -- Hard rule 3: DO NOT touch pinned skills -- Hard rule 4: DO NOT judge on usage counters alone -- Hard rule 5: DO NOT reject on basis of distinct triggers - -Consolidation strategy: Identify PREFIX CLUSTERS, ask "what UMBRELLA CLASS?", pick one of 3 methods: - a) MERGE INTO EXISTING UMBRELLA — patch, add labeled section, archive sibling - b) CREATE NEW UMBRELLA SKILL — skill_manage action=create, archive siblings - c) DEMOTE TO REFERENCES/TEMPLATES/SCRIPTS — move to umbrella's subdirs - -Expected output (EXACT YAML format, curator.py:468-489): -\\\yaml -consolidations: - - from: - into: - reason: -prunings: - - name: - reason: -\\\ - -### When Nudges Trigger - -After every turn completes. Runs in DAEMON THREAD (background, non-blocking): -- Called at: agent/conversation_loop.py:4578-4584 -- Function: agent._spawn_background_review(messages_snapshot, review_memory, review_skills) -- Forked agent: tool whitelist (memory + skill only), nudges disabled recursively -- No observable UI — only side effects are disk writes - ---- - -## 2. MEMORY LAYERS (FILE PATHS & SCHEMAS) - -### Exact Paths - -Built-in stores: -- MEMORY.md: ~/.hermes/memories/MEMORY.md (2200 chars, config: memory.memory_char_limit) -- USER.md: ~/.hermes/memories/USER.md (1375 chars, config: memory.user_char_limit) -- Locks: MEMORY.md.lock, USER.md.lock (fcntl Unix / msvcrt Windows) -- Drift backups: MEMORY.md.bak., USER.md.bak. - -Curator: -- State: ~/.hermes/skills/.curator_state (JSON) -- Reports: ~/.hermes/logs/curator//REPORT.md - -External provider: -- Location: ~/.hermes/plugins/memory// -- Config: config.yaml key memory.provider -- Secrets: ~/.hermes/.env (mode 0600) - -### File Formats - -Built-in memory files: Markdown with § delimiters (Unicode U+00A7) - -ENTRY_DELIMITER = \"\n§\n\" (tools/memory_tool.py:55) - -Example MEMORY.md: -\\\ -PostgreSQL 16: BETWEEN excludes upper bound, use >= and <= -§ -Project uses Go 1.22 + sqlc; migrations in migrations/ -§ -User prefers direct answers, no verbose explanations -\\\ - -Curator state JSON: -\\\json -{ - "last_run_at": "2024-05-29T14:15:32.123456+00:00", - "last_run_duration_seconds": 127, - "last_run_summary": "Consolidated PR skills into pr-triage umbrella", - "last_run_summary_shown_at": null, - "last_report_path": "~/.hermes/logs/curator/20240529-141532/REPORT.md", - "paused": false, - "run_count": 5 -} -\\\ - -### Schema - -MemoryStore class (tools/memory_tool.py): -- memory_entries: List[str] (live state, mutable) -- user_entries: List[str] (live state, mutable) -- memory_char_limit: int (2200 default) -- user_char_limit: int (1375 default) -- _system_prompt_snapshot: Dict[str, str] (frozen at load, immutable during session) - -Methods: -- load_from_disk() — read, dedupe, scan threats, freeze snapshot -- add(target, content) → Dict — append, check limits, persist -- replace(target, old_text, new_content) → Dict — substring match, swap, persist -- remove(target, old_text) → Dict — substring match, delete, persist -- format_for_system_prompt(target) → Optional[str] — return frozen snapshot -- save_to_disk(target) — atomic tempfile + os.replace - ---- - -## 3. CURATION LOGIC - -### How Curator Decides WHAT to Write - -**Memory nudge**: User guidance in prompt — focus on persona, preferences, expectations about behavior - -**Skill nudge**: 113-line detailed prompt with explicit signals: -- User corrected style/tone/format (FIRST-CLASS) -- User corrected workflow/approach -- Non-trivial technique/fix/workaround -- Loaded skill turned out wrong/missing (patch NOW) - -Preference order (from prompt): -1. UPDATE CURRENTLY-LOADED SKILL (check /skill-name or skill_view in conversation) -2. UPDATE EXISTING UMBRELLA (skills_list + skill_view search) -3. ADD SUPPORT FILE under umbrella (references/, templates/, scripts/) -4. CREATE NEW CLASS-LEVEL UMBRELLA (name must be class-level, NOT PR#/error/codename/'fix-X') - -### Automatic State Transitions (Non-LLM) - -Function: curator.apply_automatic_transitions(now) (curator.py:273-314) - -Pure state machine (no LLM call): -- ACTIVE → STALE: no activity >= 30 days (config: curator.stale_after_days) -- ACTIVE/STALE → ARCHIVED: no activity >= 90 days (config: curator.archive_after_days) -- STALE → ACTIVE: activity after being stale (automatic reactivation) - -Pinned skills: Never transitioned (bypass all auto-transitions) - -Constants (curator.py:56-59): -- DEFAULT_STALE_AFTER_DAYS = 30 -- DEFAULT_ARCHIVE_AFTER_DAYS = 90 -- DEFAULT_INTERVAL_HOURS = 24 * 7 (7 days) -- DEFAULT_MIN_IDLE_HOURS = 2 - -### Curator Review (LLM-Driven Consolidation) - -When: Triggered by maybe_run_curator() when: -- curator.enabled == True (config) -- Not paused -- Last run >= get_interval_hours() ago (default: 168 hours = 7 days) -- Agent idle >= get_min_idle_hours() (default: 2 hours) - -Prompt (curator.py:309-543): - -"You are running as Hermes' background skill CURATOR. This is an UMBRELLA-BUILDING consolidation pass, not a passive audit..." - -"The goal of the skill collection is a LIBRARY OF CLASS-LEVEL INSTRUCTIONS AND EXPERIENTIAL KNOWLEDGE. A collection of hundreds of narrow skills where each one captures one session's specific bug is a FAILURE of the library — not a feature." - -Hard rules (non-negotiable): -1. DO NOT touch bundled/hub-installed skills -2. DO NOT delete (archive only) -3. DO NOT touch pinned skills -4. DO NOT judge on usage counters alone (judge on CONTENT) -5. DO NOT reject consolidation for pairwise distinctness - -Consolidation workflow: -1. Scan full list, identify PREFIX CLUSTERS -2. For each 2+ member cluster: ask "what UMBRELLA CLASS?" -3. Choose consolidation path (a/b/c above) -4. Emit structured YAML block with consolidations + prunings lists - ---- - -## 4. MEMORY.MD / USER.MD PERSISTENCE - -### What Writes Them - -memory_tool(action=add) called by background review agent after nudge trigger - -### Structure - -Entries: Freeform text, delimited by \n§\n - -MEMORY.md examples: -- Environment facts ("PostgreSQL 16: BETWEEN excludes upper") -- Project conventions ("Go + sqlc, migrations in migrations/ dir") -- Tool quirks ("Docker Desktop required for docker daemon on Mac") -- Lessons learned ("Nil dereference panics in Go") - -USER.md examples: -- Persona ("Works in fintech, prefers Rust, timezone PST") -- Preferences ("Concise output, direct, no verbose explanations") -- Communication style ("Pragmatic, impatient with basics") -- Pet peeves ("Don't explain basic concepts") - -### How They're Injected - -**Frozen snapshot pattern**: -1. Session start: MemoryStore.load_from_disk() reads MEMORY.md + USER.md -2. Entries scanned for threats (injection/exfil patterns) — strict scope -3. Any threat-matched entry replaced with [BLOCKED: ...] in snapshot only -4. Frozen snapshot captured and immutable during entire session -5. format_for_system_prompt() returns snapshot for injection -6. Mid-session tool calls mutate live memory_entries/user_entries + disk, NOT snapshot -7. Next session: new snapshot from updated disk - -Injection point (system_prompt.py:306-311): -\\\python -if agent._memory_store: - mem_block = agent._memory_store.format_for_system_prompt("memory") - if mem_block: - parts["memory"] = mem_block - - user_block = agent._memory_store.format_for_system_prompt("user") - if user_block: - parts["user"] = user_block -\\\ - -Rendered format (memory_tool.py:431-449): -\\\ -════════════════════════════════════════════════ -MEMORY (your personal notes) [50% — 1100/2200 chars] -════════════════════════════════════════════════ -Entry 1 -§ -Entry 2 -\\\ - -No truncation; entire snapshot injected (within char limits). - ---- - -## 5. RETRIEVAL - -### Full File Injection at Session Start - -Built-in: Complete frozen snapshot injected verbatim, no ranking/search - -External provider: Optional prefetch() method called before each turn -- Retrieved context wrapped in fences -- Scrubbed by StreamingContextScrubber during streaming -- NOT injected into system prompt (no prefix cache invalidation) - -### On-Session-End - -Optional provider hook: on_session_end(messages) -- Called at real boundaries (CLI exit, /reset, gateway timeout) -- NOT called after every turn -- Provider can extract/summarize full conversation - ---- - -## 6. THRESHOLDS (EXACT NUMBERS) - -- Memory nudge: 10 turns (agent_init.py:1067) -- Skill nudge: 10 iterations (agent_init.py:1187) -- MEMORY.md char limit: 2200 (agent_init.py:1071) -- USER.md char limit: 1375 (agent_init.py:1072) -- Curator interval: 168 hours = 7 days (curator.py:56) -- Curator min idle: 2 hours (curator.py:57) -- Skill stale: 30 days (curator.py:58) -- Skill archive: 90 days (curator.py:59) - ---- - -## 7. CRITICAL INVARIANTS - -1. Frozen snapshot — system prompt never mutates mid-session (prefix cache stable) -2. Immediate atomic writes — memory tool uses tempfile + os.replace for safety -3. Concurrent drift detection — file locking + round-trip check prevents data loss -4. Background isolation — nudge agents tool-whitelisted, nudges disabled recursively -5. Pinned skills never auto-transitioned — curator skips any pinned -6. One external provider max — MemoryManager enforces mutual exclusion -7. Threat scanning on load — poisoned entries replaced with [BLOCKED: ...] in snapshot only diff --git a/docs/01-research.md b/docs/01-research.md deleted file mode 100644 index 6960a9f..0000000 --- a/docs/01-research.md +++ /dev/null @@ -1,158 +0,0 @@ -# komi-learn — Deep Research Report -### Continuous, zero-friction learning for AI agents - -*Research compiled 2026-05-29. Primary sources: the Hermes Agent source tree (nousresearch/hermes-agent v0.15.x), official Claude Code & Agent SDK docs (2026), and recent academic work on agent memory & skill learning.* - ---- - -## 0. The thesis - -Today's AI agents are **amnesiac geniuses**. Each session starts from zero. They re-derive the same workarounds, re-learn your preferences, repeat corrected mistakes, and never compound. The fix is an externalized learning loop: the agent watches its own work, distills durable lessons, persists them outside the context window, and reloads them next time — **automatically, with no command to invoke.** - -Hermes Agent proved this can be a *first-class architectural feature* rather than a bolt-on. Letta proved it produces **measurable capability gains** (+36.8% relative on Terminal Bench 2.0). A 2026 protocol paper ("Portable Agent Memory") proved learnings can be made **portable and provenance-verified across vendors** — which is exactly what a shared "Global Learnings" pool requires. - -**komi-learn** is the synthesis: Hermes' loop, made model-agnostic, universal across personas, and extended with a public, anonymized, cryptographically-trustworthy global knowledge layer. - ---- - -## 1. How Hermes Agent's learning system actually works - -> Reverse-engineered from source. Several widely-repeated blog claims are **wrong**; the real mechanics are below with file references. - -### 1.1 The background review loop — the heart of it - -The single most important mechanism is in `agent/background_review.py`. After a turn, the main loop *may* call `spawn_background_review`, which: - -1. Spawns a **daemon thread** running a **forked copy of the agent** (`_run_review_in_thread`). -2. The fork **replays the conversation snapshot** and is asked one question: *"should any skill/memory be saved or updated?"* -3. The fork runs with a **tool whitelist of only the memory + skill tools** — every other tool is denied at runtime. -4. Writes go straight to the memory/skill stores on disk. **The main conversation and its prompt cache are never touched.** - -Three design choices make this cheap and safe: - -- **Prefix-cache reuse.** The fork inherits the parent's *cached system prompt verbatim* (`review_agent._cached_system_prompt = agent._cached_system_prompt`) and pins `session_start`/`session_id`, so its outbound request hits the **same Anthropic prefix cache** the parent warmed. The source cites a measured **~26% end-to-end cost reduction** from this alone (PR #17276). This is a critical lesson: *a learning pass that reuses the warm cache is nearly free.* -- **No side effects.** `skip_memory=True` stops the fork from polluting external memory providers; `max_iterations=16`; `quiet_mode`; dangerous commands auto-deny (the fork can't prompt a human). -- **One source of truth for prompts.** Three prompt variants — `_MEMORY_REVIEW_PROMPT`, `_SKILL_REVIEW_PROMPT`, `_COMBINED_REVIEW_PROMPT` — selected by which trigger fired. - -### 1.2 The triggers (myth-corrected) - -| Mechanism | Reality (from source) | Common blog myth | -|---|---|---| -| Memory nudge | every **10 turns** (`memory.nudge_interval`) | — | -| Skill nudge | every **10 tool iterations** (`skills.creation_nudge_interval`) | "after 5+ tool calls" ✗ | -| Curator (consolidation) | every **7 days** (168h), min idle 2h; stale 30d, archive 90d | — | -| Insights | **on-demand only**, pure SQL aggregation, **no LLM**, **not persisted** | "every 15 tasks the agent reflects" ✗ | - -The takeaway: the *real* loop is **turn/iteration-cadence background review** plus a **slow periodic curator**, not a task-counter reflection. - -### 1.3 What the review prompt teaches — and what it forbids - -The `_SKILL_REVIEW_PROMPT` is a masterclass. Its philosophy: - -- **Be ACTIVE.** *"most sessions produce at least one skill update… A pass that does nothing is a missed learning opportunity, not a neutral outcome."* `'Nothing to save.'` is allowed but is explicitly *not the default*. -- **User frustration is a FIRST-CLASS skill signal.** *"stop doing X", "this is too verbose", "why are you explaining", "you always do Y and I hate it"* → embed the corrected preference **in the skill that governs that class of task**, so the next session starts already fixed. (Memory says *who the user is*; skills say *how to do this task for this user*.) -- **Preference order for where a lesson goes:** (1) patch a currently-loaded skill → (2) patch an existing umbrella skill → (3) add a support file (`references/`, `templates/`, `scripts/`) → (4) only then create a new class-level skill. - -The **anti-capture list** is as valuable as the capture logic — these are the failure modes that make naive "save everything" systems rot: - -- ❌ **Environment-dependent failures** (missing binaries, "command not found", unconfigured creds). *The user can fix these — they aren't durable rules.* -- ❌ **Negative claims about tools** ("browser tools don't work", "X is broken"). *"These harden into refusals the agent cites against itself for months after the actual problem was fixed."* -- ❌ **Transient errors that resolved.** If a retry worked, the lesson is the *retry pattern*, not the failure. -- ❌ **One-off task narratives** ("summarize today's market" is not a class of work). -- ✅ When a tool fails due to setup, capture the **FIX** (the install command, the env var) under a troubleshooting skill — never the bare "this doesn't work". - -> This list is the difference between a system that gets *smarter* and one that accumulates **self-imposed constraints** until it's afraid of its own tools. komi-learn adopts it wholesale. - -### 1.4 Memory model - -- **Two built-in stores:** `MEMORY.md` (the agent's own notes) and `USER.md` (the user profile), at `~/.hermes/memories/`. Plain Markdown, entries separated by `\n§\n` (U+00A7). Soft char limits (~2200 / ~1375). -- **Full-file injection via a frozen snapshot.** At session start: load → dedupe (`dict.fromkeys`) → threat-scan (replace injection-y entries with `[BLOCKED]` *in the snapshot only*) → **freeze**. The frozen block goes into the system prompt and is *immutable for the whole session* (so the prefix cache holds). Mid-session writes hit disk immediately but don't change the live snapshot — they land next session. -- **Cross-session recall:** sessions indexed in **SQLite + FTS5**; optional external providers (Honcho, Mem0, Hindsight) can add semantic prefetch. - -### 1.5 Skill model — "umbrellas", not snowflakes - -- **Class-level skills.** The target library shape is a *small* set of rich, class-level skills, each a `SKILL.md` plus `references/` (session-specific detail + condensed knowledge banks), `templates/` (copy-and-modify starters), `scripts/` (re-runnable probes). The explicit anti-goal: *"a collection of hundreds of narrow skills where each one captures one session's specific bug is a FAILURE of the library."* -- **The curator** runs slowly (7-day cadence), finds **prefix-clusters** of overlapping skills, and **consolidates them into umbrellas** or demotes them to support files. It **archives, never deletes** (max destructive action). Bundled/hub-installed skills are protected; pinned skills can be content-updated but not archived. -- **Frontmatter** follows the `agentskills.io` open standard: `name`, `description` (with an embedded *"Use when…"*), `version`, `author`, `license`, `platforms`, `metadata..tags`, `related_skills`, `prerequisites`. - -### 1.6 User modeling - -Hermes uses **Honcho** (optional) for *dialectic* user modeling: a **user peer** and an **AI peer** per session, with knobs for cadence (how often it reasons), depth (1–3 reasoning passes), and intensity, plus cold-start vs warm-start prompt strategies. ("Hermes profiles" are something different — fully isolated agent instances, not a learned model.) - ---- - -## 2. The broader field — what else informs the design - -### 2.1 Letta "Skill Learning" (2026) — empirical proof + the reflection pattern - -- **Two-stage learning:** (1) **Reflection** — given the agent's trajectory, evaluate whether it solved the task, whether each step was justified, and what repeats could be abstracted; optionally enrich with verifier feedback. (2) **Creation** — feed the reflection to a learning agent that uses a skill-creator to write a skill with *approaches, pitfalls, and verification strategies*. -- **Results (Terminal Bench 2.0, 89 tasks, Sonnet 4.5 + extended thinking):** trajectory-only skills → **+21.1% relative** (and −15.7% cost, −10.4% tool calls); trajectory **+ feedback** → **+36.8% relative**. -- **Key finding:** *feedback-informed skills that encode failure modes are more robust than success-only skills.* → komi-learn must capture **what went wrong and how it was fixed**, not just what worked. (This independently validates Hermes' frustration-as-first-class-signal.) -- **Memory hierarchy:** an evolving **system prompt** (agent-specific state) + evolving **skill files** (task-specific, interchangeable between agents). Model-agnostic: a strong model can write skills a weaker model later uses. -- Avoids the **RecoveryBench degradation trap** (raw errors in-context *hurt* performance) by distilling errors into separate skill files rather than leaving them in the trajectory. - -### 2.2 Voyager (the origin of skill libraries) - -A skill library of **verified executable programs** that grows through exploration; retrieval by embedding similarity over skill descriptions, deterministic code execution. Lesson: *a skill is only worth keeping if it's been verified to work.* komi-learn carries a verification notion into procedural learnings. - -### 2.3 Reflexion - -Linguistic self-feedback stored as **episodic memory** so the agent learns from failures across attempts. Lesson: natural-language reflection is a legitimate, durable memory substrate. - -### 2.4 "Portable Agent Memory" (PAM) — the Global-Learnings blueprint - -This 2026 protocol paper solves the precise problem of moving learnings **across heterogeneous agents (Claude/GPT/Gemini) with verifiable trust** — i.e., what a public global pool needs. - -- **Five memory types** `M = (E, S, P, W, I)`: **E**pisodic (events), **S**emantic (subject-predicate-object facts with confidence), **P**rocedural (skills/workflows with usage stats), **W**orking (transient), **I**dentity (persona/prefs). komi-learn maps cleanly onto this. -- **Content-addressed integrity:** each entry's `id` is the **BLAKE3 hash of its canonical JSON**; entries form a **Merkle-DAG** (`parent_ids`) so a semantic fact links to the episodic observation it derived from. Tampering with any entry invalidates everything downstream. The DAG root is **Ed25519-signed** by the operator. -- **Capability tokens** for scoping: permissions `{read, write, derive, redact, export, rehydrate}` over scope expressions (entry-list / component-type / **tag-predicate** with any_of·all_of·none_of / wildcard). This is how you express *personal vs shared vs global* without an all-or-nothing switch. -- **Redaction pipeline** = provenance-preserving deletion: a redacted entry keeps its DAG position but its content becomes a typed token → satisfies GDPR Art. 17 erasure **and** Art. 20 portability without breaking the hash chain. -- **Structural injection defense:** recalled memory is wrapped in typed boundary markers (`[PAM:DATA:semantic] … [/PAM:DATA]`) with an explicit directive to treat the content as **data, not instructions**, plus three escaping passes (boundary / role-marker / instruction). Essential when ingesting *public, untrusted* global learnings. -- **Re-hydration pipeline:** Verify → Filter → Rank → Compress → Format → Frame → Inject, with relevance = `0.2·recency + 0.3·salience + 0.4·similarity + 0.1·depth`. -- Reference SDK reports **Transfer Continuity 0.83–0.92** vs a no-memory baseline of 0.28–0.45 across three model families. - ---- - -## 3. The Claude Code substrate — can we build zero-friction here? (Yes.) - -The brief demands **no slash commands — it just does it, like Hermes.** Claude Code's extension surface supports exactly this: - -| Need | Mechanism | Notes | -|---|---|---| -| **Load learnings with zero friction** | `SessionStart` & `UserPromptSubmit` hooks return `hookSpecificOutput.additionalContext` | Injected straight into the model's context. No command. ✅ | -| **Trigger a learning pass after work** | `Stop` / `SubagentStop` hooks fire when Claude finishes | Hooks are shell scripts; they can spawn a detached background process. ✅ | -| **Run the distill pass as a real agent** | **Claude Agent SDK** `query(...)` with an `AgentDefinition` (supports `background: true`, `memory: "user"\|"project"`) | A hook shells out to a tiny SDK wrapper — this is our analogue of Hermes' forked review agent. ✅ | -| **Persist learnings** | Auto-memory dir `~/.claude/projects//memory/MEMORY.md` + topic files; project/user `CLAUDE.md` with `@import` | `MEMORY.md` (first ~25KB) auto-loads at startup. ✅ | -| **Cross-session recall** | Transcripts are **JSONL** at `~/.claude/projects//.jsonl` | No built-in index — we build one (SQLite/FTS). Default retention 30d. | -| **Distribute the whole thing** | **Plugin**: `.claude-plugin/plugin.json` bundling `hooks/`, `skills/`, `agents/`, `.mcp.json` — hooks auto-register when enabled | This is komi-learn's shipping vehicle. ✅ | - -**Hard constraints discovered (these shape the design):** - -1. **Hooks can't spawn subagents directly** — they're shell scripts. We need a small **Agent SDK wrapper** (Python/TS) that the hook invokes. (Or do the distill with a direct Anthropic API call — see architecture doc.) -2. **Background subagents auto-deny permission prompts** (can't ask a human) — so the distill pass must run with a **read-mostly + write-to-learning-store-only** tool set. Mirrors Hermes' whitelist exactly. -3. **Subagents can't spawn subagents** — only the top level can; no nested delegation. -4. **Transcripts are machine-local** and expire (default 30d) — cross-machine/global sync must mirror deliberately. -5. The **frozen-snapshot lesson from Hermes applies here too**: inject learnings at `SessionStart`, don't mutate context mid-turn, to preserve Claude Code's own prefix cache. - ---- - -## 4. Synthesis — the principles komi-learn inherits - -1. **Background, forked, cache-warm distillation.** Learn in a separate pass that reuses the warm prompt prefix; never disturb the live turn. *(Hermes)* -2. **Capture corrections & failure-fixes, not just successes.** Feedback-informed learnings are measurably more robust. *(Letta + Hermes)* -3. **A strict anti-capture list.** Refuse environment failures, negative tool claims, transient errors, one-off narratives. This is what prevents rot. *(Hermes)* -4. **Umbrellas over snowflakes + a slow curator.** Consolidate toward few rich class-level skills; archive, never delete. *(Hermes)* -5. **Separate "who the user is" (Identity/USER) from "how to do this task" (Procedural/skills).** Two stores, two purposes. *(Hermes + PAM)* -6. **Zero friction via hooks.** Inject at `SessionStart`, distill at `Stop`, persist to the auto-memory dir. No slash commands. *(Claude Code)* -7. **Verified, provenance-carrying, injection-safe units for anything shared.** Content-addressed IDs, derivation DAG, capability-scoped sharing, redaction for erasure, data-not-instructions framing. *(PAM)* -8. **Model-agnostic substrate.** Learnings are plain files/records, portable across Claude/Codex/others; a strong model can teach a weaker one. *(Letta + PAM)* - -These eight principles drive the architecture in `02-architecture.md`. - ---- - -## Appendix — source map - -- **Hermes loop:** `agent/background_review.py` (review fork + the three prompts), `agent/curator.py` (7-day consolidation), `agent/insights.py` (on-demand aggregation, no LLM), `agent/skill_utils.py` (frontmatter parsing), `tools/memory_tool.py` (`§` delimiter, frozen snapshot, char limits). -- **Claude Code:** hooks / skills / memory / plugins / sub-agents / settings docs + Agent SDK (subagents, sessions) — code.claude.com/docs (2026). -- **Academic:** Letta "Skill Learning" (letta.com/blog/skill-learning); "Portable Agent Memory" (arXiv 2605.11032); "Externalization in LLM Agents" (arXiv 2604.08224); Voyager; Reflexion. diff --git a/docs/02-architecture.md b/docs/02-architecture.md deleted file mode 100644 index e230b5a..0000000 --- a/docs/02-architecture.md +++ /dev/null @@ -1,373 +0,0 @@ -# komi-learn — Architecture Specification (v1) - -*Status: draft for review. Companion to `01-research.md`. Decisions locked 2026-05-29; see §10 for the ones open to your override.* - ---- - -## 1. What we're building - -A **continuous-learning layer** that rides underneath an AI agent and, with **zero user friction**: - -1. **Observes** each session's work. -2. **Distills** durable lessons in a background pass (cache-warm, never disturbing the live turn). -3. **Classifies** each lesson by scope — `personal` / `project` / `global-candidate` — and by category. -4. **Persists** lessons to the right store, consolidating toward few rich "umbrella" skills (a slow curator prevents rot). -5. **Recalls** the relevant lessons into the next session automatically. -6. Optionally **contributes** scrubbed, provenance-verified, anonymized lessons to a **public Global Learnings pool**, and **pulls** trusted global lessons back down. - -v1 ships as a **Claude Code plugin**. The learning *engine* is host-agnostic; Claude Code is the first adapter. - -### Naming -- **Learning** — one durable unit of knowledge (the atom). Maps to a memory entry or a skill patch. -- **Skill / umbrella** — a class-level procedural document (`SKILL.md` + `references/`/`templates/`/`scripts/`). -- **Identity (USER)** — facts about who the user is and how they want to be served. -- **Pool** — the shared/global knowledge store. - ---- - -## 2. The three planes - -``` -┌──────────────────────────────────────────────────────────────────────┐ -│ HOST PLANE (Claude Code today; Codex/others later) │ -│ hooks ─ SessionStart (recall in) · Stop/SubagentStop (distill out) │ -│ skills ─ umbrellas live as Claude Code skills (auto-triggered) │ -└───────────────▲───────────────────────────────────┬───────────────────┘ - │ additionalContext │ transcript (JSONL) -┌───────────────┴───────────────────────────────────▼───────────────────┐ -│ ENGINE PLANE (host-agnostic, the product) │ -│ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌───────────┐ │ -│ │ Recall │ │ Distiller│ │ Classifier│ │ Curator │ │ Store API │ │ -│ │ (in) │ │ (LLM) │ │ (rules+LLM)│ │ (slow) │ │ (CRUD) │ │ -│ └─────────┘ └──────────┘ └──────────┘ └──────────┘ └───────────┘ │ -│ local stores: USER.md · MEMORY.md · skills/ · index.db (FTS) │ -└───────────────▲───────────────────────────────────┬───────────────────┘ - │ pull trusted globals │ contribute scrubbed -┌───────────────┴───────────────────────────────────▼───────────────────┐ -│ POOL PLANE (public Global Learnings — PAM-style) │ -│ scrub → sign → DAG → moderate → publish; query → verify → rehydrate │ -└────────────────────────────────────────────────────────────────────────┘ -``` - -The three planes are deliberately decoupled: the **engine** never assumes Claude Code, and the **pool** never trusts the engine (it re-verifies everything). - ---- - -## 3. Data model - -One schema underlies everything, mapping onto the PAM five-type model (`E/S/P/W/I`). A **Learning** is the unit. (komi-learn focuses on the durable types — Identity, Semantic, Procedural — and uses Episodic only as transient distill input.) - -### 3.1 The Learning record - -```jsonc -{ - "id": "blake3:9f86d081…", // content-addressed: BLAKE3(canonical_json(everything below except id/sig)) - "schema": "komi.learning/1", - "type": "identity | semantic | procedural", // PAM I / S / P - "scope": "personal | project | global", - "category": "tooling | workflow | preference | domain-knowledge | pitfall | environment", - "title": "Run pytest with -p no:cacheprovider in this monorepo", - "body": "…the actual lesson, written as data, not instructions…", - "trigger": "When running tests in a uv-managed monorepo", // 'use when' — drives recall - "confidence": 0.0, // 0–1, raised by repeat observation / successful reuse - "evidence": { // provenance, kept LOCAL; never published - "session_id": "…", - "observed_at": "2026-05-29T14:00:00Z", - "signal": "user-correction | technique | fix | repeated-pattern", - "transcript_span": [124, 161] // line range in the JSONL, for audit - }, - "provenance": { // populated only when shared (PAM) - "parent_ids": [], // Merkle-DAG: which learnings/observations this derived from - "origin": "agent:claude-code", - "signature": null // Ed25519 over the root, set at publish time - }, - "usage": { "recalled": 0, "reused": 0, "last_used": null }, - "lifecycle": { "created_at": "…", "updated_at": "…", "state": "active | archived" }, - "tags": ["pytest", "uv", "monorepo"] -} -``` - -Design notes: -- **`id` is the BLAKE3 of the content** → any edit changes the id → tamper-evident, dedup-by-content for free. -- **`trigger` is the recall key** — mirrors a skill's "Use when…". Recall ranks on trigger/tag/body similarity. -- **`evidence` stays local forever.** It carries the raw provenance (session, transcript span) used for auditing *your own* learnings; it is **stripped before any contribution** to the pool. -- **`confidence`** starts low and only rises with corroboration (seen again) or successful reuse — this lets the curator prune low-confidence noise. - -### 3.2 Physical stores (Claude Code adapter) - -| Store | Path | Format | Loaded into context | -|---|---|---|---| -| Identity (USER) | `~/.claude/komi/USER.md` | Markdown, `§`-delimited entries | Full, at SessionStart | -| Personal memory | `~/.claude/komi/MEMORY.md` | Markdown, `§`-delimited | Index (~first 25KB) at SessionStart | -| Project memory | `/.claude/komi/MEMORY.md` | Markdown | At SessionStart when in project | -| Skills (umbrellas) | `~/.claude/skills//SKILL.md` + project `.claude/skills/` | SKILL.md + `references/`/`templates/`/`scripts/` | Descriptions always; bodies on trigger | -| Structured index | `~/.claude/komi/index.db` | **SQLite + FTS5** | Queried by Recall; not injected | -| Global cache | `~/.claude/komi/pool/` | Learning records (JSON) + signatures | Pulled subset → eligible for recall | -| Review queue | `~/.claude/komi/queue/` | Pending contributions (JSON) | Never auto-injected | - -> We deliberately reuse Claude Code's existing auto-memory conventions and the `agentskills.io` skill format so komi-learn's output is *also* useful even with the plugin disabled. Nothing is locked in a proprietary blob. - -The **`index.db`** is the engine's brain: every Learning (across USER/MEMORY/skills) is mirrored as a row with its `trigger`, `tags`, `body`, `scope`, `confidence`, `usage`, embedded into an FTS5 table for fast recall and into a normal table for the curator's clustering. The Markdown files remain the human-readable source of truth; `index.db` is a derived cache that can be rebuilt by re-scanning the files. - ---- - -## 4. The learning loop (engine) - -Faithful to Hermes' background-review design, adapted to Claude Code's hook + Agent SDK surface. - -### 4.1 Recall — `SessionStart` & `UserPromptSubmit` - -``` -SessionStart hook - → engine.recall(cwd, recent_context) - 1. load USER.md (full) → identity block - 2. load MEMORY.md index + project MEMORY.md → memory block - 3. query index.db (FTS5) for top-K by relevance → just-in-time block - relevance = 0.2·recency + 0.3·salience(confidence·reuse) - + 0.4·similarity(cwd, recent_files, prompt) + 0.1·depth - 4. frame everything as DATA-not-instructions (PAM markers) - → emit hookSpecificOutput.additionalContext -``` - -- **Zero friction:** nothing typed; learnings appear in context as the session opens. -- **Frozen-snapshot discipline (Hermes lesson):** recall happens at `SessionStart`; we do **not** mutate context mid-turn, preserving Claude Code's prefix cache. `UserPromptSubmit` recall is *optional* and used only for sharply on-topic just-in-time pulls (off by default in v1 to keep the cache warm). -- Global learnings that were pulled into the local cache are eligible here, but **clearly labelled** as community knowledge and wrapped in the data-not-instructions frame (they're untrusted input). - -### 4.2 Distill — `Stop` / `SubagentStop` - -This is the analogue of Hermes' forked review agent. - -``` -Stop hook (Claude finished responding) - → if turns_since_distill ≥ NUDGE_TURNS (default 8) OR session ending: - spawn DETACHED background process: komi-distill - (hook returns immediately — never blocks the user) - -komi-distill (tiny Agent SDK / API wrapper — the "review fork") - 1. read transcript JSONL (the conversation snapshot) - 2. run the DISTILL PROMPT (see §5) → candidate learnings[] (structured output) - 3. for each candidate → Classifier (§6) → {drop | personal | project | global-candidate} - 4. write survivors via Store API: - - identity/preference → USER.md - - durable fact → MEMORY.md (personal or project) - - technique/pitfall → patch an existing umbrella skill, else queue a new one - 5. global-candidates → review queue (NEVER auto-published) - 6. update index.db; bump confidence on corroborated repeats -``` - -Cost-warm trick (Hermes' ~26% saving): when the distiller runs through the Agent SDK against the same model/provider, we **reuse the system-prompt prefix** so the distill request hits the warm cache. When it can't (e.g. a pure API wrapper), we keep the distill prompt short and run it on a cheaper model (Haiku-class) — distillation is a summarization task, not a reasoning-hard one. - -Constraints honored (from research §3): -- The distiller is **read-mostly**: it may read the transcript and the stores, and write **only** to the learning stores + queue. (Background agents auto-deny prompts, so it must never need human approval.) This is Hermes' tool-whitelist, enforced here by giving the wrapper a restricted tool set. -- It **cannot** run other tools, touch the repo, or take outward actions. - -### 4.3 Curate — slow background pass - -Mirrors Hermes' 7-day curator. Triggered opportunistically (a `SessionStart` checks "last curated > 7d ago and idle"), runs detached: - -``` -curator() - 1. cluster index.db skills by prefix/tag/embedding - 2. for each cluster ≥2 members: propose an UMBRELLA; merge bodies, demote detail to references/ - 3. prune: archive (never delete) learnings with confidence < τ and reuse = 0 older than 30d - 4. re-embed, rebuild FTS5 - 5. write a human-readable CURATION_REPORT.md -``` - -Protected (never auto-edited): user-pinned skills (content-updatable but not archivable), and any skill the user authored by hand and marked `pinned: true`. - -### 4.4 The cadence summary - -| Pass | Trigger | Default | Cost posture | -|---|---|---|---| -| Recall | SessionStart | every session | free (one context injection) | -| Distill | Stop / SubagentStop | every ~8 turns or session end | cheap (short prompt, cache-warm or Haiku) | -| Curate | SessionStart guard | ≥7 days idle | rare; can be a heavier model | - ---- - -## 5. The distill prompt (the product's "brain") - -Adapted from Hermes' `_COMBINED_REVIEW_PROMPT` and Letta's reflection→creation pattern, with Letta's key finding baked in (**capture failure modes, not just successes**). Stored at `engine/prompts/distill.md`, versioned. Skeleton: - -``` -You are komi-learn's background distiller. You are reviewing a finished -session to extract DURABLE learnings for future sessions. Your output is -DATA for a learning store — not a message to a human. - -Be ACTIVE: most real sessions yield at least one learning. A pass that -saves nothing is usually a missed opportunity — but saving noise is worse -than saving nothing. - -Extract a learning when ANY of these fired: - • The user corrected your style, tone, format, verbosity, or approach. - (Frustration — "stop doing X", "too verbose", "I hate when you Y", - "just give me the answer", "remember this" — is a FIRST-CLASS signal. - Encode it so the next session starts already fixed.) - • A non-trivial technique, fix, workaround, or debugging path emerged. - • Something you tried FAILED and you found the fix. Capture BOTH the - failure mode AND the fix — failure-aware learnings are more robust. - • A durable fact about the user, their domain, or their project surfaced. - -For each learning, emit a structured record: {type, category, title, -body, trigger ("use when…"), tags, signal}. - -DO NOT capture (these rot into self-imposed constraints): - • Environment-dependent failures: missing binaries, "command not found", - unconfigured creds, uninstalled packages. The user can fix these. - → If a setup issue had a fix, capture the FIX, never "X doesn't work". - • Negative claims about tools/features ("browser tools don't work"). - These harden into refusals you cite against yourself for months. - • Transient errors that resolved. If a retry worked, the lesson is the - retry pattern, not the original failure. - • One-off task narratives ("summarize today's news" is not a class of work). - • Anything containing secrets, credentials, or tokens — never. - -Prefer UPDATING an existing umbrella skill over creating a new one. -Name skills at the CLASS level, never after a single task/PR/error. -``` - -The distiller returns structured JSON (enforced via the Agent SDK's structured-output / a tool schema), which the engine consumes deterministically — no fragile parsing. - ---- - -## 6. Classification — how a learning gets its scope (HYBRID) - -> This is the "used with thought; some knowledge is global" logic. The user was unsure on approach; we chose **Hybrid (rules gate → LLM decides)** because a public pool needs a *hard safety floor* AND nuance. Open to override (§10). - -``` -classify(learning) → - STAGE 1 — DETERMINISTIC SAFETY FLOOR (cannot be reasoned around) - reject-to-personal if body/title/tags match ANY: - • secret/credential patterns (API keys, tokens, JWT, PEM, .env values) - • PII (emails, names, phone, addresses) via detectors - • machine/user-specific identifiers (absolute home paths, usernames, - hostnames, internal URLs, private IPs) - • repo/org/project proper nouns from the local git remote + cwd - → if matched: scope = personal (or project if only project-identifiers). STOP. - - STAGE 2 — LLM SCOPE JUDGMENT (only on survivors) - Ask: "Is this lesson GENERALLY TRUE and USEFUL to anyone doing this class - of work, independent of this user/project/machine? Or is it specific to - THIS project's conventions?" - • general technique / language-or-tool behavior / broadly-applicable - pitfall, with NO identifiers → global-candidate - • depends on this project's structure, naming, or choices → project - • about the user themselves / their preferences → personal (Identity) - Also assign `category` and a generalization rewrite: the LLM REWRITES a - global-candidate to strip residual specificity ("in this repo" → "in a - uv-managed monorepo") so the published form is genuinely general. - - STAGE 3 — never auto-publish. global-candidates land in the REVIEW QUEUE. -``` - -Rationale recorded for review: Stage 1 guarantees no identifier/secret can *ever* reach the pool even if the LLM misjudges; Stage 2 supplies the nuance Hermes relies on. Strictly safer than LLM-only, strictly more nuanced than rules-only. - ---- - -## 7. Global Learnings — the public pool (PAM-style, full trust pipeline) - -The killer feature, and the part that most needs to be right. It must be: **anonymous, tamper-evident, injection-safe, moderatable, and erasable.** Design follows the "Portable Agent Memory" protocol (arXiv 2605.11032). - -> **v1 implementation decision (2026-05-29): the pool is a GitHub repo of `.md` files — no custom server.** A dedicated repo (`kurikomi-labs/komi-pool`) holds one Markdown file per learning under `learnings//.md`; each file carries the human-readable lesson plus the verifiable signed envelope in a fenced ` ```komi ` block. **Contribution = human-approved Pull Request** (`gh pr create`). **Consumption = periodic `git` sync to a local cache + local re-verification.** This gives free hosting, public auditability, PR-based moderation, and CDN distribution, and reuses the exact verification the protocol below specifies — only the transport is git instead of a bespoke API. The repo's CI (`.github/workflows/verify.yml`) re-runs id + signature + scrub verification on every PR. See `komi/pool/github_backend.py`, `komi/pool/repo_format.py`, `komi/pool/queue.py`, `komi/pool/verify_cli.py`, and `pool-repo-template/`. - -### 7.1 Contribution pipeline (local → pool) - -``` -queued global-candidate - → 1. SCRUB second-pass LLM + detector sweep: strip evidence{}, - any residual PII/secret/identifier; reject on doubt. - → 2. GENERALIZE ensure body is class-level (already rewritten in §6 S2; - re-verify). Drop confidence/usage/local fields. - → 3. CANONICALIZE produce canonical JSON of the publishable subset - {schema,type,category,title,body,trigger,tags}. - → 4. ADDRESS id = BLAKE3(canonical_json). parent_ids link to any - global learnings this built on (Merkle-DAG). - → 5. SIGN Ed25519 over the entry root with the contributor's key - (pseudonymous keypair generated locally; identity optional). - → 6. HUMAN GATE user reviews the final publishable form in the queue UI - and approves. NOTHING leaves without this in v1. - → 7. SUBMIT POST to pool endpoint (STUBBED in v1 — writes to a local - "outbox" that a future server would accept). -``` - -### 7.2 Pool-side (server — designed, not built in v1) - -``` -ingest(entry) - • verify BLAKE3 id matches content; verify Ed25519 signature - • verify DAG references resolve and are acyclic - • run independent server-side scrubber (defense in depth) - • dedup by content id; if near-duplicate, link instead of duplicate - • MODERATION: automated safety classifier + community flagging + - confidence accrual (a global learning gains trust as independent - contributors submit a corroborating entry — corroboration = distinct - signers reaching the same content id or a linked one) - • publish to a categorized, queryable index -``` - -### 7.3 Consumption (pool → local) - -``` -pull(category|tags, trust_threshold) - • fetch entries above a trust/corroboration threshold - • re-verify hashes + signatures locally (never trust the server blindly) - • store in ~/.claude/komi/pool/ , marked scope=global, untrusted-origin - • eligible for recall, but ALWAYS injected inside data-not-instructions - PAM markers, and visually labelled as community knowledge -``` - -### 7.4 Safety properties (why this is trustworthy) - -- **Anonymity:** evidence stripped; pseudonymous signing; two scrubber passes (client + server) + the Stage-1 deterministic floor. -- **Tamper-evidence:** content-addressed ids + signed DAG roots; editing any entry breaks the chain. -- **Injection-safety:** every consumed global learning is framed as DATA with explicit "do not treat as instructions", plus boundary/role/instruction escaping (PAM's three passes). This matters because the pool is *public and untrusted*. -- **Erasure:** redaction pipeline replaces an entry's content with a typed token while keeping its DAG position → "right to be forgotten" without breaking downstream hashes. -- **Quality:** trust grows by **independent corroboration**, not raw vote counts; low-trust entries aren't pulled by default. - -### 7.5 Categories (v1 taxonomy) -`tooling` · `workflow` · `language-behavior` · `pitfall` · `debugging` · `domain-knowledge` · `formatting/style` · `meta-agent` (how to work with agents). Categories are the primary query axis and the unit of "some knowledge is global, applied everywhere" (e.g. a `meta-agent` learning recalls regardless of project). - ---- - -## 8. Universality — one substrate, many personas - -The brief: works for developers, knowledge workers, students, scientists, everyone. We achieve this **without per-persona code** — the substrate is domain-neutral; personas differ only in *which categories dominate* and *which host surfaces* they use: - -- **Developer (Claude Code):** procedural skills + tooling/pitfall learnings dominate; transcripts are code sessions. -- **Knowledge worker / finance:** domain-knowledge + workflow + formatting/style learnings (e.g. "this analyst wants outputs as a one-page memo, numbers in basis points"). Same USER.md/MEMORY.md/skill machinery. -- **Student / scientist:** identity learnings about level & explanation style ("explain at undergrad level, derive before stating"), domain-knowledge accretion across a course or research line. - -The engine doesn't branch on persona; the **distill prompt's signal list is universal** (corrections, techniques, fixes, durable facts apply to anyone), and the **category taxonomy** carries the domain. New hosts (a chat UI, Codex, a web app) are just new *adapters* implementing two methods: `recall() → context` and `on_session_end(transcript) → distill`. - ---- - -## 9. v1 build plan (what gets coded now vs later) - -**Build now (runnable personal loop MVP):** -- `engine/` — Store API (Markdown + `index.db`), Recall, Distiller wrapper, Classifier (Stage-1 rules + Stage-2 LLM), schema + canonicalization (BLAKE3 ids). -- `adapters/claude_code/` — `SessionStart` recall hook, `Stop` distill hook, plugin manifest, the distiller invoked via Agent SDK/API. -- Local end-to-end: a real session → distill → learnings on disk → recalled next session. - -**Designed + stubbed now:** -- Global pool: scrub/generalize/canonicalize/sign/queue all implemented locally; the network `submit`/`pull` write to a local outbox/inbox (no server). Ed25519 keypair generated locally. - -**Later (post-review loop):** -- The pool server + moderation + corroboration trust. -- Additional host adapters (Codex, chat). -- Embedding-based recall/clustering (v1 uses FTS5 + heuristics; embeddings are an upgrade). -- A `verify`/inspection UI for the review queue. - ---- - -## 10. Decisions open to your override (after review) - -1. **Classification = Hybrid** (rules floor → LLM). Alt: LLM-only (more nuance, less safe) or rules-only (predictable, blunt). *My pick stands unless you say otherwise.* -2. **Distill cadence = every ~8 turns + session end.** Hermes uses 10 turns / 10 iterations; tune to taste. -3. **Distiller model.** Cache-warm same-model (cheapest if Agent SDK path) vs. dedicated cheap model (Haiku-class). v1 supports both; default = whatever the host session uses, via SDK. -4. **`UserPromptSubmit` just-in-time recall** off by default (protects prefix cache). Toggle on if you want sharper mid-session pulls. -5. **Human gate before publish = mandatory in v1.** Could later offer an "auto-publish high-confidence, fully-scrubbed `meta-agent`/`language-behavior` learnings" mode — but not until trust is proven. - ---- - -*Next: build `engine/` + the Claude Code adapter per §9, then we review and loop.* diff --git a/docs/03-roadmap.md b/docs/03-roadmap.md deleted file mode 100644 index b42fe96..0000000 --- a/docs/03-roadmap.md +++ /dev/null @@ -1,77 +0,0 @@ -# komi-learn — Roadmap - -*Living document. Reflects what's actually built (verified against the code), not aspiration.* - -## ✅ Shipped - -**Phase 0 — Research & architecture.** Hermes/Letta/PAM synthesis; full spec. (`docs/01`, `docs/02`) - -**Phase 1 — Personal learning loop.** Content-addressed `Learning` model (BLAKE3, tamper-evident); Store (Markdown + skills/ + SQLite FTS); distiller with anti-capture rules; hybrid classifier (deterministic safety floor → LLM scope); relevance-ranked recall with data-not-instructions framing. - -**Phase 2 — Global Learnings pool (GitHub-backed).** A repo of signed `.md` files — no server. PR-based contribution, periodic git sync, local re-verification, CI safety gate. **Live** on `kurikomi-labs/komi-pool` (real PR → CI green → merge → pull, proven). - -**Phase 3 — One-command install + OAuth.** `komi-learn install/doctor/status/sync/login/curate`. OAuth-first distillation (free, no key) with API-key fallback; **strict requirements gate** (real model-call verification, fails loudly, no silent degradation); runtime stays safe (never crashes a session). Live and verified on a real machine. - -**Phase 4 — The Curator.** Slow (~weekly) consolidation pass: deterministic pruning (archive, never delete, stale+unused+low-confidence; pinned/pool exempt) + LLM "umbrella" consolidation of overlapping skills. Cadence-guarded at SessionStart; writes `CURATION_REPORT.md`; `komi-learn curate`. *(Also closed a gap here: procedural learnings now persist as `skills//SKILL.md`.)* - -**Reviews (4 lenses).** Adversarial correctness/security bug-hunt, then AI-Engineer, Security-Engineer, and Software-Architect persona reviews — all real findings fixed + regression-tested (incl. a CRITICAL recall prompt-injection fence-escape). See `docs/04-ai-engineering-review.md`, `docs/05-adr-log.md`. - -**Phase 5 review (3 lenses, on corroboration + semantic clustering).** Security/AI/Architect personas re-reviewed the Phase 5b + clustering work; all findings fixed + regression-tested. Highlights: **Sybil interim hardening** (corroboration clamped to 3, advisory-only, never a hard gate — keys are free to mint, so distinct-key ≠ distinct-person; GitHub-account binding deferred to Phase 7 — see ADR-9); pull made crash-proof against a malformed pool file (one bad file no longer disables all community recall); signature-array + parsed-block **DoS caps**; **`(id, origin_root)` composite index identity** (a pool copy no longer overwrites/evicts the user's local learning of the same id); `corroboration` made structurally transient (never deserialized from content, never written to Markdown); clustering threshold **re-calibrated 0.45→0.58** against the real model with a labeled-set regression test, mutual-similarity clustering (no star-FPs), and a stronger consolidator contract; curator clustering vectorized (numpy) + reuses persisted vectors; CI **append-only signature** check + branch-protection guidance; shared adapter **config schema** (Codex had silently dropped 6 env vars). - -**Phase 6 — Second host adapter (OpenAI Codex CLI).** Proves the engine is genuinely host-agnostic. `komi/adapters/base.py` Adapter ABC made real; `komi/adapters/hooklib.py` holds the host-neutral hook logic; `komi/adapters/codex/` is a THIN shim (CodexAdapter + ~/.codex paths + OpenAI/codex LLM + hooks). `komi-learn install --host codex`. **Demonstrated** end-to-end: a learning distilled in a Codex session is recalled in the next, same engine, files under `$CODEX_HOME`, zero Claude Code (`examples/demo_codex_host.py`, `tests/test_codex_adapter.py`). *(Live Codex auth not exercised from the build sandbox — same caveat as Claude Code distill; verify in a real Codex session.)* - -## 🔜 Next - -**Phase 5 — Trust & quality at scale** -- ✅ **Semantic recall (done).** Meaning-based recall via a local embedding model - (`komi-learn[smart]`), keyword fallback when absent. `engine/embed.py`, - `vector_search`, semantic-first `_candidate_hits`. Verified with the real model. -- ✅ **Corroboration-based trust (done).** A pool learning carries a `signatures` - array — one per distinct contributor who independently signed the same - content-addressed lesson. `pull` counts *distinct valid* signers and gates on - `pool.min_corroboration`; recall adds a small log-dampened bonus so - well-corroborated community knowledge ranks higher (never overriding relevance). - Publishing an already-present learning by a new signer *appends* their signature - (corroboration ↑) instead of being a no-op. Legacy single-signer files stay valid - (no re-signing); the vendored CI verifier counts corroboration in lockstep - (parity-tested). No new dependencies. `pool/corroboration.py`, `engine/recall.py`, - `engine/store.py` (corroboration column), `tests/test_corroboration.py`. -- ✅ **Embedding-based clustering (done).** When the embedding model is present the - curator clusters procedural learnings by *meaning* (cosine ≥ threshold, calibrated - ~0.45 against the real model) instead of shared title-word/tag — so conceptually - related lessons that share no surface form (e.g. "ripgrep" vs "ag" for code search) - get proposed for the same umbrella. Deterministic greedy seed-based grouping; - lexical clustering stays as the zero-dep fallback; the LLM consolidator remains the - real merge gate. `engine/curator.py` (`_cluster_semantic`), `tests/test_semantic_clustering.py`. - -**Phase 5 is complete.** ✅ Semantic recall · ✅ Corroboration trust · ✅ Semantic clustering. - -**Phase 6 — Second host adapter** *(proves "works for every agent")* -- A non–Claude-Code adapter (Codex, or a chat UI) behind the same two-method interface (`recall()` + `on_session_end()`). The real test that the substrate isn't Claude-specific. -- Persona validation (developer / finance / student / scientist on one substrate). - -**Phase 7 — Polish & open up** *(largely done)* -- ✅ Lean, install-first README; root MIT LICENSE; public repo metadata. -- ✅ **PyPI** — `pip install komi-learn` is live (v0.1.0), clean-venv verified. -- ✅ **Signer↔GitHub-account binding** (the Sybil fix): signatures bind `github_user`, - corroboration counts distinct *accounts* not keys, CI enforces PR-author == signer - + an account-age bar. Back-compat preserved (empty username → pre-Phase-7 bytes). ADR-9. -- ✅ **Review-queue inspection CLI** — `komi-learn queue list|approve|reject` - (approve signs with your username + opens a PR). -- ✅ **Erasure / right-to-be-forgotten** — `komi-learn forget ` (archive, - or `--hard` for true deletion via `Store.delete`); pool-shared items print the - removal-PR path. -- 🔜 Plugin-marketplace distribution; docs site. *(external polish.)* -- 🔜 GitHub branch protection on both repos (UI; require the "Verify learnings" check). -- 🔜 CI: add a minimal-install matrix leg (no optional extras) to actually exercise - the zero-dependency degradation paths (blake2b/unsigned/keyword). The current CI - installs `[dev,crypto,smart]` so several signing/embedding tests run as written; - proving the dep-absent paths needs either skipif-guards or a second install - profile. (The engine *is* zero-dep; this is a test-coverage gap, not a code gap.) - -## Known gaps / honest notes -- Recall ranking is semantic (embeddings) when the model is installed, keyword FTS otherwise; both feed the same blend + a corroboration bonus. -- Trust now has corroboration weighting (distinct-signer count), but the pool is young — `min_corroboration` defaults to 1 until enough lessons have independent signers to make a higher gate meaningful. -- Two hosts proven (Claude Code + Codex) via the shared engine; broader persona validation (finance/student/scientist) still unproven end-to-end. -- The pool repo's vendored `verify.py` must stay in sync with the engine's verification + corroboration logic (parity-tested in `tests/test_review_fixes.py` and `tests/test_corroboration.py`). After any signing-scheme change, re-run `pool-repo-template/.github/scripts/resign_seeds.py`. -- Repos are public (Phase 7). Corroboration's distinct-signer count is Sybil-forgeable until signer↔account binding lands — it's clamped + advisory-only meanwhile (ADR-9). diff --git a/docs/04-ai-engineering-review.md b/docs/04-ai-engineering-review.md deleted file mode 100644 index 03fc0cd..0000000 --- a/docs/04-ai-engineering-review.md +++ /dev/null @@ -1,92 +0,0 @@ -# komi-learn — Review through the "AI Engineer" lens - -*Re-review of the codebase against the obligations of the `engineering-ai-engineer` -persona: bias/fairness, privacy-preserving ML, interpretability/transparency, model -drift, inference latency/cost, MLOps lifecycle, adversarial robustness.* - -This is deliberately a **different lens** from the earlier correctness/security review -(`docs` + `test_review_fixes.py`). That pass found bugs; this one asks "is this a -well-behaved *ML system*?" I'm honest below about which persona obligations genuinely -apply to komi-learn and which don't — komi-learn orchestrates LLM calls and ranks -recalled context, it doesn't train models, so some classical-ML concerns map and some -don't. - ---- - -## How each persona obligation maps to komi-learn - -| Persona obligation | Maps? | Why | -|---|---|---| -| **Bias / fairness across demographic groups** | ⚠️ Reframed | There are no demographic groups or protected classes here. But there IS a *ranking-fairness* analogue: the recall ranker can develop a **rich-get-richer popularity bias** that buries newer/rarer-but-relevant learnings. That's the real "fairness" issue for this system. | -| **Privacy-preserving techniques** | ✅ Strong | Central to the design — the deterministic safety floor, evidence-stripping, pseudonymous signing. Already a first-class concern. | -| **Interpretability / transparency** | ⚠️ Partial | The curation report + `[community]` labels + human-readable Markdown give good transparency. But the **recall ranker is a black box to the user** — they can't see *why* a learning surfaced. | -| **Model drift detection + retraining triggers** | ⚠️ Missing analogue | No model is trained, so no weight drift. But there's a real analogue: **the learning corpus drifts** (stale facts accrete, the user's preferences change) and nothing surfaces it. | -| **Inference latency / cost** | ❌ Gap | The distiller and judge make real LLM calls with **no cost/latency tracking, no caching on the CLI path**. An ML engineer would never ship inference with zero observability. | -| **MLOps lifecycle / monitoring** | ⚠️ Partial | `doctor`/`status` give some health view; there's no metrics on the loop itself (how many distills, hit rate, cost). | -| **Adversarial robustness** | ✅ Mostly | Recalled pool content is framed as data-not-instructions (good). But there's **no cap/rate-limit on how much untrusted community content floods a single recall**, and no dedup of recalled content. | -| **A/B testing / accuracy metrics** | ❌ Doesn't map | No served model to A/B; "accuracy" of a learning isn't measurable the way a classifier's is. Honestly not applicable — I won't invent it. | - ---- - -## Findings (ordered by real impact) - -### 1. Recall popularity-bias feedback loop *(the "fairness" issue)* -**`komi/engine/recall.py`** — `salience = confidence·(1+reused)`, and recall calls -`_mark_recalled`, and corroboration bumps confidence. The loop: - -> a learning surfaces → it's marked recalled / reused → its salience rises → it -> surfaces *more* → newer or rarer-but-relevant learnings are crowded out. - -This is the classic recommender popularity-bias trap. Over months, recall ossifies -around a handful of "greatest hits" and stops surfacing fresh knowledge. **Fix:** -dampen the reuse term (log, not linear), and separate "was shown" (weak signal) from -"was actually useful" (strong signal) so merely-surfacing doesn't inflate rank. - -### 2. Identity recall is unbounded -**`recall.py`** — *every* active identity learning is injected each session (only -char-truncated at the very end). As the user model grows (it's designed to grow -forever), the identity block bloats the prompt and **older identity facts never age -out**, even when contradicted by newer ones (preference drift). **Fix:** rank + cap -identity like JIT learnings; let recency/confidence decide which persona facts lead. - -### 3. No inference cost / latency observability -**`distill.py` / `llm_cli.py`** — the distiller and judge make LLM calls with no -record of tokens, latency, or count. An ML engineer's first instinct: you can't -optimize or trust what you don't measure. **Fix:** record per-pass distill telemetry -(count, duration, candidate count) to state; surface in `status`. - -### 4. Distiller candidates not capped or deduped -**`distill.py`** — `_parse_candidates` accepts whatever the model returns; a -misbehaving or prompt-injected model could emit hundreds of "learnings" in one pass, -all written to disk. **Fix:** cap candidates per pass (e.g. 12) and dedup by content. - -### 5. No corpus-drift surfacing -Nothing tells the user (or the curator) that the corpus is going stale — e.g. "60% of -your learnings are >90 days old and never reused." **Fix:** compute a cheap drift/health -metric (age + confidence + reuse distribution) and surface it in `doctor`/`curate`. - -### 6. Untrusted community content not rate-limited in recall -**`recall.py`** — pool (`scope=global`) learnings are framed as data, but a single -recall could be dominated by community items if they out-rank personal ones. Defense -in depth for a *public, untrusted* source argues for a **cap on community items per -recall** and dedup of recalled content by id. **Fix:** cap community share; dedup. - -### 7. Recall opacity (interpretability) -The user can't see why something surfaced. Low priority, but the persona values it: -optionally annotate recalled items with the dominant ranking reason (e.g. "matched -your current files"). Deferred — nice-to-have, not a correctness issue. - ---- - -## What I deliberately did NOT add (honesty over persona-cosplay) - -- **Demographic bias testing** — there are no demographic features in this data. Adding - a "fairness metric across groups" would be theater. The ranking-bias fix (#1) is the - honest version of the obligation. -- **A/B testing framework** — nothing to A/B; no served model with measurable accuracy. -- **Model retraining triggers** — no model weights. The corpus-drift surfacing (#5) is - the real analogue. - -Applying a persona well means honoring its *intent* where it maps and refusing to fake -it where it doesn't. Fixes #1–#6 are implemented; see `test_ai_eng_fixes` (and the -git history) for the changes. diff --git a/docs/05-adr-log.md b/docs/05-adr-log.md deleted file mode 100644 index 295e784..0000000 --- a/docs/05-adr-log.md +++ /dev/null @@ -1,139 +0,0 @@ -# komi-learn — Architecture Decision Log - -Lightweight ADRs: the significant decisions and **why** (not just what), plus the -trade-off each one accepts. Written after the Software Architect review flagged -these as load-bearing-but-undocumented. Newest decisions can be appended. - -Format per entry: **Decision · Context · Trade-off accepted · Reversibility.** - ---- - -## ADR-1 — Engine injects the LLM via a Protocol, not a concrete client -**Decision.** `LLMClient` / `ScopeJudge` are `Protocol` types; the engine never -imports the Anthropic SDK. Adapters/tests pass a concrete client. -**Context.** The engine must run host-agnostic and with **zero required deps** -(stdlib-only fallback), and be testable with a deterministic mock. -**Trade-off.** Gain: testability, no vendor coupling, offline core. Give up: -a tiny bit of indirection; no compile-time guarantee the client is "complete." -**Reversibility.** Easy — concrete clients already satisfy the Protocol. - -## ADR-2 — Markdown is the source of truth; SQLite FTS is a derived cache -**Decision.** Learnings persist as human-readable Markdown (`USER.md`, -`MEMORY.md`, `skills//SKILL.md`); `index.db` is rebuilt from them. -**Context.** Two consumers with opposite needs — humans (readable, hand-editable, -survives the plugin being disabled) and the engine (fast recall, clustering). -**Trade-off.** Gain: both audiences served; the index is disposable/rebuildable. -Give up: must keep two stores in sync (mitigated — every `upsert` writes both, -`reindex` rebuilds from Markdown and *preserves* DB-only telemetry); Markdown -upsert is O(n) per write (fine <~1000 learnings; revisit at scale). -**Reversibility.** Medium — the `.md` format is now load-bearing (see ADR-5). - -## ADR-3 — Distillation runs detached, never blocking the session -**Decision.** The `Stop` hook spawns the distiller as a detached process and -returns immediately. -**Context.** A session must never wait on (or be broken by) a background learning -pass; and the live prompt cache must not be disturbed. -**Trade-off.** Gain: zero added latency, failure isolation, prefix-cache intact. -Give up: a distiller crash is *silent* to the user (mitigated by graceful no-op -on missing model; a failure log is a known follow-up). -**Reversibility.** Easy — cadence/threshold are config. - -## ADR-4 — Mandatory human gate before any pool contribution -**Decision.** Global-candidate learnings sit in a local review queue; nothing is -pushed to the public pool without explicit approval. -**Context.** The pool is shared, public, and signed — irreversibly. One bad/leaky -learning harms everyone and is costly to retract. -**Trade-off.** Gain: trust, user agency, no accidental leaks. Give up: friction / -slower pool growth. (A future opt-in auto-publish for high-confidence, -fully-scrubbed `meta-agent` learnings is possible but deliberately not built.) -**Reversibility.** Easy to relax later; hard to claw back over-shared data — so we -start strict. - -## ADR-5 — Content-addressing (BLAKE3) + the `.md` envelope format -**Decision.** A learning's id is `BLAKE3(canonical_json(content))`; the pool -stores one `.md` per learning at `learnings//.md` with a verifiable -` ```komi ` JSON block. -**Context.** Need dedup (same lesson → same id → corroboration, not duplication), -tamper-evidence, and a portable, reviewable, server-less pool. -**Trade-off.** Gain: free dedup, tamper-evidence, git-as-database, PR-reviewable -diffs. Give up: the format is now **load-bearing** for the live pool — changing it -needs a migration + dual-read. blake2b fallback exists for no-blake3 hosts but a -blake3 id can't be verified without blake3 (consumers need the `crypto` extra). -**Reversibility.** **Hard.** Treat the format + signing scheme as a stable API. -(We already paid one migration cost when the signing message changed — see git log.) - -## ADR-6 — The pool CI verifier is a vendored copy, not an import -**Decision.** `pool-repo-template/.github/scripts/verify.py` re-implements -canonicalization / id / signature / scrub instead of importing `komi`. -**Context.** The pool repo must verify itself with no dependency on the code -package (decoupled repos; the pool's CI installs only `blake3`+`pynacl`, never -`komi`). Keeps the two repos independently releasable. -**Trade-off.** Gain: pool independence, CI works standalone. Give up: duplication -that can drift. **Mitigation:** a parity test (`tests/test_review_fixes.py`) -asserts the vendored detectors/canonicalization/id/signing match the engine — -it has already caught one real drift. -**Reversibility.** Easy — could publish a tiny verifier package later and import it. - -## ADR-7 — Strict install gate; runtime degrades safely -**Decision.** `komi-learn install` verifies every requirement *for real* (incl. a -live model call) and **fails loudly** if unmet; but at *runtime* a hook never -crashes the session — it no-ops. -**Context.** "No hacks": if install says OK, it works. But a background hook must -never break the user's live agent. -**Trade-off.** Gain: honest setup + un-killable sessions. Give up: install can -refuse on a flaky/restricted environment (escape hatch: `--allow-incomplete`). -**Reversibility.** Easy — gate strictness is policy, not structure. - -## ADR-8 — One Adapter contract; host plumbing stays in the adapter -**Decision.** `komi.adapters.base.Adapter` (ABC) defines `recall()` + -`on_session_end()`; `ClaudeCodeAdapter` implements it. The engine never imports an -adapter (dependency points adapter → engine only). -**Context.** "Works everywhere" needs the engine to be genuinely host-agnostic and -a second host to be a known surface, not copy-paste. -**Trade-off.** Gain: provable universality, clean dependency direction, Phase-6 -ready. Give up: a little indirection now for a host (Claude Code) that's the only -one today. -**Reversibility.** Easy — additive. - -## ADR-9 — Corroboration is a transient count of distinct signers, never part of the id -**Decision.** A pool learning carries a `signatures` array (one entry per distinct -contributor who signed the *same* content). Its corroboration level = the number of -*distinct, valid* signers, computed at pull time and attached to the in-memory -`Learning` (`corroboration`) + the index column. It is **excluded** from -`content_view()` / the content-addressed id. The legacy single-`signer` shape is -treated as signature #1 (back-compatible); the array is authoritative when present. -**Context.** "Verified" (valid signature) ≠ "good." Independent agreement is a real -trust signal, and the content-addressed id already makes it *mechanically* -detectable: two people who distill the same lesson produce the same file, and each -signs a message binding their own pubkey (so signatures can't be replayed under -another identity — ADR re: signing scheme). But the same lesson must hash -*identically* regardless of how many have signed it — otherwise corroboration would -fork the very files it's meant to merge. -**Trade-off.** Gain: a trust *hint* (`pool.min_corroboration`) + a recall ranking -nudge, with **zero new dependencies** and no id churn; old files and the live pool -stay valid. Give up: the count is recomputed on every pull (cheap) rather than -stored in the content; a signing-scheme change still invalidates signatures and -needs a re-sign pass (`resign_seeds.py`) — corroboration doesn't change that. -**Reversibility.** Easy — the array is additive; drop the bonus/gate and the system -reverts to binary verified/not. The authoritative-array rule is what keeps the -identity-swap defense intact (legacy-field tampering is ignored; parity-tested). - -**Sybil resistance — distinct key ≠ distinct person, so count distinct ACCOUNTS.** -A contributor key is an Ed25519 keypair generated locally for free, so "N distinct -keys" is forgeable: one attacker mints N keys and signs the same content under each -to fabricate a high count. Flagged Critical in the 3-persona security review. -**Fix (shipped, Phase 7):** each signature binds the contributor's GitHub username -(`github_user`) *inside the signed message* (so it can't be swapped post-signature), -and corroboration counts **distinct accounts, not keys** (`_identity` in -`corroboration.py`) — one person's many keys under one account count once. The pool's -CI enforces it: a `--identity` step requires every signature a PR *adds* to be bound -to the **PR author's** account (hard fail otherwise) and clears an account-age bar -(`--identity`/`check_author_binding` in the vendored `verify.py`; mirrored in the -engine). Sybil now costs N established GitHub accounts that each open a PR, not N free -keys. **Defense in depth retained:** the count is still clamped to -`MAX_COUNTED_SIGNERS` (3) and recall only *filters/down-weights* on corroboration, -never *admits* otherwise-excluded content; the recall bonus is bounded (≈0.11 max). -**Back-compat:** `github_user` is added to the signed bytes only when non-empty, so -every pre-Phase-7 signature (and the seeds) still verifies byte-identically; a legacy -unbound signature still counts (by key) but earns no *account-verified* corroboration. -A pool wanting the strong guarantee requires `github_user` via CI + branch protection. diff --git a/komi/adapters/claude_code/paths.py b/komi/adapters/claude_code/paths.py index 24566fc..caefc51 100644 --- a/komi/adapters/claude_code/paths.py +++ b/komi/adapters/claude_code/paths.py @@ -3,7 +3,7 @@ Personal scope lives under ``~/.claude/komi``; project scope under ``/.claude/komi`` so it can be committed and shared with a team. A single shared ``index.db`` (the "one brain") lives at the personal root and records each -row's own scope. Mirrors docs/02-architecture.md §3.2. +row's own scope. """ from __future__ import annotations diff --git a/komi/engine/classify.py b/komi/engine/classify.py index 38eadfd..3a14e86 100644 --- a/komi/engine/classify.py +++ b/komi/engine/classify.py @@ -18,7 +18,7 @@ residual specificity from a global candidate. The LLM is injected as a callable so the engine runs (and tests pass) with a -deterministic mock; the real one is wired in adapters. See docs/02-architecture.md §6. +deterministic mock; the real one is wired in adapters. """ from __future__ import annotations diff --git a/komi/engine/distill.py b/komi/engine/distill.py index 803441e..ba1843e 100644 --- a/komi/engine/distill.py +++ b/komi/engine/distill.py @@ -12,7 +12,7 @@ with a deterministic mock and host-agnostic in production (Claude Agent SDK, the Anthropic API, or any other backend wire in the same interface). The distiller itself takes no outward actions and writes only to the learning stores + queue — -matching the read-mostly tool whitelist from docs/02-architecture.md §4.2. +matching the read-mostly tool whitelist. """ from __future__ import annotations diff --git a/komi/engine/model.py b/komi/engine/model.py index b2e3061..3b817b4 100644 --- a/komi/engine/model.py +++ b/komi/engine/model.py @@ -10,7 +10,7 @@ means two agents that independently distill the same lesson arrive at the same id, which is what makes pool dedup and cross-agent corroboration work. -See docs/02-architecture.md §3 for the schema rationale. +The schema is kept JSON-trivial and forward-compatible. """ from __future__ import annotations @@ -136,7 +136,7 @@ def content_view(self) -> dict[str, Any]: Excludes id/signature and every local-only or mutable field, so the same lesson distilled by two different agents hashes identically. Tags are sorted and lowercased so trivial ordering/case differences don't fork - the id. This is the canonical content per docs/02-architecture.md §3.1. + the id. This is the canonical content. """ return { "schema": self.schema, diff --git a/komi/engine/recall.py b/komi/engine/recall.py index e4f57af..b62982a 100644 --- a/komi/engine/recall.py +++ b/komi/engine/recall.py @@ -11,7 +11,7 @@ Everything recalled is wrapped in PAM-style *data-not-instructions* framing, and anything sourced from the public pool is additionally labelled as untrusted community knowledge — because recalled text (especially global) is untrusted -input and must never be able to hijack the agent. See docs/02-architecture.md §4.1, §7.4. +input and must never be able to hijack the agent. Critical discipline (the Hermes frozen-snapshot lesson): recall runs ONCE at session start so the injected prefix stays byte-stable and the host's prompt diff --git a/komi/engine/store.py b/komi/engine/store.py index f677b9f..3690053 100644 --- a/komi/engine/store.py +++ b/komi/engine/store.py @@ -15,7 +15,7 @@ always be rebuilt from the Markdown by :meth:`reindex`. Writes are atomic (temp file + os.replace) and deduped by content id, following -the patterns in docs/02-architecture.md §3.2. +atomic temp-file + os.replace writes, deduped by content id. """ from __future__ import annotations diff --git a/komi/pool/contribute.py b/komi/pool/contribute.py index 0bb117a..332b336 100644 --- a/komi/pool/contribute.py +++ b/komi/pool/contribute.py @@ -1,6 +1,6 @@ """komi-learn pool — contribution & consumption pipeline (PAM-style). -Contribution (local → pool), per docs/02-architecture.md §7.1: +Contribution (local → pool): scrub → generalize-check → canonicalize → content-address → sign → HUMAN GATE → outbox Consumption (pool → local), §7.3: diff --git a/komi/pool/corroboration.py b/komi/pool/corroboration.py index 3dff23d..6cb8082 100644 --- a/komi/pool/corroboration.py +++ b/komi/pool/corroboration.py @@ -38,7 +38,7 @@ cue), and recall only ever *down-weights/filters* on corroboration, never *admits* untrusted content it would otherwise exclude. Legacy signatures with no ``github_user`` still count (by key) but are NOT account-verified — a pool that -wants the strong guarantee requires github_user via CI. See docs/05-adr-log.md ADR-9. +wants the strong guarantee requires github_user via CI. """ from __future__ import annotations diff --git a/komi/pool/identity.py b/komi/pool/identity.py index e0d50f9..cdc8c00 100644 --- a/komi/pool/identity.py +++ b/komi/pool/identity.py @@ -7,7 +7,7 @@ fall back to a clearly-labelled unsigned mode so the MVP still runs (the pool server would reject unsigned entries — that's the point of the label). -See docs/02-architecture.md §7.1 step 5. +The public key is the only thing that travels. """ from __future__ import annotations diff --git a/komi/pool/queue.py b/komi/pool/queue.py index 76a9006..0ed70c1 100644 --- a/komi/pool/queue.py +++ b/komi/pool/queue.py @@ -11,7 +11,7 @@ This module lists the queue, lets a caller approve/reject, and publishes approved items through a :class:`GitHubPool` (opening a PR, or committing in local mode). Publishing re-prepares + re-signs the envelope so the freshest scrub runs at the -moment of contribution. See docs/02-architecture.md §7.1. +moment of contribution. """ from __future__ import annotations diff --git a/komi/pool/repo_format.py b/komi/pool/repo_format.py index 81a014c..eadd58a 100644 --- a/komi/pool/repo_format.py +++ b/komi/pool/repo_format.py @@ -22,7 +22,7 @@ ``signatures[0]`` mirrors it. The count of *distinct, valid* signers is the corroboration level, computed on pull (never stored in the content id). -See docs/02-architecture.md §7 and the komi-pool repo template. +See the komi-pool repo template. """ from __future__ import annotations diff --git a/tests/test_ai_eng_fixes.py b/tests/test_ai_eng_fixes.py index 3784e94..22e5116 100644 --- a/tests/test_ai_eng_fixes.py +++ b/tests/test_ai_eng_fixes.py @@ -1,4 +1,4 @@ -"""Regression tests for the AI-Engineer-lens review fixes (docs/04-ai-engineering-review.md): +"""Regression tests for the AI-Engineer-lens review fixes: popularity-bias damping, identity/community bounding, candidate cap+dedup, corpus drift.""" import time diff --git a/tests/test_secarch_fixes.py b/tests/test_secarch_fixes.py index caf00cb..83dc780 100644 --- a/tests/test_secarch_fixes.py +++ b/tests/test_secarch_fixes.py @@ -1,5 +1,5 @@ """Regression tests for the Security-Engineer + Software-Architect review fixes -(docs/04..05). The headline one: a malicious pool learning must not break out of +The headline one: a malicious pool learning must not break out of the recall data fence (prompt injection).""" import sys