kurikomi-labs
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎komi/adapters/claude_code/hook_capture.py‎
Lines changed: 95 additions & 0 deletions b/‎komi/adapters/claude_code/hook_capture.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎komi/adapters/claude_code/hook_compact.py‎
Lines changed: 5 additions & 1 deletion b/‎komi/adapters/claude_code/hook_compact.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎komi/adapters/claude_code/hook_recall.py‎
Lines changed: 132 additions & 21 deletions b/‎komi/adapters/claude_code/hook_recall.py‎
Lines changed: 132 additions & 21 deletions
diff --git a/‎komi/adapters/claude_code/paths.py‎
Lines changed: 36 additions & 0 deletions b/‎komi/adapters/claude_code/paths.py‎
Lines changed: 36 additions & 0 deletions
@@ -34,7 +34,7 @@ pip install -e .
 
 ```bash
 komi-learn doctor      # check the install and what to fix
-komi-learn update      # upgrade to the latest version (--check to only look)
+komi-learn update      # upgrade komi-learn + the agent's hooks (--check to only look)
 komi-learn status      # config + how much it has learned
 komi-learn config      # change any setting (menu, or `config set <key> <val>`)
 komi-learn sync        # pull the latest community learnings
 
@@ -0,0 +1,95 @@
+"""Diagnostic hook — capture the RAW payload Claude Code sends, then behave normally.
+
+Purpose: the compaction re-injection feature is built on assumptions about which
+events fire on a ``/compact`` and what field names they carry (``hook_event_name``,
+``source`` vs ``trigger``, whether ``additionalContext`` / plain stdout actually
+reach the model). We have never observed a real payload. This hook records the exact
+stdin we receive — keyed by which entry point invoked it — to a JSONL file, then
+delegates to the normal recall path so the session is unaffected.
+
+Enable it with ``komi-learn capture on`` (which re-points the SessionStart +
+PostCompact hooks here), run ``/compact`` in a real Claude Code session, then
+``komi-learn capture show`` to inspect what actually fired. ``komi-learn capture
+off`` restores the normal hooks.
+
+Two entry points so we can tell SessionStart from PostCompact even if the payload
+omits the event name:
+  ``python -m komi.adapters.claude_code.hook_capture``            (SessionStart)
+  ``python -m komi.adapters.claude_code.hook_capture --compact``  (PostCompact)
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import time
+
+from . import paths
+
+
+def capture_path():
+    return paths.personal_root() / "_hook_capture.jsonl"
+
+
+def _capture(entry_event: str, raw: str) -> None:
+    """Append one capture record. Best-effort; never raises into the hook."""
+    try:
+        parsed = None
+        try:
+            parsed = json.loads(raw) if raw.strip() else {}
+        except Exception:
+            parsed = None
+        rec = {
+            "ts": time.time(),
+            "entry_event": entry_event,                 # which entry point ran (authoritative)
+            "raw_len": len(raw),
+            "raw": raw[:8192],                          # cap; payloads are tiny
+            "parsed_keys": sorted(parsed.keys()) if isinstance(parsed, dict) else None,
+            "hook_event_name": (parsed or {}).get("hook_event_name") if isinstance(parsed, dict) else None,
+            "source": (parsed or {}).get("source") if isinstance(parsed, dict) else None,
+            "trigger": (parsed or {}).get("trigger") if isinstance(parsed, dict) else None,
+            "session_id": (parsed or {}).get("session_id") if isinstance(parsed, dict) else None,
+        }
+        p = capture_path()
+        p.parent.mkdir(parents=True, exist_ok=True)
+        with open(p, "a", encoding="utf-8") as f:
+            f.write(json.dumps(rec) + "\n")
+    except Exception:
+        pass
+
+
+def main(default_event: str = "") -> int:
+    # Read stdin ONCE, capture it, then hand the same payload to the real recall
+    # path so behavior is unchanged. We re-feed stdin by monkeypatching the reader.
+    # Mirror hook_recall's bound (set_capture re-points the real events here, so the
+    # cap must not be lost on these routes).
+    from . import hook_recall
+    raw = ""
+    try:
+        raw = sys.stdin.read(hook_recall._MAX_STDIN_BYTES + 1)
+        if len(raw) > hook_recall._MAX_STDIN_BYTES:
+            raw = ""                      # oversized/garbage → safe no-op
+    except Exception:
+        raw = ""
+    _capture(default_event or "SessionStart", raw)
+
+    # Delegate to the normal recall, feeding it the bytes we already consumed.
+    try:
+        hook_recall._read_stdin_json = lambda: _safe_json(raw)  # type: ignore
+        return hook_recall.main(default_event=default_event)
+    except Exception:
+        # Even if delegation fails, never break the session.
+        sys.stdout.write("{}")
+        return 0
+
+
+def _safe_json(raw: str) -> dict:
+    try:
+        return json.loads(raw) if raw.strip() else {}
+    except Exception:
+        return {}
+
+
+if __name__ == "__main__":
+    ev = "PostCompact" if (len(sys.argv) >= 2 and sys.argv[1] == "--compact") else "SessionStart"
+    raise SystemExit(main(default_event=ev))
@@ -14,4 +14,8 @@
 from .hook_recall import main
 
 if __name__ == "__main__":
-    raise SystemExit(main())
+    # This entry point IS the PostCompact hook — assert that identity rather than
+    # re-deriving it from stdin. If the host omits ``hook_event_name`` on the
+    # PostCompact payload, main() would otherwise default to SessionStart and emit
+    # the wrong format on the very path this hook exists to serve.
+    raise SystemExit(main(default_event="PostCompact"))
@@ -34,10 +34,10 @@
 from . import paths
 
 
-def main() -> int:
+def main(default_event: str = "") -> int:
     payload = _read_stdin_json()
     cwd = payload.get("cwd", "") or ""
-    event, source = _classify_event(payload)
+    event, source = _classify_event(payload, default_event)
     is_compaction = (event == "PostCompact") or (event == "SessionStart" and source == "compact")
 
     # Background maintenance (pool sync ~12h, curator ~7d) belongs to a genuine
@@ -51,26 +51,51 @@ def main() -> int:
         except Exception:
             pass
 
+    # Double-injection guard: we register BOTH SessionStart(compact) and PostCompact
+    # for one compaction (a host-reliability hedge — see module docstring). On a host
+    # that honors both channels the block would otherwise be injected twice. If a
+    # sibling event already served THIS compaction moments ago, no-op.
+    if is_compaction and _compaction_already_served(payload, event):
+        _emit({}, note="komi recall: compaction already re-injected by a sibling event",
+              event=event)
+        return 0
+
     try:
         # Recompute the block FRESH every time — at compaction this picks up anything
-        # learned earlier this session. Cheap: a local store read.
-        block = build_block(cwd, payload)
+        # learned earlier this session. On a genuine session start we rebuild the
+        # index from Markdown (fresh=True); on a compaction we query the index that
+        # was already built at session start (fresh=False) — rebuilding mid-session
+        # would be a synchronous reindex + pool re-mirror in the hook's critical path.
+        block = build_block(cwd, payload, fresh=not is_compaction)
     except Exception as e:
         # Never break the session because recall failed — emit nothing.
-        _emit({}, note=f"komi recall skipped: {e}")
+        _emit({}, note=f"komi recall skipped: {e}", event=event)
         return 0
 
     if not block:
-        _emit({})
+        _emit({}, event=event)
         return 0
-
-    _emit_block(block, event, is_compaction)
+    # _emit_block is internally format-correct per event; the no-op/diagnostic emits
+    # above go through _emit, which is now event-aware + pipe-safe (suppresses the
+    # JSON diagnostic on PostCompact, where stdout is appended verbatim to context).
+    try:
+        _emit_block(block, event, is_compaction)
+        if is_compaction:
+            _record_compaction_served(payload, event)
+    except Exception:
+        pass    # a broken pipe / write error must never wedge the session
     return 0
 
 
-def build_block(cwd: str, payload: dict) -> str:
-    """Build the recall context block from the merged store. Reusable across events."""
-    store = _merged_store(cwd)
+def build_block(cwd: str, payload: dict, *, fresh: bool = True) -> str:
+    """Build the recall context block from the merged store. Reusable across events.
+
+    ``fresh`` rebuilds this store's index slice + re-mirrors the pool from disk (the
+    right thing at a genuine session start). When False (compaction), we skip that
+    rebuild and query the existing shared index — it was already populated at session
+    start, and a mid-session reindex is needless synchronous work in the hook path.
+    """
+    store = _merged_store(cwd, fresh=fresh)
     return recall(
         store,
         cwd=cwd,
@@ -80,16 +105,73 @@ def build_block(cwd: str, payload: dict) -> str:
     )
 
 
-def _classify_event(payload: dict) -> tuple[str, str]:
+def _classify_event(payload: dict, default_event: str = "") -> tuple[str, str]:
     """Return (event, source). ``event`` is the hook event name (SessionStart /
     PostCompact / …); ``source`` is the SessionStart trigger (startup/resume/clear/
-    compact) or the compaction trigger (manual/auto), empty if absent. Defaults to
+    compact) or the compaction trigger (manual/auto), empty if absent.
+
+    ``default_event`` is supplied by the invoking entry point (e.g. hook_compact
+    passes "PostCompact") and WINS over a missing/absent ``hook_event_name`` — the
+    entry point knows its own identity, so we never misroute a real PostCompact to
+    the SessionStart format just because the host omitted the field. Falls back to
     SessionStart so a bare/legacy payload behaves exactly as before."""
-    event = payload.get("hook_event_name") or "SessionStart"
+    event = payload.get("hook_event_name") or default_event or "SessionStart"
     source = payload.get("source") or payload.get("trigger") or ""
     return event, source
 
 
+# How close two events must be (seconds) to count as serving the SAME compaction.
+# SessionStart(compact) and PostCompact fire within moments of one /compact; a later
+# genuine compaction is many seconds away. Generous enough to dedup siblings, tight
+# enough not to swallow a real subsequent compaction.
+_COMPACTION_DEDUP_WINDOW = 45.0
+
+
+def _compaction_key(payload: dict) -> str:
+    """Identify a compaction event for dedup. Prefer the host's session id (both
+    sibling events share it); fall back to a constant so dedup still works per-window
+    when no id is present."""
+    return str(payload.get("session_id") or payload.get("sessionId") or "_nosid")
+
+
+def _compaction_already_served(payload: dict, event: str) -> bool:
+    """True if a sibling event already re-injected for THIS compaction (same session
+    id, within the dedup window) — so we don't inject the block twice. Read-only."""
+    import time
+    key = _compaction_key(payload)
+    try:
+        state = paths.read_state()       # read-only: no lock-and-rewrite
+    except Exception:
+        return False
+    last = state.get("last_compact_reinject") or {}
+    if last.get("key") != key:
+        return False
+    if last.get("event") == event:
+        return False  # the SAME event re-firing (e.g. retry) — let it re-inject
+    try:
+        return (time.time() - float(last.get("ts", 0))) < _COMPACTION_DEDUP_WINDOW
+    except Exception:
+        return False
+
+
+def _record_compaction_served(payload: dict, event: str) -> None:
+    """Breadcrumb: record that THIS event re-injected for THIS compaction. Doubles as
+    the dedup signal a sibling event reads, and as on-device observability (which path
+    actually fired in production)."""
+    import time
+    key = _compaction_key(payload)
+    now = time.time()
+
+    def _mut(s: dict):
+        s["last_compact_reinject"] = {"key": key, "event": event, "ts": now}
+        return None
+
+    try:
+        paths.update_state(_mut)
+    except Exception:
+        pass
+
+
 def _emit_block(block: str, event: str, is_compaction: bool) -> None:
     """Emit the recall block in the form the given event supports.
 
@@ -124,16 +206,25 @@ def _emit_block(block: str, event: str, is_compaction: bool) -> None:
                                   "additionalContext": ctx}})
 
 
-def _merged_store(cwd: str) -> Store:
+def _merged_store(cwd: str, *, fresh: bool = True) -> Store:
     """Personal store is the base; if in a project, its learnings share the same
     index so a single recall query sees both. We open the personal store (which
     owns index.db) and ensure the project store + synced global pool are mirrored
-    into the shared index so one recall query sees personal + project + global."""
+    into the shared index so one recall query sees personal + project + global.
+
+    When ``fresh`` is False (a compaction re-inject), we SKIP the project reindex
+    and the pool re-mirror: the shared index was already built at session start, and
+    those operations are a full DELETE+re-INSERT (project Markdown) plus up to a
+    500-row pool mirror — too heavy to run synchronously in the hook's critical path
+    on every mid-session compaction. We just query what's already indexed.
+    """
     personal = Store(paths.personal_root(), index_path=paths.index_path())
+    if not fresh:
+        return personal
     proot = paths.project_root(cwd)
     if proot is not None:
         proj = Store(proot, index_path=paths.index_path())
-        # cheap: make sure project rows are present in the shared index
+        # make sure project rows are present in the shared index (session start only)
         proj.reindex()
     _mirror_pool_into_index(personal)
     return personal
@@ -226,19 +317,39 @@ def _recent_files(payload: dict) -> list[str]:
     return []
 
 
+_MAX_STDIN_BYTES = 4 * 1024 * 1024  # hook payloads are tiny; cap to avoid a runaway read
+
+
 def _read_stdin_json() -> dict:
     try:
-        data = sys.stdin.read()
+        data = sys.stdin.read(_MAX_STDIN_BYTES + 1)
+        if len(data) > _MAX_STDIN_BYTES:
+            return {}                     # oversized/garbage payload → safe no-op
         return json.loads(data) if data.strip() else {}
     except Exception:
         return {}
 
 
-def _emit(obj: dict, *, note: str = "") -> None:
+def _emit(obj: dict, *, note: str = "", event: str = "") -> None:
+    """Emit a no-op / diagnostic result. Event-aware and pipe-safe:
+
+    - For PostCompact, stdout is appended VERBATIM to the model's context (its
+      documented add-to-context path), so a diagnostic JSON blob like
+      ``{"_note": ...}`` would pollute the context. Emit nothing on PostCompact.
+    - For SessionStart, additionalContext comes from a structured JSON object, so a
+      bare ``{}`` (optionally with a ``_note``) is the correct no-op.
+    - A closed stdout (BrokenPipeError, host hung up early) must never wedge the
+      session — swallow any write error.
+    """
+    if event == "PostCompact":
+        return
     if note:
         obj = {**obj, "_note": note}
-    sys.stdout.write(json.dumps(obj))
-    sys.stdout.flush()
+    try:
+        sys.stdout.write(json.dumps(obj))
+        sys.stdout.flush()
+    except Exception:
+        pass
 
 
 if __name__ == "__main__":
 
@@ -124,6 +124,41 @@ def update_state(mutator):
                 fh.close()
 
 
+def read_state() -> dict:
+    """Read state.json under the shared lock WITHOUT writing it back.
+
+    ``update_state`` always performs a full read-modify-write (it re-serializes and
+    atomically replaces the file even for an identity mutator), which is wasteful and
+    serializes concurrent sessions for a pure read. Callers that only need to inspect
+    state (e.g. the compaction dedup check) use this instead. The lock is still held
+    briefly so a reader never observes a torn mid-``os.replace`` write. Best-effort:
+    returns {} on any error.
+    """
+    import json
+
+    sp = state_path()
+    if not sp.exists():
+        return {}
+    lock = sp.with_suffix(".lock")
+    fh = None
+    try:
+        fh = open(lock, "a+")
+        _lock_file(fh)
+        try:
+            data = json.loads(sp.read_text(encoding="utf-8")) or {}
+            return data if isinstance(data, dict) else {}
+        except (json.JSONDecodeError, OSError):
+            return {}
+    except Exception:
+        return {}
+    finally:
+        if fh is not None:
+            try:
+                _unlock_file(fh)
+            finally:
+                fh.close()
+
+
 def _lock_file(fh) -> None:
     """Acquire an exclusive advisory lock (blocking). No-op if locking is unavailable."""
     try:
@@ -153,4 +188,5 @@ def _unlock_file(fh) -> None:
 __all__ = [
     "claude_home", "personal_root", "project_root", "index_path",
     "queue_dir", "outbox_dir", "inbox_dir", "keys_dir", "state_path", "update_state",
+    "read_state",
 ]