thedotmack · surfingdoggo · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 8, 2026
diff --git a/docs/public/docs.json b/docs/public/docs.json
@@ -43,6 +43,7 @@
           "usage/claude-desktop",
           "usage/private-tags",
           "usage/export-import",
+          "usage/memory-ingest",
           "usage/manual-recovery",
           "usage/folder-context",
           "beta-features",

diff --git a/docs/public/usage/memory-ingest.mdx b/docs/public/usage/memory-ingest.mdx
@@ -0,0 +1,115 @@
+---
+title: "Memory Ingest"
+description: "Import Claude Code's auto-memory markdown files directly into claude-mem as observations — no model spend"
+---
+
+# Memory Ingest
+
+Claude Code maintains its own **auto-memory** — markdown it distills for itself — under:
+
+```
+~/.claude/projects/<encoded-cwd>/memory/MEMORY.md   (a link-only index)
+~/.claude/projects/<encoded-cwd>/memory/<topic>.md  (distilled prose, one fact per file)
+```
+
+`<encoded-cwd>` is the repo's absolute path with every `/` replaced by `-` (e.g.
+`/home/you/code/app` → `-home-you-code-app`).
+
+`claude-mem memory ingest` imports those topic files **directly** into your
+memory database as observations.
+
+## Why it doesn't cost anything
+
+Auto-memory is **already distilled** — each topic file is the same *kind* of
+artifact the observation generator produces. So memory-ingest does **not** run
+the Haiku generation pipeline. It stores each file's prose directly through
+claude-mem's existing observation seam (content-hash dedup + Chroma sync).
+Re-running generation on already-distilled prose would be lossy and pay for
+negative value.
+
+The `MEMORY.md` index is skipped — it's just links and carries no knowledge of
+its own.
+
+## Usage
+
+Always **dry-run first** — it's a zero-spend, DB-free scan + count:
+
+```bash
+# Scan the current repo's memory dir and report what would be stored
+npx claude-mem memory ingest --dry-run
+
+# Sweep every project under ~/.claude/projects/*/memory/
+npx claude-mem memory ingest --all --dry-run
+```
+
+Then run the real ingest:
+
+```bash
+# Ingest the current repo's memory (default source = cwd)
+npx claude-mem memory ingest
+
+# Ingest from an explicit directory
+npx claude-mem memory ingest --source ~/.claude/projects/-home-you-code-app/memory
+
+# Ingest everything
+npx claude-mem memory ingest --all
+```
+
+### Flags
+
+| Flag | Effect |
+|------|--------|
+| *(none)* | Source = the current repo's memory dir, resolved from `cwd`. |
+| `--source <dir>` | Ingest from an explicit `memory/` directory. |
+| `--all` | Sweep every `~/.claude/projects/*/memory/` directory. |
+| `--dry-run` | Zero-spend parse + count only. No worker, no DB writes. Run this first. |
+| `--require-cwd` | Skip orphaned project dirs whose originating `cwd` cannot be resolved (instead of ingesting them under a fallback project). |
+
+## How it runs
+
+- **`--dry-run`** is pure parse + count — it runs entirely in the CLI process,
+  touches no worker and no database, and spends nothing.
+- The **real ingest** stores into the SQLite observation database, which lives
+  in the worker. The CLI starts the worker if needed and drives the import over
+  HTTP (`POST /api/memory/ingest`), mirroring how transcript ingest and
+  summaries reach the worker. Bulk imports are not time-limited.
+
+## Frontmatter
+
+Topic files carry a small YAML frontmatter block, which is preserved as
+observation metadata:
+
+```markdown
+---
+name: recent-work
+description: "What was done in the most recent session"
+metadata:
+  node_type: memory
+  type: project
+  originSessionId: 74e59070-...
+---
+
+<the distilled prose — stored as the observation body>
+```
+
+`metadata.type` (e.g. `project`, `feedback`, `reference`, `user`) and
+`metadata.originSessionId` are carried through; files without frontmatter are
+stored using their body as-is.
+
+## Idempotency
+
+Ingest is safe to re-run. Observations are content-hash deduplicated on insert,
+so already-imported files are reported as `already-imported` and skipped — only
+new or changed files are stored.
+
+The summary line reports the outcome:
+
+```
+MEMORY INGEST: 12 stored, 38 already-imported, 0 skipped, 0 failed, of 50 files across 1 dirs
+```
+
+## Related
+
+- [Memory Export/Import](/usage/export-import) — share memory sets between installations.
+- Transcript backfill (`claude-mem transcript ingest`) — the sibling path that
+  imports raw Claude Code session JSONL and *does* run generation.
diff --git a/src/npx-cli/commands/runtime.ts b/src/npx-cli/commands/runtime.ts
@@ -263,3 +263,7 @@ export function runTranscriptWatchCommand(): void {
     process.exit(exitCode ?? 0);
   });
 }
+
+export function runMemoryIngestCommand(extraArgs: string[] = []): void {
+  spawnBunWorkerCommand('memory', ['ingest', ...extraArgs]);
+}
diff --git a/src/npx-cli/index.ts b/src/npx-cli/index.ts
@@ -209,6 +209,19 @@ async function main(): Promise<void> {
       break;
     }
 
+    case 'memory': {
+      const subCommand = args[1]?.toLowerCase();
+      if (subCommand === 'ingest') {
+        const { runMemoryIngestCommand } = await import('./commands/runtime.js');
+        runMemoryIngestCommand(args.slice(2));
+      } else {
+        console.error(pc.red(`Unknown memory subcommand: ${subCommand ?? '(none)'}`));
+        console.error(`Usage: npx claude-mem memory ingest [--source <dir> | --all] [--dry-run] [--require-cwd]`);
+        process.exit(1);
+      }
+      break;
+    }
+
     default: {
       console.error(pc.red(`Unknown command: ${command}`));
       console.error(`Run ${pc.bold('npx claude-mem --help')} for usage information.`);

diff --git a/src/services/memory/cli.ts b/src/services/memory/cli.ts
@@ -0,0 +1,89 @@
+import {
+  dryRunMemorySource,
+  formatMemoryDryRunReport,
+  memoryDirForCwd,
+  claudeProjectsDir,
+  type MemoryIngestReport,
+} from './ingest.js';
+import { ensureWorkerRunning, workerHttpRequest } from '../../shared/worker-utils.js';
+
+function getArgValue(args: string[], name: string): string | null {
+  const index = args.indexOf(name);
+  if (index === -1) return null;
+  return args[index + 1] ?? null;
+}
+
+function hasFlag(args: string[], name: string): boolean {
+  return args.includes(name);
+}
+
+const USAGE =
+  'Usage: claude-mem memory ingest [--source <dir> | --all] [--dry-run] [--require-cwd]\n' +
+  '  default: the current repo\'s memory dir (cwd)\n' +
+  '  --all:   sweep every ~/.claude/projects/*/memory/\n' +
+  '  --dry-run:     zero-spend scan + count (do this first)\n' +
+  '  --require-cwd: skip orphaned dirs whose cwd cannot be resolved';
+
+/** Resolve the effective source dir from flags (default = current repo's memory). */
+function resolveSource(args: string[]): { source: string; all: boolean } {
+  const all = hasFlag(args, '--all');
+  if (all) return { source: claudeProjectsDir(), all: true };
+  const explicit = getArgValue(args, '--source');
+  if (explicit) return { source: explicit, all: false };
+  return { source: memoryDirForCwd(process.cwd()), all: false };
+}
+
+export async function runMemoryCommand(subcommand: string | undefined, args: string[]): Promise<number> {
+  switch (subcommand) {
+    case 'ingest': {
+      const { source, all } = resolveSource(args);
+
+      // Dry-run is pure parse + count — no worker, no spend — so run it here.
+      if (hasFlag(args, '--dry-run')) {
+        try {
+          console.log(formatMemoryDryRunReport(dryRunMemorySource(source, { all })));
+          return 0;
+        } catch (error) {
+          console.error(error instanceof Error ? error.message : String(error));
+          return 1;
+        }
+      }
+
+      // Real ingest stores into the SQLite observation DB, which lives in the
+      // worker. Drive it over HTTP (mirroring transcript ingest + summaries).
+      const workerReady = await ensureWorkerRunning();
+      if (!workerReady) {
+        console.error('Worker is not running and could not be started. Cannot ingest.');
+        return 1;
+      }
+      const response = await workerHttpRequest('/api/memory/ingest', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ source, all, requireCwd: hasFlag(args, '--require-cwd') }),
+        timeoutMs: 0, // bulk import can be long; do not time out
+      });
+      if (!response.ok) {
+        console.error(`Memory ingest failed: HTTP ${response.status} ${await response.text()}`);
+        return 1;
+      }
+
+      const report = (await response.json()) as MemoryIngestReport;
+      for (const f of report.files) {
+        if (f.status === 'stored' || f.status === 'failed') {
+          console.log(`${f.project}/${f.file}: ${f.status}${f.reason ? ` (${f.reason})` : ''}` +
+            (f.observationId ? ` -> obs #${f.observationId}` : ''));
+        }
+      }
+      console.log(
+        `MEMORY INGEST: ${report.stored} stored, ${report.deduped} already-imported, ` +
+          `${report.skipped} skipped, ${report.failed} failed, of ${report.found} files ` +
+          `across ${report.dirs} dirs` +
+          (report.cwdUnresolvedDirs ? ` (${report.cwdUnresolvedDirs} orphaned/cwd-unresolved)` : '')
+      );
+      return report.failed > 0 ? 1 : 0;
+    }
+    default:
+      console.log(USAGE);
+      return subcommand ? 1 : 0;
+  }
+}