@@ -195,6 +195,29 @@ enum Commands {
195195 /// Show global statistics
196196 Stats ,
197197
198+ /// Process the async extraction queue (LLM-backed). Reads pending
199+ /// raw tool outputs captured by hooks when
200+ /// `extraction.summarizer.provider != none` and runs the configured
201+ /// LLM CLI to extract facts. Designed to be invoked from a cron, a
202+ /// SessionEnd async fork, or manually.
203+ ExtractPending {
204+ /// Maximum rows to process in this run.
205+ #[ arg( short, long, default_value = "10" ) ]
206+ limit : usize ,
207+
208+ /// Optional CLI override of `extraction.summarizer.provider`.
209+ #[ arg( long) ]
210+ provider : Option < String > ,
211+
212+ /// Optional CLI override of `extraction.summarizer.model`.
213+ #[ arg( long) ]
214+ model : Option < String > ,
215+
216+ /// Don't actually call the LLM — just print what would be sent.
217+ #[ arg( long) ]
218+ dry_run : bool ,
219+ } ,
220+
198221 /// Apply temporal decay to memory weights
199222 Decay {
200223 /// Decay factor (default: 0.95)
@@ -1179,6 +1202,19 @@ fn main() -> Result<()> {
11791202 } => cmd_extract_patterns ( & store, & topic, memoir. as_deref ( ) , min_cluster_size) ,
11801203 Commands :: Topics => cmd_topics ( & store) ,
11811204 Commands :: Stats => cmd_stats ( & store) ,
1205+ Commands :: ExtractPending {
1206+ limit,
1207+ provider,
1208+ model,
1209+ dry_run,
1210+ } => cmd_extract_pending (
1211+ & store,
1212+ & cfg. extraction . summarizer ,
1213+ limit,
1214+ provider. as_deref ( ) ,
1215+ model. as_deref ( ) ,
1216+ dry_run,
1217+ ) ,
11821218 Commands :: Decay { factor } => cmd_decay ( & store, factor) ,
11831219 Commands :: Prune { threshold, dry_run } => cmd_prune ( & store, threshold, dry_run) ,
11841220 Commands :: Consolidate {
@@ -1377,6 +1413,7 @@ fn main() -> Result<()> {
13771413 & cfg. memory ,
13781414 extract_every,
13791415 cfg. extraction . store_raw ,
1416+ & cfg. extraction . summarizer ,
13801417 )
13811418 }
13821419 HookCommands :: Compact => {
@@ -1400,7 +1437,7 @@ fn main() -> Result<()> {
14001437 let emb_ref = embedder. as_ref ( ) . map ( |e| e as & dyn icm_core:: Embedder ) ;
14011438 #[ cfg( not( feature = "embeddings" ) ) ]
14021439 let emb_ref: Option < & dyn icm_core:: Embedder > = None ;
1403- cmd_hook_end ( & store, emb_ref, & cfg. memory )
1440+ cmd_hook_end ( & store, emb_ref, & cfg. memory , & cfg . extraction . summarizer )
14041441 }
14051442 } ,
14061443 #[ cfg( feature = "tui" ) ]
@@ -2334,12 +2371,25 @@ fn extract_tool_output(json: &Value) -> Option<&str> {
23342371
23352372/// PostToolUse hook: auto-extract context every N tool calls.
23362373/// Reads JSON from stdin. Runs extraction asynchronously.
2374+ ///
2375+ /// Two paths are wired up:
2376+ ///
2377+ /// 1. **Async path** (`extraction.summarizer.provider != "none"`).
2378+ /// The hook stores the raw tool output verbatim in
2379+ /// `pending_extractions` (~50ms / fire, no embedder load) and a
2380+ /// separate worker (`icm extract-pending` or the SessionEnd async
2381+ /// fork) dequeues it later and runs the configured LLM CLI.
2382+ ///
2383+ /// 2. **Inline path** (default, `provider = "none"`). Current
2384+ /// fastembed semantic-scoring extractor — multilingual, but pays
2385+ /// a ~3.7s model-load cost per process.
23372386fn cmd_hook_post (
23382387 store : & SqliteStore ,
23392388 embedder : Option < & dyn icm_core:: Embedder > ,
23402389 memory_cfg : & crate :: config:: MemoryConfig ,
23412390 extract_every : usize ,
23422391 store_raw : bool ,
2392+ extraction_summarizer : & crate :: config:: SummarizerConfig ,
23432393) -> Result < ( ) > {
23442394 let Some ( input) = read_stdin_utf8_lossy ( ) else {
23452395 return Ok ( ( ) ) ;
@@ -2387,7 +2437,35 @@ fn cmd_hook_post(
23872437 . and_then ( |p| p. file_name ( ) . map ( |n| n. to_string_lossy ( ) . to_string ( ) ) )
23882438 . unwrap_or_else ( || "project" . to_string ( ) ) ;
23892439
2390- // Extract facts and store (with raw text fallback if enabled).
2440+ // Async path: enqueue raw output and return without loading the
2441+ // embedder. The worker (`icm extract-pending` / SessionEnd fork) will
2442+ // dequeue and run the configured LLM CLI. ~50ms / fire vs ~3.7s
2443+ // for the inline fastembed path below.
2444+ if extraction_summarizer. provider != "none" {
2445+ // Cap to 8 KB to keep the queue reasonable. LLM extraction works
2446+ // fine on the most recent slice; very long outputs are rare and
2447+ // their tail is what matters most for auto-context anyway.
2448+ let capped = if tool_output. len ( ) > 8192 {
2449+ & tool_output[ tool_output. len ( ) - 8192 ..]
2450+ } else {
2451+ tool_output
2452+ } ;
2453+ match store. enqueue_pending_extraction ( & project, tool_name, capped) {
2454+ Ok ( _) => {
2455+ eprintln ! (
2456+ "[icm] enqueued raw output for async LLM extraction (provider={})" ,
2457+ extraction_summarizer. provider,
2458+ ) ;
2459+ }
2460+ Err ( e) => {
2461+ eprintln ! ( "[icm] enqueue failed, falling back inline: {e}" ) ;
2462+ // Fall through to inline path on storage failure.
2463+ }
2464+ }
2465+ return Ok ( ( ) ) ;
2466+ }
2467+
2468+ // Inline path: current behavior, fastembed semantic scoring.
23912469 // Cap auto-extracted importance at Medium: tool output is untrusted
23922470 // (a malicious tool could emit decision-keyword text to poison wake-up).
23932471 // Pass the embedder so non-English content is also scored: the keyword
@@ -2434,7 +2512,58 @@ fn cmd_hook_end(
24342512 store : & SqliteStore ,
24352513 embedder : Option < & dyn icm_core:: Embedder > ,
24362514 memory_cfg : & crate :: config:: MemoryConfig ,
2515+ extraction_summarizer : & crate :: config:: SummarizerConfig ,
24372516) -> Result < ( ) > {
2517+ // Async path: when a provider is configured, drain the
2518+ // pending_extractions queue in a detached subprocess and return
2519+ // immediately so Claude Code doesn't kill us with "Hook cancelled".
2520+ // The transcript-extract path below stays for back-compat (it's
2521+ // still cheap when --no-embeddings is set).
2522+ if extraction_summarizer. provider != "none" {
2523+ if let Ok ( self_path) = std:: env:: current_exe ( ) {
2524+ // `nohup`-style detach: redirect std{in,out,err} to /dev/null
2525+ // and let the child outlive us. The child reads the same
2526+ // config so it picks up the same provider.
2527+ let mut cmd = std:: process:: Command :: new ( & self_path) ;
2528+ cmd. arg ( "extract-pending" ) . arg ( "--limit" ) . arg ( "20" ) ;
2529+ cmd. stdin ( std:: process:: Stdio :: null ( ) )
2530+ . stdout ( std:: process:: Stdio :: null ( ) )
2531+ . stderr ( std:: process:: Stdio :: null ( ) ) ;
2532+ // On Unix, set a new session so the child survives our exit.
2533+ #[ cfg( unix) ]
2534+ {
2535+ use std:: os:: unix:: process:: CommandExt ;
2536+ unsafe {
2537+ cmd. pre_exec ( || {
2538+ // Detach from controlling tty / process group.
2539+ libc:: setsid ( ) ;
2540+ Ok ( ( ) )
2541+ } ) ;
2542+ }
2543+ }
2544+ match cmd. spawn ( ) {
2545+ Ok ( _) => {
2546+ eprintln ! (
2547+ "[icm] session-end: forked async LLM worker (provider={})" ,
2548+ extraction_summarizer. provider,
2549+ ) ;
2550+ }
2551+ Err ( e) => {
2552+ eprintln ! (
2553+ "[icm] session-end: fork failed ({e}), falling back to inline transcript extract" ,
2554+ ) ;
2555+ return extract_from_hook_transcript (
2556+ store,
2557+ embedder,
2558+ memory_cfg,
2559+ "session-end" ,
2560+ ) ;
2561+ }
2562+ }
2563+ return Ok ( ( ) ) ;
2564+ }
2565+ }
2566+ // Inline path (legacy): scan transcript and extract via fastembed.
24382567 extract_from_hook_transcript ( store, embedder, memory_cfg, "session-end" )
24392568}
24402569
@@ -4562,6 +4691,146 @@ fn resolve_consolidate_provider(
45624691 } )
45634692}
45644693
4694+ /// Process the async extraction queue.
4695+ ///
4696+ /// Reads up to `limit` oldest pending rows from `pending_extractions`,
4697+ /// concatenates their raw outputs, asks the configured LLM CLI to extract
4698+ /// decisions / architecture / preferences, parses the bullet response,
4699+ /// and stores the results as Memory rows. Successfully-processed rows
4700+ /// are deleted from the queue regardless of whether facts were
4701+ /// extracted (so an output with no extractable content doesn't loop
4702+ /// forever).
4703+ #[ allow( clippy:: too_many_arguments) ]
4704+ fn cmd_extract_pending (
4705+ store : & SqliteStore ,
4706+ cfg : & config:: SummarizerConfig ,
4707+ limit : usize ,
4708+ cli_provider : Option < & str > ,
4709+ cli_model : Option < & str > ,
4710+ dry_run : bool ,
4711+ ) -> Result < ( ) > {
4712+ let pending = store. list_pending_extractions ( limit) ?;
4713+ if pending. is_empty ( ) {
4714+ println ! ( "No pending extractions." ) ;
4715+ return Ok ( ( ) ) ;
4716+ }
4717+
4718+ let provider_kind = resolve_consolidate_provider ( cfg, cli_provider) ?;
4719+ if matches ! ( provider_kind, summarizer:: ProviderKind :: None ) {
4720+ bail ! (
4721+ "extraction.summarizer.provider = \" none\" — nothing would be \
4722+ extracted. Set it to auto/claude/codex/gemini/ollama, or \
4723+ pass --provider on this command."
4724+ ) ;
4725+ }
4726+
4727+ // Build a single LLM prompt covering all rows. The prompt asks for
4728+ // a structured bullet list so we can deterministically split into
4729+ // facts. Each bullet becomes one Memory.
4730+ let mut joined = String :: new ( ) ;
4731+ let mut ids: Vec < String > = Vec :: new ( ) ;
4732+ let mut project_for_each: Vec < String > = Vec :: new ( ) ;
4733+ for ( id, project, tool_name, raw, _ts) in & pending {
4734+ joined. push_str ( & format ! ( "=== tool={tool_name} project={project} ===\n " ) ) ;
4735+ joined. push_str ( raw) ;
4736+ joined. push_str ( "\n \n " ) ;
4737+ ids. push ( id. clone ( ) ) ;
4738+ project_for_each. push ( project. clone ( ) ) ;
4739+ }
4740+
4741+ let model_owned: Option < String > = cli_model. map ( |s| s. to_string ( ) ) . or_else ( || {
4742+ if cfg. model . is_empty ( ) {
4743+ None
4744+ } else {
4745+ Some ( cfg. model . clone ( ) )
4746+ }
4747+ } ) ;
4748+ let max_tokens = cfg. max_tokens ;
4749+
4750+ let prompt = format ! (
4751+ "From the tool outputs below, extract durable facts that an AI agent \
4752+ should remember across sessions: architecture decisions, resolved \
4753+ errors, user preferences, project-specific context.\n \
4754+ \n \
4755+ Output format: one fact per line, prefixed with `- `. Each fact \
4756+ must be a complete, standalone sentence — no pronouns referring to \
4757+ missing context. Skip routine noise (file listings, build progress, \
4758+ git status). If nothing durable is present, output exactly `- (none)`.\n \
4759+ \n \
4760+ {joined}",
4761+ ) ;
4762+
4763+ if dry_run {
4764+ println ! ( "=== Dry run ===" ) ;
4765+ println ! ( "provider: {provider_kind:?}" ) ;
4766+ println ! (
4767+ "model: {}" ,
4768+ model_owned. as_deref( ) . unwrap_or( "<provider default>" )
4769+ ) ;
4770+ println ! ( "rows: {}" , pending. len( ) ) ;
4771+ println ! ( "--- prompt ---" ) ;
4772+ println ! ( "{prompt}" ) ;
4773+ return Ok ( ( ) ) ;
4774+ }
4775+
4776+ let provider = summarizer:: make_summarizer ( provider_kind) ?;
4777+ let req = summarizer:: SummarizeRequest {
4778+ prompt : & prompt,
4779+ model : model_owned. as_deref ( ) ,
4780+ max_tokens,
4781+ timeout : std:: time:: Duration :: from_secs ( cfg. timeout_secs ) ,
4782+ } ;
4783+ let response = match provider. summarize ( & req) {
4784+ Ok ( s) if !s. trim ( ) . is_empty ( ) => s,
4785+ Ok ( _) => {
4786+ eprintln ! ( "[extract-pending] provider returned empty output" ) ;
4787+ // Still drop the rows so we don't loop forever on bad inputs.
4788+ store. delete_pending_extractions ( & ids) ?;
4789+ return Ok ( ( ) ) ;
4790+ }
4791+ Err ( e) => {
4792+ eprintln ! ( "[extract-pending] provider failed: {e}" ) ;
4793+ // Don't delete — let the next run retry.
4794+ return Err ( e) ;
4795+ }
4796+ } ;
4797+
4798+ // Parse bullet output into individual facts.
4799+ let mut stored = 0usize ;
4800+ for line in response. lines ( ) {
4801+ let line = line. trim ( ) ;
4802+ let fact = line
4803+ . strip_prefix ( "- " )
4804+ . or_else ( || line. strip_prefix ( "* " ) )
4805+ . unwrap_or ( line)
4806+ . trim ( ) ;
4807+ if fact. is_empty ( ) || fact == "(none)" || fact. eq_ignore_ascii_case ( "none" ) {
4808+ continue ;
4809+ }
4810+ // Use the first row's project as the topic anchor — most batches
4811+ // will be from a single session anyway. Multi-project batches
4812+ // get a slightly weaker per-fact attribution; not worth more
4813+ // ceremony in v1.
4814+ let project = project_for_each
4815+ . first ( )
4816+ . map ( |s| s. as_str ( ) )
4817+ . unwrap_or ( "project" ) ;
4818+ let topic = format ! ( "context-{project}" ) ;
4819+ let mem = Memory :: new ( topic, fact. to_string ( ) , Importance :: Medium ) ;
4820+ store. store ( mem) ?;
4821+ stored += 1 ;
4822+ }
4823+
4824+ let deleted = store. delete_pending_extractions ( & ids) ?;
4825+ println ! (
4826+ "Processed {} rows, extracted {} facts, dequeued {}." ,
4827+ pending. len( ) ,
4828+ stored,
4829+ deleted,
4830+ ) ;
4831+ Ok ( ( ) )
4832+ }
4833+
45654834/// Lexical fallback: concat all summaries with " | " — the historical behavior
45664835/// preserved as a safe baseline when no LLM is configured or available.
45674836fn lexical_consolidate ( memories : & [ Memory ] ) -> String {
0 commit comments