Skip to content

Commit 04b1fdf

Browse files
authored
Merge pull request #220 from rtk-ai/develop
Release prep: merge develop → main for v0.10.48 (async LLM extraction)
2 parents 702da80 + 9774aba commit 04b1fdf

6 files changed

Lines changed: 385 additions & 2 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/icm-cli/src/config.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,16 @@ pub struct ExtractionConfig {
7979
pub extract_every: usize,
8080
/// Store raw text as fallback when no facts are extracted.
8181
pub store_raw: bool,
82+
/// LLM-backed extraction provider. When set to anything other than
83+
/// `none`, hooks switch to the **fast async path**: tool output is
84+
/// stored verbatim into a `pending_extractions` queue (~50ms / fire,
85+
/// no embedder load) and a separate worker dequeues it later via
86+
/// the configured LLM CLI. See `icm extract-pending` and the
87+
/// SessionEnd async trigger.
88+
///
89+
/// `provider = "none"` (default) keeps the existing inline fastembed
90+
/// behavior so this feature is fully opt-in and zero-regression.
91+
pub summarizer: SummarizerConfig,
8292
}
8393

8494
/// Context recall/injection settings (Layer 2).
@@ -225,6 +235,8 @@ impl Default for ExtractionConfig {
225235
max_facts: 20,
226236
extract_every: 3,
227237
store_raw: true,
238+
// Default = none → inline fastembed path, no behavior change.
239+
summarizer: SummarizerConfig::default(),
228240
}
229241
}
230242
}

crates/icm-cli/src/main.rs

Lines changed: 271 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,29 @@ enum Commands {
195195
/// Show global statistics
196196
Stats,
197197

198+
/// Process the async extraction queue (LLM-backed). Reads pending
199+
/// raw tool outputs captured by hooks when
200+
/// `extraction.summarizer.provider != none` and runs the configured
201+
/// LLM CLI to extract facts. Designed to be invoked from a cron, a
202+
/// SessionEnd async fork, or manually.
203+
ExtractPending {
204+
/// Maximum rows to process in this run.
205+
#[arg(short, long, default_value = "10")]
206+
limit: usize,
207+
208+
/// Optional CLI override of `extraction.summarizer.provider`.
209+
#[arg(long)]
210+
provider: Option<String>,
211+
212+
/// Optional CLI override of `extraction.summarizer.model`.
213+
#[arg(long)]
214+
model: Option<String>,
215+
216+
/// Don't actually call the LLM — just print what would be sent.
217+
#[arg(long)]
218+
dry_run: bool,
219+
},
220+
198221
/// Apply temporal decay to memory weights
199222
Decay {
200223
/// Decay factor (default: 0.95)
@@ -1179,6 +1202,19 @@ fn main() -> Result<()> {
11791202
} => cmd_extract_patterns(&store, &topic, memoir.as_deref(), min_cluster_size),
11801203
Commands::Topics => cmd_topics(&store),
11811204
Commands::Stats => cmd_stats(&store),
1205+
Commands::ExtractPending {
1206+
limit,
1207+
provider,
1208+
model,
1209+
dry_run,
1210+
} => cmd_extract_pending(
1211+
&store,
1212+
&cfg.extraction.summarizer,
1213+
limit,
1214+
provider.as_deref(),
1215+
model.as_deref(),
1216+
dry_run,
1217+
),
11821218
Commands::Decay { factor } => cmd_decay(&store, factor),
11831219
Commands::Prune { threshold, dry_run } => cmd_prune(&store, threshold, dry_run),
11841220
Commands::Consolidate {
@@ -1377,6 +1413,7 @@ fn main() -> Result<()> {
13771413
&cfg.memory,
13781414
extract_every,
13791415
cfg.extraction.store_raw,
1416+
&cfg.extraction.summarizer,
13801417
)
13811418
}
13821419
HookCommands::Compact => {
@@ -1400,7 +1437,7 @@ fn main() -> Result<()> {
14001437
let emb_ref = embedder.as_ref().map(|e| e as &dyn icm_core::Embedder);
14011438
#[cfg(not(feature = "embeddings"))]
14021439
let emb_ref: Option<&dyn icm_core::Embedder> = None;
1403-
cmd_hook_end(&store, emb_ref, &cfg.memory)
1440+
cmd_hook_end(&store, emb_ref, &cfg.memory, &cfg.extraction.summarizer)
14041441
}
14051442
},
14061443
#[cfg(feature = "tui")]
@@ -2334,12 +2371,25 @@ fn extract_tool_output(json: &Value) -> Option<&str> {
23342371

23352372
/// PostToolUse hook: auto-extract context every N tool calls.
23362373
/// Reads JSON from stdin. Runs extraction asynchronously.
2374+
///
2375+
/// Two paths are wired up:
2376+
///
2377+
/// 1. **Async path** (`extraction.summarizer.provider != "none"`).
2378+
/// The hook stores the raw tool output verbatim in
2379+
/// `pending_extractions` (~50ms / fire, no embedder load) and a
2380+
/// separate worker (`icm extract-pending` or the SessionEnd async
2381+
/// fork) dequeues it later and runs the configured LLM CLI.
2382+
///
2383+
/// 2. **Inline path** (default, `provider = "none"`). Current
2384+
/// fastembed semantic-scoring extractor — multilingual, but pays
2385+
/// a ~3.7s model-load cost per process.
23372386
fn cmd_hook_post(
23382387
store: &SqliteStore,
23392388
embedder: Option<&dyn icm_core::Embedder>,
23402389
memory_cfg: &crate::config::MemoryConfig,
23412390
extract_every: usize,
23422391
store_raw: bool,
2392+
extraction_summarizer: &crate::config::SummarizerConfig,
23432393
) -> Result<()> {
23442394
let Some(input) = read_stdin_utf8_lossy() else {
23452395
return Ok(());
@@ -2387,7 +2437,35 @@ fn cmd_hook_post(
23872437
.and_then(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
23882438
.unwrap_or_else(|| "project".to_string());
23892439

2390-
// Extract facts and store (with raw text fallback if enabled).
2440+
// Async path: enqueue raw output and return without loading the
2441+
// embedder. The worker (`icm extract-pending` / SessionEnd fork) will
2442+
// dequeue and run the configured LLM CLI. ~50ms / fire vs ~3.7s
2443+
// for the inline fastembed path below.
2444+
if extraction_summarizer.provider != "none" {
2445+
// Cap to 8 KB to keep the queue reasonable. LLM extraction works
2446+
// fine on the most recent slice; very long outputs are rare and
2447+
// their tail is what matters most for auto-context anyway.
2448+
let capped = if tool_output.len() > 8192 {
2449+
&tool_output[tool_output.len() - 8192..]
2450+
} else {
2451+
tool_output
2452+
};
2453+
match store.enqueue_pending_extraction(&project, tool_name, capped) {
2454+
Ok(_) => {
2455+
eprintln!(
2456+
"[icm] enqueued raw output for async LLM extraction (provider={})",
2457+
extraction_summarizer.provider,
2458+
);
2459+
}
2460+
Err(e) => {
2461+
eprintln!("[icm] enqueue failed, falling back inline: {e}");
2462+
// Fall through to inline path on storage failure.
2463+
}
2464+
}
2465+
return Ok(());
2466+
}
2467+
2468+
// Inline path: current behavior, fastembed semantic scoring.
23912469
// Cap auto-extracted importance at Medium: tool output is untrusted
23922470
// (a malicious tool could emit decision-keyword text to poison wake-up).
23932471
// Pass the embedder so non-English content is also scored: the keyword
@@ -2434,7 +2512,58 @@ fn cmd_hook_end(
24342512
store: &SqliteStore,
24352513
embedder: Option<&dyn icm_core::Embedder>,
24362514
memory_cfg: &crate::config::MemoryConfig,
2515+
extraction_summarizer: &crate::config::SummarizerConfig,
24372516
) -> Result<()> {
2517+
// Async path: when a provider is configured, drain the
2518+
// pending_extractions queue in a detached subprocess and return
2519+
// immediately so Claude Code doesn't kill us with "Hook cancelled".
2520+
// The transcript-extract path below stays for back-compat (it's
2521+
// still cheap when --no-embeddings is set).
2522+
if extraction_summarizer.provider != "none" {
2523+
if let Ok(self_path) = std::env::current_exe() {
2524+
// `nohup`-style detach: redirect std{in,out,err} to /dev/null
2525+
// and let the child outlive us. The child reads the same
2526+
// config so it picks up the same provider.
2527+
let mut cmd = std::process::Command::new(&self_path);
2528+
cmd.arg("extract-pending").arg("--limit").arg("20");
2529+
cmd.stdin(std::process::Stdio::null())
2530+
.stdout(std::process::Stdio::null())
2531+
.stderr(std::process::Stdio::null());
2532+
// On Unix, set a new session so the child survives our exit.
2533+
#[cfg(unix)]
2534+
{
2535+
use std::os::unix::process::CommandExt;
2536+
unsafe {
2537+
cmd.pre_exec(|| {
2538+
// Detach from controlling tty / process group.
2539+
libc::setsid();
2540+
Ok(())
2541+
});
2542+
}
2543+
}
2544+
match cmd.spawn() {
2545+
Ok(_) => {
2546+
eprintln!(
2547+
"[icm] session-end: forked async LLM worker (provider={})",
2548+
extraction_summarizer.provider,
2549+
);
2550+
}
2551+
Err(e) => {
2552+
eprintln!(
2553+
"[icm] session-end: fork failed ({e}), falling back to inline transcript extract",
2554+
);
2555+
return extract_from_hook_transcript(
2556+
store,
2557+
embedder,
2558+
memory_cfg,
2559+
"session-end",
2560+
);
2561+
}
2562+
}
2563+
return Ok(());
2564+
}
2565+
}
2566+
// Inline path (legacy): scan transcript and extract via fastembed.
24382567
extract_from_hook_transcript(store, embedder, memory_cfg, "session-end")
24392568
}
24402569

@@ -4562,6 +4691,146 @@ fn resolve_consolidate_provider(
45624691
})
45634692
}
45644693

4694+
/// Process the async extraction queue.
4695+
///
4696+
/// Reads up to `limit` oldest pending rows from `pending_extractions`,
4697+
/// concatenates their raw outputs, asks the configured LLM CLI to extract
4698+
/// decisions / architecture / preferences, parses the bullet response,
4699+
/// and stores the results as Memory rows. Successfully-processed rows
4700+
/// are deleted from the queue regardless of whether facts were
4701+
/// extracted (so an output with no extractable content doesn't loop
4702+
/// forever).
4703+
#[allow(clippy::too_many_arguments)]
4704+
fn cmd_extract_pending(
4705+
store: &SqliteStore,
4706+
cfg: &config::SummarizerConfig,
4707+
limit: usize,
4708+
cli_provider: Option<&str>,
4709+
cli_model: Option<&str>,
4710+
dry_run: bool,
4711+
) -> Result<()> {
4712+
let pending = store.list_pending_extractions(limit)?;
4713+
if pending.is_empty() {
4714+
println!("No pending extractions.");
4715+
return Ok(());
4716+
}
4717+
4718+
let provider_kind = resolve_consolidate_provider(cfg, cli_provider)?;
4719+
if matches!(provider_kind, summarizer::ProviderKind::None) {
4720+
bail!(
4721+
"extraction.summarizer.provider = \"none\" — nothing would be \
4722+
extracted. Set it to auto/claude/codex/gemini/ollama, or \
4723+
pass --provider on this command."
4724+
);
4725+
}
4726+
4727+
// Build a single LLM prompt covering all rows. The prompt asks for
4728+
// a structured bullet list so we can deterministically split into
4729+
// facts. Each bullet becomes one Memory.
4730+
let mut joined = String::new();
4731+
let mut ids: Vec<String> = Vec::new();
4732+
let mut project_for_each: Vec<String> = Vec::new();
4733+
for (id, project, tool_name, raw, _ts) in &pending {
4734+
joined.push_str(&format!("=== tool={tool_name} project={project} ===\n"));
4735+
joined.push_str(raw);
4736+
joined.push_str("\n\n");
4737+
ids.push(id.clone());
4738+
project_for_each.push(project.clone());
4739+
}
4740+
4741+
let model_owned: Option<String> = cli_model.map(|s| s.to_string()).or_else(|| {
4742+
if cfg.model.is_empty() {
4743+
None
4744+
} else {
4745+
Some(cfg.model.clone())
4746+
}
4747+
});
4748+
let max_tokens = cfg.max_tokens;
4749+
4750+
let prompt = format!(
4751+
"From the tool outputs below, extract durable facts that an AI agent \
4752+
should remember across sessions: architecture decisions, resolved \
4753+
errors, user preferences, project-specific context.\n\
4754+
\n\
4755+
Output format: one fact per line, prefixed with `- `. Each fact \
4756+
must be a complete, standalone sentence — no pronouns referring to \
4757+
missing context. Skip routine noise (file listings, build progress, \
4758+
git status). If nothing durable is present, output exactly `- (none)`.\n\
4759+
\n\
4760+
{joined}",
4761+
);
4762+
4763+
if dry_run {
4764+
println!("=== Dry run ===");
4765+
println!("provider: {provider_kind:?}");
4766+
println!(
4767+
"model: {}",
4768+
model_owned.as_deref().unwrap_or("<provider default>")
4769+
);
4770+
println!("rows: {}", pending.len());
4771+
println!("--- prompt ---");
4772+
println!("{prompt}");
4773+
return Ok(());
4774+
}
4775+
4776+
let provider = summarizer::make_summarizer(provider_kind)?;
4777+
let req = summarizer::SummarizeRequest {
4778+
prompt: &prompt,
4779+
model: model_owned.as_deref(),
4780+
max_tokens,
4781+
timeout: std::time::Duration::from_secs(cfg.timeout_secs),
4782+
};
4783+
let response = match provider.summarize(&req) {
4784+
Ok(s) if !s.trim().is_empty() => s,
4785+
Ok(_) => {
4786+
eprintln!("[extract-pending] provider returned empty output");
4787+
// Still drop the rows so we don't loop forever on bad inputs.
4788+
store.delete_pending_extractions(&ids)?;
4789+
return Ok(());
4790+
}
4791+
Err(e) => {
4792+
eprintln!("[extract-pending] provider failed: {e}");
4793+
// Don't delete — let the next run retry.
4794+
return Err(e);
4795+
}
4796+
};
4797+
4798+
// Parse bullet output into individual facts.
4799+
let mut stored = 0usize;
4800+
for line in response.lines() {
4801+
let line = line.trim();
4802+
let fact = line
4803+
.strip_prefix("- ")
4804+
.or_else(|| line.strip_prefix("* "))
4805+
.unwrap_or(line)
4806+
.trim();
4807+
if fact.is_empty() || fact == "(none)" || fact.eq_ignore_ascii_case("none") {
4808+
continue;
4809+
}
4810+
// Use the first row's project as the topic anchor — most batches
4811+
// will be from a single session anyway. Multi-project batches
4812+
// get a slightly weaker per-fact attribution; not worth more
4813+
// ceremony in v1.
4814+
let project = project_for_each
4815+
.first()
4816+
.map(|s| s.as_str())
4817+
.unwrap_or("project");
4818+
let topic = format!("context-{project}");
4819+
let mem = Memory::new(topic, fact.to_string(), Importance::Medium);
4820+
store.store(mem)?;
4821+
stored += 1;
4822+
}
4823+
4824+
let deleted = store.delete_pending_extractions(&ids)?;
4825+
println!(
4826+
"Processed {} rows, extracted {} facts, dequeued {}.",
4827+
pending.len(),
4828+
stored,
4829+
deleted,
4830+
);
4831+
Ok(())
4832+
}
4833+
45654834
/// Lexical fallback: concat all summaries with " | " — the historical behavior
45664835
/// preserved as a safe baseline when no LLM is configured or available.
45674836
fn lexical_consolidate(memories: &[Memory]) -> String {

crates/icm-store/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ chrono = { workspace = true }
1313
tracing = { workspace = true }
1414
lru = { workspace = true }
1515
sha2 = { workspace = true }
16+
ulid = { workspace = true }
1617

1718
[dev-dependencies]
1819
tempfile = "3"

0 commit comments

Comments
 (0)