Skip to content

Commit 0f1805b

Browse files
committed
feat: implement command-aware content classification heuristics for improved pipeline accuracy
1 parent dbded1f commit 0f1805b

10 files changed

Lines changed: 420 additions & 380 deletions

File tree

benches/pipeline.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ fn bench_classify(c: &mut Criterion) {
2525

2626
for (name, input) in &fixtures {
2727
c.bench_with_input(BenchmarkId::new("classify", name), input, |b, i| {
28-
b.iter(|| classifier::classify(i))
28+
b.iter(|| classifier::classify(i, None))
2929
});
3030
}
3131
}
@@ -35,7 +35,7 @@ fn bench_full_pipeline(c: &mut Criterion) {
3535

3636
c.bench_function("full_pipeline_cargo_build", |b| {
3737
b.iter(|| {
38-
let ctype = classifier::classify(input);
38+
let ctype = classifier::classify(input, None);
3939
let segments = scorer::score_segments(input, &ctype, None);
4040
let distiller = distillers::get_distiller(&ctype);
4141
distiller.distill(&segments, input, None)
@@ -49,7 +49,7 @@ fn bench_hook_roundtrip(c: &mut Criterion) {
4949

5050
c.bench_function("hook_roundtrip_50kb", |b| {
5151
b.iter(|| {
52-
let ctype = classifier::classify(&large_input);
52+
let ctype = classifier::classify(&large_input, None);
5353
let segments = scorer::score_segments(&large_input, &ctype, None);
5454
let distiller = distillers::get_distiller(&ctype);
5555
distiller.distill(&segments, &large_input, None)

src/cli/stats.rs

Lines changed: 61 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,21 @@ pub fn run(args: &[String], store: &Store) -> Result<()> {
137137
return Ok(());
138138
}
139139

140-
// Mode detection
141-
let mode = if args.iter().any(|a| a == "--detail") {
140+
let detail_flag = args.iter().any(|a| a == "--detail");
141+
let type_flag = args.iter().any(|a| a == "--by-type");
142+
let json_flag = args.iter().any(|a| a == "--json");
143+
let filter_flag = args
144+
.iter()
145+
.any(|a| a == "--today" || a == "--week" || a == "--month" || a == "--all-commands");
146+
147+
let mode = if detail_flag {
142148
"detail"
143-
} else if args.iter().any(|a| a == "--by-type") {
149+
} else if type_flag {
144150
"by-type"
145-
} else if args.iter().any(|a| a == "--json") {
151+
} else if json_flag {
146152
"json"
153+
} else if filter_flag {
154+
"detail" // Implicit detail mode for scoped queries
147155
} else {
148156
"default"
149157
};
@@ -227,7 +235,7 @@ fn run_default(store: &Store) -> Result<()> {
227235

228236
if !top_types.is_empty() {
229237
println!("\n {}", "Top Savings by Type:".bold().bright_white());
230-
for (content_type, count, pct, _) in &top_types {
238+
for (content_type, count, pct, commands) in &top_types {
231239
let bar = format_bar_with_empty(*pct);
232240
let bar_colored = if *pct > 80.0 {
233241
bar.bright_green()
@@ -237,12 +245,23 @@ fn run_default(store: &Store) -> Result<()> {
237245
bar.bright_red()
238246
};
239247

248+
let label_display = if content_type == "Unknown" {
249+
let cmds = truncate_commands(commands, 2);
250+
if !cmds.is_empty() {
251+
format!("Unknown ({})", cmds)
252+
} else {
253+
"Unknown".to_string()
254+
}
255+
} else {
256+
content_type.clone()
257+
};
258+
240259
println!(
241260
" {:<13} {} {:>5.1}% ({}x)",
242-
content_type.bright_cyan(),
261+
label_display.bright_cyan(),
243262
bar_colored,
244263
pct,
245-
count,
264+
count
246265
);
247266
}
248267
}
@@ -368,13 +387,18 @@ fn run_detail(args: &[String], store: &Store) -> Result<()> {
368387
);
369388
}
370389

371-
// By Command — top 10, filter 0% savings
390+
// By Command — top 10 (or all if requested), filter 0% savings
372391
let filters = store.filter_breakdown(since)?;
373-
let display_filters: Vec<_> = filters
374-
.iter()
375-
.filter(|(_, _, pct)| *pct > 0.0)
376-
.take(10)
377-
.collect();
392+
let all_flag = args.iter().any(|a| a == "--all-commands");
393+
let display_filters: Vec<_> = if all_flag {
394+
filters.iter().collect()
395+
} else {
396+
filters
397+
.iter()
398+
.filter(|(_, _, pct)| *pct > 0.0)
399+
.take(10)
400+
.collect()
401+
};
378402

379403
if !display_filters.is_empty() {
380404
println!("\n {}", "By Command:".bold().bright_white());
@@ -416,16 +440,30 @@ fn run_detail(args: &[String], store: &Store) -> Result<()> {
416440
);
417441
}
418442

419-
if filters.len() > 10 {
420-
println!(
421-
"\n {}",
422-
format!(
423-
"Run `omni stats --detail --all-commands` for all {} commands.",
424-
filters.len()
425-
)
426-
.bright_black()
427-
.italic()
428-
);
443+
if !all_flag {
444+
let filtered_count = filters.iter().filter(|(_, _, pct)| *pct > 0.0).count();
445+
let hidden_zero = filters.len() - filtered_count;
446+
447+
if filtered_count > 10 {
448+
println!(
449+
"\n {}",
450+
format!(
451+
"Showing top 10 of {} commands with active savings.",
452+
filtered_count
453+
)
454+
.bright_black()
455+
.italic()
456+
);
457+
}
458+
459+
if hidden_zero > 0 {
460+
println!(
461+
" {}",
462+
format!("({} noise commands with 0% savings hidden. Use --all-commands to see all).", hidden_zero)
463+
.bright_black()
464+
.italic()
465+
);
466+
}
429467
}
430468
}
431469

src/hooks/pipe.rs

Lines changed: 58 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -179,47 +179,65 @@ fn distill(
179179
}
180180
}
181181

182-
let (output, filter_name, ctype, rewind_hash, kept_count, dropped_count) =
183-
if let Some(filter) = matched_toml {
184-
let out = filter.apply(&input_text);
185-
(out, filter.name.clone(), ContentType::Unknown, None, 0, 0)
186-
} else {
187-
let c = classifier::classify(&input_text);
188-
189-
let collapse_result = collapse::collapse(&input_text, &c);
190-
let effective_input = collapse_result.collapsed_lines.join("\n");
191-
192-
let active_session_opt = session.as_ref().and_then(|m| m.lock().ok());
193-
let scored_segments =
194-
scorer::score_segments(&effective_input, &c, active_session_opt.as_deref());
195-
drop(active_session_opt);
196-
197-
let compose_config = composer::ComposeConfig::default();
198-
let decision = composer::decide_rewind(&scored_segments, &c);
199-
200-
let k_count = scored_segments
201-
.iter()
202-
.filter(|s| s.final_score() >= compose_config.threshold)
203-
.count();
204-
let d_count = scored_segments.len() - k_count;
205-
206-
let store_for_compose = if decision.should_store { store } else { None };
207-
208-
let (out, r_hash) = composer::compose(
209-
scored_segments,
210-
if decision.should_store {
211-
Some(input_text.clone())
212-
} else {
213-
None
214-
}, // Temporary clone for compose drops
215-
&compose_config,
216-
store_for_compose,
217-
&input_text,
218-
&c,
219-
);
182+
let (output, filter_name, ctype, rewind_hash, kept_count, dropped_count) = if let Some(filter) =
183+
matched_toml
184+
{
185+
let out = filter.apply(&input_text);
186+
(out, filter.name.clone(), ContentType::Unknown, None, 0, 0)
187+
} else {
188+
let c = classifier::classify(&input_text, command_name);
220189

221-
(out, format!("{:?}", c), c, r_hash, k_count, d_count)
222-
};
190+
let collapse_result = collapse::collapse(&input_text, &c);
191+
let effective_input = collapse_result.collapsed_lines.join("\n");
192+
193+
let active_session_opt = session.as_ref().and_then(|m| m.lock().ok());
194+
let scored_segments =
195+
scorer::score_segments(&effective_input, &c, active_session_opt.as_deref());
196+
197+
let distiller = crate::distillers::get_distiller(&c);
198+
let mut out =
199+
distiller.distill(&scored_segments, &input_text, active_session_opt.as_deref());
200+
201+
let compose_config = composer::ComposeConfig::default();
202+
let decision = composer::decide_rewind(&scored_segments, &c);
203+
204+
let k_count = scored_segments
205+
.iter()
206+
.filter(|s| s.final_score() >= compose_config.threshold)
207+
.count();
208+
let d_count = scored_segments.len() - k_count;
209+
210+
crate::pipeline::composer::evaluate_learning(
211+
&c,
212+
&input_text,
213+
scored_segments.len(),
214+
d_count,
215+
command_name.unwrap_or(""),
216+
);
217+
218+
let mut r_hash = None;
219+
if decision.should_store
220+
&& let Some(s) = store
221+
{
222+
let hash = s.store_rewind(&input_text);
223+
out.push_str(&format!(
224+
"\n{} {} {} {} lines. The hash {} stores the full output in RewindStore for retrieval.\n",
225+
"⏺".cyan(),
226+
"OMNI".bold().bright_white(),
227+
"distilled".bright_green(),
228+
d_count,
229+
hash.cyan().bold()
230+
));
231+
r_hash = Some(hash);
232+
}
233+
234+
if out.len() > compose_config.max_output_chars {
235+
out.truncate(compose_config.max_output_chars);
236+
out.push_str("\n[OMNI: output truncated]\n");
237+
}
238+
239+
(out, format!("{:?}", c), c, r_hash, k_count, d_count)
240+
};
223241

224242
PipelineResult {
225243
session_id,

src/hooks/post_tool.rs

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ pub fn process_payload(
115115
(output, fname, None)
116116
} else {
117117
// Fallback to Rust distiller pipeline
118-
let ctype = classifier::classify(&content);
118+
let ctype = classifier::classify(&content, Some(&command));
119119

120120
// Pre-processing: collapse repetitive lines before scoring
121121
let collapse_result = collapse::collapse(&content, &ctype);
@@ -158,28 +158,35 @@ pub fn process_payload(
158158

159159
let decision = composer::decide_rewind(&scored_segments, ctype);
160160

161-
if decision.should_store {
162-
if let Some(ref s) = store {
163-
let hash = s.store_rewind(&content);
164-
let dropped_lines = scored_segments
165-
.iter()
166-
.filter(|s| s.final_score() < decision.threshold)
167-
.map(|s| s.content.lines().count())
168-
.sum::<usize>();
169-
170-
final_out.push_str(&format!(
171-
"\n[OMNI: {} lines omitted — omni_retrieve(\"{}\") for full output]",
172-
dropped_lines, hash
173-
));
174-
rewind_hash = hash;
175-
} else {
176-
let dropped_lines = scored_segments
177-
.iter()
178-
.filter(|s| s.final_score() < decision.threshold)
179-
.map(|s| s.content.lines().count())
180-
.sum::<usize>();
181-
final_out.push_str(&format!("\n[OMNI: {} lines omitted]", dropped_lines));
182-
}
161+
let dropped_lines = scored_segments
162+
.iter()
163+
.filter(|s| s.final_score() < decision.threshold)
164+
.map(|s| s.content.lines().count())
165+
.sum::<usize>();
166+
167+
// Trigger Auto-Learn
168+
crate::pipeline::composer::evaluate_learning(
169+
ctype,
170+
&content,
171+
scored_segments.len(),
172+
scored_segments
173+
.iter()
174+
.filter(|s| s.final_score() < decision.threshold)
175+
.count(),
176+
&command,
177+
);
178+
179+
if decision.should_store
180+
&& let Some(ref s) = store
181+
{
182+
let hash = s.store_rewind(&content);
183+
final_out.push_str(&format!(
184+
"\n[OMNI: {} lines omitted — omni_retrieve(\"{}\") for full output]",
185+
dropped_lines, hash
186+
));
187+
rewind_hash = hash;
188+
} else if decision.should_store {
189+
final_out.push_str(&format!("\n[OMNI: {} lines omitted]", dropped_lines));
183190
}
184191

185192
// Update session state (only for Rust pipeline)
@@ -243,6 +250,13 @@ pub fn process_payload(
243250
}
244251
}
245252

253+
// Safety Truncation
254+
let max_chars = composer::ComposeConfig::default().max_output_chars;
255+
if final_out.len() > max_chars {
256+
final_out.truncate(max_chars);
257+
final_out.push_str("\n[OMNI: output truncated]");
258+
}
259+
246260
serde_json::to_string(&HookOutput {
247261
hook_specific_output: HookSpecificOutput {
248262
hook_event_name: "PostToolUse",

src/mcp/server.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ impl OmniServer {
9898
description = "Measure how much signal vs noise in text"
9999
)]
100100
pub async fn omni_density(&self, #[tool(param)] text: String) -> String {
101-
let content_type = classify(&text);
101+
let content_type = classify(&text, None);
102102
let current_session = self.session.lock().unwrap().clone();
103103

104104
let segments = score_segments(&text, &content_type, Some(&current_session));

0 commit comments

Comments
 (0)