Skip to content

Commit a46a64a

Browse files
authored
Enhance command-based content type inference and distillation logic (#56)
<!-- AI-PR-DESCRIPTION-START --> ## PR Auto Describe ## Summary This PR overhauls Omni's core pipeline and CLI stack, replacing the legacy content-type classification model with command-aware routing, cleaning up deprecated code, and streamlining user-facing reporting and performance. --- ## Key Changes 1. Replaced legacy content-based pipeline routing with command-first profile resolution 2. Rewrote `omni stats` CLI: removed `--by-type` flag, added `--project` per-project analytics 3. Deleted unused distillers and deprecated classifier/composer modules 4. Updated hook logic with auto-sibling command detection for pipe mode --- ## Detailed Breakdown ### Core Pipeline Overhaul - Removed entire `src/pipeline/classifier.rs` legacy content classifier - Rewrote distiller system: removed `content_type()` from `Distiller` trait, replaced `get_distiller()` with command-based `distill_with_command()` in `src/distillers/mod.rs` - Deleted unused distillers: `infra.rs`, `log.rs`, `tabular.rs` - Updated all distiller tests to use command strings instead of ContentType, updated snapshot outputs for docker/kubectl/nginx workflows - Rewrote `collapse.rs` to use `CollapseMode` enum instead of ContentType for content-aware collapsing - Removed `src/pipeline/composer.rs`, moving rewind/learning logic directly into hook code ### CLI & Stats Updates - **`src/cli/stats.rs`**: Removed `--by-type` mode, added `--project` flag for per-project stats, reworked default view to show top commands, updated JSON output to return command stats instead of content-type data - Replaced hardcoded $3/1M token cost calculation with config-driven pricing via new `src/guard/config.rs` - Renamed all Indonesian-named test functions in `init.rs`, `session.rs`, `stats.rs` to English - Deleted old `truncate_commands()` function and associated tests - Updated bench targets in `benches/pipeline.rs` from `bench_classify` to `bench_registry_resolve` testing `scorer::score_with_command` ### Hook & Runtime Changes - Rewrote `src/hooks/pipe.rs` and `src/hooks/post_tool.rs`: removed classifier calls, added auto-sibling command detection for pipe mode, updated rewind/route logic and auto-learn triggers - Added `src/guard/config.rs` for loading user pricing/config settings - Updated `src/main.rs` to detect pipe commands via env vars, added background DB cleanup thread - Added new MCP `omni_search` tool in `src/mcp/server.rs` - Removed Intelligence Consistency check from `src/cli/doctor.rs` --- ## Notes - Distiller snapshot outputs have been updated for docker, kubectl, and nginx workflows to match new command-based distillation logic --- ## Breaking Changes 1. **Public API Break**: Removed `pipeline::classifier::classify`, `distillers::get_distiller()`, and deprecated `ContentType` enum usage across core code 2. **CLI Break**: Removed `--by-type` flag from `omni stats`; JSON output now returns `commands` instead of `content_types` 3. **Distiller Trait Break**: `Distiller` trait no longer includes the `content_type()` method 4. **Hook Logic Break**: Pipe mode now requires command context, and pipeline results no longer include ContentType fields _Last updated: 2026-04-12 04:30:58_ <!-- AI-PR-DESCRIPTION-END -->
2 parents 2eea101 + a4f4013 commit a46a64a

40 files changed

Lines changed: 1238 additions & 2031 deletions

benches/pipeline.rs

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,18 @@
11
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
22
use omni::distillers;
3-
use omni::pipeline::{classifier, scorer};
3+
use omni::pipeline::scorer;
44
use std::time::Duration;
55

6-
fn bench_classify(c: &mut Criterion) {
7-
let fixtures = [
8-
(
9-
"git_diff",
10-
include_str!("../tests/fixtures/git_diff_multi_file.txt"),
11-
),
12-
(
13-
"cargo_build",
14-
include_str!("../tests/fixtures/cargo_build_errors.txt"),
15-
),
16-
(
17-
"kubectl",
18-
include_str!("../tests/fixtures/kubectl_pods_mixed.txt"),
19-
),
20-
(
21-
"nginx_log",
22-
include_str!("../tests/fixtures/nginx_access_log.txt"),
23-
),
6+
fn bench_registry_resolve(c: &mut Criterion) {
7+
let commands = [
8+
("git_diff", "git diff HEAD~1"),
9+
("cargo_build", "cargo build"),
10+
("kubectl", "kubectl get pods"),
2411
];
2512

26-
for (name, input) in &fixtures {
27-
c.bench_with_input(BenchmarkId::new("classify", name), input, |b, i| {
28-
b.iter(|| classifier::classify(i, None))
13+
for (name, cmd) in &commands {
14+
c.bench_with_input(BenchmarkId::new("registry_resolve", name), cmd, |b, &i| {
15+
b.iter(|| scorer::score_with_command("", i, None))
2916
});
3017
}
3118
}
@@ -35,10 +22,8 @@ fn bench_full_pipeline(c: &mut Criterion) {
3522

3623
c.bench_function("full_pipeline_cargo_build", |b| {
3724
b.iter(|| {
38-
let ctype = classifier::classify(input, None);
39-
let segments = scorer::score_segments(input, &ctype, None);
40-
let distiller = distillers::get_distiller(&ctype);
41-
distiller.distill(&segments, input, None)
25+
let segments = scorer::score_with_command(input, "cargo build", None);
26+
distillers::distill_with_command(&segments, input, "cargo build", None)
4227
})
4328
});
4429
}
@@ -49,10 +34,8 @@ fn bench_hook_roundtrip(c: &mut Criterion) {
4934

5035
c.bench_function("hook_roundtrip_50kb", |b| {
5136
b.iter(|| {
52-
let ctype = classifier::classify(&large_input, None);
53-
let segments = scorer::score_segments(&large_input, &ctype, None);
54-
let distiller = distillers::get_distiller(&ctype);
55-
distiller.distill(&segments, &large_input, None)
37+
let segments = scorer::score_with_command(&large_input, "git diff", None);
38+
distillers::distill_with_command(&segments, &large_input, "git diff", None)
5639
})
5740
});
5841
}
@@ -62,6 +45,6 @@ criterion_group! {
6245
config = Criterion::default()
6346
.measurement_time(Duration::from_secs(5))
6447
.warm_up_time(Duration::from_secs(2));
65-
targets = bench_classify, bench_full_pipeline, bench_hook_roundtrip
48+
targets = bench_registry_resolve, bench_full_pipeline, bench_hook_roundtrip
6649
}
6750
criterion_main!(benches);

src/cli/doctor.rs

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -463,47 +463,6 @@ pub fn run(args: &[String]) -> anyhow::Result<()> {
463463
println!(" {:<15} none", "Project:".bright_black());
464464
}
465465

466-
// 10. Intelligence Consistency
467-
println!("\n {}", "Intelligence:".bold().bright_white());
468-
if let Ok(store) = Store::open() {
469-
if fix_mode {
470-
match store.reclassify_historical_data() {
471-
Ok(count) => {
472-
if count > 0 {
473-
println!(
474-
" {:<15} {} records upgraded to new categories {}",
475-
"Upgrade:".bright_black(),
476-
count.to_string().yellow().bold(),
477-
"[FIXED]".green().bold()
478-
);
479-
} else {
480-
println!(
481-
" {:<15} historical statistics are up to date {}",
482-
"Status:".bright_black(),
483-
"[OK]".green().bold()
484-
);
485-
}
486-
}
487-
Err(e) => {
488-
println!(
489-
" {:<15} upgrade failed: {} {}",
490-
"Upgrade:".bright_black(),
491-
e,
492-
"[ERROR]".red().bold()
493-
);
494-
}
495-
}
496-
} else {
497-
// Diagnostic mode: check how many could be upgraded
498-
// For now, simpler to just say 'Run with --fix to upgrade'
499-
println!(
500-
" {:<15} Run with {} to upgrade historical stats",
501-
"Status:".bright_black(),
502-
"--fix".cyan()
503-
);
504-
}
505-
}
506-
507466
if let Some(latest) = crate::guard::update::check() {
508467
crate::guard::update::print_notification(&latest);
509468
}

src/cli/init.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ mod tests {
415415
}
416416

417417
#[test]
418-
fn test_init_hook_idempotent_run_2x_tidak_duplicate() {
418+
fn test_init_hook_idempotent_run_2x_not_duplicate() {
419419
let mut val = json!({});
420420
install_omni_hooks(&mut val, "/usr/bin/omni");
421421

src/cli/learn.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ pub fn run_learn(args: &[String]) -> Result<()> {
9191
.bright_white()
9292
);
9393
let report = crate::pipeline::toml_filter::run_inline_tests(
94-
&crate::pipeline::toml_filter::load_all_filters(),
94+
crate::pipeline::toml_filter::load_all_filters(),
9595
);
9696
let total = report.passes + report.failures.len();
9797

src/cli/session.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ mod tests {
496496
}
497497

498498
#[test]
499-
fn test_session_command_tidak_crash_jika_tidak_ada_session() {
499+
fn test_session_command_not_crash_jika_not_ada_session() {
500500
let (store, _dir) = get_store();
501501
let args = vec!["session".to_string()];
502502
let res = run_session(&args, store);

0 commit comments

Comments
 (0)