Skip to content

Commit 928a4a2

Browse files
tmchowclaude
andcommitted
feat(gain): add --quality flag for filter quality analysis
Add `rtk gain --quality` / `rtk gain -Q` that analyzes tracking data for filter quality signals: - Retry detection: commands re-run within 60 seconds (possible retries from insufficient filtered output) - Low savings detection: filters consistently delivering <30% savings (excludes proxy/passthrough commands) - Parse failures summary: filters that fell back to raw output - Net savings estimate: gross savings minus retry overhead Uses LAG window function for O(n log n) retry detection with base-command grouping (e.g., "git diff" groups all git diff variants). Addresses #831 (AI retry loops from over-filtering) and #839 (empirical benchmarks for savings claims). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2d2f199 commit 928a4a2

3 files changed

Lines changed: 258 additions & 0 deletions

File tree

src/analytics/gain.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,3 +725,129 @@ fn show_failures(tracker: &Tracker) -> Result<()> {
725725

726726
Ok(())
727727
}
728+
729+
pub fn show_quality(tracker: &Tracker) -> Result<()> {
730+
let retries = tracker
731+
.get_retry_commands()
732+
.context("Failed to load retry data")?;
733+
let low_savings = tracker
734+
.get_low_savings_commands()
735+
.context("Failed to load low-savings data")?;
736+
let pf_summary = tracker
737+
.get_parse_failure_summary()
738+
.context("Failed to load parse failure data")?;
739+
let gross = tracker
740+
.get_gross_savings()
741+
.context("Failed to load gross savings")?;
742+
743+
println!("{}", styled("RTK Filter Quality Report", true));
744+
println!("{}", "═".repeat(60));
745+
println!();
746+
747+
if retries.is_empty() {
748+
println!("{}", styled("Retry Detection", true));
749+
println!("{}", "─".repeat(60));
750+
println!(" No retries detected (commands re-run within 60s).");
751+
println!();
752+
} else {
753+
println!(
754+
"{}",
755+
styled("Retry Detection (commands re-run within 60s)", true)
756+
);
757+
println!("{}", "─".repeat(60));
758+
for r in &retries {
759+
let rate = if r.total_runs > 0 {
760+
(r.retry_count as f64 / r.total_runs as f64) * 100.0
761+
} else {
762+
0.0
763+
};
764+
let retry_word = if r.retry_count == 1 {
765+
"retry "
766+
} else {
767+
"retries"
768+
};
769+
println!(
770+
" {:<20} {} {} / {} runs ({:.1}% retry rate)",
771+
r.base_cmd, r.retry_count, retry_word, r.total_runs, rate
772+
);
773+
}
774+
println!();
775+
}
776+
777+
if low_savings.is_empty() {
778+
println!("{}", styled("Low Savings", true));
779+
println!("{}", "─".repeat(60));
780+
println!(" All filters achieving 30%+ savings.");
781+
println!();
782+
} else {
783+
println!(
784+
"{}",
785+
styled("Low Savings (below 30% — excludes proxy/passthrough)", true)
786+
);
787+
println!("{}", "─".repeat(60));
788+
for ls in &low_savings {
789+
println!(
790+
" {:<20} {:.0}% avg savings (expected 60%+) {} runs",
791+
ls.rtk_cmd, ls.avg_savings_pct, ls.runs
792+
);
793+
}
794+
println!();
795+
}
796+
797+
if pf_summary.total > 0 {
798+
println!(
799+
"{}",
800+
styled(
801+
"Parse Failures (filters that fell back to raw output)",
802+
true
803+
)
804+
);
805+
println!("{}", "─".repeat(60));
806+
for (cmd, count) in &pf_summary.top_commands {
807+
let cmd_display = if cmd.len() > 30 {
808+
format!("{}...", &cmd[..27])
809+
} else {
810+
cmd.clone()
811+
};
812+
println!(" {:<30} {} failures", cmd_display, count);
813+
}
814+
println!();
815+
}
816+
817+
let retry_overhead: i64 = retries.iter().map(|r| r.retry_count as i64 * 800).sum();
818+
let net = gross - retry_overhead;
819+
println!("{}", styled("Net Savings", true));
820+
println!("{}", "─".repeat(60));
821+
println!(
822+
" Gross savings: {} tokens saved",
823+
format_tokens(gross as usize)
824+
);
825+
if retry_overhead > 0 {
826+
println!(
827+
" Retry overhead: ~{} tokens (est. from {} retried commands)",
828+
format_tokens(retry_overhead as usize),
829+
retries.iter().map(|r| r.retry_count).sum::<usize>()
830+
);
831+
}
832+
println!(
833+
" Net savings: {} tokens",
834+
format_tokens(net.max(0) as usize)
835+
);
836+
if gross > 0 {
837+
let efficiency = (net.max(0) as f64 / gross as f64) * 100.0;
838+
println!(" Efficiency: {:.0}%", efficiency);
839+
}
840+
println!();
841+
842+
let has_issues = !retries.is_empty() || !low_savings.is_empty() || pf_summary.total > 0;
843+
if has_issues {
844+
println!(
845+
"{}",
846+
"Review the sections above for potential filter quality improvements.".yellow()
847+
);
848+
} else {
849+
println!("{}", "No quality issues detected. ✓".green());
850+
}
851+
852+
Ok(())
853+
}

src/core/tracking.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,111 @@ impl Tracker {
478478
})
479479
}
480480

481+
/// Detect commands re-run within 60 seconds (possible retries).
482+
pub fn get_retry_commands(&self) -> Result<Vec<RetryRecord>> {
483+
let mut stmt = self.conn.prepare(
484+
"WITH base AS (
485+
SELECT
486+
CASE
487+
WHEN original_cmd LIKE 'git %' THEN
488+
CASE
489+
WHEN instr(substr(original_cmd, 5), ' ') > 0
490+
THEN substr(original_cmd, 1, instr(substr(original_cmd, 5), ' ') + 3)
491+
ELSE original_cmd
492+
END
493+
ELSE
494+
CASE
495+
WHEN instr(original_cmd, ' ') > 0
496+
THEN substr(original_cmd, 1, instr(original_cmd, ' ') - 1)
497+
ELSE original_cmd
498+
END
499+
END AS base_cmd,
500+
timestamp,
501+
LAG(timestamp) OVER (
502+
PARTITION BY
503+
CASE
504+
WHEN original_cmd LIKE 'git %' THEN
505+
CASE
506+
WHEN instr(substr(original_cmd, 5), ' ') > 0
507+
THEN substr(original_cmd, 1, instr(substr(original_cmd, 5), ' ') + 3)
508+
ELSE original_cmd
509+
END
510+
ELSE
511+
CASE
512+
WHEN instr(original_cmd, ' ') > 0
513+
THEN substr(original_cmd, 1, instr(original_cmd, ' ') - 1)
514+
ELSE original_cmd
515+
END
516+
END
517+
ORDER BY timestamp
518+
) AS prev_ts
519+
FROM commands
520+
)
521+
SELECT base_cmd,
522+
COUNT(*) AS total_runs,
523+
SUM(CASE
524+
WHEN prev_ts IS NOT NULL
525+
AND (strftime('%s', timestamp) - strftime('%s', prev_ts)) < 60
526+
THEN 1 ELSE 0
527+
END) AS retry_count
528+
FROM base
529+
GROUP BY base_cmd
530+
HAVING retry_count > 0
531+
ORDER BY retry_count DESC
532+
LIMIT 10",
533+
)?;
534+
535+
let rows = stmt
536+
.query_map([], |row| {
537+
Ok(RetryRecord {
538+
base_cmd: row.get(0)?,
539+
total_runs: row.get::<_, i64>(1)? as usize,
540+
retry_count: row.get::<_, i64>(2)? as usize,
541+
})
542+
})?
543+
.collect::<Result<Vec<_>, _>>()?;
544+
545+
Ok(rows)
546+
}
547+
548+
/// Find commands with consistently low token savings (<30%).
549+
pub fn get_low_savings_commands(&self) -> Result<Vec<LowSavingsRecord>> {
550+
let mut stmt = self.conn.prepare(
551+
"SELECT rtk_cmd, COUNT(*) AS runs, AVG(savings_pct) AS avg_savings
552+
FROM commands
553+
WHERE savings_pct < 30.0
554+
AND input_tokens > 50
555+
AND rtk_cmd NOT LIKE '%proxy%'
556+
AND rtk_cmd NOT LIKE '%fallback%'
557+
GROUP BY rtk_cmd
558+
HAVING runs >= 3
559+
ORDER BY avg_savings ASC
560+
LIMIT 10",
561+
)?;
562+
563+
let rows = stmt
564+
.query_map([], |row| {
565+
Ok(LowSavingsRecord {
566+
rtk_cmd: row.get(0)?,
567+
runs: row.get::<_, i64>(1)? as usize,
568+
avg_savings_pct: row.get(2)?,
569+
})
570+
})?
571+
.collect::<Result<Vec<_>, _>>()?;
572+
573+
Ok(rows)
574+
}
575+
576+
/// Get the total gross savings from all tracked commands.
577+
pub fn get_gross_savings(&self) -> Result<i64> {
578+
let total: i64 = self.conn.query_row(
579+
"SELECT COALESCE(SUM(saved_tokens), 0) FROM commands",
580+
[],
581+
|row| row.get(0),
582+
)?;
583+
Ok(total)
584+
}
585+
481586
/// Get overall summary statistics across all recorded commands.
482587
///
483588
/// Returns aggregated metrics including:
@@ -996,6 +1101,22 @@ pub struct ParseFailureSummary {
9961101
pub recent: Vec<ParseFailureRecord>,
9971102
}
9981103

1104+
/// A command detected as potentially retried (re-run within 60 seconds).
1105+
#[derive(Debug)]
1106+
pub struct RetryRecord {
1107+
pub base_cmd: String,
1108+
pub total_runs: usize,
1109+
pub retry_count: usize,
1110+
}
1111+
1112+
/// A command with consistently low token savings.
1113+
#[derive(Debug)]
1114+
pub struct LowSavingsRecord {
1115+
pub rtk_cmd: String,
1116+
pub runs: usize,
1117+
pub avg_savings_pct: f64,
1118+
}
1119+
9991120
/// Record a parse failure without ever crashing.
10001121
/// Silently ignores all errors — used in the fallback path.
10011122
pub fn record_parse_failure_silent(raw_command: &str, error_message: &str, succeeded: bool) {

src/main.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,9 @@ enum Commands {
403403
/// Show parse failure log (commands that fell back to raw execution)
404404
#[arg(short = 'F', long)]
405405
failures: bool,
406+
/// Show filter quality analysis (retry detection, low-savings commands)
407+
#[arg(short = 'Q', long)]
408+
quality: bool,
406409
},
407410

408411
/// Claude Code economics: spending (ccusage) vs savings (rtk) analysis
@@ -1731,7 +1734,15 @@ fn main() -> Result<()> {
17311734
all,
17321735
format,
17331736
failures,
1737+
quality,
17341738
} => {
1739+
if quality {
1740+
let tracker = crate::core::tracking::Tracker::new()
1741+
.context("Failed to initialize tracking database")?;
1742+
analytics::gain::show_quality(&tracker)?;
1743+
return Ok(());
1744+
}
1745+
17351746
analytics::gain::run(
17361747
project, // added: pass project flag
17371748
graph,

0 commit comments

Comments
 (0)