feat(gain): add --quality flag for filter quality analysis

tmchow · claude · tmchow · commit 928a4a2e77e3 · 2026-03-28T10:10:06.000-07:00
Add `rtk gain --quality` / `rtk gain -Q` that analyzes tracking data for filter quality signals: - Retry detection: commands re-run within 60 seconds (possible retries from insufficient filtered output) - Low savings detection: filters consistently delivering <30% savings (excludes proxy/passthrough commands) - Parse failures summary: filters that fell back to raw output - Net savings estimate: gross savings minus retry overhead Uses LAG window function for O(n log n) retry detection with base-command grouping (e.g., "git diff" groups all git diff variants). Addresses #831 (AI retry loops from over-filtering) and #839 (empirical benchmarks for savings claims). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/src/analytics/gain.rs b/src/analytics/gain.rs
@@ -725,3 +725,129 @@ fn show_failures(tracker: &Tracker) -> Result<()> {
 
     Ok(())
 }
+
+pub fn show_quality(tracker: &Tracker) -> Result<()> {
+    let retries = tracker
+        .get_retry_commands()
+        .context("Failed to load retry data")?;
+    let low_savings = tracker
+        .get_low_savings_commands()
+        .context("Failed to load low-savings data")?;
+    let pf_summary = tracker
+        .get_parse_failure_summary()
+        .context("Failed to load parse failure data")?;
+    let gross = tracker
+        .get_gross_savings()
+        .context("Failed to load gross savings")?;
+
+    println!("{}", styled("RTK Filter Quality Report", true));
+    println!("{}", "═".repeat(60));
+    println!();
+
+    if retries.is_empty() {
+        println!("{}", styled("Retry Detection", true));
+        println!("{}", "─".repeat(60));
+        println!("  No retries detected (commands re-run within 60s).");
+        println!();
+    } else {
+        println!(
+            "{}",
+            styled("Retry Detection (commands re-run within 60s)", true)
+        );
+        println!("{}", "─".repeat(60));
+        for r in &retries {
+            let rate = if r.total_runs > 0 {
+                (r.retry_count as f64 / r.total_runs as f64) * 100.0
+            } else {
+                0.0
+            };
+            let retry_word = if r.retry_count == 1 {
+                "retry "
+            } else {
+                "retries"
+            };
+            println!(
+                "  {:<20} {} {} / {} runs  ({:.1}% retry rate)",
+                r.base_cmd, r.retry_count, retry_word, r.total_runs, rate
+            );
+        }
+        println!();
+    }
+
+    if low_savings.is_empty() {
+        println!("{}", styled("Low Savings", true));
+        println!("{}", "─".repeat(60));
+        println!("  All filters achieving 30%+ savings.");
+        println!();
+    } else {
+        println!(
+            "{}",
+            styled("Low Savings (below 30% — excludes proxy/passthrough)", true)
+        );
+        println!("{}", "─".repeat(60));
+        for ls in &low_savings {
+            println!(
+                "  {:<20} {:.0}% avg savings  (expected 60%+)    {} runs",
+                ls.rtk_cmd, ls.avg_savings_pct, ls.runs
+            );
+        }
+        println!();
+    }
+
+    if pf_summary.total > 0 {
+        println!(
+            "{}",
+            styled(
+                "Parse Failures (filters that fell back to raw output)",
+                true
+            )
+        );
+        println!("{}", "─".repeat(60));
+        for (cmd, count) in &pf_summary.top_commands {
+            let cmd_display = if cmd.len() > 30 {
+                format!("{}...", &cmd[..27])
+            } else {
+                cmd.clone()
+            };
+            println!("  {:<30} {} failures", cmd_display, count);
+        }
+        println!();
+    }
+
+    let retry_overhead: i64 = retries.iter().map(|r| r.retry_count as i64 * 800).sum();
+    let net = gross - retry_overhead;
+    println!("{}", styled("Net Savings", true));
+    println!("{}", "─".repeat(60));
+    println!(
+        "  Gross savings:     {} tokens saved",
+        format_tokens(gross as usize)
+    );
+    if retry_overhead > 0 {
+        println!(
+            "  Retry overhead:    ~{} tokens (est. from {} retried commands)",
+            format_tokens(retry_overhead as usize),
+            retries.iter().map(|r| r.retry_count).sum::<usize>()
+        );
+    }
+    println!(
+        "  Net savings:       {} tokens",
+        format_tokens(net.max(0) as usize)
+    );
+    if gross > 0 {
+        let efficiency = (net.max(0) as f64 / gross as f64) * 100.0;
+        println!("  Efficiency:        {:.0}%", efficiency);
+    }
+    println!();
+
+    let has_issues = !retries.is_empty() || !low_savings.is_empty() || pf_summary.total > 0;
+    if has_issues {
+        println!(
+            "{}",
+            "Review the sections above for potential filter quality improvements.".yellow()
+        );
+    } else {
+        println!("{}", "No quality issues detected. ✓".green());
+    }
+
+    Ok(())
+}
diff --git a/src/core/tracking.rs b/src/core/tracking.rs
@@ -478,6 +478,111 @@ impl Tracker {
         })
     }
 
+    /// Detect commands re-run within 60 seconds (possible retries).
+    pub fn get_retry_commands(&self) -> Result<Vec<RetryRecord>> {
+        let mut stmt = self.conn.prepare(
+            "WITH base AS (
+               SELECT
+                 CASE
+                   WHEN original_cmd LIKE 'git %' THEN
+                     CASE
+                       WHEN instr(substr(original_cmd, 5), ' ') > 0
+                       THEN substr(original_cmd, 1, instr(substr(original_cmd, 5), ' ') + 3)
+                       ELSE original_cmd
+                     END
+                   ELSE
+                     CASE
+                       WHEN instr(original_cmd, ' ') > 0
+                       THEN substr(original_cmd, 1, instr(original_cmd, ' ') - 1)
+                       ELSE original_cmd
+                     END
+                 END AS base_cmd,
+                 timestamp,
+                 LAG(timestamp) OVER (
+                   PARTITION BY
+                     CASE
+                       WHEN original_cmd LIKE 'git %' THEN
+                         CASE
+                           WHEN instr(substr(original_cmd, 5), ' ') > 0
+                           THEN substr(original_cmd, 1, instr(substr(original_cmd, 5), ' ') + 3)
+                           ELSE original_cmd
+                         END
+                       ELSE
+                         CASE
+                           WHEN instr(original_cmd, ' ') > 0
+                           THEN substr(original_cmd, 1, instr(original_cmd, ' ') - 1)
+                           ELSE original_cmd
+                         END
+                     END
+                   ORDER BY timestamp
+                 ) AS prev_ts
+               FROM commands
+             )
+             SELECT base_cmd,
+                    COUNT(*) AS total_runs,
+                    SUM(CASE
+                          WHEN prev_ts IS NOT NULL
+                           AND (strftime('%s', timestamp) - strftime('%s', prev_ts)) < 60
+                          THEN 1 ELSE 0
+                        END) AS retry_count
+             FROM base
+             GROUP BY base_cmd
+             HAVING retry_count > 0
+             ORDER BY retry_count DESC
+             LIMIT 10",
+        )?;
+
+        let rows = stmt
+            .query_map([], |row| {
+                Ok(RetryRecord {
+                    base_cmd: row.get(0)?,
+                    total_runs: row.get::<_, i64>(1)? as usize,
+                    retry_count: row.get::<_, i64>(2)? as usize,
+                })
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(rows)
+    }
+
+    /// Find commands with consistently low token savings (<30%).
+    pub fn get_low_savings_commands(&self) -> Result<Vec<LowSavingsRecord>> {
+        let mut stmt = self.conn.prepare(
+            "SELECT rtk_cmd, COUNT(*) AS runs, AVG(savings_pct) AS avg_savings
+             FROM commands
+             WHERE savings_pct < 30.0
+               AND input_tokens > 50
+               AND rtk_cmd NOT LIKE '%proxy%'
+               AND rtk_cmd NOT LIKE '%fallback%'
+             GROUP BY rtk_cmd
+             HAVING runs >= 3
+             ORDER BY avg_savings ASC
+             LIMIT 10",
+        )?;
+
+        let rows = stmt
+            .query_map([], |row| {
+                Ok(LowSavingsRecord {
+                    rtk_cmd: row.get(0)?,
+                    runs: row.get::<_, i64>(1)? as usize,
+                    avg_savings_pct: row.get(2)?,
+                })
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(rows)
+    }
+
+    /// Get the total gross savings from all tracked commands.
+    pub fn get_gross_savings(&self) -> Result<i64> {
+        let total: i64 = self.conn.query_row(
+            "SELECT COALESCE(SUM(saved_tokens), 0) FROM commands",
+            [],
+            |row| row.get(0),
+        )?;
+        Ok(total)
+    }
+
     /// Get overall summary statistics across all recorded commands.
     ///
     /// Returns aggregated metrics including:
@@ -996,6 +1101,22 @@ pub struct ParseFailureSummary {
     pub recent: Vec<ParseFailureRecord>,
 }
 
+/// A command detected as potentially retried (re-run within 60 seconds).
+#[derive(Debug)]
+pub struct RetryRecord {
+    pub base_cmd: String,
+    pub total_runs: usize,
+    pub retry_count: usize,
+}
+
+/// A command with consistently low token savings.
+#[derive(Debug)]
+pub struct LowSavingsRecord {
+    pub rtk_cmd: String,
+    pub runs: usize,
+    pub avg_savings_pct: f64,
+}
+
 /// Record a parse failure without ever crashing.
 /// Silently ignores all errors — used in the fallback path.
 pub fn record_parse_failure_silent(raw_command: &str, error_message: &str, succeeded: bool) {
diff --git a/src/main.rs b/src/main.rs
@@ -403,6 +403,9 @@ enum Commands {
         /// Show parse failure log (commands that fell back to raw execution)
         #[arg(short = 'F', long)]
         failures: bool,
+        /// Show filter quality analysis (retry detection, low-savings commands)
+        #[arg(short = 'Q', long)]
+        quality: bool,
     },
 
     /// Claude Code economics: spending (ccusage) vs savings (rtk) analysis
@@ -1731,7 +1734,15 @@ fn main() -> Result<()> {
             all,
             format,
             failures,
+            quality,
         } => {
+            if quality {
+                let tracker = crate::core::tracking::Tracker::new()
+                    .context("Failed to initialize tracking database")?;
+                analytics::gain::show_quality(&tracker)?;
+                return Ok(());
+            }
+
             analytics::gain::run(
                 project, // added: pass project flag
                 graph,