bash-trim: only dedup when row trimming is needed (#7)

mgabor3141 · web-flow · commit ff4bf12cb8d4 · 2026-03-09T02:33:09.000+01:00
Co-authored-by: mgabor3141 <@mgabor3141>
diff --git a/.changeset/bash-trim-conditional-dedup.md b/.changeset/bash-trim-conditional-dedup.md
@@ -0,0 +1,5 @@
+---
+"pi-bash-trim": patch
+---
+
+Only apply line deduplication when row trimming is actually needed. Output that fits within the token budget is no longer deduped.
diff --git a/.changeset/safeguard-config-redesign.md b/.changeset/safeguard-config-redesign.md
@@ -2,8 +2,4 @@
 "pi-safeguard": minor
 ---
 
-Add user-configurable `commands`, `patterns`, and `instructions` fields to safeguard config. Commands support flat string (flag any invocation) and subcommand prefix arrays (`["gh", "repo", "delete"]`). Patterns are regexes matched against all tool input text. Instructions are natural language appended to the judge system prompt.
-
-Support project-level config at `.pi/extensions/pi-safeguard.json` (additive only — cannot weaken global settings). Global and project configs merge: commands and patterns concatenate, instructions are labeled by source.
-
-Add `\bsafeguard\b` to built-in string patterns to flag attempts to reference or modify the security guardrail.
+Add user-configurable `commands`, `patterns`, and `instructions` to safeguard config. Support project-level config at `.pi/extensions/pi-safeguard.json` (additive only — cannot weaken global settings). Add `\bsafeguard\b` to built-in string patterns.
diff --git a/.changeset/safeguard-signal-flagger.md b/.changeset/safeguard-signal-flagger.md
@@ -2,17 +2,8 @@
 "pi-safeguard": major
 ---
 
-Replace specific pattern matching with signal-based flagging architecture. The flagger is now a wide-net boolean gate (high recall, no reasoning) and the judge sees raw actions only — no flagger bias in evaluations.
+Replace pattern matching with signal-based flagging. The flagger is now a wide-net boolean gate (high recall, no reasoning); the judge sees raw actions only — no flagger bias.
 
-Broadened sensitive file detection beyond `.env*`:
-- Files outside the working directory
-- Dotfiles/dotdirs in `$HOME` (`.ssh`, `.aws`, `.gnupg`, etc.)
-- System paths (`/etc`, `/usr`, `/var`, `/dev`, `/proc`, etc.)
-- Paths containing secret keywords (`secret`, `credential`, `password`, `token`, `.pem`, `.key`, `id_rsa`, `authorized_keys`)
+Broadened sensitive file detection beyond `.env*`: files outside cwd, dotfiles in `$HOME`, system paths, paths with secret keywords.
 
-New signals:
-- `rm -r` and `rm -f` flagged independently (previously required both)
-- Interpreter with inline code (`eval`, `bash -c`, `python -c`, `node -e`)
-- `chmod u+s`/`g+s` (setuid/setgid)
-- `su`, `doas`, `pkexec` (previously only `sudo`)
-- Content scanning: private key material, known API key formats (GitHub PAT, OpenAI, AWS, Slack)
+New signals: `rm -r`/`rm -f` flagged independently, inline code interpreters (`eval`, `bash -c`, `python -c`, `node -e`), `chmod u+s`/`g+s`, `su`/`doas`/`pkexec`, private key material, known API key formats.
diff --git a/.changeset/safeguard-string-patterns.md b/.changeset/safeguard-string-patterns.md
@@ -2,4 +2,4 @@
 "pi-safeguard": minor
 ---
 
-Add string pattern matching in addition to AST-based detection. Dangerous keywords like `sudo` are now caught anywhere in tool input text (e.g. scripts being written or edited), not just when they appear as parsed command names. Also fix post-denial circumvention check cascade where a single denial could trigger repeated checks on every subsequent tool call.
+Add string pattern matching in addition to AST-based detection — dangerous keywords like `sudo` are now caught anywhere in tool input text, not just as parsed command names. Fix post-denial circumvention check cascade.
diff --git a/packages/bash-trim/src/trim.ts b/packages/bash-trim/src/trim.ts
@@ -285,26 +285,39 @@ export function trimOutput(fullOutput: string, options?: Partial<TrimOptions>):
 	const colTrimmed = tokenized.map((lt) => colTrimLine(lt, maxLineWidth, trimmedWidth, headRatio));
 	const anyColumnsTrimmed = colTrimmed.some((l) => l.trimmed);
 
-	// Phase 2: Dedup consecutive similar lines
-	const colTexts = colTrimmed.map((l) => l.text);
-	const dedupResult = dedup(colTexts, minDedupLines);
-
-	// Rebuild ColTrimmedLine[] from dedup output — summary lines need fresh token counts
+	// Phase 2: Dedup — only if row trimming is actually needed.
+	// If the output fits within the token budget, dedup is counter-productive:
+	// collapsing e.g. 10 similar `ls` rows when nothing is being cut from the
+	// middle just destroys information for no benefit.
+	const rowCheckWithoutDedup = trimRows(colTrimmed, maxTotalTokens);
+	const needsRowTrimming = rowCheckWithoutDedup.omittedLines > 0;
+
+	let dedupResult: { lines: string[]; dedupedLines: number; groupCount: number };
 	let dedupColTrimmed: ColTrimmedLine[];
-	if (dedupResult.dedupedLines > 0) {
-		dedupColTrimmed = dedupResult.lines.map((text) => {
-			// Try to find the original ColTrimmedLine for non-summary lines
-			const origIdx = colTexts.indexOf(text);
-			if (origIdx !== -1) return colTrimmed[origIdx];
-			// Summary line — encode fresh
-			return { text, tokenCount: encode(text).length, trimmed: false, omittedChars: 0 };
-		});
+
+	if (needsRowTrimming) {
+		// Dedup to reclaim space before row trimming
+		const colTexts = colTrimmed.map((l) => l.text);
+		dedupResult = dedup(colTexts, minDedupLines);
+
+		if (dedupResult.dedupedLines > 0) {
+			dedupColTrimmed = dedupResult.lines.map((text) => {
+				// Try to find the original ColTrimmedLine for non-summary lines
+				const origIdx = colTexts.indexOf(text);
+				if (origIdx !== -1) return colTrimmed[origIdx];
+				// Summary line — encode fresh
+				return { text, tokenCount: encode(text).length, trimmed: false, omittedChars: 0 };
+			});
+		} else {
+			dedupColTrimmed = colTrimmed;
+		}
 	} else {
+		dedupResult = { lines: [], dedupedLines: 0, groupCount: 0 };
 		dedupColTrimmed = colTrimmed;
 	}
 
 	// Phase 3: Row trimming (uses column-trimmed + deduped token counts)
-	const rowResult = trimRows(dedupColTrimmed, maxTotalTokens);
+	const rowResult = needsRowTrimming ? trimRows(dedupColTrimmed, maxTotalTokens) : rowCheckWithoutDedup;
 	const rowsTrimmed = rowResult.omittedLines > 0;
 
 	// Column-trim stats for visible lines only (after dedup + row trimming).
diff --git a/packages/bash-trim/test/trim.test.ts b/packages/bash-trim/test/trim.test.ts
@@ -188,6 +188,31 @@ describe("trimOutput pipeline", () => {
 		expect(r.columnsTrimmed).toBe(true);
 	});
 
+	it("skips dedup when output fits without row trimming", () => {
+		// 10 similar lines that would be deduped, but fit well within token budget.
+		// Dedup should NOT run since no rows need to be trimmed from the middle.
+		const lines = Array.from(
+			{ length: 10 },
+			(_, i) => `2026-03-06 19:29:37.${String(800 + i).padStart(3, "0")} E  kernel[0:af9] (IOSurface) SID: 0x0`,
+		);
+		const r = trimOutput(lines.join("\n"), { minTokensToTrim: 0 });
+		expect(r.rowsTrimmed).toBe(false);
+		expect(r.dedupedLines).toBe(0);
+		expect(r.dedupGroupCount).toBe(0);
+		// All 10 lines preserved verbatim
+		expect(r.text).toBe(lines.join("\n"));
+	});
+
+	it("applies dedup when output would need row trimming", () => {
+		// Many similar lines that exceed token budget — dedup should kick in
+		const lines = Array.from(
+			{ length: 500 },
+			(_, i) => `2026-03-06 19:29:37.${String(i).padStart(3, "0")} E  kernel[0:af9] (IOSurface) SID: 0x0`,
+		);
+		const r = trimOutput(lines.join("\n"), { maxTotalTokens: 500 });
+		expect(r.dedupedLines).toBeGreaterThan(0);
+	});
+
 	it("column-trimmed lines count at trimmed token cost for row budget", () => {
 		// 30 lines × 1000 chars = lots of raw tokens, but after column trimming
 		// each line's token count drops drastically → should fit in 2K budget
@@ -273,15 +298,12 @@ describe("fixtures", () => {
 		expect(r.dedupedLines).toBeGreaterThan(0);
 	});
 
-	it("npm-ls.txt — fits with higher budget", () => {
+	it("npm-ls.txt — fits with higher budget, dedup skipped", () => {
 		const input = fixture("npm-ls.txt");
 		const r = trimOutput(input, { maxTotalTokens: 10_000 });
 		expect(r.rowsTrimmed).toBe(false);
-		// Dedup may still collapse similar dependency lines
-		if (r.dedupedLines > 0) {
-			expect(r.text).not.toBe(input);
-		} else {
-			expect(r.text).toBe(input);
-		}
+		// With higher budget, no row trimming needed → dedup is skipped
+		expect(r.dedupedLines).toBe(0);
+		expect(r.text).toBe(input);
 	});
 });

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"pi-bash-trim": patch
 +---
++
 +Only apply line deduplication when row trimming is actually needed. Output that fits within the token budget is no longer deduped.