Skip to content

Commit 62882b3

Browse files
authored
Implement rg (#46)
* Implement rg * Import tests and implement many features * Align json output * vercel-review * ripgrep alignment * binary
1 parent c7183b9 commit 62882b3

34 files changed

+8572
-267
lines changed

AGENTS.npm.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ const result = await bash.exec("cat input.txt | grep pattern");
7070

7171
## Available Commands
7272

73-
**Text processing**: `awk`, `cat`, `column`, `comm`, `cut`, `egrep`, `expand`, `fgrep`, `fold`, `grep`, `head`, `join`, `nl`, `paste`, `rev`, `sed`, `sort`, `strings`, `tac`, `tail`, `tr`, `unexpand`, `uniq`, `wc`, `xargs`
73+
**Text processing**: `awk`, `cat`, `column`, `comm`, `cut`, `egrep`, `expand`, `fgrep`, `fold`, `grep`, `head`, `join`, `nl`, `paste`, `rev`, `rg`, `sed`, `sort`, `strings`, `tac`, `tail`, `tr`, `unexpand`, `uniq`, `wc`, `xargs`
7474

7575
**Data processing**: `jq` (JSON), `sqlite3` (SQLite), `xan` (CSV), `yq` (YAML/XML/TOML/CSV)
7676

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ pnpm shell --no-network
279279

280280
### Text Processing
281281

282-
`awk`, `base64`, `column`, `comm`, `cut`, `diff`, `expand`, `fold`, `grep` (+ `egrep`, `fgrep`), `head`, `join`, `md5sum`, `nl`, `od`, `paste`, `printf`, `rev`, `sed`, `sha1sum`, `sha256sum`, `sort`, `strings`, `tac`, `tail`, `tr`, `unexpand`, `uniq`, `wc`, `xargs`
282+
`awk`, `base64`, `column`, `comm`, `cut`, `diff`, `expand`, `fold`, `grep` (+ `egrep`, `fgrep`), `head`, `join`, `md5sum`, `nl`, `od`, `paste`, `printf`, `rev`, `rg`, `sed`, `sha1sum`, `sha256sum`, `sort`, `strings`, `tac`, `tail`, `tr`, `unexpand`, `uniq`, `wc`, `xargs`
283283

284284
### Data Processing
285285

src/commands/grep/grep.advanced.test.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ describe("grep advanced", () => {
150150
},
151151
});
152152
const result = await env.exec("grep -A1 match /test.txt");
153-
expect(result.stdout).toBe("match1\nb\nmatch2\nd\n");
153+
// Separator between non-contiguous groups (GNU grep behavior)
154+
expect(result.stdout).toBe("match1\nb\n--\nmatch2\nd\n");
154155
expect(result.exitCode).toBe(0);
155156
});
156157

src/commands/grep/grep.ts

Lines changed: 21 additions & 224 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { Command, CommandContext, ExecResult } from "../../types.js";
22
import { matchGlob } from "../../utils/glob.js";
33
import { hasHelpFlag, showHelp, unknownOption } from "../help.js";
4+
import { buildRegex, searchContent } from "../search-engine/index.js";
45

56
/** File entry with optional type info from glob expansion */
67
interface FileEntry {
@@ -193,27 +194,23 @@ export const grepCommand: Command = {
193194
};
194195
}
195196

196-
// Build regex
197-
let regexPattern: string;
198-
if (fixedStrings) {
199-
// -F: escape all regex special characters for literal match
200-
regexPattern = pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
201-
} else if (extendedRegex || perlRegex) {
202-
// -E and -P: use pattern as-is (JavaScript regex is mostly PCRE-compatible)
203-
regexPattern = pattern;
204-
} else {
205-
regexPattern = escapeRegexForBasicGrep(pattern);
206-
}
207-
if (wholeWord) {
208-
regexPattern = `\\b${regexPattern}\\b`;
209-
}
210-
if (lineRegexp) {
211-
regexPattern = `^${regexPattern}$`;
212-
}
197+
// Build regex using shared search-engine
198+
const regexMode = fixedStrings
199+
? "fixed"
200+
: extendedRegex
201+
? "extended"
202+
: perlRegex
203+
? "perl"
204+
: "basic";
213205

214206
let regex: RegExp;
215207
try {
216-
regex = new RegExp(regexPattern, ignoreCase ? "gi" : "g");
208+
regex = buildRegex(pattern, {
209+
mode: regexMode,
210+
ignoreCase,
211+
wholeWord,
212+
lineRegexp,
213+
});
217214
} catch {
218215
return {
219216
stdout: "",
@@ -224,18 +221,16 @@ export const grepCommand: Command = {
224221

225222
// If no files and no stdin, read from stdin
226223
if (files.length === 0 && ctx.stdin) {
227-
const result = grepContent(
228-
ctx.stdin,
229-
regex,
224+
const result = searchContent(ctx.stdin, regex, {
230225
invertMatch,
231226
showLineNumbers,
232227
countOnly,
233-
"",
228+
filename: "",
234229
onlyMatching,
235230
beforeContext,
236231
afterContext,
237232
maxCount,
238-
);
233+
});
239234
if (quietMode) {
240235
return { stdout: "", stderr: "", exitCode: result.matched ? 0 : 1 };
241236
}
@@ -351,18 +346,16 @@ export const grepCommand: Command = {
351346
}
352347

353348
const content = await ctx.fs.readFile(filePath);
354-
const result = grepContent(
355-
content,
356-
regex,
349+
const result = searchContent(content, regex, {
357350
invertMatch,
358351
showLineNumbers,
359352
countOnly,
360-
showFilename ? file : "",
353+
filename: showFilename ? file : "",
361354
onlyMatching,
362355
beforeContext,
363356
afterContext,
364357
maxCount,
365-
);
358+
});
366359

367360
return { file, result };
368361
} catch {
@@ -431,202 +424,6 @@ export const grepCommand: Command = {
431424
},
432425
};
433426

434-
function escapeRegexForBasicGrep(str: string): string {
435-
// Basic grep (BRE) uses different escaping than JavaScript regex
436-
// In BRE: \| is alternation, \( \) are groups, \{ \} are quantifiers
437-
// We need to convert BRE to JavaScript regex
438-
439-
let result = "";
440-
let i = 0;
441-
442-
while (i < str.length) {
443-
const char = str[i];
444-
445-
if (char === "\\" && i + 1 < str.length) {
446-
const nextChar = str[i + 1];
447-
// BRE: \| becomes | (alternation)
448-
// BRE: \( \) become ( ) (grouping)
449-
// BRE: \{ \} become { } (quantifiers) - but we'll treat as literal for simplicity
450-
if (nextChar === "|" || nextChar === "(" || nextChar === ")") {
451-
result += nextChar;
452-
i += 2;
453-
continue;
454-
} else if (nextChar === "{" || nextChar === "}") {
455-
// Keep as escaped for now (literal)
456-
result += `\\${nextChar}`;
457-
i += 2;
458-
continue;
459-
}
460-
}
461-
462-
// Escape characters that are special in JavaScript regex but not in BRE
463-
if (
464-
char === "+" ||
465-
char === "?" ||
466-
char === "|" ||
467-
char === "(" ||
468-
char === ")" ||
469-
char === "{" ||
470-
char === "}"
471-
) {
472-
result += `\\${char}`;
473-
} else {
474-
result += char;
475-
}
476-
i++;
477-
}
478-
479-
return result;
480-
}
481-
482-
function grepContent(
483-
content: string,
484-
regex: RegExp,
485-
invertMatch: boolean,
486-
showLineNumbers: boolean,
487-
countOnly: boolean,
488-
filename: string,
489-
onlyMatching: boolean = false,
490-
beforeContext: number = 0,
491-
afterContext: number = 0,
492-
maxCount: number = 0, // 0 means unlimited
493-
): { output: string; matched: boolean } {
494-
const lines = content.split("\n");
495-
const lineCount = lines.length;
496-
// Handle trailing empty line from split if content ended with newline
497-
const lastIdx =
498-
lineCount > 0 && lines[lineCount - 1] === "" ? lineCount - 1 : lineCount;
499-
500-
// Fast path: count only mode
501-
if (countOnly) {
502-
let matchCount = 0;
503-
for (let i = 0; i < lastIdx; i++) {
504-
regex.lastIndex = 0;
505-
if (regex.test(lines[i]) !== invertMatch) {
506-
matchCount++;
507-
}
508-
}
509-
const countStr = filename
510-
? `${filename}:${matchCount}`
511-
: String(matchCount);
512-
return { output: `${countStr}\n`, matched: matchCount > 0 };
513-
}
514-
515-
// Fast path: no context needed (most common case)
516-
if (beforeContext === 0 && afterContext === 0) {
517-
const outputLines: string[] = [];
518-
let hasMatch = false;
519-
let matchCount = 0;
520-
521-
for (let i = 0; i < lastIdx; i++) {
522-
// Check if we've reached maxCount
523-
if (maxCount > 0 && matchCount >= maxCount) break;
524-
525-
const line = lines[i];
526-
regex.lastIndex = 0;
527-
const matches = regex.test(line);
528-
529-
if (matches !== invertMatch) {
530-
hasMatch = true;
531-
matchCount++;
532-
if (onlyMatching) {
533-
regex.lastIndex = 0;
534-
for (
535-
let match = regex.exec(line);
536-
match !== null;
537-
match = regex.exec(line)
538-
) {
539-
outputLines.push(filename ? `${filename}:${match[0]}` : match[0]);
540-
if (match[0].length === 0) regex.lastIndex++;
541-
}
542-
} else if (showLineNumbers) {
543-
outputLines.push(
544-
filename ? `${filename}:${i + 1}:${line}` : `${i + 1}:${line}`,
545-
);
546-
} else {
547-
outputLines.push(filename ? `${filename}:${line}` : line);
548-
}
549-
}
550-
}
551-
552-
return {
553-
output: outputLines.length > 0 ? `${outputLines.join("\n")}\n` : "",
554-
matched: hasMatch,
555-
};
556-
}
557-
558-
// Slow path: context lines needed
559-
const outputLines: string[] = [];
560-
let matchCount = 0;
561-
const printedLines = new Set<number>();
562-
563-
// First pass: find all matching lines (respecting maxCount)
564-
const matchingLineNumbers: number[] = [];
565-
for (let i = 0; i < lastIdx; i++) {
566-
// Check if we've reached maxCount
567-
if (maxCount > 0 && matchCount >= maxCount) break;
568-
regex.lastIndex = 0;
569-
if (regex.test(lines[i]) !== invertMatch) {
570-
matchingLineNumbers.push(i);
571-
matchCount++;
572-
}
573-
}
574-
575-
// Second pass: output with context
576-
for (const lineNum of matchingLineNumbers) {
577-
// Before context
578-
for (let i = Math.max(0, lineNum - beforeContext); i < lineNum; i++) {
579-
if (!printedLines.has(i)) {
580-
printedLines.add(i);
581-
let outputLine = lines[i];
582-
if (showLineNumbers) outputLine = `${i + 1}-${outputLine}`;
583-
if (filename) outputLine = `${filename}-${outputLine}`;
584-
outputLines.push(outputLine);
585-
}
586-
}
587-
588-
// The matching line
589-
if (!printedLines.has(lineNum)) {
590-
printedLines.add(lineNum);
591-
const line = lines[lineNum];
592-
593-
if (onlyMatching) {
594-
regex.lastIndex = 0;
595-
for (
596-
let match = regex.exec(line);
597-
match !== null;
598-
match = regex.exec(line)
599-
) {
600-
outputLines.push(filename ? `${filename}:${match[0]}` : match[0]);
601-
if (match[0].length === 0) regex.lastIndex++;
602-
}
603-
} else {
604-
let outputLine = line;
605-
if (showLineNumbers) outputLine = `${lineNum + 1}:${outputLine}`;
606-
if (filename) outputLine = `${filename}:${outputLine}`;
607-
outputLines.push(outputLine);
608-
}
609-
}
610-
611-
// After context
612-
const maxAfter = Math.min(lastIdx - 1, lineNum + afterContext);
613-
for (let i = lineNum + 1; i <= maxAfter; i++) {
614-
if (!printedLines.has(i)) {
615-
printedLines.add(i);
616-
let outputLine = lines[i];
617-
if (showLineNumbers) outputLine = `${i + 1}-${outputLine}`;
618-
if (filename) outputLine = `${filename}-${outputLine}`;
619-
outputLines.push(outputLine);
620-
}
621-
}
622-
}
623-
624-
return {
625-
output: outputLines.length > 0 ? `${outputLines.join("\n")}\n` : "",
626-
matched: matchCount > 0,
627-
};
628-
}
629-
630427
async function expandRecursiveGlob(
631428
baseDir: string,
632429
afterGlob: string,

src/commands/registry.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export type CommandName =
3232
| "grep"
3333
| "fgrep"
3434
| "egrep"
35+
| "rg"
3536
| "sed"
3637
| "awk"
3738
| "sort"
@@ -189,6 +190,10 @@ const commandLoaders: LazyCommandDef<CommandName>[] = [
189190
name: "egrep",
190191
load: async () => (await import("./grep/grep.js")).egrepCommand,
191192
},
193+
{
194+
name: "rg",
195+
load: async () => (await import("./rg/rg.js")).rgCommand,
196+
},
192197
{
193198
name: "sed",
194199
load: async () => (await import("./sed/sed.js")).sedCommand,

0 commit comments

Comments
 (0)