vercel-labs
diff --git a/‎.github/workflows/comparison-tests.yml‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/comparison-tests.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 24 additions & 2 deletions b/‎CLAUDE.md‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 0 deletions b/‎package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/commands/printf/printf.test.ts‎
Lines changed: 4 additions & 3 deletions b/‎src/commands/printf/printf.test.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/commands/sed/sed.limits.test.ts‎
Lines changed: 2 additions & 6 deletions b/‎src/commands/sed/sed.limits.test.ts‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎src/comparison-tests/README.md‎
Lines changed: 186 additions & 0 deletions b/‎src/comparison-tests/README.md‎
Lines changed: 186 additions & 0 deletions
diff --git a/‎src/comparison-tests/cat.comparison.test.ts‎
Lines changed: 2 additions & 3 deletions b/‎src/comparison-tests/cat.comparison.test.ts‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/comparison-tests/fixtures/alias.comparison.fixtures.json‎
Lines changed: 26 additions & 0 deletions b/‎src/comparison-tests/fixtures/alias.comparison.fixtures.json‎
Lines changed: 26 additions & 0 deletions
@@ -25,3 +25,13 @@ jobs:
 
       - name: Run comparison tests
         run: pnpm test:comparison
+
+      - name: Run comparison tests in record mode
+        run: pnpm test:comparison:record
+
+      - name: diff
+        run: git diff
+
+      # Fail if there are any diffs
+      - name: Fail if there are any diffs
+        run: if [ -n "$(git diff --name-only)" ]; then exit 1; fi
@@ -18,7 +18,8 @@ pnpm knip                  # Check for unused exports/dependencies
 # Testing
 pnpm test:run              # Run ALL tests (including spec tests)
 pnpm test:unit             # Run unit tests only (fast, no comparison/spec)
-pnpm test:comparison       # Run comparison tests only
+pnpm test:comparison       # Run comparison tests only (uses fixtures)
+pnpm test:comparison:record # Re-record comparison test fixtures
 
 # Excluding spec tests (spec tests have known failures)
 pnpm test:run --exclude src/spec-tests
@@ -149,11 +150,32 @@ Commands go in `src/commands/<name>/` with:
 ### Testing Strategy
 
 - **Unit tests**: Fast, isolated tests for specific functionality
-- **Comparison tests**: Run same script in just-bash and real bash, compare output
+- **Comparison tests**: Compare just-bash output against recorded bash fixtures (see `src/comparison-tests/README.md`)
 - **Spec tests** (`src/spec-tests/`): Bash specification conformance (may have known failures)
 
 Prefer comparison tests when uncertain about bash behavior. Keep test files under 300 lines.
 
+### Comparison Tests (Fixture System)
+
+Comparison tests use pre-recorded bash outputs stored in `src/comparison-tests/fixtures/`. This eliminates platform differences (macOS vs Linux). See `src/comparison-tests/README.md` for details.
+
+```bash
+# Run comparison tests (uses fixtures, no real bash needed)
+pnpm test:comparison
+
+# Re-record fixtures (skips locked fixtures)
+RECORD_FIXTURES=1 pnpm test:run src/comparison-tests/mytest.comparison.test.ts
+
+# Force re-record including locked fixtures
+RECORD_FIXTURES=force pnpm test:comparison
+```
+
+When adding comparison tests:
+1. Write the test using `setupFiles()` and `compareOutputs()`
+2. Run with `RECORD_FIXTURES=1` to generate fixtures
+3. Commit both the test file and the generated fixture JSON
+4. If manually adjusting for Linux behavior, add `"locked": true` to the fixture
+
 ## Development Guidelines
 
 - Read AGENTS.md
 
@@ -76,6 +76,7 @@
     "test:dist": "vitest run src/cli/just-bash.bundle.test.ts",
     "test:unit": "vitest run --config vitest.unit.config.ts",
     "test:comparison": "vitest run --config vitest.comparison.config.ts",
+    "test:comparison:record": "RECORD_FIXTURES=1 vitest run --config vitest.comparison.config.ts",
     "shell": "npx tsx src/cli/shell.ts",
     "dev:exec": "npx tsx src/cli/exec.ts"
   },
 
@@ -129,12 +129,13 @@ describe("printf", () => {
       expect(result.exitCode).toBe(0);
     });
 
-    it.skip("should handle non-numeric for %d", async () => {
-      // TODO: Bash returns exit 0 with warning, our shell returns exit 1
+    it("should handle non-numeric for %d", async () => {
+      // Bash returns exit 1 with warning and outputs 0
       const env = new Bash();
       const result = await env.exec('printf "%d" notanumber');
       expect(result.stdout).toBe("0");
-      expect(result.exitCode).toBe(0);
+      expect(result.stderr).toContain("invalid number");
+      expect(result.exitCode).toBe(1);
     });
   });
 
 
@@ -23,10 +23,7 @@ describe("SED Execution Limits", () => {
       expect(result.exitCode).toBe(ExecutionLimitError.EXIT_CODE);
     });
 
-    // TODO: t command with loop needs better substitution tracking
-    // The t command branches on successful substitution, but s/./&/ replaces
-    // a character with itself, which doesn't count as "successful" in our impl
-    it.skip("should protect against test loop (t command)", async () => {
+    it("should protect against test loop (t command)", async () => {
       const env = new Bash();
       // Substitution that always succeeds + t branch = infinite loop
       const result = await env.exec(
@@ -152,8 +149,7 @@ describe("SED Execution Limits", () => {
       expect(result.exitCode).toBeDefined();
     });
 
-    // TODO: Nested braces parsing not implemented in our sed
-    it.skip("should handle deeply nested braces", async () => {
+    it("should handle deeply nested braces", async () => {
       const env = new Bash();
       // Nested command blocks
       const result = await env.exec(`echo "test" | sed '{ { { p } } }'`);
 
@@ -0,0 +1,186 @@
+# Comparison Tests
+
+Comparison tests validate that just-bash produces the same output as real bash. They use a **fixture-based system** that records bash outputs once and replays them during tests, eliminating platform-specific differences.
+
+## How It Works
+
+1. **Fixtures** are JSON files containing recorded bash outputs (`src/comparison-tests/fixtures/*.fixtures.json`)
+2. **Tests** run commands in just-bash and compare against the recorded fixtures
+3. **Record mode** runs real bash and saves outputs to fixtures
+
+## Running Tests
+
+```bash
+# Run all comparison tests (uses fixtures, no real bash needed)
+pnpm test:comparison
+
+# Run a specific test file
+pnpm test:run src/comparison-tests/ls.comparison.test.ts
+
+# Re-record fixtures (runs real bash, skips locked fixtures)
+pnpm test:comparison:record
+# Or: RECORD_FIXTURES=1 pnpm test:comparison
+
+# Force re-record ALL fixtures including locked ones
+RECORD_FIXTURES=force pnpm test:comparison
+```
+
+## Adding New Tests
+
+### 1. Add the test case
+
+```typescript
+// src/comparison-tests/mycommand.comparison.test.ts
+import { afterEach, beforeEach, describe, it } from "vitest";
+import {
+  cleanupTestDir,
+  compareOutputs,
+  createTestDir,
+  setupFiles,
+} from "./test-helpers.js";
+
+describe("mycommand - Real Bash Comparison", () => {
+  let testDir: string;
+
+  beforeEach(async () => {
+    testDir = await createTestDir();
+  });
+
+  afterEach(async () => {
+    await cleanupTestDir(testDir);
+  });
+
+  it("should do something", async () => {
+    const env = await setupFiles(testDir, {
+      "input.txt": "hello world\n",
+    });
+    await compareOutputs(env, testDir, "mycommand input.txt");
+  });
+});
+```
+
+### 2. Record the fixture
+
+```bash
+RECORD_FIXTURES=1 pnpm test:run src/comparison-tests/mycommand.comparison.test.ts
+```
+
+This creates `src/comparison-tests/fixtures/mycommand.comparison.fixtures.json`.
+
+### 3. Commit both the test and fixture file
+
+## Updating Fixtures
+
+When bash behavior changes or you need to update expected outputs:
+
+```bash
+# Re-record specific test file
+RECORD_FIXTURES=1 pnpm test:run src/comparison-tests/ls.comparison.test.ts
+
+# Re-record all fixtures
+pnpm test:comparison:record
+```
+
+## Handling Platform Differences
+
+The fixture system solves platform differences (macOS vs Linux):
+
+1. **Record once** on any platform
+2. **Manually adjust** the fixture to match desired behavior (usually Linux)
+3. **Lock the fixture** to prevent accidental overwriting
+4. Tests then pass on all platforms
+
+Example: `ls -R` outputs differently on macOS vs Linux:
+- macOS: `dir\nfile.txt\n...`
+- Linux: `.:\ndir\nfile.txt\n...` (includes ".:" header)
+
+We record on macOS, then edit the fixture to use Linux behavior since our implementation follows Linux.
+
+## Locked Fixtures
+
+Fixtures that have been manually adjusted for platform-specific behavior should be marked as **locked** to prevent accidental overwriting when re-recording:
+
+```json
+{
+  "fixture_id": {
+    "command": "ls -R",
+    "files": { ... },
+    "stdout": ".:\ndir\nfile.txt\n...",
+    "stderr": "",
+    "exitCode": 0,
+    "locked": true
+  }
+}
+```
+
+When recording:
+- `RECORD_FIXTURES=1` skips locked fixtures and reports them
+- `RECORD_FIXTURES=force` overwrites all fixtures including locked ones
+
+Currently locked fixtures:
+- `ls -R` - Uses Linux-style output with ".:" header
+- `cat -n` with multiple files - Uses continuous line numbering (Linux behavior)
+
+## API Reference
+
+### `setupFiles(testDir, files)`
+
+Sets up test files in both real filesystem and BashEnv.
+
+```typescript
+const env = await setupFiles(testDir, {
+  "file.txt": "content",
+  "dir/nested.txt": "nested content",
+});
+```
+
+### `compareOutputs(env, testDir, command, options?)`
+
+Compares just-bash output against recorded fixture.
+
+```typescript
+// Basic usage
+await compareOutputs(env, testDir, "cat file.txt");
+
+// With options
+await compareOutputs(env, testDir, "wc -l file.txt", {
+  normalizeWhitespace: true,  // For BSD/GNU whitespace differences
+  compareExitCode: false,     // Skip exit code comparison
+});
+```
+
+### `runRealBash(command, cwd)`
+
+Runs a command in real bash (for tests that need direct bash access).
+
+```typescript
+const result = await runRealBash("echo hello", testDir);
+// result: { stdout, stderr, exitCode }
+```
+
+## Fixture File Format
+
+```json
+{
+  "fixture_id_hash": {
+    "command": "ls -la",
+    "files": {
+      "file.txt": "content"
+    },
+    "stdout": "file.txt\n",
+    "stderr": "",
+    "exitCode": 0
+  }
+}
+```
+
+The fixture ID is a hash of (command + files), ensuring each unique test case has its own fixture entry.
+
+## Best Practices
+
+1. **Keep tests focused** - One behavior per test
+2. **Use meaningful file content** - Makes debugging easier
+3. **Test edge cases** - Empty files, special characters, etc.
+4. **Use `normalizeWhitespace`** for commands with platform-specific formatting (wc, column widths)
+5. **Commit fixtures** - They're part of the test suite
+6. **Re-record when needed** - If you change test files/commands, re-record the fixtures
@@ -3,7 +3,6 @@ import {
   cleanupTestDir,
   compareOutputs,
   createTestDir,
-  isLinux,
   setupFiles,
 } from "./test-helpers.js";
 
@@ -62,8 +61,8 @@ describe("cat command - Real Bash Comparison", () => {
   });
 
   // Linux cat -n continues line numbers across files, macOS resets per file
-  // BashEnv follows Linux behavior, so skip on macOS
-  it.skipIf(!isLinux)("should match -n with multiple files", async () => {
+  // BashEnv follows Linux behavior - fixture uses Linux output
+  it("should match -n with multiple files", async () => {
     const env = await setupFiles(testDir, {
       "a.txt": "file a line 1\nfile a line 2\n",
       "b.txt": "file b line 1\n",
 
@@ -0,0 +1,26 @@
+{
+  "2cab91f70ea1cb84": {
+    "command": "alias notexists || echo failed",
+    "files": {},
+    "stdout": "failed\n",
+    "stderr": "/bin/bash: line 1: alias: notexists: not found\n",
+    "exitCode": 0,
+    "locked": true
+  },
+  "b7f201402670991c": {
+    "command": "alias greet='echo hi'; unalias greet; alias greet || echo removed",
+    "files": {},
+    "stdout": "removed\n",
+    "stderr": "/bin/bash: line 1: alias: greet: not found\n",
+    "exitCode": 0,
+    "locked": true
+  },
+  "c2151098e11aee6e": {
+    "command": "unalias nonexistent || echo not_found",
+    "files": {},
+    "stdout": "not_found\n",
+    "stderr": "/bin/bash: line 1: unalias: nonexistent: not found\n",
+    "exitCode": 0,
+    "locked": true
+  }
+}