Skip to content

Commit 06f4956

Browse files
pauldambraclaude
andcommitted
fix(git): cap and stream untracked-file line counting
`countFileLines` previously read each untracked file's full content into memory as UTF-8 via `fs.readFile(filePath, "utf-8")`. The caller in `getChangedFilesDetailed` runs this 16-way concurrent against every untracked path returned by `streamGitStatus` (up to 50k entries). On a monorepo with multi-MB build artifacts or lockfiles, peak heap was `16 * file_bytes * 2` (V8's UTF-16 cost), easily several GB. That OOM'd the main process and froze the renderer waiting on a tRPC call that would never return — the symptom was the app appearing to freeze a few seconds after the sidebar painted, with no `[ipc-rate]` warnings, just a silent V8 `Scavenger: semi-space copy Allocation failed`. Fix: bail out on files larger than 1 MB, and stream the rest with `createReadStream`, counting `\n` bytes byte-by-byte. Per-stream memory stays at ~64 KB regardless of file size, so peak across 16 concurrent reads is ~1 MB total. Preserves the original return semantics (trailing-newline-aware line count) for files within the cap. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 180b915 commit 06f4956

2 files changed

Lines changed: 70 additions & 3 deletions

File tree

packages/git/src/queries.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { createGitClient } from "./client";
66
import {
77
detectDefaultBranch,
88
getBranchDiffPatchesByPath,
9+
getChangedFilesDetailed,
910
splitUnifiedDiffByFile,
1011
} from "./queries";
1112

@@ -242,3 +243,46 @@ describe("getBranchDiffPatchesByPath", () => {
242243
}
243244
});
244245
});
246+
247+
describe("getChangedFilesDetailed", () => {
248+
let repoDir: string;
249+
250+
afterEach(async () => {
251+
if (repoDir) {
252+
await rm(repoDir, { recursive: true, force: true });
253+
repoDir = "";
254+
}
255+
});
256+
257+
it("reports line counts for small untracked files", async () => {
258+
repoDir = await setupRepo();
259+
await writeFile(path.join(repoDir, "small.txt"), "a\nb\nc\n");
260+
await writeFile(path.join(repoDir, "no-trailing.txt"), "a\nb\nc");
261+
262+
const files = await getChangedFilesDetailed(repoDir);
263+
const small = files.find((f) => f.path === "small.txt");
264+
const noTrailing = files.find((f) => f.path === "no-trailing.txt");
265+
266+
expect(small).toMatchObject({ status: "untracked", linesAdded: 3 });
267+
expect(noTrailing).toMatchObject({ status: "untracked", linesAdded: 3 });
268+
});
269+
270+
// Regression guard for the OOM in https://github.com/PostHog/code/issues/...
271+
// (introduced in c617988f). Before the fix `countFileLines` read each
272+
// untracked file's full content into memory, 16-way concurrent, with no
273+
// size cap. On a monorepo with multi-MB build artifacts / lockfiles this
274+
// exhausted the main-process V8 heap (~3GB+) and froze the renderer
275+
// waiting on the dead tRPC call. The fix bails on files larger than
276+
// COUNT_FILE_LINES_MAX_BYTES (1MB) and stream-counts the rest, so peak
277+
// memory stays ~16 * 64KB regardless of file size.
278+
it("skips line counting for files over the size cap", async () => {
279+
repoDir = await setupRepo();
280+
const oneAndAHalfMB = "a\n".repeat(800_000);
281+
await writeFile(path.join(repoDir, "huge.txt"), oneAndAHalfMB);
282+
283+
const files = await getChangedFilesDetailed(repoDir);
284+
const huge = files.find((f) => f.path === "huge.txt");
285+
286+
expect(huge).toMatchObject({ status: "untracked", linesAdded: 0 });
287+
});
288+
});

packages/git/src/queries.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { createReadStream } from "node:fs";
12
import * as fs from "node:fs/promises";
23
import * as path from "node:path";
34
import type { CreateGitClientOptions } from "./client";
@@ -413,11 +414,33 @@ function matchesExcludePattern(filePath: string, patterns: string[]): boolean {
413414
});
414415
}
415416

417+
const COUNT_FILE_LINES_MAX_BYTES = 1 * 1024 * 1024;
418+
416419
async function countFileLines(filePath: string): Promise<number> {
417420
try {
418-
const content = await fs.readFile(filePath, "utf-8");
419-
if (!content) return 0;
420-
return content.split("\n").length - (content.endsWith("\n") ? 1 : 0);
421+
const stat = await fs.stat(filePath);
422+
if (!stat.isFile() || stat.size === 0) return 0;
423+
if (stat.size > COUNT_FILE_LINES_MAX_BYTES) return 0;
424+
return await new Promise<number>((resolve) => {
425+
let newlines = 0;
426+
let lastByte = -1;
427+
const stream = createReadStream(filePath);
428+
stream.on("data", (chunk) => {
429+
const buf = typeof chunk === "string" ? Buffer.from(chunk) : chunk;
430+
for (let i = 0; i < buf.length; i++) {
431+
if (buf[i] === 0x0a) newlines++;
432+
}
433+
if (buf.length > 0) lastByte = buf[buf.length - 1];
434+
});
435+
stream.on("end", () => {
436+
if (lastByte === -1) {
437+
resolve(0);
438+
return;
439+
}
440+
resolve(lastByte === 0x0a ? newlines : newlines + 1);
441+
});
442+
stream.on("error", () => resolve(0));
443+
});
421444
} catch {
422445
return 0;
423446
}

0 commit comments

Comments
 (0)