Skip to content

Commit f7d359b

Browse files
authored
This PR introduces significant performance optimizations for filesystem operations and adds new features for file processing commands. Key improvements include parallel batch processing, shared utilities for multi-file operations, and new command options. (#44)
* This PR introduces significant performance optimizations for filesystem operations and adds new features for file processing commands. Key improvements include parallel batch processing, shared utilities for multi-file operations, and new command options. * fix * Self-feedback and test coverage
1 parent 9aab3b8 commit f7d359b

File tree

23 files changed

+1938
-502
lines changed

23 files changed

+1938
-502
lines changed

src/cli/exec.ts

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
* cat script.sh | pnpm dev:exec
1010
*
1111
* Options:
12-
* --no-ast Disable AST output
12+
* --print-ast Show the parsed AST
1313
* --real-bash Also run the script with real bash for comparison
14+
* --root <path> Use OverlayFS with specified root directory
15+
* --no-limit Remove execution limits (for large scripts)
1416
*
1517
* Output:
1618
* - AST: The parsed Abstract Syntax Tree as JSON (unless --no-ast)
@@ -21,11 +23,21 @@
2123
*/
2224

2325
import { spawnSync } from "node:child_process";
26+
import { resolve } from "node:path";
2427
import { Bash } from "../Bash.js";
28+
import { OverlayFs } from "../fs/overlay-fs/index.js";
2529
import { parse } from "../parser/parser.js";
2630

2731
const showAst = process.argv.includes("--print-ast");
2832
const runRealBash = process.argv.includes("--real-bash");
33+
const noLimit = process.argv.includes("--no-limit");
34+
35+
// Parse --root option
36+
let rootPath: string | undefined;
37+
const rootIndex = process.argv.indexOf("--root");
38+
if (rootIndex !== -1 && rootIndex + 1 < process.argv.length) {
39+
rootPath = resolve(process.argv[rootIndex + 1]);
40+
}
2941

3042
async function readStdin(): Promise<string> {
3143
const chunks: Buffer[] = [];
@@ -66,7 +78,27 @@ if (showAst) {
6678
console.log("AST: Request with --print-ast");
6779
}
6880

69-
const env = new Bash();
81+
// Create Bash environment with optional OverlayFS
82+
// Use high limits for dev:exec (typical use is exploration of large filesystems)
83+
const executionLimits = noLimit
84+
? {
85+
maxCommandCount: Number.MAX_SAFE_INTEGER,
86+
maxLoopIterations: Number.MAX_SAFE_INTEGER,
87+
}
88+
: {
89+
maxCommandCount: 100000, // Higher default for dev:exec
90+
maxLoopIterations: 100000,
91+
};
92+
93+
let env: Bash;
94+
if (rootPath) {
95+
const fs = new OverlayFs({ root: rootPath });
96+
const mountPoint = fs.getMountPoint();
97+
env = new Bash({ fs, cwd: mountPoint, executionLimits });
98+
console.log(`OverlayFS: ${rootPath} mounted at ${mountPoint}`);
99+
} else {
100+
env = new Bash({ executionLimits });
101+
}
70102
const r = await env.exec(script);
71103
console.log("exitCode:", r.exitCode);
72104
console.log("stderr:", JSON.stringify(r.stderr));

src/commands/du/du.ts

Lines changed: 112 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { Command, CommandContext, ExecResult } from "../../types.js";
22
import { parseArgs } from "../../utils/args.js";
3+
import { DEFAULT_BATCH_SIZE } from "../../utils/constants.js";
34
import { hasHelpFlag, showHelp } from "../help.js";
45

56
const duHelp = {
@@ -120,49 +121,123 @@ async function calculateSize(
120121
return result;
121122
}
122123

123-
// Directory
124-
const entries = await ctx.fs.readdir(fullPath);
124+
// Directory - use readdirWithFileTypes if available for better performance
125125
let dirSize = 0;
126126

127-
for (const entry of entries) {
128-
const entryPath = fullPath === "/" ? `/${entry}` : `${fullPath}/${entry}`;
129-
const entryDisplayPath =
130-
displayPath === "." ? entry : `${displayPath}/${entry}`;
127+
// Get entries with type info if possible
128+
interface EntryInfo {
129+
name: string;
130+
isDirectory: boolean;
131+
size?: number;
132+
}
133+
const entryInfos: EntryInfo[] = [];
131134

132-
try {
133-
const entryStat = await ctx.fs.stat(entryPath);
134-
135-
if (entryStat.isDirectory) {
136-
const subResult = await calculateSize(
137-
ctx,
138-
entryPath,
139-
entryDisplayPath,
140-
options,
141-
depth + 1,
142-
);
143-
dirSize += subResult.totalSize;
144-
145-
// Only output subdirectories if not summarizing and within depth limit
146-
if (!options.summarize) {
147-
if (options.maxDepth === null || depth + 1 <= options.maxDepth) {
148-
result.output += subResult.output;
149-
} else {
150-
// Still need to count the size even if not displaying
151-
dirSize += 0; // Size already counted
135+
if (ctx.fs.readdirWithFileTypes) {
136+
const entriesWithTypes = await ctx.fs.readdirWithFileTypes(fullPath);
137+
// For files, we still need stat to get size, but we know directories
138+
const fileEntries = entriesWithTypes.filter((e) => e.isFile);
139+
const dirEntries = entriesWithTypes.filter((e) => e.isDirectory);
140+
141+
// Parallel stat for files to get sizes
142+
for (let i = 0; i < fileEntries.length; i += DEFAULT_BATCH_SIZE) {
143+
const batch = fileEntries.slice(i, i + DEFAULT_BATCH_SIZE);
144+
const stats = await Promise.all(
145+
batch.map(async (e) => {
146+
const entryPath =
147+
fullPath === "/" ? `/${e.name}` : `${fullPath}/${e.name}`;
148+
try {
149+
const s = await ctx.fs.stat(entryPath);
150+
return { name: e.name, isDirectory: false, size: s.size };
151+
} catch {
152+
return { name: e.name, isDirectory: false, size: 0 };
152153
}
153-
}
154-
} else {
155-
dirSize += entryStat.size;
156-
if (options.allFiles && !options.summarize) {
157-
result.output +=
158-
formatSize(entryStat.size, options.humanReadable) +
159-
"\t" +
160-
entryDisplayPath +
161-
"\n";
154+
}),
155+
);
156+
entryInfos.push(...stats);
157+
}
158+
159+
// Add directory entries (size will be calculated recursively)
160+
entryInfos.push(
161+
...dirEntries.map((e) => ({ name: e.name, isDirectory: true })),
162+
);
163+
} else {
164+
// Fall back to readdir + parallel stat
165+
const entries = await ctx.fs.readdir(fullPath);
166+
for (let i = 0; i < entries.length; i += DEFAULT_BATCH_SIZE) {
167+
const batch = entries.slice(i, i + DEFAULT_BATCH_SIZE);
168+
const stats = await Promise.all(
169+
batch.map(async (entry) => {
170+
const entryPath =
171+
fullPath === "/" ? `/${entry}` : `${fullPath}/${entry}`;
172+
try {
173+
const s = await ctx.fs.stat(entryPath);
174+
return {
175+
name: entry,
176+
isDirectory: s.isDirectory,
177+
size: s.isDirectory ? undefined : s.size,
178+
};
179+
} catch {
180+
return { name: entry, isDirectory: false, size: 0 };
181+
}
182+
}),
183+
);
184+
entryInfos.push(...stats);
185+
}
186+
}
187+
188+
// Sort entries for consistent output
189+
entryInfos.sort((a, b) => a.name.localeCompare(b.name));
190+
191+
// Process files first (simple size addition)
192+
const fileInfos = entryInfos.filter((e) => !e.isDirectory);
193+
for (const file of fileInfos) {
194+
const size = file.size ?? 0;
195+
dirSize += size;
196+
if (options.allFiles && !options.summarize) {
197+
const entryDisplayPath =
198+
displayPath === "." ? file.name : `${displayPath}/${file.name}`;
199+
result.output +=
200+
formatSize(size, options.humanReadable) +
201+
"\t" +
202+
entryDisplayPath +
203+
"\n";
204+
}
205+
}
206+
207+
// Process directories in parallel batches
208+
const dirInfos = entryInfos.filter((e) => e.isDirectory);
209+
for (let i = 0; i < dirInfos.length; i += DEFAULT_BATCH_SIZE) {
210+
const batch = dirInfos.slice(i, i + DEFAULT_BATCH_SIZE);
211+
const subResults = await Promise.all(
212+
batch.map(async (dir) => {
213+
const entryPath =
214+
fullPath === "/" ? `/${dir.name}` : `${fullPath}/${dir.name}`;
215+
const entryDisplayPath =
216+
displayPath === "." ? dir.name : `${displayPath}/${dir.name}`;
217+
return {
218+
name: dir.name,
219+
result: await calculateSize(
220+
ctx,
221+
entryPath,
222+
entryDisplayPath,
223+
options,
224+
depth + 1,
225+
),
226+
};
227+
}),
228+
);
229+
230+
// Sort results for consistent order
231+
subResults.sort((a, b) => a.name.localeCompare(b.name));
232+
233+
for (const { result: subResult } of subResults) {
234+
dirSize += subResult.totalSize;
235+
// Only output subdirectories if not summarizing and within depth limit
236+
if (!options.summarize) {
237+
if (options.maxDepth === null || depth + 1 <= options.maxDepth) {
238+
result.output += subResult.output;
162239
}
163240
}
164-
} catch {
165-
// Skip entries we can't read
166241
}
167242
}
168243

0 commit comments

Comments
 (0)