@@ -8,9 +8,9 @@ import { processFiles } from './file/fileProcess.js';
88import { searchFiles } from './file/fileSearch.js' ;
99import type { FilesByRoot } from './file/fileTreeGenerate.js' ;
1010import type { ProcessedFile } from './file/fileTypes.js' ;
11- import { getGitDiffs } from './git/gitDiffHandle.js' ;
12- import { getGitLogs } from './git/gitLogHandle.js' ;
13- import { calculateMetrics , createMetricsTaskRunner } from './metrics/calculateMetrics.js' ;
11+ import { getGitDiffs , type GitDiffResult } from './git/gitDiffHandle.js' ;
12+ import { getGitLogs , type GitLogResult } from './git/gitLogHandle.js' ;
13+ import { calculateMetrics , type CalculateMetricsResult , createMetricsTaskRunner } from './metrics/calculateMetrics.js' ;
1414import { prewarmGitSortCache } from './output/outputSort.js' ;
1515import { produceOutput } from './packager/produceOutput.js' ;
1616import type { SuspiciousFileResult } from './security/securityCheck.js' ;
@@ -56,6 +56,49 @@ export interface PackOptions {
5656 skillSourceUrl ?: string ;
5757}
5858
59+ const filterSuspiciousFiles = (
60+ allFiles : ProcessedFile [ ] ,
61+ suspiciousResults : SuspiciousFileResult [ ] ,
62+ ) : ProcessedFile [ ] => {
63+ if ( suspiciousResults . length === 0 ) return allFiles ;
64+ const suspiciousPathSet = new Set ( suspiciousResults . map ( ( r ) => r . filePath ) ) ;
65+ return allFiles . filter ( ( f ) => ! suspiciousPathSet . has ( f . path ) ) ;
66+ } ;
67+
68+ /** Run output generation and metrics calculation in parallel. */
69+ const runOutputAndMetrics = async (
70+ rootDirs : string [ ] ,
71+ config : RepomixConfigMerged ,
72+ processedFiles : ProcessedFile [ ] ,
73+ allFilePaths : string [ ] ,
74+ gitDiffResult : GitDiffResult | undefined ,
75+ gitLogResult : GitLogResult | undefined ,
76+ progressCallback : RepomixProgressCallback ,
77+ filePathsByRoot : FilesByRoot [ ] ,
78+ emptyDirPaths : string [ ] | undefined ,
79+ metricsRunnerDeps : { taskRunner : ReturnType < typeof createMetricsTaskRunner > [ 'taskRunner' ] } ,
80+ deps : Pick < typeof defaultDeps , 'produceOutput' | 'calculateMetrics' > ,
81+ ) : Promise < { outputFiles : string [ ] | undefined ; metrics : CalculateMetricsResult } > => {
82+ const outputPromise = deps . produceOutput (
83+ rootDirs , config , processedFiles , allFilePaths ,
84+ gitDiffResult , gitLogResult , progressCallback , filePathsByRoot , emptyDirPaths ,
85+ ) ;
86+
87+ const outputForMetrics = outputPromise . then ( ( r ) => r . outputForMetrics ) ;
88+
89+ const [ { outputFiles } , metrics ] = await Promise . all ( [
90+ outputPromise ,
91+ withMemoryLogging ( 'Calculate Metrics' , ( ) =>
92+ deps . calculateMetrics (
93+ processedFiles , outputForMetrics , progressCallback , config ,
94+ gitDiffResult , gitLogResult , metricsRunnerDeps ,
95+ ) ,
96+ ) ,
97+ ] ) ;
98+
99+ return { outputFiles, metrics } ;
100+ } ;
101+
59102export const pack = async (
60103 rootDirs : string [ ] ,
61104 config : RepomixConfigMerged ,
@@ -81,40 +124,29 @@ export const pack = async (
81124 ) ,
82125 ) ;
83126
84- // Deduplicate and sort empty directory paths for reuse during output generation,
85- // avoiding a redundant searchFiles call in buildOutputGeneratorContext.
127+ // Deduplicate and sort empty directory paths for reuse during output generation.
86128 const emptyDirPaths = config . output . includeEmptyDirectories
87129 ? [ ...new Set ( searchResultsByDir . flatMap ( ( r ) => r . emptyDirPaths ) ) ] . sort ( )
88130 : undefined ;
89131
90- // Sort file paths
91132 progressCallback ( 'Sorting files...' ) ;
92133 const allFilePaths = searchResultsByDir . flatMap ( ( { filePaths } ) => filePaths ) ;
93134 const sortedFilePaths = deps . sortPaths ( allFilePaths ) ;
94-
95- // Regroup sorted file paths by rootDir using Set for O(1) membership checks
96135 const filePathSetByDir = new Map ( searchResultsByDir . map ( ( { rootDir, filePaths } ) => [ rootDir , new Set ( filePaths ) ] ) ) ;
97136 const sortedFilePathsByDir = rootDirs . map ( ( rootDir ) => ( {
98137 rootDir,
99138 filePaths : sortedFilePaths . filter ( ( filePath ) => filePathSetByDir . get ( rootDir ) ?. has ( filePath ) ?? false ) ,
100139 } ) ) ;
101140
102- // Pre-initialize metrics worker pool to overlap gpt-tokenizer loading with subsequent pipeline stages
103- // (security check, file processing, output generation).
141+ // Pre-initialize metrics worker pool to overlap gpt-tokenizer loading with subsequent stages.
104142 const { taskRunner : metricsTaskRunner , warmupPromise : metricsWarmupPromise } = deps . createMetricsTaskRunner (
105143 allFilePaths . length ,
106144 config . tokenCount . encoding ,
107145 ) ;
108146
109147 try {
110148 // Run file collection, git operations, and git sort cache pre-warming in parallel
111- // since they are independent:
112- // - collectFiles reads file contents from disk
113- // - getGitDiffs/getGitLogs spawn git subprocesses
114- // - prewarmGitSortCache spawns `git log` for sort-by-changes, populating a module-level
115- // cache so that sortOutputFiles (called later during output generation) hits the cache
116- // instead of blocking the critical path with a subprocess.
117- // Neither depends on the other's results.
149+ // since they are independent.
118150 progressCallback ( 'Collecting files...' ) ;
119151 const [ collectResults , gitDiffResult , gitLogResult ] = await Promise . all ( [
120152 withMemoryLogging (
@@ -135,31 +167,22 @@ export const pack = async (
135167 const allSkippedFiles = collectResults . flatMap ( ( curr ) => curr . skippedFiles ) ;
136168
137169 // Run security check and file processing concurrently.
138- // Security check uses worker threads while file processing runs on the main thread
139- // (in the default non-compress/non-removeComments config), so they don't compete for CPU.
140- // After both complete, filter out any suspicious files from the processed results.
141- const [ validationResult , allProcessedFiles ] = await Promise . all ( [
142- withMemoryLogging ( 'Security Check' , ( ) =>
143- deps . validateFileSafety ( rawFiles , progressCallback , config , gitDiffResult , gitLogResult ) ,
144- ) ,
145- withMemoryLogging ( 'Process Files' , ( ) => {
146- progressCallback ( 'Processing files...' ) ;
147- return deps . processFiles ( rawFiles , config , progressCallback ) ;
148- } ) ,
149- ] ) ;
150-
151- const { safeFilePaths, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } =
152- validationResult ;
170+ const securityPromise = withMemoryLogging ( 'Security Check' , ( ) =>
171+ deps . validateFileSafety ( rawFiles , progressCallback , config , gitDiffResult , gitLogResult ) ,
172+ ) ;
153173
154- // Filter processed files to exclude suspicious ones
155- const suspiciousPathSet = new Set ( suspiciousFilesResults . map ( ( r ) => r . filePath ) ) ;
156- const processedFiles =
157- suspiciousPathSet . size > 0 ? allProcessedFiles . filter ( ( f ) => ! suspiciousPathSet . has ( f . path ) ) : allProcessedFiles ;
174+ const allProcessedFiles = await withMemoryLogging ( 'Process Files' , ( ) => {
175+ progressCallback ( 'Processing files...' ) ;
176+ return deps . processFiles ( rawFiles , config , progressCallback ) ;
177+ } ) ;
158178
159- progressCallback ( 'Generating output...' ) ;
160-
161- // Skill generation path — metrics not needed, return early (worker pool cleaned up by finally)
179+ // Skill generation path — wait for security, filter, and return early
162180 if ( config . skillGenerate !== undefined && options . skillDir ) {
181+ const validationResult = await securityPromise ;
182+ const { safeFilePaths, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } =
183+ validationResult ;
184+ const processedFiles = filterSuspiciousFiles ( allProcessedFiles , suspiciousFilesResults ) ;
185+
163186 const result = await deps . packSkill ( {
164187 rootDirs,
165188 config,
@@ -181,8 +204,6 @@ export const pack = async (
181204 }
182205
183206 // Build filePathsByRoot for multi-root tree generation
184- // Use directory basename as the label for each root
185- // Fallback to rootDir if basename is empty (e.g., filesystem root "/")
186207 const filePathsByRoot : FilesByRoot [ ] = sortedFilePathsByDir . map ( ( { rootDir, filePaths } ) => ( {
187208 rootLabel : path . basename ( rootDir ) || rootDir ,
188209 files : filePaths ,
@@ -191,48 +212,53 @@ export const pack = async (
191212 // Ensure warm-up task completes before metrics calculation
192213 await metricsWarmupPromise ;
193214
194- // Generate and write output, overlapping with metrics calculation.
195- // File and git metrics don't depend on the output, so they start immediately
196- // while output generation runs concurrently.
197- const outputPromise = deps . produceOutput (
198- rootDirs ,
199- config ,
200- processedFiles ,
201- allFilePaths ,
202- gitDiffResult ,
203- gitLogResult ,
204- progressCallback ,
205- filePathsByRoot ,
206- emptyDirPaths ,
215+ progressCallback ( 'Generating output...' ) ;
216+
217+ // Start output and metrics optimistically with ALL processed files, overlapping
218+ // with the still-running security check. If security finds suspicious files (rare),
219+ // fall back to regenerating with filtered files.
220+ const metricsRunnerDeps = { taskRunner : metricsTaskRunner } ;
221+ const outputAndMetrics = runOutputAndMetrics (
222+ rootDirs , config , allProcessedFiles , allFilePaths ,
223+ gitDiffResult , gitLogResult , progressCallback , filePathsByRoot , emptyDirPaths ,
224+ metricsRunnerDeps , deps ,
207225 ) ;
208226
209- const outputForMetricsPromise = outputPromise . then ( ( r ) => r . outputForMetrics ) ;
210-
211- const [ { outputFiles } , metrics ] = await Promise . all ( [
212- outputPromise ,
213- withMemoryLogging ( 'Calculate Metrics' , ( ) =>
214- deps . calculateMetrics (
215- processedFiles ,
216- outputForMetricsPromise ,
217- progressCallback ,
218- config ,
219- gitDiffResult ,
220- gitLogResult ,
221- {
222- taskRunner : metricsTaskRunner ,
223- } ,
224- ) ,
225- ) ,
226- ] ) ;
227+ // Prevent unhandled rejections if securityPromise rejects before
228+ // the optimistic pipeline settles.
229+ outputAndMetrics . catch ( ( ) => { } ) ;
230+
231+ // Wait for the optimistic pipeline and security check to complete
232+ const [ { outputFiles : optimisticOutputFiles , metrics : optimisticMetrics } , validationResult ] =
233+ await Promise . all ( [ outputAndMetrics , securityPromise ] ) ;
234+
235+ const { safeFilePaths, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } =
236+ validationResult ;
237+
238+ let finalOutputFiles = optimisticOutputFiles ;
239+ let finalMetrics = optimisticMetrics ;
240+ let finalProcessedFiles : ProcessedFile [ ] = allProcessedFiles ;
241+
242+ // If security found suspicious files, regenerate output and metrics with filtered files
243+ if ( suspiciousFilesResults . length > 0 ) {
244+ finalProcessedFiles = filterSuspiciousFiles ( allProcessedFiles , suspiciousFilesResults ) ;
245+
246+ const filtered = await runOutputAndMetrics (
247+ rootDirs , config , finalProcessedFiles , allFilePaths ,
248+ gitDiffResult , gitLogResult , progressCallback , filePathsByRoot , emptyDirPaths ,
249+ metricsRunnerDeps , deps ,
250+ ) ;
251+ finalOutputFiles = filtered . outputFiles ;
252+ finalMetrics = filtered . metrics ;
253+ }
227254
228- // Create a result object that includes metrics and security results
229255 const result = {
230- ...metrics ,
231- ...( outputFiles && { outputFiles } ) ,
256+ ...finalMetrics ,
257+ ...( finalOutputFiles && { outputFiles : finalOutputFiles } ) ,
232258 suspiciousFilesResults,
233259 suspiciousGitDiffResults,
234260 suspiciousGitLogResults,
235- processedFiles,
261+ processedFiles : finalProcessedFiles ,
236262 safeFilePaths,
237263 skippedFiles : allSkippedFiles ,
238264 } ;
0 commit comments