@@ -6,7 +6,10 @@ const CONFIG = {
66 issueFilePath : path . resolve ( __dirname , "../issue-pool.json" ) ,
77 regex : / h t t p s ? : \/ \/ g i t h u b \. c o m \/ ( [ ^ \/ ] + ) \/ ( [ ^ \/ ] + ) / ,
88 githubToken : process . env . GITHUB_TOKEN ,
9- requiredLabel : 'code-gov'
9+ requiredLabel : 'code-gov' ,
10+ concurrentRepos : 6 , // processing 6 repos at once but need to find the sweetspot because at this rate, it takes 18 minutes for the entire script to run through codegov.json. the "bathtub curve" is what we have here and what we need to experiment with and solve 👀
11+ rateLimitRemaining : 5000 ,
12+ rateLimitReset : Date . now
1013}
1114
1215// #region - Helper Functions
@@ -19,11 +22,29 @@ const getHeaders = () => {
1922 return HEADERS
2023}
2124
25+ async function fetchWithRateLimit ( url , options = { } ) {
26+ if ( CONFIG . rateLimitRemaining <= 10 && Date . now ( ) < CONFIG . rateLimitReset ) {
27+ const waitTime = CONFIG . rateLimitReset - Date . now ( ) + 1000 // add 1 second buffer
28+ console . log ( `Rate limit low (${ CONFIG . rateLimitRemaining } remaining). Waiting ${ Math . round ( waitTime / 1000 ) } s...` )
29+ await new Promise ( resolve => setTimeout ( resolve , waitTime ) )
30+ }
31+
32+ const response = await fetch ( url , options )
33+
34+ const remainingHeader = response . headers . get ( 'X-RateLimit-Remaining' )
35+ const resetHeader = response . headers . get ( 'X-RateLimit-Reset' )
36+
37+ if ( remainingHeader ) CONFIG . rateLimitRemaining = parseInt ( remainingHeader )
38+ if ( resetHeader ) CONFIG . rateLimitReset = parseInt ( resetHeader ) * 1000
39+
40+ return response
41+ }
42+
2243async function getRepoInfo ( ) { // dont know how i feel about this double loop setup...
2344 let repoInfo = [ ]
2445
2546 try {
26- const content = await fs . readFile ( CONFIG . repoFilePath , "utf-8" ) // filter by tier 3 maturity to get the projects that truly want outside help
47+ const content = await fs . readFile ( CONFIG . repoFilePath , "utf-8" )
2748 const jsonData = JSON . parse ( content )
2849
2950 for ( const agencyKey in jsonData ) {
@@ -34,12 +55,17 @@ async function getRepoInfo() { // dont know how i feel about this double loop se
3455
3556 if ( organization . repositoryURL ) {
3657 const match = organization . repositoryURL . match ( CONFIG . regex )
37- const [ url , owner , repo ] = match
3858
39- repoInfo . push ( {
40- ownerName : owner ,
41- repoName : repo
42- } )
59+ if ( match ) {
60+ const [ url , owner , repo ] = match
61+
62+ repoInfo . push ( {
63+ ownerName : owner ,
64+ repoName : repo
65+ } )
66+ } else {
67+ console . warn ( `No match found for URL: ${ organization . repositoryURL } ` )
68+ }
4369 }
4470 }
4571 }
@@ -107,64 +133,83 @@ function transformIssue(issue, repo, repoLanguage) {
107133 }
108134}
109135
110- // #region - Main Function
111- async function updateIssuePool ( ) {
112- const issuePool = { }
113- const repoInfo = await getRepoInfo ( )
114- const headers = getHeaders ( )
115-
116- for ( let i = 0 ; i < repoInfo . length ; i ++ ) { // switch to a forOf loop here?
117- const repo = repoInfo [ i ]
136+ async function processSingleRepository ( repo , headers ) {
137+ const repoIssues = { }
138+
139+ try {
140+ const repoUrl = `https://api.github.com/repos/${ repo . ownerName } /${ repo . repoName } `
141+ const repoResponse = await fetchWithRateLimit ( repoUrl , { headers } )
118142
119- try {
120- const repoUrl = `https://api.github.com/repos/${ repo . ownerName } /${ repo . repoName } `
121- const repoResponse = await fetch ( repoUrl , { headers } )
143+ if ( ! repoResponse . ok ) {
144+ console . error ( `Failed to fetch repo info for ${ repo . ownerName } /${ repo . repoName } : ${ repoResponse . status } ` )
145+ return repoIssues
146+ }
122147
123- if ( ! repoResponse . ok ) {
124- console . error ( `Failed to fetch repo info for ${ repo . ownerName } /${ repo . repoName } : ${ repoResponse . status } ` )
125- continue
126- }
148+ const repoData = await repoResponse . json ( )
149+ const repoLanguage = repoData . language || ""
127150
128- const repoData = await repoResponse . json ( )
129- const repoLanguage = repoData . language || ""
151+ let page = 1
152+ let hasMore = true
130153
131- let page = 1
132- let hasMore = true
154+ while ( hasMore ) {
155+ const issuesUrl = `https://api.github.com/repos/${ repo . ownerName } /${ repo . repoName } /issues?page=${ page } &per_page=100&state=open&labels=${ CONFIG . requiredLabel } `
156+ const issuesResponse = await fetchWithRateLimit ( issuesUrl , { headers } )
133157
134- while ( hasMore ) {
135- const issuesUrl = `https://api.github.com/repos/${ repo . ownerName } /${ repo . repoName } /issues?page=${ page } &per_page=100&state=open&labels=${ CONFIG . requiredLabel } `
136- const issuesResponse = await fetch ( issuesUrl , { headers } )
158+ if ( ! issuesResponse . ok ) {
159+ console . error ( `Failed to fetch issues for ${ repo . ownerName } /${ repo . repoName } : ${ issuesResponse . status } ` )
160+ break
161+ }
137162
138- if ( ! issuesResponse . ok ) {
139- console . error ( `Failed to fetch issues for ${ repo . ownerName } /${ repo . repoName } : ${ issuesResponse . status } ` )
140- break
163+ const issues = await issuesResponse . json ( )
164+
165+ // endpoint always returns both issues and pull requests so we ignore the PRs
166+ for ( const [ index , issue ] of issues . entries ( ) ) {
167+ if ( issue . pull_request ) {
168+ continue
141169 }
142-
143- const issues = await issuesResponse . json ( )
144170
145- // endpoint always returns both issues and pull requests so we ignore the PRs
146- for ( const issue of issues ) {
147- if ( issue . pull_request ) {
148- continue
149- }
150-
171+ const transformedIssue = transformIssue ( issue , repo , repoLanguage )
172+ repoIssues [ transformedIssue . id ] = transformedIssue // is having the ID is the best key name?
173+ console . log ( `✅ Processed ${ index + 1 } /${ issues . length } : ${ repo . ownerName } /${ repo . repoName } ` )
174+ }
151175
152- const transformedIssue = transformIssue ( issue , repo , repoLanguage )
153- issuePool [ transformedIssue . id ] = transformedIssue // is having the ID is the best key name?
154- }
176+ if ( issues . length < 100 ) {
177+ hasMore = false
178+ }
155179
156- if ( issues . length < 100 ) {
157- hasMore = false
158- }
180+ page ++
181+ }
182+ } catch ( error ) {
183+ console . error ( `❌ Error processing ${ repo . ownerName } /${ repo . repoName } :` , error )
184+ }
159185
160- page ++
161- }
186+ return repoIssues
187+ }
188+
189+ // #region - Main Function
190+ async function updateIssuePool ( ) {
191+ const issuePool = { }
192+ const repoInfo = await getRepoInfo ( )
193+ const headers = getHeaders ( )
162194
163- console . log ( `✅ Processed ${ i + 1 } /${ repoInfo . length } : ${ repo . ownerName } /${ repo . repoName } ` )
195+ // process repositories in chunks of 3 for parallel processing
196+ for ( let i = 0 ; i < repoInfo . length ; i += CONFIG . concurrentRepos ) {
197+ const chunk = repoInfo . slice ( i , i + CONFIG . concurrentRepos )
198+ console . log ( `Processing chunk ${ Math . floor ( i / CONFIG . concurrentRepos ) + 1 } /${ Math . ceil ( repoInfo . length / CONFIG . concurrentRepos ) } (${ chunk . length } repos)` )
199+
200+ const chunkPromises = chunk . map ( repo => processSingleRepository ( repo , headers ) )
201+ const chunkResults = await Promise . allSettled ( chunkPromises )
202+
203+ chunkResults . forEach ( ( result , index ) => {
204+ if ( result . status === 'fulfilled' ) {
205+ Object . assign ( issuePool , result . value )
206+ } else {
207+ console . error ( `Failed ${ chunk [ index ] . ownerName } /${ chunk [ index ] . repoName } :` , result . reason )
208+ }
209+ } )
164210
165- } catch ( error ) {
166- console . error ( `❌ Error processing ${ repo . ownerName } /${ repo . repoName } :` , error )
167- continue
211+ if ( i + CONFIG . concurrentRepos < repoInfo . length ) {
212+ await new Promise ( resolve => setTimeout ( resolve , 1000 ) )
168213 }
169214 }
170215
0 commit comments