Skip to content

Commit 79bf2fd

Browse files
Merge pull request #41 from DSACMS/sachin/issuePoolChanges
Issue Pool Improvements
2 parents 3d2667a + 7654331 commit 79bf2fd

3 files changed

Lines changed: 314 additions & 58 deletions

File tree

config/updateCodeGov.js

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ const path = require('path')
33

44
// these will always stay constant
55
const CONFIG = {
6-
testAgencyDirectory: path.resolve(__dirname, "../test-agency-indexes"),
76
agencyDirectory: path.resolve(__dirname, "../agency-indexes"),
87
outputFile: path.resolve(__dirname, "../codegov.json"),
98
regex: /^(.*?)-.*\.json$/
@@ -14,17 +13,16 @@ i focused on the fact that the agencyDirectory will always contain jsons with th
1413
but that prove to be a disadvantage down the road */
1514

1615
// updates the codegov.json file with new data found from ./agency-indexes
17-
async function updateCodeGov(isTesting = false) {
16+
async function updateCodeGov() {
1817
try {
1918
const updatedJSON = {}
20-
directoryPath = isTesting === true ? CONFIG.testAgencyDirectory : CONFIG.agencyDirectory
2119

2220
// read all files in the directory
23-
const filenames = await fs.readdir(directoryPath)
21+
const filenames = await fs.readdir(CONFIG.agencyDirectory)
2422

2523
// we know that the directory will only contain json files so dont need to check for non jsons
2624
for (const file of filenames) {
27-
const filePath = path.join(directoryPath, file)
25+
const filePath = path.join(CONFIG.agencyDirectory, file)
2826

2927
try {
3028
const content = await fs.readFile(filePath, "utf-8")
@@ -51,4 +49,4 @@ async function updateCodeGov(isTesting = false) {
5149
}
5250
}
5351

54-
updateCodeGov(isTesting = true)
52+
updateCodeGov()

config/updateIssuePool.js

Lines changed: 97 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ const CONFIG = {
66
issueFilePath: path.resolve(__dirname, "../issue-pool.json"),
77
regex: /https?:\/\/github\.com\/([^\/]+)\/([^\/]+)/,
88
githubToken: process.env.GITHUB_TOKEN,
9-
requiredLabel: 'code-gov'
9+
requiredLabel: 'code-gov',
10+
concurrentRepos: 6, // processing 6 repos at once but need to find the sweetspot because at this rate, it takes 18 minutes for the entire script to run through codegov.json. the "bathtub curve" is what we have here and what we need to experiment with and solve 👀
11+
rateLimitRemaining: 5000,
12+
rateLimitReset: Date.now
1013
}
1114

1215
// #region - Helper Functions
@@ -19,11 +22,29 @@ const getHeaders = () => {
1922
return HEADERS
2023
}
2124

25+
async function fetchWithRateLimit(url, options = {}) {
26+
if (CONFIG.rateLimitRemaining <= 10 && Date.now() < CONFIG.rateLimitReset) {
27+
const waitTime = CONFIG.rateLimitReset - Date.now() + 1000 // add 1 second buffer
28+
console.log(`Rate limit low (${CONFIG.rateLimitRemaining} remaining). Waiting ${Math.round(waitTime/1000)}s...`)
29+
await new Promise(resolve => setTimeout(resolve, waitTime))
30+
}
31+
32+
const response = await fetch(url, options)
33+
34+
const remainingHeader = response.headers.get('X-RateLimit-Remaining')
35+
const resetHeader = response.headers.get('X-RateLimit-Reset')
36+
37+
if (remainingHeader) CONFIG.rateLimitRemaining = parseInt(remainingHeader)
38+
if (resetHeader) CONFIG.rateLimitReset = parseInt(resetHeader) * 1000
39+
40+
return response
41+
}
42+
2243
async function getRepoInfo() { // dont know how i feel about this double loop setup...
2344
let repoInfo = []
2445

2546
try {
26-
const content = await fs.readFile(CONFIG.repoFilePath, "utf-8") // filter by tier 3 maturity to get the projects that truly want outside help
47+
const content = await fs.readFile(CONFIG.repoFilePath, "utf-8")
2748
const jsonData = JSON.parse(content)
2849

2950
for (const agencyKey in jsonData) {
@@ -34,12 +55,17 @@ async function getRepoInfo() { // dont know how i feel about this double loop se
3455

3556
if (organization.repositoryURL) {
3657
const match = organization.repositoryURL.match(CONFIG.regex)
37-
const [url, owner, repo] = match
3858

39-
repoInfo.push({
40-
ownerName: owner,
41-
repoName: repo
42-
})
59+
if (match) {
60+
const [url, owner, repo] = match
61+
62+
repoInfo.push({
63+
ownerName: owner,
64+
repoName: repo
65+
})
66+
} else {
67+
console.warn(`No match found for URL: ${organization.repositoryURL}`)
68+
}
4369
}
4470
}
4571
}
@@ -107,64 +133,83 @@ function transformIssue(issue, repo, repoLanguage) {
107133
}
108134
}
109135

110-
// #region - Main Function
111-
async function updateIssuePool() {
112-
const issuePool = {}
113-
const repoInfo = await getRepoInfo()
114-
const headers = getHeaders()
115-
116-
for (let i = 0; i < repoInfo.length; i++) { // switch to a forOf loop here?
117-
const repo = repoInfo[i]
136+
async function processSingleRepository(repo, headers) {
137+
const repoIssues = {}
138+
139+
try {
140+
const repoUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}`
141+
const repoResponse = await fetchWithRateLimit(repoUrl, { headers })
118142

119-
try {
120-
const repoUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}`
121-
const repoResponse = await fetch(repoUrl, { headers })
143+
if (!repoResponse.ok) {
144+
console.error(`Failed to fetch repo info for ${repo.ownerName}/${repo.repoName}: ${repoResponse.status}`)
145+
return repoIssues
146+
}
122147

123-
if (!repoResponse.ok) {
124-
console.error(`Failed to fetch repo info for ${repo.ownerName}/${repo.repoName}: ${repoResponse.status}`)
125-
continue
126-
}
148+
const repoData = await repoResponse.json()
149+
const repoLanguage = repoData.language || ""
127150

128-
const repoData = await repoResponse.json()
129-
const repoLanguage = repoData.language || ""
151+
let page = 1
152+
let hasMore = true
130153

131-
let page = 1
132-
let hasMore = true
154+
while (hasMore) {
155+
const issuesUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}/issues?page=${page}&per_page=100&state=open&labels=${CONFIG.requiredLabel}`
156+
const issuesResponse = await fetchWithRateLimit(issuesUrl, { headers })
133157

134-
while (hasMore) {
135-
const issuesUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}/issues?page=${page}&per_page=100&state=open&labels=${CONFIG.requiredLabel}`
136-
const issuesResponse = await fetch(issuesUrl, { headers })
158+
if (!issuesResponse.ok) {
159+
console.error(`Failed to fetch issues for ${repo.ownerName}/${repo.repoName}: ${issuesResponse.status}`)
160+
break
161+
}
137162

138-
if (!issuesResponse.ok) {
139-
console.error(`Failed to fetch issues for ${repo.ownerName}/${repo.repoName}: ${issuesResponse.status}`)
140-
break
163+
const issues = await issuesResponse.json()
164+
165+
// endpoint always returns both issues and pull requests so we ignore the PRs
166+
for (const [index, issue] of issues.entries()) {
167+
if (issue.pull_request) {
168+
continue
141169
}
142-
143-
const issues = await issuesResponse.json()
144170

145-
// endpoint always returns both issues and pull requests so we ignore the PRs
146-
for (const issue of issues) {
147-
if (issue.pull_request) {
148-
continue
149-
}
150-
171+
const transformedIssue = transformIssue(issue, repo, repoLanguage)
172+
repoIssues[transformedIssue.id] = transformedIssue // is having the ID is the best key name?
173+
console.log(`✅ Processed ${index + 1}/${issues.length}: ${repo.ownerName}/${repo.repoName}`)
174+
}
151175

152-
const transformedIssue = transformIssue(issue, repo, repoLanguage)
153-
issuePool[transformedIssue.id] = transformedIssue // is having the ID is the best key name?
154-
}
176+
if (issues.length < 100) {
177+
hasMore = false
178+
}
155179

156-
if (issues.length < 100) {
157-
hasMore = false
158-
}
180+
page++
181+
}
182+
} catch (error) {
183+
console.error(`❌ Error processing ${repo.ownerName}/${repo.repoName}:`, error)
184+
}
159185

160-
page++
161-
}
186+
return repoIssues
187+
}
188+
189+
// #region - Main Function
190+
async function updateIssuePool() {
191+
const issuePool = {}
192+
const repoInfo = await getRepoInfo()
193+
const headers = getHeaders()
162194

163-
console.log(`✅ Processed ${i + 1}/${repoInfo.length}: ${repo.ownerName}/${repo.repoName}`)
195+
// process repositories in chunks of 3 for parallel processing
196+
for (let i = 0; i < repoInfo.length; i += CONFIG.concurrentRepos) {
197+
const chunk = repoInfo.slice(i, i + CONFIG.concurrentRepos)
198+
console.log(`Processing chunk ${Math.floor(i/CONFIG.concurrentRepos) + 1}/${Math.ceil(repoInfo.length/CONFIG.concurrentRepos)} (${chunk.length} repos)`)
199+
200+
const chunkPromises = chunk.map(repo => processSingleRepository(repo, headers))
201+
const chunkResults = await Promise.allSettled(chunkPromises)
202+
203+
chunkResults.forEach((result, index) => {
204+
if (result.status === 'fulfilled') {
205+
Object.assign(issuePool, result.value)
206+
} else {
207+
console.error(`Failed ${chunk[index].ownerName}/${chunk[index].repoName}:`, result.reason)
208+
}
209+
})
164210

165-
} catch (error) {
166-
console.error(`❌ Error processing ${repo.ownerName}/${repo.repoName}:`, error)
167-
continue
211+
if (i + CONFIG.concurrentRepos < repoInfo.length) {
212+
await new Promise(resolve => setTimeout(resolve, 1000))
168213
}
169214
}
170215

0 commit comments

Comments
 (0)