diff --git a/providers/github/resources/github.go b/providers/github/resources/github.go index c57b2b5e3f..8a598bb389 100644 --- a/providers/github/resources/github.go +++ b/providers/github/resources/github.go @@ -77,5 +77,4 @@ func githubTimestamp(ts *github.Timestamp) *time.Time { const ( paginationPerPage = 100 - workers = 10 ) diff --git a/providers/github/resources/github_org.go b/providers/github/resources/github_org.go index a83b6aa0ce..753046c831 100644 --- a/providers/github/resources/github_org.go +++ b/providers/github/resources/github_org.go @@ -315,13 +315,15 @@ func (g *mqlGithubOrganization) repositories() ([]any, error) { } repoCount := g.TotalPrivateRepos.Data + g.TotalPublicRepos.Data - workerPool := workerpool.New[[]*github.Repository](workers) + expectedPages := int(repoCount)/paginationPerPage + 1 + workerCount := int(repoCount)/paginationPerPage + 1 + workerPool := workerpool.New[[]*github.Repository](workerCount) workerPool.Start() defer workerPool.Close() log.Debug(). - Int("workers", workers). - Int64("total_repos", repoCount). + Int("workers", workerCount). + Int64("total-repos", repoCount). Str("organization", g.Name.Data). Msg("list repositories") @@ -332,6 +334,19 @@ func (g *mqlGithubOrganization) repositories() ([]any, error) { break } + // failsafe: when total count is correct but some repos aren't returned from ListByOrg + // (e.g., due to permission issues), we stop after enough pages have been requested + // plus the number of pending workers to account for concurrency + if listOpts.Page > (int(workerPool.PendingRequests()) + expectedPages) { + log.Warn(). + Int("found-repos", reposLen). + Int64("total-repos", repoCount). + Int("page", listOpts.Page). + Int("per-page", listOpts.PerPage). + Msg("Failsafe triggered, no more repos are returned") + break + } + // send requests to workers opts := listOpts workerPool.Submit(func() ([]*github.Repository, error) {