Skip to content

Commit 10f4d02

Browse files
Allow gitlab to resume from encoded resume info (#611)
1 parent ffacaa1 commit 10f4d02

File tree

6 files changed

+406
-284
lines changed

6 files changed

+406
-284
lines changed

pkg/sources/github/github.go

+10-90
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,11 @@ import (
2424
"google.golang.org/protobuf/proto"
2525
"google.golang.org/protobuf/types/known/anypb"
2626

27+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
28+
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
2729
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
2830
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
2931
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
30-
31-
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
32-
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
3332
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
3433
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
3534
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
@@ -355,7 +354,8 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch
355354
}
356355

357356
// If there is resume information available, limit this scan to only the repos that still need scanning.
358-
progressIndexOffset := s.filterReposToResume(s.GetProgress().EncodedResumeInfo)
357+
reposToScan, progressIndexOffset := sources.FilterReposToResume(s.repos, s.GetProgress().EncodedResumeInfo)
358+
s.repos = reposToScan
359359

360360
for i, repoURL := range s.repos {
361361
if err := s.jobSem.Acquire(ctx, 1); err != nil {
@@ -372,7 +372,11 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch
372372

373373
s.setProgressCompleteWithRepo(i+progressIndexOffset, repoURL)
374374
// Ensure the repo is removed from the resume info after being scanned.
375-
defer s.removeRepoFromResumeInfo(repoURL)
375+
defer func(s *Source) {
376+
s.resumeInfoMutex.Lock()
377+
defer s.resumeInfoMutex.Unlock()
378+
s.resumeInfoSlice = sources.RemoveRepoFromResumeInfo(s.resumeInfoSlice, repoURL)
379+
}(s)
376380

377381
if !strings.HasSuffix(repoURL, ".git") {
378382
return
@@ -799,91 +803,7 @@ func (s *Source) setProgressCompleteWithRepo(index int, repoURL string) {
799803
sort.Strings(s.resumeInfoSlice)
800804

801805
// Make the resume info string from the slice.
802-
encodedResumeInfo := s.encodeResumeInfo()
806+
encodedResumeInfo := sources.EncodeResumeInfo(s.resumeInfoSlice)
803807

804808
s.SetProgressComplete(index, len(s.repos), fmt.Sprintf("Repo: %s", repoURL), encodedResumeInfo)
805809
}
806-
807-
// removeRepoFromResumeInfo removes the repoURL from the resume info.
808-
func (s *Source) removeRepoFromResumeInfo(repoURL string) {
809-
s.resumeInfoMutex.Lock()
810-
defer s.resumeInfoMutex.Unlock()
811-
812-
index := -1
813-
for i, repo := range s.resumeInfoSlice {
814-
if repoURL == repo {
815-
index = i
816-
}
817-
}
818-
819-
if index == -1 {
820-
// We should never be able to be here. But if we are, it means the resume info never had the repo added.
821-
// So log the error and do nothing.
822-
s.log.Errorf("repoURL (%q) not found in list of encode resume info: %q", repoURL, s.EncodedResumeInfo)
823-
return
824-
}
825-
826-
// This removes the element at the given index.
827-
s.resumeInfoSlice = append(s.resumeInfoSlice[:index], s.resumeInfoSlice[index+1:]...)
828-
}
829-
830-
func (s *Source) encodeResumeInfo() string {
831-
return strings.Join(s.resumeInfoSlice, "\t")
832-
}
833-
834-
func (s *Source) decodeResumeInfo(resumeInfo string) {
835-
// strings.Split will, for an empty string, return []string{""},
836-
// which is an element, where as when there is no resume info we want an empty slice.
837-
if resumeInfo == "" {
838-
return
839-
}
840-
s.resumeInfoSlice = strings.Split(resumeInfo, "\t")
841-
}
842-
843-
// filterReposToResume filters the existing repos down to those that are included in the encoded resume info.
844-
// It also returns the difference between the original length of the repos and the new length to use for progress reporting.
845-
// It is required that both the resumeInfo repos and the existing repos in s.repos are sorted.
846-
func (s *Source) filterReposToResume(resumeInfo string) int {
847-
if resumeInfo == "" {
848-
return 0
849-
}
850-
851-
s.resumeInfoMutex.Lock()
852-
defer s.resumeInfoMutex.Unlock()
853-
854-
s.decodeResumeInfo(resumeInfo)
855-
856-
// Because this scanner is multithreaded, it is possible that we have scanned a range of repositories
857-
// with some gaps of unlisted but completed repositories in between the ones in resumeInfo.
858-
// So we know repositories that have not finished scanning are the ones included in the resumeInfo,
859-
// and those that come after the last repository in the resumeInfo.
860-
// However, it is possible that a resumed scan does not include all or even any of the repos within the resumeInfo.
861-
// In this case, we must ensure we still scan all repos that come after the last found repo in the list.
862-
reposToScan := []string{}
863-
lastFoundRepoIndex := -1
864-
resumeRepoIndex := 0
865-
for i, repoURL := range s.repos {
866-
// If the repoURL is bigger than what we're looking for, move to the next one.
867-
if repoURL > s.resumeInfoSlice[resumeRepoIndex] {
868-
resumeRepoIndex++
869-
}
870-
871-
// If we've found all of our repositories end the filter.
872-
if resumeRepoIndex == len(s.resumeInfoSlice) {
873-
break
874-
}
875-
876-
// If the repoURL is the one we're looking for, add it and update the lastFoundRepoIndex.
877-
if repoURL == s.resumeInfoSlice[resumeRepoIndex] {
878-
lastFoundRepoIndex = i
879-
reposToScan = append(reposToScan, repoURL)
880-
}
881-
}
882-
883-
// Append all repos after the last one we've found.
884-
reposToScan = append(reposToScan, s.repos[lastFoundRepoIndex+1:]...)
885-
886-
progressOffsetCount := len(s.repos) - len(reposToScan)
887-
s.repos = reposToScan
888-
return progressOffsetCount
889-
}

pkg/sources/github/github_test.go

-149
Original file line numberDiff line numberDiff line change
@@ -373,152 +373,3 @@ func Test_setProgressCompleteWithRepo(t *testing.T) {
373373
}
374374
}
375375
}
376-
377-
func Test_removeRepoFromResumeInfo(t *testing.T) {
378-
tests := []struct {
379-
startingResumeInfoSlice []string
380-
repoURL string
381-
wantResumeInfoSlice []string
382-
}{
383-
{
384-
startingResumeInfoSlice: []string{"a", "b", "c"},
385-
repoURL: "a",
386-
wantResumeInfoSlice: []string{"b", "c"},
387-
},
388-
{
389-
startingResumeInfoSlice: []string{"a", "b", "c"},
390-
repoURL: "b",
391-
wantResumeInfoSlice: []string{"a", "c"},
392-
},
393-
{ // This is the probably can't happen case of a repo not in the list.
394-
startingResumeInfoSlice: []string{"a", "b", "c"},
395-
repoURL: "not in the list",
396-
wantResumeInfoSlice: []string{"a", "b", "c"},
397-
},
398-
}
399-
400-
logger := logrus.New()
401-
logger.Out = io.Discard
402-
s := &Source{
403-
repos: []string{},
404-
log: logger.WithField("no", "output"),
405-
}
406-
407-
for _, tt := range tests {
408-
s.resumeInfoSlice = tt.startingResumeInfoSlice
409-
s.removeRepoFromResumeInfo(tt.repoURL)
410-
if !reflect.DeepEqual(s.resumeInfoSlice, tt.wantResumeInfoSlice) {
411-
t.Errorf("s.removeRepoFromResumeInfo() got: %v, want: %v", s.resumeInfoSlice, tt.wantResumeInfoSlice)
412-
}
413-
}
414-
}
415-
416-
func Test_encodeResumeInfo(t *testing.T) {
417-
tests := []struct {
418-
startingResumeInfoSlice []string
419-
wantEncodedResumeInfo string
420-
}{
421-
{
422-
startingResumeInfoSlice: []string{"a", "b", "c"},
423-
wantEncodedResumeInfo: "a\tb\tc",
424-
},
425-
{
426-
startingResumeInfoSlice: []string{},
427-
wantEncodedResumeInfo: "",
428-
},
429-
}
430-
431-
logger := logrus.New()
432-
logger.Out = io.Discard
433-
s := &Source{
434-
repos: []string{},
435-
log: logger.WithField("no", "output"),
436-
}
437-
438-
for _, tt := range tests {
439-
s.resumeInfoSlice = tt.startingResumeInfoSlice
440-
gotEncodedResumeInfo := s.encodeResumeInfo()
441-
if gotEncodedResumeInfo != tt.wantEncodedResumeInfo {
442-
t.Errorf("s.encodeResumeInfo() got: %q, want: %q", gotEncodedResumeInfo, tt.wantEncodedResumeInfo)
443-
}
444-
}
445-
}
446-
447-
func Test_decodeResumeInfo(t *testing.T) {
448-
tests := []struct {
449-
resumeInfo string
450-
wantResumeInfoSlice []string
451-
}{
452-
{
453-
resumeInfo: "a\tb\tc",
454-
wantResumeInfoSlice: []string{"a", "b", "c"},
455-
},
456-
{
457-
resumeInfo: "",
458-
wantResumeInfoSlice: nil,
459-
},
460-
}
461-
462-
for _, tt := range tests {
463-
s := &Source{}
464-
s.decodeResumeInfo(tt.resumeInfo)
465-
if !reflect.DeepEqual(s.resumeInfoSlice, tt.wantResumeInfoSlice) {
466-
t.Errorf("s.decodeResumeInfo() got: %v, want: %v", s.resumeInfoSlice, tt.wantResumeInfoSlice)
467-
}
468-
}
469-
}
470-
471-
func Test_filterReposToResume(t *testing.T) {
472-
startingRepos := []string{"a", "b", "c", "d", "e", "f", "g"}
473-
474-
tests := map[string]struct {
475-
resumeInfo string
476-
wantProgressOffsetCount int
477-
wantReposToScan []string
478-
}{
479-
"blank resume info": {
480-
resumeInfo: "",
481-
wantProgressOffsetCount: 0,
482-
wantReposToScan: startingRepos,
483-
},
484-
"starting repos": {
485-
resumeInfo: "a\tb",
486-
wantProgressOffsetCount: 0,
487-
wantReposToScan: startingRepos,
488-
},
489-
"early contiguous repos": {
490-
resumeInfo: "b\tc",
491-
wantProgressOffsetCount: 1,
492-
wantReposToScan: []string{"b", "c", "d", "e", "f", "g"},
493-
},
494-
"non-contiguous repos": {
495-
resumeInfo: "b\te",
496-
wantProgressOffsetCount: 3,
497-
wantReposToScan: []string{"b", "e", "f", "g"},
498-
},
499-
"no repos found in the repo list": {
500-
resumeInfo: "not\tthere",
501-
wantProgressOffsetCount: 0,
502-
wantReposToScan: startingRepos,
503-
},
504-
"only some repos in the list": {
505-
resumeInfo: "c\tnot\tthere",
506-
wantProgressOffsetCount: 2,
507-
wantReposToScan: []string{"c", "d", "e", "f", "g"},
508-
},
509-
}
510-
511-
for name, tt := range tests {
512-
s := &Source{
513-
repos: startingRepos,
514-
}
515-
516-
gotProgressOffsetCount := s.filterReposToResume(tt.resumeInfo)
517-
if gotProgressOffsetCount != tt.wantProgressOffsetCount {
518-
t.Errorf("s.filterReposToResume() name: %q got: %d, want: %d", name, gotProgressOffsetCount, tt.wantProgressOffsetCount)
519-
}
520-
if !reflect.DeepEqual(s.repos, tt.wantReposToScan) {
521-
t.Errorf("s.filterReposToResume() name: %q got: %v, want: %v", name, s.repos, tt.wantReposToScan)
522-
}
523-
}
524-
}

0 commit comments

Comments
 (0)