Skip to content

Commit f8234ff

Browse files
committed
feat(detectors): add conditional interface
1 parent ef3a714 commit f8234ff

File tree

4 files changed

+73
-0
lines changed

4 files changed

+73
-0
lines changed

pkg/detectors/detectors.go

+22
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"math/big"
88
"net/url"
9+
"regexp"
910
"strings"
1011
"unicode"
1112

@@ -44,6 +45,27 @@ type CustomResultsCleaner interface {
4445
ShouldCleanResultsIrrespectiveOfConfiguration() bool
4546
}
4647

48+
// ConditionalDetector is an optional interface that a detector can implement to
49+
// skip chunks based on specific criteria.
50+
type ConditionalDetector interface {
51+
// ShouldScanChunk determines whether the detector should run.
52+
ShouldScanChunk(chunk sources.Chunk) bool
53+
}
54+
55+
var lockFilePat = regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`)
56+
57+
// Conditions is a set of common conditions to be used by ConditionalDetector.
58+
// (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104)
59+
var Conditions = struct {
60+
// LockFiles are a common source of false-positives.
61+
// https://github.com/trufflesecurity/trufflehog/issues/1460
62+
IsLockFile func(path string) bool
63+
}{
64+
IsLockFile: func(path string) bool {
65+
return lockFilePat.MatchString(path)
66+
},
67+
}
68+
4769
// Versioner is an optional interface that a detector can implement to
4870
// differentiate instances of the same detector type.
4971
type Versioner interface {

pkg/detectors/parseur/parseur.go

+10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"net/http"
88
"strings"
99

10+
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
11+
1012
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
1113
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
1214
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
@@ -18,6 +20,7 @@ type Scanner struct {
1820

1921
// Ensure the Scanner satisfies the interface at compile time
2022
var _ detectors.Detector = (*Scanner)(nil)
23+
var _ detectors.ConditionalDetector = (*Scanner)(nil)
2124

2225
var (
2326
defaultClient = common.SaneHttpClient()
@@ -32,6 +35,13 @@ func (s Scanner) Keywords() []string {
3235
return []string{"parseur"}
3336
}
3437

38+
func (s Scanner) ShouldScanChunk(chunk sources.Chunk) bool {
39+
if m, ok := sources.NewGitSourceMetadata(chunk.SourceType, chunk.SourceMetadata); ok {
40+
return !detectors.Conditions.IsLockFile(m.File)
41+
}
42+
return true
43+
}
44+
3545
// FromData will find and optionally verify Parseur secrets in a given set of bytes.
3646
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
3747
dataStr := string(data)

pkg/engine/engine.go

+5
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,11 @@ func (e *Engine) scannerWorker(ctx context.Context) {
802802
}
803803

804804
for _, detector := range matchingDetectors {
805+
if d, ok := detector.Detector.(detectors.ConditionalDetector); ok && !d.ShouldScanChunk(*chunk) {
806+
ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk)
807+
continue
808+
}
809+
805810
decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides)
806811
wgDetect.Add(1)
807812
e.detectableChunksChan <- detectableChunk{

pkg/sources/sources.go

+36
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,42 @@ type Chunk struct {
4545
Verify bool
4646
}
4747

48+
// GitSourceMetadata defines a common struct for Git-based source metadata.
49+
type GitSourceMetadata struct {
50+
Repository string
51+
Commit string
52+
File string
53+
}
54+
55+
func NewGitSourceMetadata(source sourcespb.SourceType, data *source_metadatapb.MetaData) (*GitSourceMetadata, bool) {
56+
if data == nil {
57+
return nil, false
58+
}
59+
60+
switch source {
61+
case sourcespb.SourceType_SOURCE_TYPE_GIT:
62+
md := data.GetGit()
63+
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
64+
case sourcespb.SourceType_SOURCE_TYPE_AZURE_REPOS:
65+
md := data.GetAzureRepos()
66+
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
67+
case sourcespb.SourceType_SOURCE_TYPE_BITBUCKET:
68+
md := data.GetBitbucket()
69+
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
70+
case sourcespb.SourceType_SOURCE_TYPE_GERRIT:
71+
md := data.GetGerrit()
72+
return &GitSourceMetadata{md.GetProject(), md.GetCommit(), md.GetFile()}, true
73+
case sourcespb.SourceType_SOURCE_TYPE_GITHUB:
74+
md := data.GetGithub()
75+
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
76+
case sourcespb.SourceType_SOURCE_TYPE_GITLAB:
77+
md := data.GetGitlab()
78+
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
79+
default:
80+
return nil, false
81+
}
82+
}
83+
4884
// ChunkingTarget specifies criteria for a targeted chunking process.
4985
// Instead of collecting data indiscriminately, this struct allows the caller
5086
// to specify particular subsets of data they're interested in. This becomes

0 commit comments

Comments
 (0)