Skip to content

Commit d1a81af

Browse files
Only include one result per AWS Key ID, preferably verified (#619)
Also ignore unverified results that match hashes, because they are probably just hashes.
1 parent e793f4a commit d1a81af

File tree

2 files changed

+100
-4
lines changed

2 files changed

+100
-4
lines changed

pkg/detectors/aws/aws.go

+37-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ var (
2828
// Key types are from this list https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_identifiers.html#identifiers-unique-ids
2929
idPat = regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`)
3030
secretPat = regexp.MustCompile(`\b([A-Za-z0-9+/]{40})\b`)
31+
// Hashes, like those for git, do technically match the secret pattern.
32+
// But they are extremely unlikely to be generated as an actual AWS secret.
33+
// So when we find them, if they're not verified, we should ignore the result.
34+
falsePositiveSecretCheck = regexp.MustCompile(`[a-f0-9]{40}`)
3135
)
3236

3337
// Keywords are used for efficiently pre-filtering chunks.
@@ -148,12 +152,44 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
148152
}
149153
}
150154

155+
// If the result is unverified and matches something like a git hash, don't include it in the results.
156+
if !s1.Verified && falsePositiveSecretCheck.MatchString(resSecretMatch) {
157+
continue
158+
}
159+
151160
results = append(results, s1)
152161
// If we've found a verified match with this ID, we don't need to look for any more. So move on to the next ID.
153162
if s1.Verified {
154163
break
155164
}
156165
}
157166
}
158-
return detectors.CleanResults(results), nil
167+
return awsCustomCleanResults(results), nil
168+
}
169+
170+
func awsCustomCleanResults(results []detectors.Result) []detectors.Result {
171+
if len(results) == 0 {
172+
return results
173+
}
174+
175+
// For every ID, we want at most one result, preferrably verified.
176+
idResults := map[string]detectors.Result{}
177+
for _, result := range results {
178+
// Always accept the verified result as the result for the given ID.
179+
if result.Verified {
180+
idResults[result.Redacted] = result
181+
continue
182+
}
183+
184+
// Only include an unverified result if we don't already have a result for a given ID.
185+
if _, exist := idResults[result.Redacted]; !exist {
186+
idResults[result.Redacted] = result
187+
}
188+
}
189+
190+
out := []detectors.Result{}
191+
for _, r := range idResults {
192+
out = append(out, r)
193+
}
194+
return out
159195
}

pkg/detectors/aws/aws_test.go

+63-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@ package aws
22

33
import (
44
"context"
5+
"crypto/sha256"
56
"fmt"
67
"testing"
78
"time"
89

9-
"github.com/kylelemons/godebug/pretty"
10-
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
11-
1210
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
11+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
1312
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
13+
14+
"github.com/kylelemons/godebug/pretty"
1415
)
1516

1617
func TestAWS_FromChunk(t *testing.T) {
@@ -23,6 +24,10 @@ func TestAWS_FromChunk(t *testing.T) {
2324
secret := testSecrets.MustGetField("AWS")
2425
id := testSecrets.MustGetField("AWS_ID")
2526
inactiveSecret := testSecrets.MustGetField("AWS_INACTIVE")
27+
inactiveID := id[:len(id)-3] + "XYZ"
28+
hasher := sha256.New()
29+
hasher.Write([]byte(inactiveSecret))
30+
hash := string(hasher.Sum(nil))
2631

2732
type args struct {
2833
ctx context.Context
@@ -81,6 +86,61 @@ func TestAWS_FromChunk(t *testing.T) {
8186
want: nil,
8287
wantErr: false,
8388
},
89+
{
90+
name: "found two, one included for every ID found",
91+
s: Scanner{},
92+
args: args{
93+
ctx: context.Background(),
94+
data: []byte(fmt.Sprintf("The verified ID is %s with a secret of %s, but the unverified ID is %s and this is the secret %s", id, secret, inactiveID, inactiveSecret)),
95+
verify: true,
96+
},
97+
want: []detectors.Result{
98+
{
99+
DetectorType: detectorspb.DetectorType_AWS,
100+
Verified: true,
101+
Redacted: id,
102+
},
103+
{
104+
DetectorType: detectorspb.DetectorType_AWS,
105+
Verified: false,
106+
Redacted: inactiveID,
107+
},
108+
},
109+
wantErr: false,
110+
},
111+
{
112+
name: "not found, because unverified secret was a hash",
113+
s: Scanner{},
114+
args: args{
115+
ctx: context.Background(),
116+
data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s but not valid", hash, id)), // The secret would satisfy the regex but be filtered out after not passing validation.
117+
verify: true,
118+
},
119+
want: nil,
120+
wantErr: false,
121+
},
122+
{
123+
name: "found two, returned both because the active secret for one paired with the inactive ID, despite the hash",
124+
s: Scanner{},
125+
args: args{
126+
ctx: context.Background(),
127+
data: []byte(fmt.Sprintf("The verified ID is %s with a secret of %s, but the unverified ID is %s and the secret is this hash %s", id, secret, inactiveID, hash)),
128+
verify: true,
129+
},
130+
want: []detectors.Result{
131+
{
132+
DetectorType: detectorspb.DetectorType_AWS,
133+
Verified: true,
134+
Redacted: id,
135+
},
136+
{
137+
DetectorType: detectorspb.DetectorType_AWS,
138+
Verified: false,
139+
Redacted: inactiveID,
140+
},
141+
},
142+
wantErr: false,
143+
},
84144
}
85145
for _, tt := range tests {
86146
t.Run(tt.name, func(t *testing.T) {

0 commit comments

Comments
 (0)