Skip to content

Commit 2baf759

Browse files
authored
NTLM token, Similarity filter update, Pattern for base64 filter update (#789)
* NTLM token, Similarity filter update, Pattern for base64 filter update * BM upd * rollback naming style * BM update for UUID markup * fix pattern check, test * BM fix, crc fix
1 parent c126863 commit 2baf759

19 files changed

+232
-46
lines changed

.ci/benchmark.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ FileType FileNumber ValidLines Positives Negatives
226226
.zsh 6 872 11
227227
.zsh-theme 1 97 1
228228
TOTAL: 11361 16995334 17245 53521
229-
credsweeper result_cnt : 16960, lost_cnt : 0, true_cnt : 16862, false_cnt : 98
229+
credsweeper result_cnt : 16975, lost_cnt : 0, true_cnt : 16877, false_cnt : 98
230230
Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1
231231
------------------------------ ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
232232
API 244 4000 239 239 0 4000 5 0.000000 0.020492 0.998822 1.000000 0.979508 0.989648
@@ -240,7 +240,7 @@ Azure Access Token 24 0 17 17
240240
BASE64 Private Key 22 4 22 22 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
241241
BASE64 encoded PEM Private Key 12 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
242242
Basic Authorization 688 554 688 688 0 554 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
243-
Bearer Authorization 182 0 178 178 0 0 4 0.021978 0.978022 1.000000 0.978022 0.988889
243+
Bearer Authorization 182 0 182 182 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
244244
CMD ConvertTo-SecureString 13 4 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
245245
CMD Password 33 137 33 33 0 137 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
246246
CMD Secret 1 17 1 1 0 17 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
@@ -264,12 +264,12 @@ Jira / Confluence PAT token 0 4 0
264264
Key 4288 20727 4283 4265 18 20709 23 0.000868 0.005364 0.998361 0.995797 0.994636 0.995216
265265
MailGun API Key 8 0 8 8 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
266266
NKEY Seed 60 0 59 59 0 0 1 0.016667 0.983333 1.000000 0.983333 0.991597
267-
NTLM Token 4 0 0 0 0 4 1.000000 0.000000 0.000000
267+
NTLM Token 4 0 4 4 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
268268
Nonce 130 110 129 128 1 109 2 0.009091 0.015385 0.987500 0.992248 0.984615 0.988417
269269
OTP / 2FA Secret 64 3 56 54 2 1 10 0.666667 0.156250 0.820896 0.964286 0.843750 0.900000
270270
Other 0 20 0 0 20 0 0.000000 1.000000
271271
PEM Private Key 1150 76 1154 1150 4 72 0 0.052632 0.000000 0.996737 0.996534 1.000000 0.998264
272-
Password 2578 11383 2503 2492 11 11372 86 0.000966 0.033359 0.993052 0.995605 0.966641 0.980909
272+
Password 2578 11383 2504 2493 11 11372 85 0.000966 0.032971 0.993124 0.995607 0.967029 0.981110
273273
Perplexity API Key 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
274274
Postman Credentials 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
275275
SQL Password 44 14 42 42 0 14 2 0.000000 0.045455 0.965517 1.000000 0.954545 0.976744
@@ -279,8 +279,8 @@ Secret 1527 2474 1519 1517
279279
Slack Token 15 1 15 15 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
280280
Stripe Credentials 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
281281
Tencent WeChat API App ID 47 0 47 47 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
282-
Token 1142 5267 1061 1058 3 5264 84 0.000570 0.073555 0.986425 0.997172 0.926445 0.960508
282+
Token 1142 5267 1067 1064 3 5264 78 0.000570 0.068301 0.987362 0.997188 0.931699 0.963332
283283
Twilio Credentials 30 39 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
284284
URL Credentials 225 401 221 221 0 401 4 0.000000 0.017778 0.993610 1.000000 0.982222 0.991031
285285
UUID 2562 3671 2559 2543 16 3655 19 0.004358 0.007416 0.994385 0.993748 0.992584 0.993165
286-
17245 53521 16963 16862 98 53423 383 0.001831 0.022209 0.993203 0.994222 0.977791 0.985938
286+
17245 53521 16978 16877 98 53423 368 0.001831 0.021340 0.993415 0.994227 0.978660 0.986382

.github/workflows/check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
run: |
9393
banner="$(python -m credsweeper --banner | head -1)"
9494
echo "banner = '${banner}'"
95-
if [ "CredSweeper 1.13.4 crc32:830d94c9" != "${banner}" ]; then
95+
if [ "CredSweeper 1.13.4 crc32:404811d6" != "${banner}" ]; then
9696
echo "Update the check for '${banner}'"
9797
exit 1
9898
fi

credsweeper/filters/value_allowlist_check.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class ValueAllowlistCheck(Filter):
2525
ALLOWED_PATTERN = re.compile(Util.get_regex_combine_or(ALLOWED), flags=re.IGNORECASE)
2626

2727
ALLOWED_QUOTED = [
28-
r"\$[a-z_]+[0-9a-z_]*([$\s]|$)", #
28+
r"\$[a-z_][0-9a-z_]+((::|->|\.)[a-z_]|\[|$)", #
2929
r"\$\([^)]+\)", #
3030
r".*\*\*\*", #
3131
]
@@ -34,7 +34,7 @@ class ValueAllowlistCheck(Filter):
3434

3535
ALLOWED_UNQUOTED = [
3636
r"[~a-z0-9_]+((\.|->)[a-z0-9_]+)+\(.*$", #
37-
r"\$[a-z_]+[0-9a-z_]*\b", #
37+
r"\$[a-z_][0-9a-z_]+((::|->|\.)[a-z_]|\[|$)", #
3838
r"\$\([.0-9a-z_-]+", #
3939
r".*\*\*\*\*\*", #
4040
]

credsweeper/filters/value_pattern_check.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import re
22
from typing import Optional
33

4-
from credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH
4+
from credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH, MIN_DATA_LEN
55
from credsweeper.config.config import Config
66
from credsweeper.credentials.line_data import LineData
77
from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -50,10 +50,13 @@ def __init__(self, config: Optional[Config] = None, pattern_len: Optional[int] =
5050
@staticmethod
5151
def get_pattern(pattern_len: int) -> re.Pattern:
5252
"""Creates regex pattern to find N or more identical characters in sequence"""
53-
if DEFAULT_PATTERN_LEN < pattern_len:
54-
pattern = fr"(\S)\1{{{str(pattern_len - 1)},}}"
53+
pattern_length = max(DEFAULT_PATTERN_LEN, pattern_len)
54+
if MIN_DATA_LEN <= pattern_length:
55+
# base64 long sequences may contain 0x00 or 0xFF inside
56+
pattern = fr"([^\sA/_])\1{{{str(pattern_length-1)},}}"
5557
else:
56-
pattern = r"(\S)\1{3,}"
58+
# up to 256 symbols length
59+
pattern = fr"(\S)\1{{{str(pattern_length-1)},}}"
5760
return re.compile(pattern)
5861

5962
def equal_pattern_check(self, value: str, bit_length: int) -> bool:
@@ -67,7 +70,7 @@ def equal_pattern_check(self, value: str, bit_length: int) -> bool:
6770
True if contain and False if not
6871
6972
"""
70-
if self.patterns[bit_length].findall(value):
73+
if self.patterns[bit_length].search(value):
7174
return True
7275
return False
7376

@@ -146,9 +149,10 @@ def duple_pattern_check(self, value: str, bit_length: int) -> bool:
146149
147150
"""
148151
even_value = value[0::2]
149-
odd_value = value[1::2]
150-
if self.check_val(even_value, bit_length) and self.check_val(odd_value, bit_length):
151-
return True
152+
if self.check_val(even_value, bit_length):
153+
odd_value = value[1::2]
154+
if self.check_val(odd_value, bit_length):
155+
return True
152156
return False
153157

154158
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
from difflib import SequenceMatcher
12
from typing import Optional
23

4+
from credsweeper.common.constants import MIN_VALUE_LENGTH
35
from credsweeper.config.config import Config
46
from credsweeper.credentials.line_data import LineData
57
from credsweeper.file_handler.analysis_target import AnalysisTarget
68
from credsweeper.filters.filter import Filter
79

810

911
class ValueSimilarityCheck(Filter):
10-
"""Check if candidate value is at least 70% same as candidate keyword. Like: `secret = "mysecret"`."""
12+
"""Check if candidate value is over 75% similarity as candidate variable. Like: `secret = "mysecret"` (0.8571)."""
1113

1214
def __init__(self, config: Optional[Config] = None) -> None:
1315
pass
@@ -23,12 +25,16 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
2325
True, if need to filter candidate and False if left
2426
2527
"""
26-
# Cannot evaluate if key is None
27-
if line_data.key is None:
28-
return False
29-
if line_data.key.lower() in line_data.value.lower() and \
30-
len(line_data.key) / len(line_data.value) >= 0.7:
31-
return True
32-
if line_data.variable is not None and line_data.value in line_data.variable:
33-
return True
28+
if line_data.variable and line_data.value:
29+
variable_lower = line_data.variable.lower()
30+
value_lower = line_data.value.lower()
31+
if len(value_lower) <= len(variable_lower):
32+
if value_lower in variable_lower:
33+
return True
34+
elif MIN_VALUE_LENGTH <= len(variable_lower):
35+
# `api` and `key` may be in the value
36+
if variable_lower in value_lower:
37+
return True
38+
if 0.75 < SequenceMatcher(None, variable_lower, value_lower).ratio():
39+
return True
3440
return False

credsweeper/ml_model/ml_validator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ def __init__(
8686
self.common_feature_list = []
8787
self.unique_feature_list = []
8888
if logger.isEnabledFor(logging.INFO):
89-
config_dbg = str(model_config) if logger.isEnabledFor(logging.DEBUG) else ''
9089
config_md5 = hashlib.md5(__ml_config_data).hexdigest()
9190
model_md5 = hashlib.md5(self.__ml_model_data).hexdigest()
92-
logger.info("Init ML validator with providers: '%s' ; model:'%s' md5:%s ; config:'%s' md5:%s ; %s",
93-
self.providers, ml_config_path, config_md5, ml_model_path, model_md5, config_dbg)
91+
logger.info("Init ML validator with providers: '%s' ; model:'%s' md5:%s ; config:'%s' md5:%s",
92+
self.providers, ml_config_path, config_md5, ml_model_path, model_md5)
93+
logger.debug(str(model_config))
9494
for feature_definition in model_config["features"]:
9595
feature_class = feature_definition["type"]
9696
kwargs = feature_definition.get("kwargs", {})

credsweeper/rules/config.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,22 @@
14341434
- code
14351435
- doc
14361436

1437+
- name: NTLM Token
1438+
severity: medium
1439+
confidence: strong
1440+
type: pattern
1441+
values:
1442+
- (?P<value>TlRMTVNTUAADAAAA[=0-9A-Za-z_/+-]{8,8000})(?![0-9A-Za-z_/+-])
1443+
filter_type:
1444+
- ValueMorphemesCheck(2)
1445+
- ValuePatternCheck
1446+
min_line_len: 160
1447+
required_substrings:
1448+
- TlRMTVNTUAADAAAA
1449+
target:
1450+
- doc
1451+
- code
1452+
14371453
- name: Basic Authorization
14381454
severity: medium
14391455
confidence: strong

tests/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
from pathlib import Path
22

33
# total number of files in test samples
4-
SAMPLES_FILES_COUNT = 166
4+
SAMPLES_FILES_COUNT = 167
55

66
# the lowest value of ML threshold is used to display possible lowest values
77
NEGLIGIBLE_ML_THRESHOLD = 0.0001
88

99
# with option --doc & NEGLIGIBLE_ML_THRESHOLD
10-
SAMPLES_IN_DOC = 878
10+
SAMPLES_IN_DOC = 879
1111

1212
# credentials count after scan without filters and ML validations
13-
SAMPLES_REGEX_COUNT = 648
13+
SAMPLES_REGEX_COUNT = 650
1414

1515
# credentials count after scan with filters and without ML validation
16-
SAMPLES_FILTERED_COUNT = 535
16+
SAMPLES_FILTERED_COUNT = 537
1717

1818
# credentials count after default post-processing
19-
SAMPLES_POST_CRED_COUNT = 463
19+
SAMPLES_POST_CRED_COUNT = 464
2020

2121
# archived credentials that are not found without --depth
2222
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138

tests/data/depth_3_pedantic.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7573,6 +7573,27 @@
75737573
}
75747574
]
75757575
},
7576+
{
7577+
"rule": "NTLM Token",
7578+
"severity": "medium",
7579+
"confidence": "strong",
7580+
"ml_probability": null,
7581+
"line_data_list": [
7582+
{
7583+
"line": "Positive: TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
7584+
"line_num": 1,
7585+
"path": "./tests/samples/ntlm",
7586+
"info": "FILE:./tests/samples/ntlm|RAW",
7587+
"variable": null,
7588+
"variable_start": -2,
7589+
"variable_end": -2,
7590+
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
7591+
"value_start": 10,
7592+
"value_end": 190,
7593+
"entropy": 3.79713
7594+
}
7595+
]
7596+
},
75767597
{
75777598
"rule": "NuGet API key",
75787599
"severity": "high",

tests/data/doc.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15661,6 +15661,27 @@
1566115661
}
1566215662
]
1566315663
},
15664+
{
15665+
"rule": "NTLM Token",
15666+
"severity": "medium",
15667+
"confidence": "strong",
15668+
"ml_probability": null,
15669+
"line_data_list": [
15670+
{
15671+
"line": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
15672+
"line_num": 1,
15673+
"path": "./tests/samples/ntlm",
15674+
"info": "FILE:./tests/samples/ntlm|RAW",
15675+
"variable": null,
15676+
"variable_start": -2,
15677+
"variable_end": -2,
15678+
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
15679+
"value_start": 10,
15680+
"value_end": 190,
15681+
"entropy": 3.79713
15682+
}
15683+
]
15684+
},
1566415685
{
1566515686
"rule": "NuGet API key",
1566615687
"severity": "high",

0 commit comments

Comments
 (0)