Skip to content

Commit 30d6ef4

Browse files
authored
Merge branch 'main' into refuzz
2 parents ce8dbff + 0cdf032 commit 30d6ef4

File tree

92 files changed

+19295
-1270
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+19295
-1270
lines changed

.ci/benchmark.txt

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
META MD5 31e22b208a89f46baa65e1e81e65d231
2-
DATA MD5 a4708c2c86e361f1bdefc1c7313fa837
3-
DATA: 16703140 interested lines. MARKUP: 61264 items
1+
META MD5 df6b5b83b38ae0541be011988b72f4a5
2+
DATA MD5 f7ce1b683a2ab681177784b235f9069f
3+
DATA: 16720463 interested lines. MARKUP: 61376 items
44
FileType FileNumber ValidLines Positives Negatives
55
--------------- ------------ ------------ ----------- -----------
6-
676 69398 134 488
6+
677 69567 134 488
77
.04 2 149 4
88
.1 2 641 2 8
99
.admx 1 26 1
@@ -34,10 +34,10 @@ FileType FileNumber ValidLines Positives Negatives
3434
.conf 63 5019 66 111
3535
.config 20 492 7 41
3636
.cpp 14 5489 7 60
37-
.creds 1 10 1 1
37+
.creds 1 10 2 1
3838
.crlf 1 27 1
3939
.crt 2 4979 126
40-
.cs 268 82752 279 958
40+
.cs 268 82752 278 959
4141
.cshtml 5 180 12
4242
.csp 3 379 9
4343
.csproj 1 14 1
@@ -66,7 +66,7 @@ FileType FileNumber ValidLines Positives Negatives
6666
.gd 1 37 1
6767
.gml 3 3075 16
6868
.gni 3 5017 19
69-
.go 1277 718576 1450 4853
69+
.go 1278 718729 1474 4847
7070
.golden 5 1168 1 42
7171
.gradle 50 4295 8 189
7272
.graphql 8 454 2 13
@@ -77,7 +77,7 @@ FileType FileNumber ValidLines Positives Negatives
7777
.haml 9 191 17
7878
.hbs 2 54 3
7979
.hs 14 4140 30 65
80-
.html 60 19400 85 124
80+
.html 76 31895 114 133
8181
.idl 3 1625 37 5
8282
.iml 6 699 30
8383
.in 7 2242 10 50
@@ -90,7 +90,7 @@ FileType FileNumber ValidLines Positives Negatives
9090
.jenkinsfile 1 58 2 6
9191
.jinja2 1 64 2
9292
.js 655 531277 582 2702
93-
.json 886 13114472 1323 10123
93+
.json 885 13114471 1323 10117
9494
.jsp 13 3202 1 37
9595
.jsx 7 857 19
9696
.jwt 1 1 2
@@ -111,14 +111,14 @@ FileType FileNumber ValidLines Positives Negatives
111111
.list 2 15 2
112112
.lkml 1 43 1
113113
.lock 24 155844 158
114-
.log 2 199 38 52
114+
.log 2 199 76 52
115115
.lua 10 1924 40
116116
.m 16 13358 22 160
117117
.manifest 3 102 9 6
118118
.markdown 38 5862 69 4
119119
.markerb 3 12 3
120120
.marko 1 21 2
121-
.md 760 180503 983 2888
121+
.md 781 185010 1044 2888
122122
.mdx 3 549 7
123123
.mjml 1 18 1
124124
.mjs 22 4424 101 369
@@ -232,17 +232,17 @@ FileType FileNumber ValidLines Positives Negatives
232232
.yml 555 54516 1870 1223
233233
.zsh 6 872 12
234234
.zsh-theme 1 97 1
235-
TOTAL: 11478 16703140 16066 50313
236-
credsweeper result_cnt : 15687, lost_cnt : 0, true_cnt : 15431, false_cnt : 256
235+
TOTAL: 11516 16720463 16218 50311
236+
credsweeper result_cnt : 15925, lost_cnt : 0, true_cnt : 15634, false_cnt : 291
237237
Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1
238238
------------------------------ ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
239-
API 246 3361 235 234 1 3360 12 0.000298 0.048780 0.996396 0.995745 0.951220 0.972973
239+
API 244 3362 234 233 1 3361 11 0.000297 0.045082 0.996672 0.995726 0.954918 0.974895
240240
AWS Client ID 191 19 183 183 0 19 8 0.000000 0.041885 0.961905 1.000000 0.958115 0.978610
241241
AWS Multi 82 10 34 34 0 10 48 0.000000 0.585366 0.478261 1.000000 0.414634 0.586207
242242
AWS S3 Bucket 67 23 92 67 23 0 0 1.000000 0.000000 0.744444 0.744444 1.000000 0.853503
243243
Akamai Credentials 6 2 6 6 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
244244
Atlassian Old PAT token 5 8 11 5 6 2 0 0.750000 0.000000 0.538462 0.454545 1.000000 0.625000
245-
Auth 1094 2837 1064 1055 9 2828 39 0.003172 0.035649 0.987789 0.991541 0.964351 0.977757
245+
Auth 1094 2836 1081 1072 9 2827 22 0.003173 0.020110 0.992112 0.991674 0.979890 0.985747
246246
Azure Access Token 21 0 13 13 0 0 8 0.380952 0.619048 1.000000 0.619048 0.764706
247247
BASE64 Private Key 22 4 22 22 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
248248
BASE64 encoded PEM Private Key 12 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
@@ -251,7 +251,7 @@ Bearer Authorization 165 0 165 165
251251
Bitbucket Client ID 36 66 42 25 16 50 11 0.242424 0.305556 0.735294 0.609756 0.694444 0.649351
252252
Bitbucket Client Secret 38 105 86 27 58 47 11 0.552381 0.289474 0.517483 0.317647 0.710526 0.439024
253253
CMD ConvertTo-SecureString 13 4 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
254-
CMD Password 29 137 27 27 0 137 2 0.000000 0.068966 0.987952 1.000000 0.931034 0.964286
254+
CMD Password 33 137 33 33 0 137 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
255255
CMD Secret 1 17 1 1 0 17 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
256256
CMD Token 6 2 5 5 0 2 1 0.000000 0.166667 0.875000 1.000000 0.833333 0.909091
257257
Credential 99 498 100 99 1 497 0 0.002008 0.000000 0.998325 0.990000 1.000000 0.994975
@@ -266,25 +266,26 @@ Google API Key 12 0 12 12
266266
Google Multi 10 2 11 10 1 1 0 0.500000 0.000000 0.916667 0.909091 1.000000 0.952381
267267
Google OAuth Access Token 3 0 3 3 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
268268
Grafana Provisioned API Key 22 1 5 5 0 1 17 0.000000 0.772727 0.260870 1.000000 0.227273 0.370370
269-
JSON Web Token 148 61 141 141 0 61 7 0.000000 0.047297 0.966507 1.000000 0.952703 0.975779
269+
JSON Web Token 168 61 160 160 0 61 8 0.000000 0.047619 0.965066 1.000000 0.952381 0.975610
270270
JWK 55 0 55 55 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
271271
Jira / Confluence PAT token 0 4 0 0 4 0 0.000000 1.000000
272272
Jira 2FA 36 2 31 30 1 1 6 0.500000 0.166667 0.815789 0.967742 0.833333 0.895522
273-
Key 4195 16294 4217 4154 63 16231 41 0.003866 0.009774 0.994924 0.985060 0.990226 0.987637
274-
Nonce 115 50 111 111 0 50 4 0.000000 0.034783 0.975758 1.000000 0.965217 0.982301
273+
Key 4213 16295 4275 4186 89 16206 27 0.005462 0.006409 0.994344 0.979181 0.993591 0.986334
274+
NKEY Seed 58 0 57 57 0 0 1 0.017241 0.982759 1.000000 0.982759 0.991304
275+
Nonce 117 49 115 115 0 49 2 0.000000 0.017094 0.987952 1.000000 0.982906 0.991379
275276
Other 9 7444 0 0 7444 9 0.000000 1.000000 0.998792 0.000000
276277
PEM Private Key 1142 76 1146 1142 4 72 0 0.052632 0.000000 0.996716 0.996510 1.000000 0.998252
277-
Password 2513 9954 2456 2432 24 9930 81 0.002411 0.032232 0.991578 0.990228 0.967768 0.978869
278+
Password 2517 9959 2466 2437 29 9930 80 0.002912 0.031784 0.991263 0.988240 0.968216 0.978126
278279
SQL Password 44 14 41 41 0 14 3 0.000000 0.068182 0.948276 1.000000 0.931818 0.964706
279280
Salesforce Credentials 6 0 5 5 0 0 1 0.166667 0.833333 1.000000 0.833333 0.909091
280281
Salt 83 75 80 80 0 75 3 0.000000 0.036145 0.981013 1.000000 0.963855 0.981595
281-
Secret 1501 2378 1497 1488 9 2369 13 0.003785 0.008661 0.994328 0.993988 0.991339 0.992662
282-
Seed 1 6 0 0 6 1 0.000000 1.000000 0.857143 0.000000
282+
Secret 1501 2378 1500 1491 9 2369 10 0.003785 0.006662 0.995102 0.994000 0.993338 0.993669
283283
Slack Token 4 1 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
284284
Stripe Credentials 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
285-
Tencent WeChat API App ID 9 0 9 9 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
286-
Token 947 4640 862 859 3 4637 88 0.000647 0.092925 0.983712 0.996520 0.907075 0.949696
285+
Tencent WeChat API App ID 47 0 47 47 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
286+
Token 950 4639 881 874 7 4632 76 0.001509 0.080000 0.985149 0.992054 0.920000 0.954670
287287
Twilio Credentials 30 39 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
288-
URL Credentials 229 361 229 229 0 361 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
288+
URL Credentials 237 361 237 237 0 361 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
289289
UUID 1866 265 1849 1848 1 264 18 0.003774 0.009646 0.991084 0.999459 0.990354 0.994886
290-
16066 50313 15696 15431 256 50057 635 0.005088 0.039524 0.986577 0.983681 0.960476 0.971940
290+
16218 50311 15934 15634 291 50020 584 0.005784 0.036009 0.986848 0.981727 0.963991 0.972778
291+

.github/workflows/benchmark.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
3232
with:
3333
repository: Samsung/CredData
34-
ref: 88e8732c681385f58eeef1a2f88b33758f3e94a5
34+
ref: 161939523732732bcb7ddfd830abd61aa6fa6b0f
3535

3636
- name: Markup hashing
3737
run: |
@@ -87,7 +87,7 @@ jobs:
8787
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
8888
with:
8989
repository: Samsung/CredData
90-
ref: 88e8732c681385f58eeef1a2f88b33758f3e94a5
90+
ref: 161939523732732bcb7ddfd830abd61aa6fa6b0f
9191

9292
- name: Markup hashing
9393
run: |
@@ -190,7 +190,7 @@ jobs:
190190
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
191191
with:
192192
repository: Samsung/CredData
193-
ref: 88e8732c681385f58eeef1a2f88b33758f3e94a5
193+
ref: 161939523732732bcb7ddfd830abd61aa6fa6b0f
194194

195195
- name: Markup hashing
196196
run: |
@@ -378,7 +378,7 @@ jobs:
378378
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
379379
with:
380380
repository: Samsung/CredData
381-
ref: 88e8732c681385f58eeef1a2f88b33758f3e94a5
381+
ref: 161939523732732bcb7ddfd830abd61aa6fa6b0f
382382

383383
- name: Markup hashing
384384
run: |

credsweeper/common/keyword_pattern.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,22 @@
44
class KeywordPattern:
55
"""Pattern set of keyword types"""
66
directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
7-
key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
8-
r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
7+
key_left = r"(?:\\[nrt]|(\\\\*u00|%)[0-9a-f]{2}|\s)*" \
8+
r"(?P<variable>(([\"'`]{1,8}[^:=\"'`}<>\\/&?]*|[^:=\"'`}<>\s()\\/&?;,%]*)" \
99
r"(?P<keyword>"
1010
# there will be inserted a keyword
1111
key_right = r")" \
12-
r"[^%:='\"`<>({?!&;\n]*" \
12+
r"[^%:=\"'`<>({?!&;\n]*" \
1313
r")" \
14-
r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
14+
r"(&(quot|apos|#3[49]);|(\\\\*u00|%)[0-9a-f]{2}|[\"'`])*" \
1515
r")" # <variable>
1616
separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
1717
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
1818
r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
1919
r"(\s|\\{1,8}[tnr])*"
2020
# might be curly, square or parenthesis with words before
2121
wrap = r"(?P<wrap>(" \
22-
r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
22+
r"((\s|\\{1,8}[tnr]|new|byte|char|string|\[\]){1,8})?" \
2323
r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
2424
r"([0-9a-z_.]|::|-(>|&gt;))*" \
2525
r"\s*" \
@@ -28,22 +28,22 @@ class KeywordPattern:
2828
r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
2929
r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
3030
r"){1,8})?"
31-
string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
32-
left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
31+
string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[\"'`])))?"
32+
left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([\"'`]|&(quot|apos|#3[49]);)){1,4}))?"
3333
# Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential
3434
auth_keywords = r"(\s?(oauth|bot|basic|bearer|apikey|accesskey|ssws|ntlm)\s)?"
3535
value = r"(?P<value>" \
3636
r"(?(value_leftquote)" \
3737
r"(" \
3838
r"(?!(?P=value_leftquote))" \
39-
r"(?(esq)((?!(?P=esq)([`'\"]|&(quot|apos);)).)|((?!(?P=value_leftquote)).)))" \
39+
r"(?(esq)((?!(?P=esq)([\"'`]|&(quot|apos|#3[49]);)).)|((?!(?P=value_leftquote)).)))" \
4040
r"|" \
41-
r"(?!&(quot|apos);)" \
42-
r"(\\{1,8}([ tnr]|[^\s`'\"])" \
41+
r"(?!&(quot|apos|#3[49]);)" \
42+
r"(\\{1,8}([ tnr]|[^\s\"'`])" \
4343
r"|" \
4444
r"(?P<url_esc>%[0-9a-f]{2})" \
4545
r"|" \
46-
r"(?(url_esc)[^\s`'\",;\\&]|[^\s`'\",;\\])" \
46+
r"(?(url_esc)[^\s\"'`,;\\&]|[^\s\"'`,;\\])" \
4747
r")" \
4848
r"){4,8000}" \
4949
r"|" \

credsweeper/common/morpheme_checklist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,7 @@ kill
797797
kind
798798
kinesis
799799
kirk
800+
know
800801
kris
801802
lab
802803
lag

credsweeper/config/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def __init__(self, config: Dict[str, Any]) -> None:
3939
self.doc: bool = config["doc"]
4040
self.severity: Severity = Severity.get(config.get("severity"))
4141

42-
self.min_keyword_value_length: int = int(config["min_keyword_value_length"])
43-
self.min_pattern_value_length: int = int(config["min_pattern_value_length"])
42+
self.max_url_cred_value_length: int = int(config["max_url_cred_value_length"])
43+
self.max_password_value_length: int = int(config["max_password_value_length"])
4444

4545
# Trim exclude patterns from space like characters
4646
self.exclude_lines = set(line.strip() for line in self.exclude_lines)

0 commit comments

Comments
 (0)