Skip to content

Commit 4785dbc

Browse files
authored
DEB archives support (#711)
* entropy extrapolation for base64 over 256 symbols * .deb format support for deep scan, free method refactored * style * fix * customBMref * style * avoid passion * value pattern check with duple cases * reform with pylint suggestions * style * mypi: check_untyped_defs * BM scores, test, fix * style * BMrefFix * enhancement * BM ref actualize * guide update * suppress fail in guide json example * Apply suggestions from code review [no ci] * optimization * upd. guide * linter fix
1 parent a779db6 commit 4785dbc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2332
-1963
lines changed

.ci/benchmark.txt

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
META MD5 7b322f7e988d13437108406bf7796f01
2-
DATA MD5 a1d607f2222155019713285daed76013
3-
DATA: 16664944 interested lines. MARKUP: 62059 items
1+
META MD5 39966c3cbfe713b3d9c2e0ad68ac5416
2+
DATA MD5 bfd00203840d99eefc189ae75713eb0c
3+
DATA: 16664944 interested lines. MARKUP: 62071 items
44
FileType FileNumber ValidLines Positives Negatives Templates
55
--------------- ------------ ------------ ----------- ----------- -----------
66
676 69134 134 420 72
@@ -66,7 +66,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
6666
.gd 1 37 1
6767
.gml 3 3075 16
6868
.gni 3 5017 19
69-
.go 1240 682004 1227 4127 741
69+
.go 1240 682004 1228 4128 741
7070
.golden 5 1168 1 13 29
7171
.gradle 50 4295 7 90 100
7272
.graphql 8 454 2 13
@@ -88,8 +88,8 @@ FileType FileNumber ValidLines Positives Negatives Templat
8888
.java 670 144140 477 1325 170
8989
.jenkinsfile 1 58 2 6
9090
.jinja2 1 64 2
91-
.js 658 533148 568 2470 284
92-
.json 886 13114472 1291 10017 132
91+
.js 658 533148 569 2470 284
92+
.json 886 13114472 1304 10018 132
9393
.jsp 13 3202 1 37
9494
.jsx 7 857 19
9595
.jwt 1 1 2
@@ -109,7 +109,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
109109
.libsonnet 2 210 1 11
110110
.list 2 15 2
111111
.lkml 1 43 1
112-
.lock 24 155844 1 162
112+
.lock 24 155844 164
113113
.log 2 199 38 52
114114
.lua 10 1924 37 3
115115
.m 16 13358 19 158 3
@@ -165,7 +165,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
165165
.pyx 2 1094 23
166166
.r 4 62 5 2
167167
.rake 2 51 2
168-
.rb 853 130809 390 2446 594
168+
.rb 853 130809 391 2446 594
169169
.re 1 31 1
170170
.red 1 159 1
171171
.release 1 13 4
@@ -215,7 +215,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
215215
.toml 86 2471 53 103 156
216216
.tpl 1 43 1
217217
.travis 1 34 2 3 1
218-
.ts 609 109982 213 1773 197
218+
.ts 609 109982 213 1774 197
219219
.tsx 54 7914 1 114 5
220220
.ttar 1 452 1
221221
.txt 322 81679 5232 5096 42
@@ -231,17 +231,17 @@ FileType FileNumber ValidLines Positives Negatives Templat
231231
.yml 556 54557 1239 908 364
232232
.zsh 6 872 12
233233
.zsh-theme 1 97 1
234-
TOTAL: 11414 16664944 14821 46712 4910
235-
credsweeper result_cnt : 14435, lost_cnt : 0, true_cnt : 14230, false_cnt : 205
234+
TOTAL: 11414 16664944 14836 46717 4910
235+
credsweeper result_cnt : 14430, lost_cnt : 0, true_cnt : 14242, false_cnt : 188
236236
Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1
237237
------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
238-
API 229 3172 187 221 221 0 3359 8 0.000000 0.034934 0.997770 1.000000 0.965066 0.982222
238+
API 235 3172 187 227 227 0 3359 8 0.000000 0.034043 0.997774 1.000000 0.965957 0.982684
239239
AWS Client ID 182 19 0 174 174 0 19 8 0.000000 0.043956 0.960199 1.000000 0.956044 0.977528
240240
AWS Multi 82 10 0 34 34 0 10 48 0.000000 0.585366 0.478261 1.000000 0.414634 0.586207
241241
AWS S3 Bucket 67 23 0 92 67 23 0 0 1.000000 0.000000 0.744444 0.744444 1.000000 0.853503
242242
Atlassian Old PAT token 5 8 0 11 5 6 2 0 0.750000 0.000000 0.538462 0.454545 1.000000 0.625000
243-
Auth 1073 2751 81 1057 1050 7 2825 23 0.002472 0.021435 0.992318 0.993377 0.978565 0.985915
244-
Azure Access Token 21 0 0 14 14 0 0 7 0.333333 0.666667 1.000000 0.666667 0.800000
243+
Auth 1074 2754 81 1058 1050 8 2827 24 0.002822 0.022346 0.991814 0.992439 0.977654 0.984991
244+
Azure Access Token 21 0 0 13 13 0 0 8 0.380952 0.619048 1.000000 0.619048 0.764706
245245
BASE64 Private Key 19 4 0 19 19 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
246246
BASE64 encoded PEM Private Key 12 0 0 10 10 0 0 2 0.166667 0.833333 1.000000 0.833333 0.909091
247247
Bitbucket Client ID 36 66 0 48 31 16 50 5 0.242424 0.138889 0.794118 0.659574 0.861111 0.746988
@@ -266,11 +266,11 @@ Grafana Provisioned API Key 22 1 0
266266
JSON Web Token 180 61 0 141 141 0 61 39 0.000000 0.216667 0.838174 1.000000 0.783333 0.878505
267267
Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000
268268
Jira 2FA 36 1 1 31 30 1 1 6 0.500000 0.166667 0.815789 0.967742 0.833333 0.895522
269-
Key 4187 15721 483 4192 4163 29 16175 24 0.001790 0.005732 0.997401 0.993082 0.994268 0.993675
269+
Key 4188 15722 483 4174 4163 11 16194 25 0.000679 0.005969 0.998235 0.997365 0.994031 0.995695
270270
Nonce 113 49 0 111 111 0 49 2 0.000000 0.017699 0.987654 1.000000 0.982301 0.991071
271271
Other 9 7446 5 0 0 7451 9 0.000000 1.000000 0.998794 0.000000
272272
PEM Private Key 1135 1483 0 1139 1135 4 1479 0 0.002697 0.000000 0.998472 0.996488 1.000000 0.998241
273-
Password 2201 7548 2539 2156 2132 24 10063 69 0.002379 0.031349 0.992432 0.988868 0.968651 0.978655
273+
Password 2202 7548 2539 2157 2133 24 10063 69 0.002379 0.031335 0.992432 0.988873 0.968665 0.978665
274274
SQL Password 44 13 0 41 41 0 13 3 0.000000 0.068182 0.947368 1.000000 0.931818 0.964706
275275
Salesforce Credentials 6 0 0 5 5 0 0 1 0.166667 0.833333 1.000000 0.833333 0.909091
276276
Salt 83 74 1 81 81 0 75 2 0.000000 0.024096 0.987342 1.000000 0.975904 0.987805
@@ -279,8 +279,8 @@ Seed 1 6 0
279279
Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
280280
Stripe Credentials 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
281281
Tencent WeChat API App ID 8 0 0 8 8 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
282-
Token 911 4171 455 839 836 3 4623 75 0.000649 0.082327 0.985913 0.996424 0.917673 0.955429
282+
Token 917 4172 455 845 842 3 4624 75 0.000648 0.081788 0.985931 0.996450 0.918212 0.955732
283283
Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
284284
URL Credentials 225 168 197 225 225 0 365 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
285285
UUID 1841 265 0 1824 1823 1 264 18 0.003774 0.009777 0.990978 0.999452 0.990223 0.994816
286-
14821 46712 4910 14444 14230 205 46507 591 0.004389 0.039876 0.987064 0.985798 0.960124 0.972792
286+
14836 46717 4910 14439 14242 188 46529 594 0.004024 0.040038 0.987296 0.986972 0.959962 0.973280

.github/workflows/benchmark.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
3232
with:
3333
repository: Samsung/CredData
34-
ref: 22b3f3789b3c2078d7dec12a6d210e11c1cc697c
34+
ref: ea8910359f66c63a3ac27517e8715129c0e0bbf3
3535

3636
- name: Markup hashing
3737
run: |
@@ -87,7 +87,7 @@ jobs:
8787
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
8888
with:
8989
repository: Samsung/CredData
90-
ref: 22b3f3789b3c2078d7dec12a6d210e11c1cc697c
90+
ref: ea8910359f66c63a3ac27517e8715129c0e0bbf3
9191

9292
- name: Markup hashing
9393
run: |
@@ -190,7 +190,7 @@ jobs:
190190
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
191191
with:
192192
repository: Samsung/CredData
193-
ref: 22b3f3789b3c2078d7dec12a6d210e11c1cc697c
193+
ref: ea8910359f66c63a3ac27517e8715129c0e0bbf3
194194

195195
- name: Markup hashing
196196
run: |
@@ -378,7 +378,7 @@ jobs:
378378
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23
379379
with:
380380
repository: Samsung/CredData
381-
ref: 22b3f3789b3c2078d7dec12a6d210e11c1cc697c
381+
ref: ea8910359f66c63a3ac27517e8715129c0e0bbf3
382382

383383
- name: Markup hashing
384384
run: |

.github/workflows/check.yml

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,19 @@ jobs:
8989

9090
- name: Analysing the code with pylint and minimum Python version 3.9
9191
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
92-
run: pylint --py-version=3.9 --errors-only credsweeper
92+
run: pylint --py-version=3.9 --verbose credsweeper
9393

9494
- name: Analysing the code with pylint and minimum Python version 3.10
9595
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
96-
run: pylint --py-version=3.10 --errors-only credsweeper
96+
run: pylint --py-version=3.10 --verbose credsweeper
9797

9898
- name: Analysing the code with pylint and minimum Python version 3.11
9999
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
100-
run: pylint --py-version=3.11 --errors-only credsweeper
100+
run: pylint --py-version=3.11 --verbose credsweeper
101101

102102
- name: Analysing the code with pylint and minimum Python version 3.12
103103
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
104-
run: pylint --py-version=3.12 --errors-only credsweeper
104+
run: pylint --py-version=3.12 --verbose credsweeper
105105

106106
# # # mypy
107107

@@ -120,13 +120,6 @@ jobs:
120120
run: |
121121
mypy --config-file .mypy.ini --python-version=3.12 credsweeper
122122
123-
# # # documentation
124-
125-
- name: Analysing the code with pylint for NEW missed docstrings of classes or functions
126-
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
127-
run: |
128-
pylint --disable=E,R,W1203,W0718,C0114,C0103,C0303,C0412,C0413,C0415,C0200,C0201,C0325 --verbose credsweeper
129-
130123
# # # Documentation check
131124

132125
- name: Test for creation sphinx documentations
@@ -138,20 +131,6 @@ jobs:
138131
cd source
139132
python -m sphinx -T -E -b html -d _build/doctrees -D language=en . ./_html
140133
141-
# # # yapf
142-
143-
- name: Check project style
144-
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
145-
run: |
146-
for f in credsweeper tests docs experiment; do
147-
yapf --style .style.yapf --recursive --in-place --parallel $f
148-
done
149-
if [ 0 -ne $(git ls-files -m | wc -l) ]; then
150-
git diff
151-
echo "<- difference how to apply the style"
152-
exit 1
153-
fi
154-
155134
# # # flake8
156135

157136
- name: Analysing the code with flake8
@@ -198,6 +177,20 @@ jobs:
198177
exit 1
199178
fi
200179
180+
# # # yapf
181+
182+
- name: Check project style
183+
if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
184+
run: |
185+
for f in credsweeper tests docs experiment; do
186+
yapf --style .style.yapf --recursive --in-place --parallel $f
187+
done
188+
if [ 0 -ne $(git ls-files -m | wc -l) ]; then
189+
git diff
190+
echo "<- difference how to apply the style"
191+
exit 1
192+
fi
193+
201194
# # # SECURITY.md check
202195

203196
- name: SECURITY.md check

.mypy.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
warn_return_any = True
55
warn_unused_configs = True
66
strict_optional = False
7+
check_untyped_defs = True
78

89
# Per-module options:
910

.pylintrc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
max-line-length=120
33

44
extension-pkg-allow-list=lxml
5+
6+
[MESSAGES CONTROL]
7+
disable=R,W0718,W1203,C0415,C0413,C0103,C0114

credsweeper/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def logger_levels(log_level: str) -> str:
6363
Returns True if log_level UPPERCASE is one of keys
6464
"""
6565
val = log_level.upper()
66-
if any(val == i for i in Logger.LEVELS.keys()):
66+
if val in Logger.LEVELS:
6767
return val
6868
raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
6969

credsweeper/common/constants.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,6 @@ class Chars(Enum):
9696
ASCII_PRINTABLE = string.printable
9797

9898

99-
ENTROPY_LIMIT_BASE64 = 4.5
100-
ENTROPY_LIMIT_BASE3x = 3
101-
102-
10399
class GroupType(Enum):
104100
"""Group type - used in Group constructor for load predefined set of filters"""
105101
KEYWORD = "keyword"
@@ -148,7 +144,8 @@ class DiffRowType(Enum):
148144
CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
149145
# ML hunk size to limit of variable or value size and get substring near value
150146
ML_HUNK = 80
151-
""" values according https://docs.python.org/3/library/codecs.html """
147+
148+
# values according https://docs.python.org/3/library/codecs.html
152149
UTF_8 = "utf_8"
153150
UTF_16 = "utf_16"
154151
LATIN_1 = "latin_1"

credsweeper/credentials/candidate_key.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __eq__(self, other):
2424
return self.key == other.key
2525

2626
def __ne__(self, other):
27-
return not (self == other)
27+
return not bool(self == other)
2828

2929
def __repr__(self) -> str:
3030
return f"{self.key}:{self.__line}"

credsweeper/credentials/credential_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def group_credentials(self) -> CandidateGroupGenerator:
110110
# Match by file path+line num+value. Value required so two different credentials still be
111111
# processed independently
112112
candidate_key = CandidateKey(line_data)
113-
if candidate_key not in groups:
114-
groups[candidate_key] = list()
115-
groups[candidate_key].append(credential_candidate)
113+
if candidate_key in groups:
114+
groups[candidate_key].append(credential_candidate)
115+
else:
116+
groups[candidate_key] = [credential_candidate]
116117
return groups

credsweeper/credentials/line_data.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,11 +327,8 @@ def is_source_file_with_quotes(self) -> bool:
327327
True if file require quotation, False otherwise
328328
329329
"""
330-
if not self.path:
331-
return False
332-
if Util.get_extension(self.path) in self.config.source_quote_ext:
333-
return True
334-
return False
330+
file_type = self.file_type or Util.get_extension(self.path)
331+
return bool(file_type) and file_type in self.config.source_quote_ext
335332

336333
@staticmethod
337334
def get_hash_or_subtext(

0 commit comments

Comments
 (0)