Skip to content

Commit 68504ad

Browse files
committed
add extension verification check
1 parent 78d0d08 commit 68504ad

File tree

2 files changed

+232
-14
lines changed

2 files changed

+232
-14
lines changed

config.json

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
{
2+
"exclude": {
3+
"pattern": [],
4+
"containers": [
5+
".aar",
6+
".apk",
7+
".bz2",
8+
".class",
9+
".gz",
10+
".jar",
11+
".lzma",
12+
".rpm",
13+
".tar",
14+
".war",
15+
".whl",
16+
".xz",
17+
".zip"
18+
],
19+
"documents": [
20+
".doc",
21+
".docx",
22+
".odp",
23+
".ods",
24+
".odt",
25+
".pdf",
26+
".ppt",
27+
".pptx",
28+
".xls",
29+
".xlsx"
30+
],
31+
"extension": [
32+
".7z",
33+
".a",
34+
".aac",
35+
".avi",
36+
".bin",
37+
".bmp",
38+
".css",
39+
".dmg",
40+
".ear",
41+
".eot",
42+
".elf",
43+
".exe",
44+
".gif",
45+
".gmo",
46+
".ico",
47+
".img",
48+
".info",
49+
".jpeg",
50+
".jpg",
51+
".lib",
52+
".map",
53+
".m4a",
54+
".mat",
55+
".mo",
56+
".mov",
57+
".mp3",
58+
".mp4",
59+
".mpg",
60+
".mkv",
61+
".npy",
62+
".npz",
63+
".obj",
64+
".oga",
65+
".ogg",
66+
".ogv",
67+
".ops",
68+
".pak",
69+
".png",
70+
".psd",
71+
".pyc",
72+
".pyd",
73+
".pyo",
74+
".rar",
75+
".rc",
76+
".rc2",
77+
".realm",
78+
".res",
79+
".s7z",
80+
".scss",
81+
".so",
82+
".sum",
83+
".svg",
84+
".swf",
85+
".tif",
86+
".tiff",
87+
".tlb",
88+
".ttf",
89+
".vcxproj",
90+
".vdproj",
91+
".wav",
92+
".webm",
93+
".webp",
94+
".wma",
95+
".woff",
96+
".woff2",
97+
".yuv"
98+
],
99+
"path": [
100+
"/.git/",
101+
"/.idea/",
102+
"/.svn/",
103+
"/__pycache__/",
104+
"/node_modules/",
105+
"/target/",
106+
"/.venv/",
107+
"/venv/"
108+
],
109+
"lines": [],
110+
"values": []
111+
},
112+
"source_ext": [
113+
".aspx",
114+
".cs",
115+
".cshtml",
116+
".ejs",
117+
".erb",
118+
".go",
119+
".html",
120+
".ipynb",
121+
".jsp",
122+
".jsx",
123+
".php",
124+
".phtml",
125+
".rb",
126+
".sh",
127+
".swift",
128+
".ts",
129+
".twig",
130+
".vue",
131+
".xhtml",
132+
".java",
133+
".js",
134+
".py",
135+
".cpp",
136+
".c",
137+
".h",
138+
".hpp",
139+
".mm",
140+
".cu",
141+
".y",
142+
".vb",
143+
".m",
144+
".cu"
145+
],
146+
"source_quote_ext": [
147+
".cs",
148+
".cc",
149+
".php",
150+
".tf",
151+
".kt",
152+
".go",
153+
".ipynb",
154+
".ts",
155+
".java",
156+
".js",
157+
".py",
158+
".cpp",
159+
".c",
160+
".h",
161+
".hpp"
162+
],
163+
"find_by_ext_list": [
164+
".pem",
165+
".cer",
166+
".csr",
167+
".der",
168+
".pfx",
169+
".p12",
170+
".key",
171+
".jks"
172+
],
173+
"bruteforce_list": [
174+
"",
175+
"changeit",
176+
"changeme",
177+
"tizen"
178+
],
179+
"check_for_literals": true,
180+
"max_password_value_length": 64,
181+
"max_url_cred_value_length": 80,
182+
"line_data_output": [
183+
"line",
184+
"line_num",
185+
"path",
186+
"info",
187+
"variable",
188+
"variable_start",
189+
"variable_end",
190+
"value",
191+
"value_start",
192+
"value_end",
193+
"entropy"
194+
],
195+
"candidate_output": [
196+
"rule",
197+
"severity",
198+
"confidence",
199+
"ml_probability",
200+
"line_data_list"
201+
]
202+
}

review_data.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
MAGENTA - templates
88
When value start-end defined - the text is marked
99
"""
10-
10+
import functools
1111
import json
1212
import os
13+
import pathlib
1314
import subprocess
1415
import sys
1516
from argparse import ArgumentParser
16-
from functools import cache
1717
from typing import List, Optional, Tuple, Dict
1818

1919
from colorama import Fore, Back, Style
@@ -25,7 +25,15 @@
2525
EXIT_FAILURE = 1
2626

2727

28-
@cache
28+
@functools.cache
29+
def get_excluding_extensions() -> set[str]:
30+
# copy of CredSweeper/secret/config.json
31+
with open("config.json") as f:
32+
result = json.load(f)
33+
return set(result["exclude"]["containers"] + result["exclude"]["documents"] + result["exclude"]["extension"])
34+
35+
36+
@functools.cache
2937
def read_cache(path) -> list[str]:
3038
with open(path, "r", encoding="utf8") as f:
3139
return f.read().replace("\r\n", '\n').replace('\r', '\n').split('\n')
@@ -115,15 +123,15 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
115123
f"/.*{path.split('/')[-1]},{line_start},{line_end},.*/d",
116124
f"meta/{repo_id}.csv"])
117125

118-
print("\n\n")
126+
print("\n\n", flush=True)
119127

120128

121-
def main(meta_dir: str,
122-
data_dir: str,
123-
check_only: bool,
124-
data_filter: dict,
125-
load_json: Optional[str] = None,
126-
category: Optional[str] = None) -> int:
129+
def review(meta_dir: str,
130+
data_dir: str,
131+
check_only: bool,
132+
data_filter: dict,
133+
load_json: Optional[str] = None,
134+
category: Optional[str] = None) -> int:
127135
errors = 0
128136
duplicates = 0
129137
if not os.path.exists(meta_dir):
@@ -146,6 +154,11 @@ def main(meta_dir: str,
146154
if category and category not in row.Category.split(':'):
147155
continue
148156

157+
if pathlib.Path(row.FilePath).suffix in get_excluding_extensions():
158+
# the file extension will be excluded during default scan
159+
print(f"File {row.FilePath} is excluded by default config with extension filter!", flush=True)
160+
errors += 1
161+
149162
displayed_rows += 1
150163
if not check_only:
151164
print(str(row), flush=True)
@@ -221,7 +234,7 @@ def main(meta_dir: str,
221234
return result
222235

223236

224-
if __name__ == "__main__":
237+
def main(argv) -> int:
225238
parser = ArgumentParser(prog="python review_data.py",
226239
description="Console script for review markup with colorization")
227240

@@ -233,7 +246,7 @@ def main(meta_dir: str,
233246
parser.add_argument("-X", help="Show X markup", action="store_true")
234247
parser.add_argument("--load", help="Load json report from CredSweeper", nargs='?')
235248
parser.add_argument("--category", help="Filter only with the category", nargs='?')
236-
_args = parser.parse_args()
249+
_args = parser.parse_args(argv[1:])
237250

238251
_data_filter = {"Other": False}
239252
if not _args.T and not _args.F and not _args.X:
@@ -244,8 +257,11 @@ def main(meta_dir: str,
244257
_data_filter["T"] = _args.T
245258
_data_filter["F"] = _args.F
246259
_data_filter["X"] = _args.X
247-
exit_code = main(_args.meta_dir, _args.data_dir, bool(_args.check_only), _data_filter, _args.load, _args.category)
248-
sys.exit(exit_code)
260+
return review(_args.meta_dir, _args.data_dir, bool(_args.check_only), _data_filter, _args.load, _args.category)
261+
262+
263+
if __name__ == """__main__""":
264+
sys.exit(main(sys.argv))
249265

250266
# review generation command
251267
# .venv/bin/python review_data.py meta data >review.$(now).$(git rev-parse HEAD).txt

0 commit comments

Comments
 (0)