77MAGENTA - templates
88When value start-end defined - the text is marked
99"""
10-
10+ import functools
1111import json
1212import os
13+ import pathlib
1314import subprocess
1415import sys
1516from argparse import ArgumentParser
16- from functools import cache
1717from typing import List , Optional , Tuple , Dict
1818
1919from colorama import Fore , Back , Style
2525EXIT_FAILURE = 1
2626
2727
28- @cache
28+ @functools .cache
29+ def get_excluding_extensions () -> set [str ]:
30+ # copy of CredSweeper/secret/config.json
31+ with open ("config.json" ) as f :
32+ result = json .load (f )
33+ return set (result ["exclude" ]["containers" ] + result ["exclude" ]["documents" ] + result ["exclude" ]["extension" ])
34+
35+
36+ @functools .cache
2937def read_cache (path ) -> list [str ]:
3038 with open (path , "r" , encoding = "utf8" ) as f :
3139 return f .read ().replace ("\r \n " , '\n ' ).replace ('\r ' , '\n ' ).split ('\n ' )
@@ -115,15 +123,15 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
115123 f"/.*{ path .split ('/' )[- 1 ]} ,{ line_start } ,{ line_end } ,.*/d" ,
116124 f"meta/{ repo_id } .csv" ])
117125
118- print ("\n \n " )
126+ print ("\n \n " , flush = True )
119127
120128
121- def main (meta_dir : str ,
122- data_dir : str ,
123- check_only : bool ,
124- data_filter : dict ,
125- load_json : Optional [str ] = None ,
126- category : Optional [str ] = None ) -> int :
129+ def review (meta_dir : str ,
130+ data_dir : str ,
131+ check_only : bool ,
132+ data_filter : dict ,
133+ load_json : Optional [str ] = None ,
134+ category : Optional [str ] = None ) -> int :
127135 errors = 0
128136 duplicates = 0
129137 if not os .path .exists (meta_dir ):
@@ -146,6 +154,11 @@ def main(meta_dir: str,
146154 if category and category not in row .Category .split (':' ):
147155 continue
148156
157+ if pathlib .Path (row .FilePath ).suffix in get_excluding_extensions ():
158+ # the file extension will be excluded during default scan
159+ print (f"File { row .FilePath } is excluded by default config with extension filter!" , flush = True )
160+ errors += 1
161+
149162 displayed_rows += 1
150163 if not check_only :
151164 print (str (row ), flush = True )
@@ -221,7 +234,7 @@ def main(meta_dir: str,
221234 return result
222235
223236
224- if __name__ == "__main__" :
237+ def main ( argv ) -> int :
225238 parser = ArgumentParser (prog = "python review_data.py" ,
226239 description = "Console script for review markup with colorization" )
227240
@@ -233,7 +246,7 @@ def main(meta_dir: str,
233246 parser .add_argument ("-X" , help = "Show X markup" , action = "store_true" )
234247 parser .add_argument ("--load" , help = "Load json report from CredSweeper" , nargs = '?' )
235248 parser .add_argument ("--category" , help = "Filter only with the category" , nargs = '?' )
236- _args = parser .parse_args ()
249+ _args = parser .parse_args (argv [ 1 :] )
237250
238251 _data_filter = {"Other" : False }
239252 if not _args .T and not _args .F and not _args .X :
@@ -244,8 +257,11 @@ def main(meta_dir: str,
244257 _data_filter ["T" ] = _args .T
245258 _data_filter ["F" ] = _args .F
246259 _data_filter ["X" ] = _args .X
247- exit_code = main (_args .meta_dir , _args .data_dir , bool (_args .check_only ), _data_filter , _args .load , _args .category )
248- sys .exit (exit_code )
260+ return review (_args .meta_dir , _args .data_dir , bool (_args .check_only ), _data_filter , _args .load , _args .category )
261+
262+
263+ if __name__ == """__main__""" :
264+ sys .exit (main (sys .argv ))
249265
250266# review generation command
251267# .venv/bin/python review_data.py meta data >review.$(now).$(git rev-parse HEAD).txt
0 commit comments