11import contextlib
22import re
33import statistics
4+ from itertools import takewhile
45
56from credsweeper .common .constants import Chars
67from credsweeper .config import Config
@@ -16,8 +17,8 @@ class ValueBase64PartCheck(Filter):
1617 Check that candidate is NOT a part of base64 long line
1718 """
1819
19- base64_pattern = re .compile (r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}" )
20- base64_set = set (Chars .BASE64STDPAD_CHARS .value )
20+ base64_pattern = re .compile (r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}$ " )
21+ base64_char_set = set (Chars .BASE64STDPAD_CHARS .value + ' \\ ' )
2122
2223 def __init__ (self , config : Config = None ) -> None :
2324 pass
@@ -64,38 +65,46 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
6465 elif right_end - left_start >= 2 * len_value :
6566 # simple analysis for data too large to yield sensible insights
6667 part_set = set (line [left_start :right_end ])
67- if not part_set .difference (self . base64_set ):
68+ if not part_set .difference (ValueBase64PartCheck . base64_char_set ):
6869 # obvious case: all characters are base64 standard
6970 return True
7071
71- left_part = line [left_start :line_data .value_start ]
72- len_left = len (left_part )
73- right_part = line [line_data .value_end :right_end ]
74- len_right = len (right_part )
72+ left_part = '' .join (
73+ takewhile (lambda x : x in ValueBase64PartCheck .base64_char_set ,
74+ reversed (line [left_start :line_data .value_start ])))
75+
76+ right_part = '' .join (
77+ takewhile (lambda x : x in ValueBase64PartCheck .base64_char_set , line [line_data .value_end :right_end ]))
7578
7679 min_entropy_value = ValueEntropyBase64Check .get_min_data_entropy (len_value )
77- value_entropy = Util .get_shannon_entropy (value , Chars .BASE64STD_CHARS .value )
7880
79- if ValueEntropyBase64Check .min_length < len_left :
80- left_entropy = Util .get_shannon_entropy (left_part , Chars .BASE64STD_CHARS .value )
81- if len_left < len_value :
82- left_entropy *= len_value / len_left
83- else :
84- left_entropy = min_entropy_value
81+ left_entropy = Util .get_shannon_entropy (left_part )
82+ value_entropy = Util .get_shannon_entropy (value )
83+ right_entropy = Util .get_shannon_entropy (right_part )
84+ common = left_part + value + right_part
85+ common_entropy = Util .get_shannon_entropy (common )
86+ min_entropy_common = ValueEntropyBase64Check .get_min_data_entropy (len (common ))
87+ if min_entropy_common < common_entropy :
88+ return True
8589
86- if ValueEntropyBase64Check .min_length < len_right :
87- right_entropy = Util .get_shannon_entropy (right_part , Chars .BASE64STD_CHARS .value )
88- if len_right < len_value :
89- left_entropy *= len_right / len_left
90+ if left_entropy and right_entropy :
91+ data = [left_entropy , value_entropy , right_entropy , min_entropy_value , common_entropy ]
92+ elif left_entropy and not right_entropy :
93+ data = [left_entropy , value_entropy , min_entropy_value , min_entropy_value , common_entropy ]
94+ elif not left_entropy and right_entropy :
95+ data = [value_entropy , right_entropy , min_entropy_value , min_entropy_value , common_entropy ]
9096 else :
91- right_entropy = min_entropy_value
97+ return False
9298
93- data = [left_entropy , value_entropy , right_entropy , min_entropy_value ]
9499 avg = statistics .mean (data )
95100 stdev = statistics .stdev (data , avg )
96101 avg_min = avg - 1.1 * stdev
97- if avg_min <= left_entropy and avg_min <= right_entropy :
102+ if (0. == left_entropy or avg_min < left_entropy or left_entropy < value_entropy < right_entropy ) \
103+ and (
104+ 0. == right_entropy or avg_min < right_entropy or right_entropy < value_entropy < left_entropy ):
98105 # high entropy of bound parts looks like a part of base64 long line
99106 return True
107+ else :
108+ return False
100109
101110 return False
0 commit comments