Skip to content

Commit 1587992

Browse files
committed
even better matches ./
1 parent 85e68c9 commit 1587992

2 files changed

Lines changed: 62 additions & 3 deletions

File tree

src/__tests__/testParsing.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@
298298
'validateFileExists': True,
299299
'match': True,
300300
'file': 'inputs/.DS_KINDA_STORE',
301+
}, {
302+
'input': './inputs/.DS_KINDA_STORE',
303+
'validateFileExists': True,
304+
'match': True,
305+
'file': './inputs/.DS_KINDA_STORE',
301306
}, {
302307
'input': 'evilFile No Prepend.txt',
303308
'validateFileExists': True,

src/parse.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,49 @@
4747
'(\s|$|:)+'
4848
)))
4949

50+
MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES = re.compile(''.join((
51+
# begin the capture
52+
'(',
53+
# capture some pre-dir stuff like / and ./
54+
'(?:',
55+
'\.?\/'
56+
')?', # thats optional
57+
# now we look at directories. The 'character class ' allowed before the '/'
58+
# is either a real character or a character and a space. This allows
59+
# multiple spaces in a directory as long as each space is followed by
60+
# a normal character, but it does not allow multiple continguous spaces
61+
# which would otherwise gobble up too much whitespace.
62+
#
63+
# Thus, these directories will match:
64+
# /something foo/
65+
# / a b c d e/
66+
# /normal/
67+
#
68+
# but these will not:
69+
# /two spaces here/
70+
# /ending in a space /
71+
'(([a-z.A-Z0-9\-_]|\s[a-zA-Z0-9\-_])+\/)+',
72+
# Recognized files starting with a dot followed by at least 3 characters
73+
'((\/?([a-z.A-Z0-9\-_]+\/))?\.[a-zA-Z0-9\-_]{3,}[a-zA-Z0-9\-_\/]*)',
74+
# or
75+
'|',
76+
# Recognize files containing at least one slash
77+
'([a-z.A-Z0-9\-_\/]{1,}\/[a-zA-Z0-9\-_]{1,})',
78+
# or
79+
'|',
80+
# Recognize files starting with capital letter and ending in "file".
81+
# eg. Makefile
82+
'([A-Z][a-zA-Z]{2,}file)',
83+
')',
84+
)))
85+
5086
MASTER_REGEX_WITH_SPACES = re.compile(''.join((
5187
# begin the capture
5288
'(',
53-
# a leading / for absolute dirs if its there
54-
'\/?',
89+
# capture some pre-dir stuff like / and ./
90+
'(?:',
91+
'\.?\/'
92+
')?', # thats optional
5593
# now we look at directories. The 'character class ' allowed before the '/'
5694
# is either a real character or a character and a space. This allows
5795
# multiple spaces in a directory as long as each space is followed by
@@ -70,7 +108,7 @@
70108
# we do similar for the filename part. the 'character class' is
71109
# char or char with space following, with some added tokens like @
72110
# for retina files.
73-
'([\(\),@\.a-zA-Z0-9\-_+.]|\s[,\(\)@a-zA-Z0-9\-_+.])+',
111+
'([\(\),@a-zA-Z0-9\-_+.]|\s[,\(\)@a-zA-Z0-9\-_+.])+',
74112
# extensions dont allow spaces
75113
'\.[a-zA-Z0-9-]{1,30}',
76114
# end capture
@@ -83,10 +121,12 @@
83121
REGEX_WATERFALL = [{
84122
# Homedirs need a separate regex.
85123
'regex': HOMEDIR_REGEX,
124+
'name': 'HOMEDIR_REGEX',
86125
}, {
87126
# the master regex matches tbgs results with
88127
# line numbers, so we prefer that and test it first
89128
'regex': MASTER_REGEX,
129+
'name': 'MASTER_REGEX',
90130
# one real quick check -- did we find a better match
91131
# earlier in the regex?
92132
'preferred_regex': OTHER_BGS_RESULT_REGEX,
@@ -97,25 +137,35 @@
97137
# the line number match since otherwise this would be too lax
98138
# of a regex.
99139
'regex': OTHER_BGS_RESULT_REGEX,
140+
'name': 'OTHER_BGS_RESULT_REGEX',
100141
}, {
101142
'regex': MASTER_REGEX_MORE_EXTENSIONS,
143+
'name': 'MASTER_REGEX_MORE_EXTENSIONS',
102144
'onlyWithFileInspection': True,
103145
}, {
104146
# We would overmatch on wayyyyy too many things if we
105147
# allowed spaces everywhere, but with filesystem validation
106148
# and the final fallback we can include them.
107149
'regex': MASTER_REGEX_WITH_SPACES,
150+
'name': 'MASTER_REGEX_WITH_SPACES',
151+
'numIndex': 4,
152+
'onlyWithFileInspection': True,
153+
}, {
154+
'regex': MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES,
155+
'name': 'MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES',
108156
'numIndex': 4,
109157
'onlyWithFileInspection': True,
110158
}, {
111159
# ok maybe its just a normal file (with a dot)
112160
# so lets test for that if the above fails
113161
'regex': JUST_FILE,
162+
'name': 'JUST_FILE',
114163
'noNum': True
115164
}, {
116165
# ok if thats not there, try do to filesystem validation
117166
# for just files with spaces
118167
'regex': JUST_FILE_WITH_SPACES,
168+
'name': 'JUST_FILE_WITH_SPACES',
119169
'noNum': True,
120170
'onlyWithFileInspection': True,
121171
}, {
@@ -125,9 +175,11 @@
125175
# we require some minimum number of slashes and minimum
126176
# file name length
127177
'regex': FILE_NO_PERIODS,
178+
'name': 'FILE_NO_PERIODS',
128179
'noNum': True,
129180
}, {
130181
'regex': ENTIRE_TRIMMED_LINE_IF_NOT_WHITESPACE,
182+
'name': 'ENTIRE_TRIMMED_LINE_IF_NOT_WHITESPACE',
131183
'noNum': True,
132184
'withAllLinesMatched': True
133185
}]
@@ -207,9 +259,11 @@ def matchLineImpl(line, withFileInspection=False, withAllLinesMatched=False):
207259
matches = regex.search(line)
208260
if not matches:
209261
continue
262+
210263
unpackFunc = unpackMatchesNoNum if \
211264
regexConfig.get('noNum') else \
212265
lambda x: unpackMatches(x, numIndex=regexConfig.get('numIndex', 2))
266+
213267
if not regexConfig.get('preferred_regex'):
214268
results.append(unpackFunc(matches))
215269
continue

0 commit comments

Comments
 (0)