Skip to content

Commit 932a36a

Browse files
committed
Merge pull request #225 from facebook/tryParseDSStore
Resolves #223 allow matching .DS_STORE
2 parents e7e1066 + 1587992 commit 932a36a

3 files changed

Lines changed: 69 additions & 3 deletions

File tree

src/__tests__/inputs/.DS_KINDA_STORE

Whitespace-only changes.

src/__tests__/testParsing.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,16 @@
293293
'match': True,
294294
'num': 42,
295295
'file': './inputs/annoyingTildeExtension.txt~',
296+
}, {
297+
'input': 'inputs/.DS_KINDA_STORE',
298+
'validateFileExists': True,
299+
'match': True,
300+
'file': 'inputs/.DS_KINDA_STORE',
301+
}, {
302+
'input': './inputs/.DS_KINDA_STORE',
303+
'validateFileExists': True,
304+
'match': True,
305+
'file': './inputs/.DS_KINDA_STORE',
296306
}, {
297307
'input': 'evilFile No Prepend.txt',
298308
'validateFileExists': True,

src/parse.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,55 @@
4141
# Recognize files starting with capital letter and ending in "file".
4242
# eg. Makefile
4343
'([A-Z][a-zA-Z]{2,}file)',
44+
# end trying to capture
4445
')',
4546
# Regardless of the above case, here's how the file name should terminate
4647
'(\s|$|:)+'
4748
)))
49+
50+
MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES = re.compile(''.join((
51+
# begin the capture
52+
'(',
53+
# capture some pre-dir stuff like / and ./
54+
'(?:',
55+
'\.?\/'
56+
')?', # thats optional
57+
# now we look at directories. The 'character class ' allowed before the '/'
58+
# is either a real character or a character and a space. This allows
59+
# multiple spaces in a directory as long as each space is followed by
60+
# a normal character, but it does not allow multiple continguous spaces
61+
# which would otherwise gobble up too much whitespace.
62+
#
63+
# Thus, these directories will match:
64+
# /something foo/
65+
# / a b c d e/
66+
# /normal/
67+
#
68+
# but these will not:
69+
# /two spaces here/
70+
# /ending in a space /
71+
'(([a-z.A-Z0-9\-_]|\s[a-zA-Z0-9\-_])+\/)+',
72+
# Recognized files starting with a dot followed by at least 3 characters
73+
'((\/?([a-z.A-Z0-9\-_]+\/))?\.[a-zA-Z0-9\-_]{3,}[a-zA-Z0-9\-_\/]*)',
74+
# or
75+
'|',
76+
# Recognize files containing at least one slash
77+
'([a-z.A-Z0-9\-_\/]{1,}\/[a-zA-Z0-9\-_]{1,})',
78+
# or
79+
'|',
80+
# Recognize files starting with capital letter and ending in "file".
81+
# eg. Makefile
82+
'([A-Z][a-zA-Z]{2,}file)',
83+
')',
84+
)))
85+
4886
MASTER_REGEX_WITH_SPACES = re.compile(''.join((
4987
# begin the capture
5088
'(',
51-
# a leading / for absolute dirs if its there
52-
'\/?',
89+
# capture some pre-dir stuff like / and ./
90+
'(?:',
91+
'\.?\/'
92+
')?', # thats optional
5393
# now we look at directories. The 'character class ' allowed before the '/'
5494
# is either a real character or a character and a space. This allows
5595
# multiple spaces in a directory as long as each space is followed by
@@ -70,7 +110,7 @@
70110
# for retina files.
71111
'([\(\),@a-zA-Z0-9\-_+.]|\s[,\(\)@a-zA-Z0-9\-_+.])+',
72112
# extensions dont allow spaces
73-
'\.[a-zA-Z0-9-]{1,30}'
113+
'\.[a-zA-Z0-9-]{1,30}',
74114
# end capture
75115
')',
76116
# optionally capture the line number
@@ -81,10 +121,12 @@
81121
REGEX_WATERFALL = [{
82122
# Homedirs need a separate regex.
83123
'regex': HOMEDIR_REGEX,
124+
'name': 'HOMEDIR_REGEX',
84125
}, {
85126
# the master regex matches tbgs results with
86127
# line numbers, so we prefer that and test it first
87128
'regex': MASTER_REGEX,
129+
'name': 'MASTER_REGEX',
88130
# one real quick check -- did we find a better match
89131
# earlier in the regex?
90132
'preferred_regex': OTHER_BGS_RESULT_REGEX,
@@ -95,25 +137,35 @@
95137
# the line number match since otherwise this would be too lax
96138
# of a regex.
97139
'regex': OTHER_BGS_RESULT_REGEX,
140+
'name': 'OTHER_BGS_RESULT_REGEX',
98141
}, {
99142
'regex': MASTER_REGEX_MORE_EXTENSIONS,
143+
'name': 'MASTER_REGEX_MORE_EXTENSIONS',
100144
'onlyWithFileInspection': True,
101145
}, {
102146
# We would overmatch on wayyyyy too many things if we
103147
# allowed spaces everywhere, but with filesystem validation
104148
# and the final fallback we can include them.
105149
'regex': MASTER_REGEX_WITH_SPACES,
150+
'name': 'MASTER_REGEX_WITH_SPACES',
151+
'numIndex': 4,
152+
'onlyWithFileInspection': True,
153+
}, {
154+
'regex': MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES,
155+
'name': 'MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES',
106156
'numIndex': 4,
107157
'onlyWithFileInspection': True,
108158
}, {
109159
# ok maybe its just a normal file (with a dot)
110160
# so lets test for that if the above fails
111161
'regex': JUST_FILE,
162+
'name': 'JUST_FILE',
112163
'noNum': True
113164
}, {
114165
# ok if thats not there, try do to filesystem validation
115166
# for just files with spaces
116167
'regex': JUST_FILE_WITH_SPACES,
168+
'name': 'JUST_FILE_WITH_SPACES',
117169
'noNum': True,
118170
'onlyWithFileInspection': True,
119171
}, {
@@ -123,9 +175,11 @@
123175
# we require some minimum number of slashes and minimum
124176
# file name length
125177
'regex': FILE_NO_PERIODS,
178+
'name': 'FILE_NO_PERIODS',
126179
'noNum': True,
127180
}, {
128181
'regex': ENTIRE_TRIMMED_LINE_IF_NOT_WHITESPACE,
182+
'name': 'ENTIRE_TRIMMED_LINE_IF_NOT_WHITESPACE',
129183
'noNum': True,
130184
'withAllLinesMatched': True
131185
}]
@@ -205,9 +259,11 @@ def matchLineImpl(line, withFileInspection=False, withAllLinesMatched=False):
205259
matches = regex.search(line)
206260
if not matches:
207261
continue
262+
208263
unpackFunc = unpackMatchesNoNum if \
209264
regexConfig.get('noNum') else \
210265
lambda x: unpackMatches(x, numIndex=regexConfig.get('numIndex', 2))
266+
211267
if not regexConfig.get('preferred_regex'):
212268
results.append(unpackFunc(matches))
213269
continue

0 commit comments

Comments
 (0)