|
47 | 47 | '(\s|$|:)+' |
48 | 48 | ))) |
49 | 49 |
|
| 50 | +MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES = re.compile(''.join(( |
| 51 | + # begin the capture |
| 52 | + '(', |
| 53 | + # capture some pre-dir stuff like / and ./ |
| 54 | + '(?:', |
| 55 | + '\.?\/' |
| 56 | + ')?', # thats optional |
| 57 | + # now we look at directories. The 'character class ' allowed before the '/' |
| 58 | + # is either a real character or a character and a space. This allows |
| 59 | + # multiple spaces in a directory as long as each space is followed by |
| 60 | + # a normal character, but it does not allow multiple continguous spaces |
| 61 | + # which would otherwise gobble up too much whitespace. |
| 62 | + # |
| 63 | + # Thus, these directories will match: |
| 64 | + # /something foo/ |
| 65 | + # / a b c d e/ |
| 66 | + # /normal/ |
| 67 | + # |
| 68 | + # but these will not: |
| 69 | + # /two spaces here/ |
| 70 | + # /ending in a space / |
| 71 | + '(([a-z.A-Z0-9\-_]|\s[a-zA-Z0-9\-_])+\/)+', |
| 72 | + # Recognized files starting with a dot followed by at least 3 characters |
| 73 | + '((\/?([a-z.A-Z0-9\-_]+\/))?\.[a-zA-Z0-9\-_]{3,}[a-zA-Z0-9\-_\/]*)', |
| 74 | + # or |
| 75 | + '|', |
| 76 | + # Recognize files containing at least one slash |
| 77 | + '([a-z.A-Z0-9\-_\/]{1,}\/[a-zA-Z0-9\-_]{1,})', |
| 78 | + # or |
| 79 | + '|', |
| 80 | + # Recognize files starting with capital letter and ending in "file". |
| 81 | + # eg. Makefile |
| 82 | + '([A-Z][a-zA-Z]{2,}file)', |
| 83 | + ')', |
| 84 | +))) |
| 85 | + |
50 | 86 | MASTER_REGEX_WITH_SPACES = re.compile(''.join(( |
51 | 87 | # begin the capture |
52 | 88 | '(', |
53 | | - # a leading / for absolute dirs if its there |
54 | | - '\/?', |
| 89 | + # capture some pre-dir stuff like / and ./ |
| 90 | + '(?:', |
| 91 | + '\.?\/' |
| 92 | + ')?', # thats optional |
55 | 93 | # now we look at directories. The 'character class ' allowed before the '/' |
56 | 94 | # is either a real character or a character and a space. This allows |
57 | 95 | # multiple spaces in a directory as long as each space is followed by |
|
70 | 108 | # we do similar for the filename part. the 'character class' is |
71 | 109 | # char or char with space following, with some added tokens like @ |
72 | 110 | # for retina files. |
73 | | - '([\(\),@\.a-zA-Z0-9\-_+.]|\s[,\(\)@a-zA-Z0-9\-_+.])+', |
| 111 | + '([\(\),@a-zA-Z0-9\-_+.]|\s[,\(\)@a-zA-Z0-9\-_+.])+', |
74 | 112 | # extensions dont allow spaces |
75 | 113 | '\.[a-zA-Z0-9-]{1,30}', |
76 | 114 | # end capture |
|
83 | 121 | REGEX_WATERFALL = [{ |
84 | 122 | # Homedirs need a separate regex. |
85 | 123 | 'regex': HOMEDIR_REGEX, |
| 124 | + 'name': 'HOMEDIR_REGEX', |
86 | 125 | }, { |
87 | 126 | # the master regex matches tbgs results with |
88 | 127 | # line numbers, so we prefer that and test it first |
89 | 128 | 'regex': MASTER_REGEX, |
| 129 | + 'name': 'MASTER_REGEX', |
90 | 130 | # one real quick check -- did we find a better match |
91 | 131 | # earlier in the regex? |
92 | 132 | 'preferred_regex': OTHER_BGS_RESULT_REGEX, |
|
97 | 137 | # the line number match since otherwise this would be too lax |
98 | 138 | # of a regex. |
99 | 139 | 'regex': OTHER_BGS_RESULT_REGEX, |
| 140 | + 'name': 'OTHER_BGS_RESULT_REGEX', |
100 | 141 | }, { |
101 | 142 | 'regex': MASTER_REGEX_MORE_EXTENSIONS, |
| 143 | + 'name': 'MASTER_REGEX_MORE_EXTENSIONS', |
102 | 144 | 'onlyWithFileInspection': True, |
103 | 145 | }, { |
104 | 146 | # We would overmatch on wayyyyy too many things if we |
105 | 147 | # allowed spaces everywhere, but with filesystem validation |
106 | 148 | # and the final fallback we can include them. |
107 | 149 | 'regex': MASTER_REGEX_WITH_SPACES, |
| 150 | + 'name': 'MASTER_REGEX_WITH_SPACES', |
| 151 | + 'numIndex': 4, |
| 152 | + 'onlyWithFileInspection': True, |
| 153 | +}, { |
| 154 | + 'regex': MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES, |
| 155 | + 'name': 'MASTER_REGEX_WITH_SPACES_AND_WEIRD_FILES', |
108 | 156 | 'numIndex': 4, |
109 | 157 | 'onlyWithFileInspection': True, |
110 | 158 | }, { |
111 | 159 | # ok maybe its just a normal file (with a dot) |
112 | 160 | # so lets test for that if the above fails |
113 | 161 | 'regex': JUST_FILE, |
| 162 | + 'name': 'JUST_FILE', |
114 | 163 | 'noNum': True |
115 | 164 | }, { |
116 | 165 | # ok if thats not there, try do to filesystem validation |
117 | 166 | # for just files with spaces |
118 | 167 | 'regex': JUST_FILE_WITH_SPACES, |
| 168 | + 'name': 'JUST_FILE_WITH_SPACES', |
119 | 169 | 'noNum': True, |
120 | 170 | 'onlyWithFileInspection': True, |
121 | 171 | }, { |
|
125 | 175 | # we require some minimum number of slashes and minimum |
126 | 176 | # file name length |
127 | 177 | 'regex': FILE_NO_PERIODS, |
| 178 | + 'name': 'FILE_NO_PERIODS', |
128 | 179 | 'noNum': True, |
129 | 180 | }, { |
130 | 181 | 'regex': ENTIRE_TRIMMED_LINE_IF_NOT_WHITESPACE, |
| 182 | + 'name': 'ENTIRE_TRIMMED_LINE_IF_NOT_WHITESPACE', |
131 | 183 | 'noNum': True, |
132 | 184 | 'withAllLinesMatched': True |
133 | 185 | }] |
@@ -207,9 +259,11 @@ def matchLineImpl(line, withFileInspection=False, withAllLinesMatched=False): |
207 | 259 | matches = regex.search(line) |
208 | 260 | if not matches: |
209 | 261 | continue |
| 262 | + |
210 | 263 | unpackFunc = unpackMatchesNoNum if \ |
211 | 264 | regexConfig.get('noNum') else \ |
212 | 265 | lambda x: unpackMatches(x, numIndex=regexConfig.get('numIndex', 2)) |
| 266 | + |
213 | 267 | if not regexConfig.get('preferred_regex'): |
214 | 268 | results.append(unpackFunc(matches)) |
215 | 269 | continue |
|
0 commit comments