@@ -176,7 +176,7 @@ def _recombine(regex_to_recombine):
176
176
if regex_to_recombine [idx ] == "\\ x" and idx < len (regex_to_recombine ) - 2 \
177
177
and regex_to_recombine [idx + 1 ] in HEXASTRING \
178
178
and regex_to_recombine [idx + 2 ] in HEXASTRING :
179
- next_str = "" .join (regex_to_recombine [idx + 1 :idx + 3 ])
179
+ next_str = "" .join (regex_to_recombine [idx + 1 :idx + 3 ])
180
180
s_trans = chr (int (next_str , 16 ))
181
181
temp .append (TRANSFORMATIONS .get (s_trans , s_trans ))
182
182
idx += 3
@@ -197,12 +197,12 @@ def _recombine(regex_to_recombine):
197
197
temp .append (TRANSFORMATIONS .get (name , name ))
198
198
idx = idx_end + 1
199
199
elif regex_to_recombine [idx ] == "\\ u" :
200
- unicode_str = "" .join (regex_to_recombine [idx + 1 : idx + 5 ])
200
+ unicode_str = "" .join (regex_to_recombine [idx + 1 : idx + 5 ])
201
201
decoded = chr (int (unicode_str , 16 ))
202
202
temp .append (TRANSFORMATIONS .get (decoded , decoded ))
203
203
idx = idx + 5
204
204
elif regex_to_recombine [idx ] == "\\ U" :
205
- unicode_str = "" .join (regex_to_recombine [idx + 1 : idx + 9 ])
205
+ unicode_str = "" .join (regex_to_recombine [idx + 1 : idx + 9 ])
206
206
decoded = chr (int (unicode_str , 16 ))
207
207
temp .append (TRANSFORMATIONS .get (decoded , decoded ))
208
208
idx = idx + 9
@@ -291,8 +291,71 @@ def _preprocess_positive_closure(self):
291
291
for j in range (pos_opening , len (regex_temp )):
292
292
regex_temp .append (regex_temp [j ])
293
293
regex_temp .append ("*" )
294
+ regex_temp = self ._add_repetition (regex_temp )
294
295
self ._python_regex = "" .join (regex_temp )
295
296
297
+ @staticmethod
298
+ def _is_repetition (regex_list , idx ):
299
+ if regex_list [idx ] == "{" :
300
+ end = idx
301
+ for i in range (idx + 1 , len (regex_list )):
302
+ if regex_list [i ] == "}" :
303
+ end = i
304
+ break
305
+ inner = "" .join (regex_list [idx + 1 :end ])
306
+ if "," in inner :
307
+ split = inner .split ("," )
308
+ if len (split ) != 2 or not split [0 ].isdigit () or not split [1 ].isdigit ():
309
+ return None
310
+ return int (split [0 ]), int (split [1 ]), end
311
+ if inner .isdigit ():
312
+ return int (inner ), end
313
+ return None
314
+
315
+ @staticmethod
316
+ def _find_repeated_sequence (regex_list ):
317
+ if regex_list [- 1 ] != ")" :
318
+ return [regex_list [- 1 ]]
319
+ res = [")" ]
320
+ counter = - 1
321
+ for i in range (len (regex_list ) - 2 , - 1 , - 1 ):
322
+ if regex_list [i ] == "(" :
323
+ counter += 1
324
+ res .append ("(" )
325
+ if counter == 0 :
326
+ return res [::- 1 ]
327
+ elif regex_list [i ] == ")" :
328
+ counter -= 1
329
+ res .append (")" )
330
+ else :
331
+ res .append (regex_list [i ])
332
+ return []
333
+
334
+ def _add_repetition (self , regex_list ):
335
+ res = []
336
+ idx = 0
337
+ while idx < len (regex_list ):
338
+ rep = self ._is_repetition (regex_list , idx )
339
+ if rep is None :
340
+ res .append (regex_list [idx ])
341
+ idx += 1
342
+ elif len (rep ) == 2 :
343
+ n_rep , end = rep
344
+ repeated = self ._find_repeated_sequence (res )
345
+ for _ in range (n_rep - 1 ):
346
+ res .extend (repeated )
347
+ idx = end + 1
348
+ elif len (rep ) == 3 :
349
+ min_rep , max_rep , end = rep
350
+ repeated = self ._find_repeated_sequence (res )
351
+ for _ in range (min_rep - 1 ):
352
+ res .extend (repeated )
353
+ for _ in range (min_rep , max_rep ):
354
+ res .extend (repeated )
355
+ res .append ("?" )
356
+ idx = end + 1
357
+ return res
358
+
296
359
def _preprocess_optional (self ):
297
360
regex_temp = []
298
361
for symbol in self ._python_regex :
0 commit comments