Skip to content

Commit 51d61ef

Browse files
committed
✨ Add the negation [^...] in PythonRegex
1 parent eca9a78 commit 51d61ef

File tree

3 files changed

+31
-6
lines changed

3 files changed

+31
-6
lines changed

Diff for: pyformlang/regular_expression/python_regex.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
"$": "\\$",
2424
"\n": "",
2525
" ": "\\ ",
26-
'\\': '\\\\'
26+
'\\': '\\\\',
27+
"?": "\\?"
2728
}
2829

2930
RECOMBINE = {
@@ -218,13 +219,14 @@ def _preprocess_brackets_content(self, bracket_content):
218219
bracket_content_temp = []
219220
previous_is_valid_for_range = False
220221
for i, symbol in enumerate(bracket_content):
221-
if (symbol == "-" and not self._should_escape_next_symbol(
222-
bracket_content_temp)):
223-
if (not previous_is_valid_for_range
224-
or i == len(bracket_content) - 1):
222+
# We have a range
223+
if symbol == "-" and not self._should_escape_next_symbol(bracket_content_temp):
224+
if not previous_is_valid_for_range or i == len(bracket_content) - 1:
225+
# False alarm, no range
225226
bracket_content_temp.append("-")
226227
previous_is_valid_for_range = True
227228
else:
229+
# We insert all the characters in the range
228230
bracket_content[i - 1] = self._recombine(bracket_content[i - 1])
229231
for j in range(ord(bracket_content[i - 1][-1]) + 1,
230232
ord(bracket_content[i + 1][-1])):
@@ -244,10 +246,18 @@ def _preprocess_brackets_content(self, bracket_content):
244246
previous_is_valid_for_range = False
245247
else:
246248
previous_is_valid_for_range = True
249+
bracket_content_temp = self._preprocess_negation(bracket_content_temp)
247250
bracket_content_temp = self._insert_or(bracket_content_temp)
248251
bracket_content_temp = self._recombine(bracket_content_temp)
249252
return bracket_content_temp
250253

254+
@staticmethod
255+
def _preprocess_negation(bracket_content):
256+
if not bracket_content or bracket_content[0] != "^":
257+
return bracket_content
258+
# We inverse everything
259+
return [x for x in ESCAPED_PRINTABLES if x not in bracket_content]
260+
251261
@staticmethod
252262
def _insert_or(l_to_modify):
253263
res = []

Diff for: pyformlang/regular_expression/tests/test_python_regex.py

+15
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,18 @@ def test_error_backslash(self):
313313
self._test_compare(r"[a\\\\\\]]", "\\]")
314314
self._test_compare(r"\"([d\"\\\\]|\\\\.)*\"", '"d\\"')
315315
self._test_compare(r"[a\\\\]", "a")
316+
317+
def test_negation_brackets(self):
318+
self._test_compare(r"[^abc]*", "")
319+
self._test_compare(r"[^abc]*", "a")
320+
self._test_compare(r"[^abc]*", "b")
321+
self._test_compare(r"[^abc]*", "c")
322+
self._test_compare(r"[^abc]*", "d")
323+
self._test_compare(r"[^abc]*", "dga")
324+
self._test_compare(r"[^abc]*", "dgh")
325+
self._test_compare(r"[^?]*", "dgh")
326+
327+
def test_question_mark(self):
328+
self._test_compare(r".", "?")
329+
self._test_compare(r"a(a|b)?", "a")
330+
self._test_compare(r"a(a|b)\?", "ab?")

Diff for: setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
setuptools.setup(
1313
name='pyformlang',
14-
version='1.0.8',
14+
version='1.0.9',
1515
#scripts=['pyformlang'] ,
1616
author="Julien Romero",
1717
author_email="[email protected]",

0 commit comments

Comments
 (0)