Skip to content

Commit e7214f7

Browse files
committed
Problem with the processing of dot when preceeded by an ambiguous escape. Resolves #19
1 parent 4dd02ef commit e7214f7

File tree

3 files changed

+20
-11
lines changed

3 files changed

+20
-11
lines changed

pyformlang/regular_expression/python_regex.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
OCTAL = "01234567"
5454
ESCAPED_OCTAL = ["\\0", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "\\7"]
5555

56+
5657
class PythonRegex(regex.Regex):
5758
""" Represents a regular expression as used in Python.
5859
@@ -106,7 +107,6 @@ def __init__(self, python_regex):
106107
self._preprocess_brackets()
107108
self._preprocess_positive_closure()
108109
self._preprocess_optional()
109-
self._preprocess_dot()
110110
self._separate()
111111
self._python_regex = self._python_regex.lstrip('\b')
112112
super().__init__(self._python_regex)
@@ -119,7 +119,13 @@ def _separate(self):
119119
else:
120120
regex_temp.append(symbol)
121121
regex_temp = self._recombine(regex_temp)
122-
self._python_regex = " ".join(regex_temp)
122+
regex_temp_dot = []
123+
for symbol in regex_temp:
124+
if symbol == ".":
125+
regex_temp_dot.append(DOT_REPLACEMENT)
126+
else:
127+
regex_temp_dot.append(symbol)
128+
self._python_regex = " ".join(regex_temp_dot)
123129

124130
def _preprocess_brackets(self):
125131
regex_temp = []
@@ -287,13 +293,6 @@ def _preprocess_positive_closure(self):
287293
regex_temp.append("*")
288294
self._python_regex = "".join(regex_temp)
289295

290-
@staticmethod
291-
def _dot_replacer(dot):
292-
return DOT_REPLACEMENT
293-
294-
def _preprocess_dot(self):
295-
self._python_regex = re.sub(r'(?<!\\)\.', self._dot_replacer, self._python_regex)
296-
297296
def _preprocess_optional(self):
298297
regex_temp = []
299298
for symbol in self._python_regex:

pyformlang/regular_expression/tests/test_python_regex.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ def test_backslash(self):
256256
def test_octal(self):
257257
self._test_compare(r"\x10", "\x10")
258258
self._test_compare(r"\110", "\110")
259+
self._test_compare(r"\\\\x10", "\x10")
260+
self._test_compare(r"\\\\x10", "\\x10")
259261

260262
def test_backspace(self):
261263
self._test_compare(r"a[b\b]", "ab")
@@ -272,4 +274,12 @@ def test_unicode_name(self):
272274

273275
def test_unicode(self):
274276
self._test_compare(r"\u1111", "\u1111")
275-
self._test_compare(r"\U00001111", "\U00001111")
277+
self._test_compare(r"\U00001111", "\U00001111")
278+
279+
def test_dot_harder(self):
280+
self._test_compare(r"\\.", "\\a")
281+
self._test_compare(r"\\.", "\\.")
282+
self._test_compare(r"\.", "a")
283+
self._test_compare(r"\.", ".")
284+
self._test_compare(r"\\\.", "\\a")
285+
self._test_compare(r"\\\.", "\\.")

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
setuptools.setup(
1313
name='pyformlang',
14-
version='1.0.6',
14+
version='1.0.7',
1515
#scripts=['pyformlang'] ,
1616
author="Julien Romero",
1717
author_email="[email protected]",

0 commit comments

Comments
 (0)