Skip to content

Commit 3eb02b7

Browse files
authored
Merge pull request #590 from gerlero/files
Improve parsing performance
2 parents 62fc6b3 + 0ebdb49 commit 3eb02b7

File tree

2 files changed

+49
-10
lines changed

2 files changed

+49
-10
lines changed

foamlib/_files/_parsing/_elements.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,20 @@ def parseImpl(
5959
self, instring: str, loc: int, doActions: bool = True
6060
) -> tuple[int, ParseResults]:
6161
spacing_pattern = "|".join(re.escape(c) for c in self.whiteChars)
62-
for ignore_expr in self.ignoreExprs:
63-
assert isinstance(ignore_expr, Suppress)
64-
assert isinstance(ignore_expr.expr, Regex)
65-
spacing_pattern += f"|(?:{ignore_expr.expr.re.pattern})"
62+
assert spacing_pattern
63+
64+
assert all(
65+
isinstance(ignore_expr, Suppress) for ignore_expr in self.ignoreExprs
66+
)
67+
assert all(
68+
isinstance(ignore_expr.expr, Regex) for ignore_expr in self.ignoreExprs
69+
)
70+
ignore_pattern = "|".join(
71+
ignore_expr.expr.re.pattern for ignore_expr in self.ignoreExprs
72+
)
73+
74+
if ignore_pattern:
75+
spacing_pattern = f"{spacing_pattern}|{ignore_pattern}"
6676

6777
if np.issubdtype(self._dtype, np.floating):
6878
base_pattern = self._FLOAT_PATTERN
@@ -86,7 +96,12 @@ def parseImpl(
8696
if match := regular_pattern.match(instring, pos=loc):
8797
count = int(c) if (c := match.group(1)) else None
8898
contents = match.group(2)
89-
contents = re.sub(spacing_pattern, " ", contents)
99+
100+
if not all(c.isspace() for c in self.whiteChars):
101+
contents = re.sub(spacing_pattern, " ", contents)
102+
elif ignore_pattern:
103+
contents = re.sub(ignore_pattern, " ", contents) # Faster
104+
90105
if self._elshape:
91106
contents = contents.replace("(", " ").replace(")", " ")
92107

@@ -206,10 +221,20 @@ def parseImpl(
206221
self, instring: str, loc: int, doActions: bool = True
207222
) -> tuple[int, ParseResults]:
208223
spacing_pattern = "|".join(re.escape(c) for c in self.whiteChars)
209-
for ignore_expr in self.ignoreExprs:
210-
assert isinstance(ignore_expr, Suppress)
211-
assert isinstance(ignore_expr.expr, Regex)
212-
spacing_pattern += f"|(?:{ignore_expr.expr.re.pattern})"
224+
assert spacing_pattern
225+
226+
assert all(
227+
isinstance(ignore_expr, Suppress) for ignore_expr in self.ignoreExprs
228+
)
229+
assert all(
230+
isinstance(ignore_expr.expr, Regex) for ignore_expr in self.ignoreExprs
231+
)
232+
ignore_pattern = "|".join(
233+
ignore_expr.expr.re.pattern for ignore_expr in self.ignoreExprs
234+
)
235+
236+
if ignore_pattern:
237+
spacing_pattern = f"{spacing_pattern}|{ignore_pattern}"
213238

214239
three_face_pattern = rf"3(?:{spacing_pattern})*\((?:{spacing_pattern})*(?:{self._INT_PATTERN}(?:{spacing_pattern})*){{3}}\)"
215240
four_face_pattern = rf"4(?:{spacing_pattern})*\((?:{spacing_pattern})*(?:{self._INT_PATTERN}(?:{spacing_pattern})*){{4}}\)"
@@ -224,7 +249,12 @@ def parseImpl(
224249
if match := face_list_pattern.match(instring, pos=loc):
225250
count = int(c) if (c := match.group(1)) else None
226251
contents = match.group(2)
227-
contents = re.sub(spacing_pattern, " ", contents)
252+
253+
if not all(c.isspace() for c in self.whiteChars):
254+
contents = re.sub(spacing_pattern, " ", contents)
255+
elif ignore_pattern:
256+
contents = re.sub(ignore_pattern, " ", contents) # Faster
257+
228258
contents = contents.replace("(", " ").replace(")", " ")
229259

230260
raw = np.fromstring(contents, sep=" ", dtype=int)

tests/test_files/test_parsing/test_basic.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ def test_parse_value() -> None:
5252
[4, 5, 6],
5353
],
5454
)
55+
lst = Parsed(b"2((1\n2 3)\t(4 5 6))")[()]
56+
assert isinstance(lst, np.ndarray)
57+
assert np.array_equal(
58+
lst,
59+
[
60+
[1, 2, 3],
61+
[4, 5, 6],
62+
],
63+
)
5564
lst = Parsed(b"2(3(1 2 3) 4(4 5 6 7))")[()]
5665
assert isinstance(lst, list)
5766
assert len(lst) == 2

0 commit comments

Comments
 (0)