Skip to content

Commit 71c6cf3

Browse files
authored
compile static regex patterns once instead of in every function call (#138)
1 parent 0a7a4e4 commit 71c6cf3

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

src/migmose/parsing.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,12 @@ def find_file_to_format(
4646
raise click.Abort()
4747

4848

49+
_date_pattern = re.compile(r"(\d{8})\.docx$")
50+
51+
4952
def _extract_date(file_path: Path) -> tuple[datetime, Path]:
5053
# Regex to extract the date format YYYYMMDD from the filename as a string
51-
match = re.search(r"(\d{8})\.docx$", file_path.name)
54+
match = _date_pattern.search(file_path.name)
5255
if match:
5356
# Return the date as a datetime object for comparison and the path for use
5457
return datetime.strptime(match.group(1), "%Y%m%d"), file_path
@@ -140,7 +143,7 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:
140143
nachrichtenstruktur_header = "Status\tMaxWdh\n\tZähler\tNr\tBez\tSta\tBDEW\tSta\tBDEW\tEbene\tInhalt"
141144
for docx_object in docx_objects:
142145
for ind, line in enumerate(docx_object._cells):
143-
# marks the beginning of the complete nachrichtentruktur table
146+
# marks the beginning of the complete nachrichtenstruktur table
144147
if line.text == nachrichtenstruktur_header:
145148
mig_tables.extend([row.text for row in docx_object._cells[ind + 1 :]])
146149
break
@@ -150,13 +153,16 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:
150153
return mig_tables
151154

152155

156+
_pattern = re.compile(
157+
r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?(.*?)"
158+
r"(?:_|KonsolidierteLesefassung|-AußerordentlicheVeröffentlichung)",
159+
re.IGNORECASE,
160+
)
161+
162+
153163
def _extract_document_version(path: Path) -> str:
154164
document_str = str(path)
155-
pattern = (
156-
r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?(.*?)"
157-
r"(?:_|KonsolidierteLesefassung|-AußerordentlicheVeröffentlichung)"
158-
)
159-
matches = re.search(pattern, document_str, re.IGNORECASE)
165+
matches = _pattern.search(document_str)
160166
if matches:
161167
document_version = matches.group(1)
162168
if document_version == "":

0 commit comments

Comments
 (0)