@@ -46,9 +46,12 @@ def find_file_to_format(
4646 raise click .Abort ()
4747
4848
49+ _date_pattern = re .compile (r"(\d{8})\.docx$" )
50+
51+
4952def _extract_date (file_path : Path ) -> tuple [datetime , Path ]:
5053 # Regex to extract the date format YYYYMMDD from the filename as a string
51- match = re .search (r"(\d{8})\.docx$" , file_path .name )
54+ match = _date_pattern .search (file_path .name )
5255 if match :
5356 # Return the date as a datetime object for comparison and the path for use
5457 return datetime .strptime (match .group (1 ), "%Y%m%d" ), file_path
@@ -140,7 +143,7 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:
140143 nachrichtenstruktur_header = "Status\t MaxWdh\n \t Zähler\t Nr\t Bez\t Sta\t BDEW\t Sta\t BDEW\t Ebene\t Inhalt"
141144 for docx_object in docx_objects :
142145 for ind , line in enumerate (docx_object ._cells ):
143- # marks the beginning of the complete nachrichtentruktur table
146+ # marks the beginning of the complete nachrichtenstruktur table
144147 if line .text == nachrichtenstruktur_header :
145148 mig_tables .extend ([row .text for row in docx_object ._cells [ind + 1 :]])
146149 break
@@ -150,13 +153,16 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:
150153 return mig_tables
151154
152155
156+ _pattern = re .compile (
157+ r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?(.*?)"
158+ r"(?:_|KonsolidierteLesefassung|-AußerordentlicheVeröffentlichung)" ,
159+ re .IGNORECASE ,
160+ )
161+
162+
153163def _extract_document_version (path : Path ) -> str :
154164 document_str = str (path )
155- pattern = (
156- r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?(.*?)"
157- r"(?:_|KonsolidierteLesefassung|-AußerordentlicheVeröffentlichung)"
158- )
159- matches = re .search (pattern , document_str , re .IGNORECASE )
165+ matches = _pattern .search (document_str )
160166 if matches :
161167 document_version = matches .group (1 )
162168 if document_version == "" :
0 commit comments