Skip to content

Commit 19487ac

Browse files
authored
Fix Version Regex For "Sparten-Versions" (#287)
* update pre-commit * fix version for utilmd with sparten * add sparte property * update docstring of document_version * Update snapshot
1 parent ef78a07 commit 19487ac

File tree

4 files changed

+61
-48
lines changed

4 files changed

+61
-48
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v4.5.0
3+
rev: v5.0.0
44
hooks:
55
- id: check-yaml
66
- id: end-of-file-fixer
77
- id: trailing-whitespace
88
- repo: https://github.com/psf/black
9-
rev: 23.11.0 # Replace by any tag/version: https://github.com/psf/black/tags
9+
rev: 25.1.0 # Replace by any tag/version: https://github.com/psf/black/tags
1010
hooks:
1111
- id: black
1212
language_version: python3
1313
- repo: https://github.com/pycqa/isort
14-
rev: 5.12.0
14+
rev: 6.0.0
1515
hooks:
1616
- id: isort
1717
name: isort (python)

src/edi_energy_scraper/apidocument.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
_MigPattern = re.compile(r".*\b[A-Z]{6}\sMIG\b.*")
1818
_AhbPattern = re.compile(r".*\bAHB\b.*")
1919
_FormatPattern = re.compile(r".*\b(?P<format>[A-Z]{6})\b.*")
20-
_VersionPattern = re.compile(r"^.*?\b(?P<version>\d+\.\d+[a-z]?)\b.*$") # assumption: version is always before datum
20+
_VersionPattern = re.compile(
21+
r"^.*?\b(?P<version>[GS]?\d+\.\d+[a-z]?)\b.*$"
22+
) # assumption: version is always before datum
2123
_AlternativeKindPattern = re.compile(r"^(?P<name>\D+).*$")
2224
_StandPattern = re.compile(r".*Stand:\s*(?P<day>\d{1,2})\.(?P<month>\d{1,2})\.(?P<year>\d{4}).*")
2325

@@ -148,13 +150,24 @@ def edifact_format(self) -> EdifactFormat | None:
148150
@property
149151
def document_version(self) -> str | None:
150152
"""
151-
returns something like "1.4a" or "2.0" for MIGs and AHBs
153+
returns something like "1.4a", "2.0" or "S2.1" for MIGs and AHBs
152154
"""
153155
match = _VersionPattern.match(self.title)
154156
if match is None:
155157
return None
156158
return match.group("version")
157159

160+
@property
161+
def sparte(self) -> str | None:
162+
"""
163+
returns the sparte of a UTILMD document
164+
"""
165+
if "gas" in self.title.lower():
166+
return "Gas"
167+
if "strom" in self.title.lower():
168+
return "Strom"
169+
return None
170+
158171
@property
159172
def is_consolidated_reading_version(self) -> bool:
160173
"""true if this is a konsolidierte Lesefassung"""

src/edi_energy_scraper/documentmetadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class DocumentMetadata(BaseModel):
2626
valid_from: date
2727
valid_until: date
2828
publication_date: date | None
29-
version: Annotated[str, StringConstraints(strip_whitespace=True, pattern=r"^\d+\.\d+[a-z]?$")] | None
29+
version: Annotated[str, StringConstraints(strip_whitespace=True, pattern=r"^[GS]?\d+\.\d+[a-z]?$")] | None
3030
is_consolidated_reading_version: bool
3131
is_extraordinary_publication: bool
3232
is_error_correction: bool

unittests/__snapshots__/test_models.ambr

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -863,30 +863,30 @@
863863
'AHB_UTILMD_1.2a_20240403_20240404_20240311_xoxx_10543.docx',
864864
'AHB_UTILMD_1.2a_20240405_20240711_20240405_xoxo_10539.pdf',
865865
'AHB_UTILMD_1.2a_20240405_20240711_20240405_xoxx_10544.docx',
866-
'MIG_UTILMD_NV_20231001_20240725_20231001_oooo_10644.pdf',
867-
'MIG_UTILMD_NV_20231001_20240725_20231001_ooox_10646.docx',
868-
'MIG_UTILMD_12.12_20231212_20240725_20231212_xoxo_10596.pdf',
869-
'MIG_UTILMD_12.12_20231212_20240725_20231212_xoxx_10260.docx',
870-
'MIG_UTILMD_29.09_20231001_20231211_20230929_xoxo_10645.pdf',
871-
'MIG_UTILMD_29.09_20231001_20231211_20230929_xoxx_10647.docx',
872-
'MIG_UTILMD_NV_20241023_20241023_20241023_oooo_10267.pdf',
873-
'MIG_UTILMD_NV_20241023_20241023_20241023_ooox_10268.docx',
866+
'MIG_UTILMD_G1.0a_20231001_20240725_20231001_oooo_10644.pdf',
867+
'MIG_UTILMD_G1.0a_20231001_20240725_20231001_ooox_10646.docx',
868+
'MIG_UTILMD_G1.0a_20231212_20240725_20231212_xoxo_10596.pdf',
869+
'MIG_UTILMD_G1.0a_20231212_20240725_20231212_xoxx_10260.docx',
870+
'MIG_UTILMD_G1.0a_20231001_20231211_20230929_xoxo_10645.pdf',
871+
'MIG_UTILMD_G1.0a_20231001_20231211_20230929_xoxx_10647.docx',
872+
'MIG_UTILMD_S2.0_20241023_20241023_20241023_oooo_10267.pdf',
873+
'MIG_UTILMD_S2.0_20241023_20241023_20241023_ooox_10268.docx',
874874
'AHB_UTILMD_1.2a_20240712_20240922_20240712_xoxo_10540.pdf',
875875
'AHB_UTILMD_1.2a_20240712_20240922_20240712_xoxx_10545.docx',
876876
'AHB_UTILMD_2.0_20241023_20241023_20241023_oooo_10551.pdf',
877877
'AHB_UTILMD_2.0_20241023_20241023_20241023_ooox_10552.docx',
878-
'MIG_UTILMD_NV_20231001_20240402_20231001_oooo_10656.pdf',
879-
'MIG_UTILMD_NV_20231001_20240402_20231001_ooox_10658.docx',
880-
'MIG_UTILMD_23.10_20231023_20231211_20231023_xoxx_10277.docx',
881-
'MIG_UTILMD_23.10_20231023_20231211_20231023_xoxo_10275.pdf',
882-
'MIG_UTILMD_12.12_20231212_20240402_20231212_xoxo_10276.pdf',
883-
'MIG_UTILMD_12.12_20231212_20240402_20231212_xoxx_10278.docx',
884-
'MIG_UTILMD_12.12_20240403_20240310_20231212_xoxo_10269.pdf',
885-
'MIG_UTILMD_12.12_20240403_20240310_20231212_xoxx_10272.docx',
886-
'MIG_UTILMD_11.03_20240403_20240404_20240311_xoxo_10265.pdf',
887-
'MIG_UTILMD_11.03_20240403_20240404_20240311_xoxx_10266.docx',
888-
'MIG_UTILMD_05.04_20240405_20240711_20240405_xoxo_10270.pdf',
889-
'MIG_UTILMD_05.04_20240405_20240704_20240405_xoxx_10273.docx',
878+
'MIG_UTILMD_S1.1_20231001_20240402_20231001_oooo_10656.pdf',
879+
'MIG_UTILMD_S1.1_20231001_20240402_20231001_ooox_10658.docx',
880+
'MIG_UTILMD_S1.1_20231023_20231211_20231023_xoxx_10277.docx',
881+
'MIG_UTILMD_S1.1_20231023_20231211_20231023_xoxo_10275.pdf',
882+
'MIG_UTILMD_S1.1_20231212_20240402_20231212_xoxo_10276.pdf',
883+
'MIG_UTILMD_S1.1_20231212_20240402_20231212_xoxx_10278.docx',
884+
'MIG_UTILMD_S1.1a_20240403_20240310_20231212_xoxo_10269.pdf',
885+
'MIG_UTILMD_S1.1a_20240403_20240310_20231212_xoxx_10272.docx',
886+
'MIG_UTILMD_S1.1a_20240403_20240404_20240311_xoxo_10265.pdf',
887+
'MIG_UTILMD_S1.1a_20240403_20240404_20240311_xoxx_10266.docx',
888+
'MIG_UTILMD_S1.1a_20240405_20240711_20240405_xoxo_10270.pdf',
889+
'MIG_UTILMD_S1.1a_20240405_20240704_20240405_xoxx_10273.docx',
890890
'MIG_UTILTS_1.1c_20240403_20240725_20240403_oooo_10700.pdf',
891891
'MIG_UTILTS_1.1c_20240403_20240725_20240403_ooox_10702.docx',
892892
'MIG_UTILTS_1.1d_20241023_20241023_20241023_oooo_10704.pdf',
@@ -926,14 +926,14 @@
926926
'MIG_UTILMD_5.2e_20221025_20230930_20221025_xoxx_10621.docx',
927927
'MIG_UTILMD_5.2e_20230512_20230930_20230512_xoxo_10279.pdf',
928928
'MIG_UTILMD_5.2e_20230512_20230930_20230512_xoxx_10280.docx',
929-
'MIG_UTILMD_NV_20241023_20241023_20241023_oooo_10648.pdf',
930-
'MIG_UTILMD_NV_20241023_20241023_20241023_ooox_10649.docx',
931-
'MIG_UTILMD_29.06_20231001_20230928_20230629_xoxo_10258.pdf',
932-
'MIG_UTILMD_29.06_20231001_20230928_20230629_xoxx_10259.docx',
933-
'MIG_UTILMD_NV_20241023_20241023_20241023_oooo_10652.pdf',
934-
'MIG_UTILMD_NV_20241023_20241023_20241023_ooox_10653.docx',
935-
'MIG_UTILMD_NV_20231001_20231022_20231001_oxoo_10657.pdf',
936-
'MIG_UTILMD_NV_20231001_20231022_20231001_oxox_10659.docx',
929+
'MIG_UTILMD_G1.0_20241023_20241023_20241023_oooo_10648.pdf',
930+
'MIG_UTILMD_G1.0_20241023_20241023_20241023_ooox_10649.docx',
931+
'MIG_UTILMD_G1.0a_20231001_20230928_20230629_xoxo_10258.pdf',
932+
'MIG_UTILMD_G1.0a_20231001_20230928_20230629_xoxx_10259.docx',
933+
'MIG_UTILMD_S1.0_20241023_20241023_20241023_oooo_10652.pdf',
934+
'MIG_UTILMD_S1.0_20241023_20241023_20241023_ooox_10653.docx',
935+
'MIG_UTILMD_S1.1_20231001_20231022_20231001_oxoo_10657.pdf',
936+
'MIG_UTILMD_S1.1_20231001_20231022_20231001_oxox_10659.docx',
937937
'AHB_UTILMD_1.0_20241023_20241023_20241023_oooo_10334.pdf',
938938
'AHB_UTILMD_1.0_20241023_20241023_20241023_ooox_10335.docx',
939939
'AHB_UTILMD_1.0a_20231001_20230928_20230629_xoxx_10332.docx',
@@ -1160,12 +1160,12 @@
11601160
'MIG_PARTIN_1.0e_20250606_99991231_20250606_ooox_9836.docx',
11611161
'AHB_PARTIN_1.0e_20250606_99991231_20250606_oooo_9818.pdf',
11621162
'AHB_PARTIN_1.0e_20250606_99991231_20250606_ooox_9819.docx',
1163-
'MIG_UTILMD_NV_20231212_99991231_20231212_oxox_10264.docx',
1164-
'MIG_UTILMD_NV_20231212_99991231_20231212_oxoo_10262.pdf',
1163+
'MIG_UTILMD_G1.0a_20231212_99991231_20231212_oxox_10264.docx',
1164+
'MIG_UTILMD_G1.0a_20231212_99991231_20231212_oxoo_10262.pdf',
11651165
'AHB_UTILMD_1.0a_20231212_99991231_20231212_oxoo_10326.pdf',
11661166
'AHB_UTILMD_1.0a_20231212_99991231_20231212_oxox_10331.docx',
1167-
'MIG_UTILMD_NV_20250606_99991231_20250606_oooo_10651.pdf',
1168-
'MIG_UTILMD_NV_20250606_99991231_20250606_ooox_10660.docx',
1167+
'MIG_UTILMD_S2.1_20250606_99991231_20250606_oooo_10651.pdf',
1168+
'MIG_UTILMD_S2.1_20250606_99991231_20250606_ooox_10660.docx',
11691169
'AHB_UTILMD_2.1_20250606_99991231_20250606_oooo_10553.pdf',
11701170
'AHB_UTILMD_2.1_20250606_99991231_20250606_ooox_10554.docx',
11711171
'MIG_UTILTS_1.1e_20250606_99991231_20250606_oooo_10706.pdf',
@@ -1252,10 +1252,10 @@
12521252
'AHB_PARTIN_1.0d_20240403_20250605_20240403_ooox_9816.docx',
12531253
'AHB_PARTIN_1.0d_20240403_20250605_20240311_xoxo_9815.pdf',
12541254
'AHB_PARTIN_1.0d_20240403_20250605_20240311_xoxx_9817.docx',
1255-
'MIG_UTILMD_NV_20240403_20250605_20240403_oooo_10654.pdf',
1256-
'MIG_UTILMD_NV_20240403_20250605_20240403_ooox_10655.docx',
1257-
'MIG_UTILMD_12.07_20240712_20250605_20240712_xoxo_10271.pdf',
1258-
'MIG_UTILMD_12.07_20240712_20250605_20240712_xoxx_10274.docx',
1255+
'MIG_UTILMD_S1.1a_20240403_20250605_20240403_oooo_10654.pdf',
1256+
'MIG_UTILMD_S1.1a_20240403_20250605_20240403_ooox_10655.docx',
1257+
'MIG_UTILMD_S1.1a_20240712_20250605_20240712_xoxo_10271.pdf',
1258+
'MIG_UTILMD_S1.1a_20240712_20250605_20240712_xoxx_10274.docx',
12591259
'AHB_UTILMD_4.1a_20240403_20250605_20240403_oooo_10443.pdf',
12601260
'AHB_UTILMD_4.1a_20240403_20250605_20240403_ooox_10446.docx',
12611261
'AHB_UTILMD_4.1a_20240403_20250605_20240311_xoxo_10444.pdf',
@@ -1330,9 +1330,9 @@
13301330
'AHB_UTILMD_1.2a_20241018_20250605_20241018_xoxo_11003.xml',
13311331
'AHB_UTILMD_2.1_20250606_20241212_20250606_oooo_11004.xml',
13321332
'AHB_UTILMD_1.0a_20231212_99991231_20231212_oxoo_11005.xml',
1333-
'MIG_UTILMD_11.07_20240712_20250605_20240711_xoxo_11006.xml',
1334-
'MIG_UTILMD_NV_20250606_20241212_20250606_oooo_11007.xml',
1335-
'MIG_UTILMD_NV_20231212_99991231_20231212_oooo_11008.xml',
1333+
'MIG_UTILMD_S1.1a_20240712_20250605_20240711_xoxo_11006.xml',
1334+
'MIG_UTILMD_S2.1_20250606_20241212_20250606_oooo_11007.xml',
1335+
'MIG_UTILMD_G1.0a_20231212_99991231_20231212_oooo_11008.xml',
13361336
'MIG_UTILTS_1.1c_20240403_20241212_20240403_oxoo_11011.xml',
13371337
'MIG_UTILTS_1.1e_20250606_20241213_20241017_xoxo_11012.xml',
13381338
'AHB_REQOTE_2.2_20240923_20250606_20240922_xoxo_11013.xml',
@@ -1406,9 +1406,9 @@
14061406
'AHB_UTILMD_2.1_20250606_99991231_20241213_xoxx_11157.docx',
14071407
'AHB_UTILMD_2.1_20250606_99991231_20241213_xoxo_11158.xml',
14081408
'AHB_UTILMD_1.2a_20241018_20250605_20251018_xoxx_11159.docx',
1409-
'MIG_UTILMD_13.12_20250606_99991231_20241213_xoxo_11160.pdf',
1410-
'MIG_UTILMD_13.12_20250606_99991231_20241213_xoxx_11161.docx',
1411-
'MIG_UTILMD_13.12_20250606_99991231_20241213_xoxo_11162.xml',
1409+
'MIG_UTILMD_S2.1_20250606_99991231_20241213_xoxo_11160.pdf',
1410+
'MIG_UTILMD_S2.1_20250606_99991231_20241213_xoxx_11161.docx',
1411+
'MIG_UTILMD_S2.1_20250606_99991231_20241213_xoxo_11162.xml',
14121412
'AHB_UTILTS_1.0_20250606_99991231_20241213_xoxo_11163.pdf',
14131413
'AHB_UTILTS_1.0_20250606_99991231_20241213_xoxx_11164.docx',
14141414
'AHB_UTILTS_1.0_20250606_99991231_20241213_xoxo_11165.xml',

0 commit comments

Comments
 (0)