Skip to content

Commit 2680911

Browse files
hf-kkleinKonstantin
andauthored
feat: introduce function to parse Kommunikation_von (w/o using it yet) (#177)
* feat: introduce function to parse `Kommunikation_von` (w/o using it yet) based on #176 (no depedency - just to avoid merge conflicts) * fix some more edge cases --------- Co-authored-by: Konstantin <[email protected]>
1 parent bcd859b commit 2680911

File tree

3 files changed

+205
-2
lines changed

3 files changed

+205
-2
lines changed

src/fundamend/models/anwendungshandbuch.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,15 @@ class SegmentGroup(FundamendBaseModel):
121121
elements: tuple["Segment | SegmentGroup", ...]
122122

123123

124+
class Kommunikationsrichtung(FundamendBaseModel):
125+
"""
126+
a strongly typed representation of the 'Kommunikation_von' attribute of anwendungsfall
127+
"""
128+
129+
sender: str #: e.g. "NB"
130+
empfaenger: str #: e.g. "MSB"
131+
132+
124133
class Anwendungsfall(FundamendBaseModel):
125134
"""
126135
One 'Anwendungsfall', indicated by `<AWF>` tag, corresponds to one Prüfidentifikator or type of Message

src/fundamend/utils.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
Contains some utility functions that are used in the project.
33
"""
44

5+
import re
6+
from typing import Optional
7+
8+
from fundamend.models.anwendungshandbuch import Kommunikationsrichtung
9+
510

611
def lstrip(prefix: str, text: str) -> str:
712
"""Strip the given prefix from the given text. If the text does not start with the prefix, return the text as is.
@@ -73,4 +78,69 @@ def remove_linebreaks_and_hyphens(original: str) -> str:
7378
return " ".join(result.strip().split())
7479

7580

76-
__all__ = ["lstrip", "rstrip", "strip", "remove_linebreaks_and_hyphens"]
81+
_UNIFIED_SEPARATOR = "/" # how multiple Marktrollen shall be split in the kommunikation_von attribute
82+
_ALTERNATIVE_SEPARATORS = [","] # other separators that are used in the wild
83+
84+
_an_at_word_boundary = re.compile(r"\ban\b")
85+
86+
87+
def _add_whitespace_before_an(original: str) -> str:
88+
"""adds whitespace before 'an' if it is not already there"""
89+
return _an_at_word_boundary.sub(" an", original)
90+
91+
92+
def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunikationsrichtung]:
93+
"""
94+
parses a single line of kommunikation_von into a list of Kommunikationsrichtung objects
95+
this is necessary because some AHBs have multiple lines in the kommunikation_von attribute which must not be mixed
96+
"""
97+
if not kommunikation_von_line or not kommunikation_von_line.strip():
98+
return []
99+
result: list[Kommunikationsrichtung] = []
100+
parts = _add_whitespace_before_an(kommunikation_von_line).split(" an ")
101+
if len(parts) != 2:
102+
# maybe this line looks different, more like 'NB an LF, MSB an NB (Gas)'
103+
# then we have to split at the comma first and treat each part like it was a single line. wtf
104+
if "," in kommunikation_von_line:
105+
for subpart in kommunikation_von_line.split(","):
106+
result += _parse_kommunikation_von_line(subpart.strip())
107+
return result
108+
raise ValueError(f"Invalid kommunikation_von string: '{kommunikation_von_line}'. Expected format: 'X an Y[/Z]'")
109+
sender_str = parts[0]
110+
receiver_str = parts[1]
111+
for alternative_separator in _ALTERNATIVE_SEPARATORS:
112+
if alternative_separator in receiver_str:
113+
receiver_str = receiver_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
114+
if alternative_separator in sender_str:
115+
sender_str = sender_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
116+
senders = [x.strip() for x in sender_str.split(_UNIFIED_SEPARATOR)]
117+
receivers = [x.strip() for x in receiver_str.split(_UNIFIED_SEPARATOR)]
118+
for sender in senders:
119+
for receiver in receivers:
120+
result.append(Kommunikationsrichtung(sender=sender, empfaenger=receiver))
121+
return result
122+
123+
124+
def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikationsrichtung] | None:
125+
"""Splits the kommunikation_von string into something strongly typed
126+
127+
Args:
128+
kommunikation_von: The kommunikation_von string to split, e.g. 'NB an LF/MSB'.
129+
130+
Returns:
131+
Properly typed list of Kommunikationsrichtung objects:
132+
[Kommunikationsrichtung(sender='NB', empfaenger='LF'),
133+
Kommunikationsrichtung(sender='NB', empfaenger='MSB')]
134+
or none in case there are no information given (directly).
135+
"""
136+
if kommunikation_von == "Beteiligte aus Ursprungs-nachricht":
137+
return None
138+
result: list[Kommunikationsrichtung] = []
139+
for line in (kommunikation_von or "").splitlines():
140+
line = line.strip()
141+
if line:
142+
result += _parse_kommunikation_von_line(line)
143+
return result
144+
145+
146+
__all__ = ["lstrip", "rstrip", "strip", "remove_linebreaks_and_hyphens", "parse_kommunikation_von"]

unittests/test_utils.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
from pathlib import Path
2+
from typing import Generator
3+
14
import pytest
25

3-
from fundamend.utils import remove_linebreaks_and_hyphens
6+
from fundamend import AhbReader
7+
from fundamend.models.anwendungshandbuch import Anwendungsfall, Kommunikationsrichtung
8+
from fundamend.utils import parse_kommunikation_von, remove_linebreaks_and_hyphens
9+
10+
from .conftest import is_private_submodule_checked_out
411

512

613
@pytest.mark.parametrize(
@@ -20,3 +27,120 @@
2027
def test_anwendungsfall_beschreibung_normalization(original: str, expected: str) -> None:
2128
actual = remove_linebreaks_and_hyphens(original)
2229
assert actual == expected
30+
31+
32+
@pytest.mark.parametrize(
33+
"original, expected",
34+
[
35+
pytest.param("", [], id="empty string = no directions"),
36+
pytest.param("LF an NB", [Kommunikationsrichtung(sender="LF", empfaenger="NB")], id="simple example"),
37+
pytest.param(
38+
"MSB an NB, LF",
39+
[
40+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
41+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
42+
],
43+
id="two receivers, comma separated",
44+
),
45+
pytest.param(
46+
"MSB an NB / LF",
47+
[
48+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
49+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
50+
],
51+
id="two receivers, slash separated",
52+
),
53+
pytest.param(
54+
"NB, LF an MSB",
55+
[
56+
Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
57+
Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
58+
],
59+
id="two senders, comma separated",
60+
),
61+
pytest.param(
62+
"NB / LF an MSB",
63+
[
64+
Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
65+
Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
66+
],
67+
id="two senders, slash separated",
68+
),
69+
pytest.param(
70+
"BIKO an NB / ÜNB",
71+
[
72+
Kommunikationsrichtung(sender="BIKO", empfaenger="NB"),
73+
Kommunikationsrichtung(sender="BIKO", empfaenger="ÜNB"),
74+
],
75+
id="two receivers, slash separated but with Umlaut",
76+
),
77+
pytest.param(
78+
"NB an LF\nMSB an LF, NB, ESA",
79+
[
80+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
81+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
82+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
83+
Kommunikationsrichtung(sender="MSB", empfaenger="ESA"),
84+
],
85+
id="two lines",
86+
),
87+
pytest.param(
88+
"NB an LF / MSB\r\nLF an NB, MSB",
89+
[
90+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
91+
Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
92+
Kommunikationsrichtung(sender="LF", empfaenger="NB"),
93+
Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
94+
],
95+
id="two lines with mixed separators",
96+
# shit is real, I'm not making this up
97+
),
98+
pytest.param(
99+
"MSB an NB/LF/ÜNB/MSB/ESA",
100+
[
101+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
102+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
103+
Kommunikationsrichtung(sender="MSB", empfaenger="ÜNB"),
104+
Kommunikationsrichtung(sender="MSB", empfaenger="MSB"),
105+
Kommunikationsrichtung(sender="MSB", empfaenger="ESA"),
106+
],
107+
id="many receivers",
108+
),
109+
pytest.param(
110+
"NB an LF, MSB an NB (Gas)",
111+
[
112+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
113+
Kommunikationsrichtung(sender="MSB", empfaenger="NB (Gas)"),
114+
],
115+
),
116+
pytest.param("NB (VNB)an NB (LPB)", [Kommunikationsrichtung(sender="NB (VNB)", empfaenger="NB (LPB)")]),
117+
pytest.param("Beteiligte aus Ursprungs-nachricht", None),
118+
],
119+
)
120+
def test_parsing_kommunikation_von(original: str, expected: list[Kommunikationsrichtung] | None) -> None:
121+
actual = parse_kommunikation_von(original)
122+
assert actual == expected
123+
124+
125+
def _all_anwendungsfaelle() -> Generator[Anwendungsfall, None, None]:
126+
if not is_private_submodule_checked_out():
127+
pytest.skip("Skipping test because of missing private submodule")
128+
private_submodule_root = Path(__file__).parent.parent / "xml-migs-and-ahbs"
129+
assert private_submodule_root.exists() and private_submodule_root.is_dir()
130+
for ahb_file_path in private_submodule_root.rglob("**/*AHB*.xml"):
131+
ahb = AhbReader(ahb_file_path).read()
132+
for anwendungsfall in ahb.anwendungsfaelle:
133+
if anwendungsfall.is_outdated:
134+
continue
135+
yield anwendungsfall
136+
137+
138+
def test_parsing_all_kommunikation_von_there_is() -> None:
139+
"""loop over all AHB files and read the 'Kommunikation Von' Attribute of all the Anwendungsfälle"""
140+
if not is_private_submodule_checked_out():
141+
pytest.skip("Skipping test because of missing private submodule")
142+
for anwendungsfall in _all_anwendungsfaelle():
143+
kommunikation_von = anwendungsfall.kommunikation_von
144+
if not isinstance(kommunikation_von, str):
145+
pytest.skip("Skipping test because 'Kommunikation Von' is not a string (anymore)")
146+
_ = parse_kommunikation_von(kommunikation_von) # must not crash

0 commit comments

Comments
 (0)