Skip to content

Commit f6e3783

Browse files
authored
feat: introduce function to parse Kommunikation_von attribute (w/o using it yet) (#180)
1 parent 20178c4 commit f6e3783

File tree

3 files changed

+209
-0
lines changed

3 files changed

+209
-0
lines changed

src/fundamend/models/anwendungshandbuch.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,15 @@ class SegmentGroup(FundamendBaseModel):
121121
elements: tuple["Segment | SegmentGroup", ...]
122122

123123

124+
class Kommunikationsrichtung(FundamendBaseModel):
125+
"""
126+
a strongly typed representation of the 'Kommunikation_von' attribute of anwendungsfall
127+
"""
128+
129+
sender: str #: e.g. "NB"
130+
empfaenger: str #: e.g. "MSB"
131+
132+
124133
class Anwendungsfall(FundamendBaseModel):
125134
"""
126135
One 'Anwendungsfall', indicated by `<AWF>` tag, corresponds to one Prüfidentifikator or type of Message

src/fundamend/utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
Contains some utility functions that are used in the project.
33
"""
44

5+
import re
6+
from typing import Optional
7+
8+
from fundamend.models.anwendungshandbuch import Kommunikationsrichtung
9+
510

611
def lstrip(prefix: str, text: str) -> str:
712
"""Strip the given prefix from the given text. If the text does not start with the prefix, return the text as is.
@@ -46,3 +51,71 @@ def strip(prefix: str, text: str, suffix: str) -> str:
4651
The text with the prefix and suffix stripped.
4752
"""
4853
return lstrip(prefix, rstrip(text, suffix))
54+
55+
56+
_UNIFIED_SEPARATOR = "/" # how multiple Marktrollen shall be split in the kommunikation_von attribute
57+
_ALTERNATIVE_SEPARATORS = [","] # other separators that are used in the wild
58+
59+
_an_at_word_boundary = re.compile(r"\ban\b")
60+
61+
62+
def _add_whitespace_before_an(original: str) -> str:
63+
"""adds whitespace before 'an' if it is not already there"""
64+
return _an_at_word_boundary.sub(" an", original)
65+
66+
67+
def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunikationsrichtung]:
68+
"""
69+
parses a single line of kommunikation_von into a list of Kommunikationsrichtung objects
70+
this is necessary because some AHBs have multiple lines in the kommunikation_von attribute which must not be mixed
71+
"""
72+
if not kommunikation_von_line or not kommunikation_von_line.strip():
73+
return []
74+
result: list[Kommunikationsrichtung] = []
75+
parts = _add_whitespace_before_an(kommunikation_von_line).split(" an ")
76+
if len(parts) != 2:
77+
# maybe this line looks different, more like 'NB an LF, MSB an NB (Gas)'
78+
# then we have to split at the comma first and treat each part like it was a single line. wtf
79+
if "," in kommunikation_von_line:
80+
for subpart in kommunikation_von_line.split(","):
81+
result += _parse_kommunikation_von_line(subpart.strip())
82+
return result
83+
raise ValueError(f"Invalid kommunikation_von string: '{kommunikation_von_line}'. Expected format: 'X an Y[/Z]'")
84+
sender_str = parts[0]
85+
receiver_str = parts[1]
86+
for alternative_separator in _ALTERNATIVE_SEPARATORS:
87+
if alternative_separator in receiver_str:
88+
receiver_str = receiver_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
89+
if alternative_separator in sender_str:
90+
sender_str = sender_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
91+
senders = [x.strip() for x in sender_str.split(_UNIFIED_SEPARATOR)]
92+
receivers = [x.strip() for x in receiver_str.split(_UNIFIED_SEPARATOR)]
93+
for sender in senders:
94+
for receiver in receivers:
95+
result.append(Kommunikationsrichtung(sender=sender, empfaenger=receiver))
96+
return result
97+
98+
99+
def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikationsrichtung] | None:
100+
"""Splits the kommunikation_von string into something strongly typed
101+
102+
Args:
103+
kommunikation_von: The kommunikation_von string to split, e.g. 'NB an LF/MSB'.
104+
105+
Returns:
106+
Properly typed list of Kommunikationsrichtung objects:
107+
[Kommunikationsrichtung(sender='NB', empfaenger='LF'),
108+
Kommunikationsrichtung(sender='NB', empfaenger='MSB')]
109+
or none in case there are no information given (directly).
110+
"""
111+
if kommunikation_von == "Beteiligte aus Ursprungs-nachricht":
112+
return None
113+
result: list[Kommunikationsrichtung] = []
114+
for line in (kommunikation_von or "").splitlines():
115+
line = line.strip()
116+
if line:
117+
result += _parse_kommunikation_von_line(line)
118+
return result
119+
120+
121+
__all__ = ["lstrip", "rstrip", "strip", "parse_kommunikation_von"]

unittests/test_utils.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from pathlib import Path
2+
from typing import Generator
3+
4+
import pytest
5+
6+
from fundamend import AhbReader
7+
from fundamend.models.anwendungshandbuch import Anwendungsfall, Kommunikationsrichtung
8+
from fundamend.utils import parse_kommunikation_von
9+
10+
from .conftest import is_private_submodule_checked_out
11+
12+
13+
@pytest.mark.parametrize(
14+
"original, expected",
15+
[
16+
pytest.param("", [], id="empty string = no directions"),
17+
pytest.param("LF an NB", [Kommunikationsrichtung(sender="LF", empfaenger="NB")], id="simple example"),
18+
pytest.param(
19+
"MSB an NB, LF",
20+
[
21+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
22+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
23+
],
24+
id="two receivers, comma separated",
25+
),
26+
pytest.param(
27+
"MSB an NB / LF",
28+
[
29+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
30+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
31+
],
32+
id="two receivers, slash separated",
33+
),
34+
pytest.param(
35+
"NB, LF an MSB",
36+
[
37+
Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
38+
Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
39+
],
40+
id="two senders, comma separated",
41+
),
42+
pytest.param(
43+
"NB / LF an MSB",
44+
[
45+
Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
46+
Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
47+
],
48+
id="two senders, slash separated",
49+
),
50+
pytest.param(
51+
"BIKO an NB / ÜNB",
52+
[
53+
Kommunikationsrichtung(sender="BIKO", empfaenger="NB"),
54+
Kommunikationsrichtung(sender="BIKO", empfaenger="ÜNB"),
55+
],
56+
id="two receivers, slash separated but with Umlaut",
57+
),
58+
pytest.param(
59+
"NB an LF\nMSB an LF, NB, ESA",
60+
[
61+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
62+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
63+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
64+
Kommunikationsrichtung(sender="MSB", empfaenger="ESA"),
65+
],
66+
id="two lines",
67+
),
68+
pytest.param(
69+
"NB an LF / MSB\r\nLF an NB, MSB",
70+
[
71+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
72+
Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
73+
Kommunikationsrichtung(sender="LF", empfaenger="NB"),
74+
Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
75+
],
76+
id="two lines with mixed separators",
77+
# shit is real, I'm not making this up
78+
),
79+
pytest.param(
80+
"MSB an NB/LF/ÜNB/MSB/ESA",
81+
[
82+
Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
83+
Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
84+
Kommunikationsrichtung(sender="MSB", empfaenger="ÜNB"),
85+
Kommunikationsrichtung(sender="MSB", empfaenger="MSB"),
86+
Kommunikationsrichtung(sender="MSB", empfaenger="ESA"),
87+
],
88+
id="many receivers",
89+
),
90+
pytest.param(
91+
"NB an LF, MSB an NB (Gas)",
92+
[
93+
Kommunikationsrichtung(sender="NB", empfaenger="LF"),
94+
Kommunikationsrichtung(sender="MSB", empfaenger="NB (Gas)"),
95+
],
96+
),
97+
pytest.param("NB (VNB)an NB (LPB)", [Kommunikationsrichtung(sender="NB (VNB)", empfaenger="NB (LPB)")]),
98+
pytest.param("Beteiligte aus Ursprungs-nachricht", None),
99+
],
100+
)
101+
def test_parsing_kommunikation_von(original: str, expected: list[Kommunikationsrichtung] | None) -> None:
102+
actual = parse_kommunikation_von(original)
103+
assert actual == expected
104+
105+
106+
def _all_anwendungsfaelle() -> Generator[Anwendungsfall, None, None]:
107+
if not is_private_submodule_checked_out():
108+
pytest.skip("Skipping test because of missing private submodule")
109+
private_submodule_root = Path(__file__).parent.parent / "xml-migs-and-ahbs"
110+
assert private_submodule_root.exists() and private_submodule_root.is_dir()
111+
for ahb_file_path in private_submodule_root.rglob("**/*AHB*.xml"):
112+
ahb = AhbReader(ahb_file_path).read()
113+
for anwendungsfall in ahb.anwendungsfaelle:
114+
if anwendungsfall.is_outdated:
115+
continue
116+
yield anwendungsfall
117+
118+
119+
def test_parsing_all_kommunikation_von_there_is() -> None:
120+
"""loop over all AHB files and read the 'Kommunikation Von' Attribute of all the Anwendungsfälle"""
121+
if not is_private_submodule_checked_out():
122+
pytest.skip("Skipping test because of missing private submodule")
123+
for anwendungsfall in _all_anwendungsfaelle():
124+
kommunikation_von = anwendungsfall.kommunikation_von
125+
if not isinstance(kommunikation_von, str):
126+
pytest.skip("Skipping test because 'Kommunikation Von' is not a string (anymore)")
127+
_ = parse_kommunikation_von(kommunikation_von) # must not crash

0 commit comments

Comments
 (0)