feat: introduce function to parse Kommunikation_von (w/o using it yet) (#177)

hf-kklein · Konstantin · web-flow · commit 26809119df19 · 2025-10-01T13:51:26.000+02:00
* feat: introduce function to parse `Kommunikation_von` (w/o using it yet) based on #176 (no depedency - just to avoid merge conflicts) * fix some more edge cases --------- Co-authored-by: Konstantin <konstantin.klein+github@hochfrequenz.de>
diff --git a/src/fundamend/models/anwendungshandbuch.py b/src/fundamend/models/anwendungshandbuch.py
@@ -121,6 +121,15 @@ class SegmentGroup(FundamendBaseModel):
     elements: tuple["Segment | SegmentGroup", ...]
 
 
+class Kommunikationsrichtung(FundamendBaseModel):
+    """
+    a strongly typed representation of the 'Kommunikation_von' attribute of anwendungsfall
+    """
+
+    sender: str  #: e.g. "NB"
+    empfaenger: str  #: e.g. "MSB"
+
+
 class Anwendungsfall(FundamendBaseModel):
     """
     One 'Anwendungsfall', indicated by `<AWF>` tag, corresponds to one Prüfidentifikator or type of Message
diff --git a/src/fundamend/utils.py b/src/fundamend/utils.py
@@ -2,6 +2,11 @@
 Contains some utility functions that are used in the project.
 """
 
+import re
+from typing import Optional
+
+from fundamend.models.anwendungshandbuch import Kommunikationsrichtung
+
 
 def lstrip(prefix: str, text: str) -> str:
     """Strip the given prefix from the given text. If the text does not start with the prefix, return the text as is.
@@ -73,4 +78,69 @@ def remove_linebreaks_and_hyphens(original: str) -> str:
     return " ".join(result.strip().split())
 
 
-__all__ = ["lstrip", "rstrip", "strip", "remove_linebreaks_and_hyphens"]
+_UNIFIED_SEPARATOR = "/"  # how multiple Marktrollen shall be split in the kommunikation_von attribute
+_ALTERNATIVE_SEPARATORS = [","]  # other separators that are used in the wild
+
+_an_at_word_boundary = re.compile(r"\ban\b")
+
+
+def _add_whitespace_before_an(original: str) -> str:
+    """adds whitespace before 'an' if it is not already there"""
+    return _an_at_word_boundary.sub(" an", original)
+
+
+def _parse_kommunikation_von_line(kommunikation_von_line: str) -> list[Kommunikationsrichtung]:
+    """
+    parses a single line of kommunikation_von into a list of Kommunikationsrichtung objects
+    this is necessary because some AHBs have multiple lines in the kommunikation_von attribute which must not be mixed
+    """
+    if not kommunikation_von_line or not kommunikation_von_line.strip():
+        return []
+    result: list[Kommunikationsrichtung] = []
+    parts = _add_whitespace_before_an(kommunikation_von_line).split(" an ")
+    if len(parts) != 2:
+        # maybe this line looks different, more like 'NB an LF, MSB an NB (Gas)'
+        # then we have to split at the comma first and treat each part like it was a single line. wtf
+        if "," in kommunikation_von_line:
+            for subpart in kommunikation_von_line.split(","):
+                result += _parse_kommunikation_von_line(subpart.strip())
+            return result
+        raise ValueError(f"Invalid kommunikation_von string: '{kommunikation_von_line}'. Expected format: 'X an Y[/Z]'")
+    sender_str = parts[0]
+    receiver_str = parts[1]
+    for alternative_separator in _ALTERNATIVE_SEPARATORS:
+        if alternative_separator in receiver_str:
+            receiver_str = receiver_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
+        if alternative_separator in sender_str:
+            sender_str = sender_str.replace(alternative_separator, _UNIFIED_SEPARATOR)
+    senders = [x.strip() for x in sender_str.split(_UNIFIED_SEPARATOR)]
+    receivers = [x.strip() for x in receiver_str.split(_UNIFIED_SEPARATOR)]
+    for sender in senders:
+        for receiver in receivers:
+            result.append(Kommunikationsrichtung(sender=sender, empfaenger=receiver))
+    return result
+
+
+def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikationsrichtung] | None:
+    """Splits the kommunikation_von string into something strongly typed
+
+    Args:
+        kommunikation_von: The kommunikation_von string to split, e.g. 'NB an LF/MSB'.
+
+    Returns:
+        Properly typed list of Kommunikationsrichtung objects:
+        [Kommunikationsrichtung(sender='NB', empfaenger='LF'),
+        Kommunikationsrichtung(sender='NB', empfaenger='MSB')]
+        or none in case there are no information given (directly).
+    """
+    if kommunikation_von == "Beteiligte aus Ursprungs-nachricht":
+        return None
+    result: list[Kommunikationsrichtung] = []
+    for line in (kommunikation_von or "").splitlines():
+        line = line.strip()
+        if line:
+            result += _parse_kommunikation_von_line(line)
+    return result
+
+
+__all__ = ["lstrip", "rstrip", "strip", "remove_linebreaks_and_hyphens", "parse_kommunikation_von"]
diff --git a/unittests/test_utils.py b/unittests/test_utils.py
@@ -1,6 +1,13 @@
+from pathlib import Path
+from typing import Generator
+
 import pytest
 
-from fundamend.utils import remove_linebreaks_and_hyphens
+from fundamend import AhbReader
+from fundamend.models.anwendungshandbuch import Anwendungsfall, Kommunikationsrichtung
+from fundamend.utils import parse_kommunikation_von, remove_linebreaks_and_hyphens
+
+from .conftest import is_private_submodule_checked_out
 
 
 @pytest.mark.parametrize(
@@ -20,3 +27,120 @@
 def test_anwendungsfall_beschreibung_normalization(original: str, expected: str) -> None:
     actual = remove_linebreaks_and_hyphens(original)
     assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "original, expected",
+    [
+        pytest.param("", [], id="empty string = no directions"),
+        pytest.param("LF an NB", [Kommunikationsrichtung(sender="LF", empfaenger="NB")], id="simple example"),
+        pytest.param(
+            "MSB an NB, LF",
+            [
+                Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
+            ],
+            id="two receivers, comma separated",
+        ),
+        pytest.param(
+            "MSB an NB / LF",
+            [
+                Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
+            ],
+            id="two receivers, slash separated",
+        ),
+        pytest.param(
+            "NB, LF an MSB",
+            [
+                Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
+                Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
+            ],
+            id="two senders, comma separated",
+        ),
+        pytest.param(
+            "NB / LF an MSB",
+            [
+                Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
+                Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
+            ],
+            id="two senders, slash separated",
+        ),
+        pytest.param(
+            "BIKO an NB / ÜNB",
+            [
+                Kommunikationsrichtung(sender="BIKO", empfaenger="NB"),
+                Kommunikationsrichtung(sender="BIKO", empfaenger="ÜNB"),
+            ],
+            id="two receivers, slash separated but with Umlaut",
+        ),
+        pytest.param(
+            "NB an LF\nMSB an LF, NB, ESA",
+            [
+                Kommunikationsrichtung(sender="NB", empfaenger="LF"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="ESA"),
+            ],
+            id="two lines",
+        ),
+        pytest.param(
+            "NB an LF / MSB\r\nLF an NB, MSB",
+            [
+                Kommunikationsrichtung(sender="NB", empfaenger="LF"),
+                Kommunikationsrichtung(sender="NB", empfaenger="MSB"),
+                Kommunikationsrichtung(sender="LF", empfaenger="NB"),
+                Kommunikationsrichtung(sender="LF", empfaenger="MSB"),
+            ],
+            id="two lines with mixed separators",
+            # shit is real, I'm not making this up
+        ),
+        pytest.param(
+            "MSB an NB/LF/ÜNB/MSB/ESA",
+            [
+                Kommunikationsrichtung(sender="MSB", empfaenger="NB"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="LF"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="ÜNB"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="MSB"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="ESA"),
+            ],
+            id="many receivers",
+        ),
+        pytest.param(
+            "NB an LF, MSB an NB (Gas)",
+            [
+                Kommunikationsrichtung(sender="NB", empfaenger="LF"),
+                Kommunikationsrichtung(sender="MSB", empfaenger="NB (Gas)"),
+            ],
+        ),
+        pytest.param("NB (VNB)an NB (LPB)", [Kommunikationsrichtung(sender="NB (VNB)", empfaenger="NB (LPB)")]),
+        pytest.param("Beteiligte aus Ursprungs-nachricht", None),
+    ],
+)
+def test_parsing_kommunikation_von(original: str, expected: list[Kommunikationsrichtung] | None) -> None:
+    actual = parse_kommunikation_von(original)
+    assert actual == expected
+
+
+def _all_anwendungsfaelle() -> Generator[Anwendungsfall, None, None]:
+    if not is_private_submodule_checked_out():
+        pytest.skip("Skipping test because of missing private submodule")
+    private_submodule_root = Path(__file__).parent.parent / "xml-migs-and-ahbs"
+    assert private_submodule_root.exists() and private_submodule_root.is_dir()
+    for ahb_file_path in private_submodule_root.rglob("**/*AHB*.xml"):
+        ahb = AhbReader(ahb_file_path).read()
+        for anwendungsfall in ahb.anwendungsfaelle:
+            if anwendungsfall.is_outdated:
+                continue
+            yield anwendungsfall
+
+
+def test_parsing_all_kommunikation_von_there_is() -> None:
+    """loop over all AHB files and read the 'Kommunikation Von' Attribute of all the Anwendungsfälle"""
+    if not is_private_submodule_checked_out():
+        pytest.skip("Skipping test because of missing private submodule")
+    for anwendungsfall in _all_anwendungsfaelle():
+        kommunikation_von = anwendungsfall.kommunikation_von
+        if not isinstance(kommunikation_von, str):
+            pytest.skip("Skipping test because 'Kommunikation Von' is not a string (anymore)")
+        _ = parse_kommunikation_von(kommunikation_von)  # must not crash