Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/fundamend/reader/ahbreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
_is_segment_group,
_is_uebertragungsdatei,
)
from fundamend.utils import lstrip, strip
from fundamend.utils import lstrip, remove_linebreaks_and_hyphens, strip

# pylint:disable=duplicate-code
# yes, it's very similar to the MigReader
Expand Down Expand Up @@ -252,7 +252,7 @@ def _read_anwendungsfall(self, original_element: ET.Element) -> Anwendungsfall:
format_element = next((child for child in original_element[0] if child.tag.startswith("M_")))
return Anwendungsfall(
pruefidentifikator=original_element.attrib["Pruefidentifikator"],
beschreibung=original_element.attrib["Beschreibung"].strip(),
beschreibung=remove_linebreaks_and_hyphens(original_element.attrib["Beschreibung"]),
kommunikation_von=original_element.attrib["Kommunikation_von"].strip(),
format=EdifactFormat(lstrip("M_", format_element.tag)),
elements=tuple(segments_and_groups),
Expand Down
27 changes: 26 additions & 1 deletion src/fundamend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,31 @@ def strip(prefix: str, text: str, suffix: str) -> str:
return lstrip(prefix, rstrip(text, suffix))


_replacements: dict[str, str] = {
"-\r\n": "",
"\r\n": " ",
"\r": "",
"\n": "",
}


def remove_linebreaks_and_hyphens(original: str) -> str:
"""
Normalize a multi line string by stripping leading and trailing whitespace and removing line breaks.

Args:
original: The string to normalize.

Returns:
The normalized string.
"""
result = original
for old, new in _replacements.items():
result = result.replace(old, new)
# if you add more replacement rules, please also add a unit test in test_utils.py
return " ".join(result.strip().split())


_UNIFIED_SEPARATOR = "/" # how multiple Marktrollen shall be split in the kommunikation_von attribute
_ALTERNATIVE_SEPARATORS = [","] # other separators that are used in the wild

Expand Down Expand Up @@ -118,4 +143,4 @@ def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikat
return result


__all__ = ["lstrip", "rstrip", "strip", "parse_kommunikation_von"]
__all__ = ["lstrip", "rstrip", "strip", "parse_kommunikation_von", "remove_linebreaks_and_hyphens"]
446 changes: 446 additions & 0 deletions unittests/__snapshots__/test_ahbreader.ambr

Large diffs are not rendered by default.

27 changes: 27 additions & 0 deletions unittests/test_ahbreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from fundamend.models.anwendungshandbuch import Anwendungsfall, Anwendungshandbuch, Bedingung, Paket, UbBedingung
from fundamend.reader import AhbReader

from .conftest import is_private_submodule_checked_out


@pytest.mark.parametrize(
"ahb_xml_file_path, expected_date",
Expand Down Expand Up @@ -193,3 +195,28 @@ def test_anwendungshandbuch_hashable(ahb_xml_file_path: Path) -> None:
assert isinstance(hash_code, int)
hash_collection = set()
hash_collection.add(ahb)


_xml_submodule_root: Path = Path(__file__).parent.parent / "xml-migs-and-ahbs"


@pytest.mark.snapshot()
def test_sanitizing_all_awf_beschreibungen(snapshot: SnapshotAssertion) -> None:
"""this test makes changes to the sanitation code visible."""
if not is_private_submodule_checked_out():
pytest.skip("Skipping test because of missing private submodule")
all_sanitized_awf_beschreibungen: set[str] = set()
for ahb_file_path in _xml_submodule_root.rglob("**/*AHB*.xml"):
reader = AhbReader(ahb_file_path)
ahb = reader.read()
for awf in ahb.anwendungsfaelle:
if awf.is_outdated:
continue
all_sanitized_awf_beschreibungen.add(awf.beschreibung)
# If you're unhappy with any specific entry in this list, better write a new unit test case in
# test_utils.py / test_anwendungsfall_beschreibung_normalization (add more parametrization).
# If the snapshot test fails because of updated data, just run
# tox -e snapshots
# and commit the updated .ambr file.
distinct_beschreibungen_as_list = list(sorted(all_sanitized_awf_beschreibungen))
snapshot.assert_match(distinct_beschreibungen_as_list)
21 changes: 20 additions & 1 deletion unittests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from fundamend import AhbReader
from fundamend.models.anwendungshandbuch import Anwendungsfall
from fundamend.models.kommunikationsrichtung import Kommunikationsrichtung
from fundamend.utils import parse_kommunikation_von
from fundamend.utils import parse_kommunikation_von, remove_linebreaks_and_hyphens

from .conftest import is_private_submodule_checked_out

Expand Down Expand Up @@ -126,3 +126,22 @@ def test_parsing_all_kommunikation_von_there_is() -> None:
if not isinstance(kommunikation_von, str):
pytest.skip("Skipping test because 'Kommunikation Von' is not a string (anymore)")
_ = parse_kommunikation_von(kommunikation_von) # must not crash


@pytest.mark.parametrize(
"original, expected",
[
pytest.param("foo", "foo", id="no change"),
pytest.param("foo ", "foo", id="trailing whitespace"),
pytest.param(" foo", "foo", id="leading whitespace"),
pytest.param(" foo ", "foo", id="trailing and leading whitespaces"),
pytest.param(" foo\r\n ", "foo", id="trailing and leading whitespaces and line break"),
# hyphen requirements discussed here:
# https://github.com/Hochfrequenz/xml-fundamend-python/issues/172#issue-3427724092
pytest.param(" Foo-\r\nbar ", "Foobar", id="hyphen with line break"),
pytest.param(" Foo\r\n and bar ", "Foo and bar", id="line break w/o hyphen"),
],
)
def test_anwendungsfall_beschreibung_normalization(original: str, expected: str) -> None:
actual = remove_linebreaks_and_hyphens(original)
assert actual == expected
2 changes: 1 addition & 1 deletion xml-migs-and-ahbs