Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/fundamend/reader/ahbreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
_is_segment_group,
_is_uebertragungsdatei,
)
from fundamend.utils import lstrip, strip
from fundamend.utils import lstrip, remove_linebreaks_and_hyphens, strip

# pylint:disable=duplicate-code
# yes, it's very similar to the MigReader
Expand Down Expand Up @@ -252,7 +252,7 @@ def _read_anwendungsfall(self, original_element: ET.Element) -> Anwendungsfall:
format_element = next((child for child in original_element[0] if child.tag.startswith("M_")))
return Anwendungsfall(
pruefidentifikator=original_element.attrib["Pruefidentifikator"],
beschreibung=original_element.attrib["Beschreibung"].strip(),
beschreibung=remove_linebreaks_and_hyphens(original_element.attrib["Beschreibung"]),
kommunikation_von=original_element.attrib["Kommunikation_von"].strip(),
format=EdifactFormat(lstrip("M_", format_element.tag)),
elements=tuple(segments_and_groups),
Expand Down
28 changes: 28 additions & 0 deletions src/fundamend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,31 @@ def strip(prefix: str, text: str, suffix: str) -> str:
The text with the prefix and suffix stripped.
"""
return lstrip(prefix, rstrip(text, suffix))
Comment on lines 11 to 53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



_replacements: dict[str, str] = {
"-\r\n": "",
"\r\n": " ",
"\r": "",
"\n": "",
}


def remove_linebreaks_and_hyphens(original: str) -> str:
"""
Normalize a multi line string by stripping leading and trailing whitespace and removing line breaks.

Args:
original: The string to normalize.

Returns:
The normalized string.
"""
result = original
for old, new in _replacements.items():
result = result.replace(old, new)
# if you add more replacement rules, please also add a unit test in bltest_utils.py
return " ".join(result.strip().split())


__all__ = ["lstrip", "rstrip", "strip", "remove_linebreaks_and_hyphens"]
22 changes: 22 additions & 0 deletions unittests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pytest

from fundamend.utils import remove_linebreaks_and_hyphens


@pytest.mark.parametrize(
"original, expected",
[
pytest.param("foo", "foo", id="no change"),
pytest.param("foo ", "foo", id="trailing whitespace"),
pytest.param(" foo", "foo", id="leading whitespace"),
pytest.param(" foo ", "foo", id="trailing and leading whitespaces"),
pytest.param(" foo\r\n ", "foo", id="trailing and leading whitespaces and line break"),
# hyphen requirements discussed here:
# https://github.com/Hochfrequenz/xml-fundamend-python/issues/172#issue-3427724092
pytest.param(" Foo-\r\nbar ", "Foobar", id="hyphen with line break"),
pytest.param(" Foo\r\n and bar ", "Foo and bar", id="line break w/o hyphen"),
],
)
def test_anwendungsfall_beschreibung_normalization(original: str, expected: str) -> None:
actual = remove_linebreaks_and_hyphens(original)
assert actual == expected