Hochfrequenz
diff --git a/‎domain-specific-terms.txt‎
Lines changed: 1 addition & 0 deletions b/‎domain-specific-terms.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/fundamend/__main__.py‎
Lines changed: 14 additions & 0 deletions b/‎src/fundamend/__main__.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎src/fundamend/cli.py‎
Lines changed: 0 additions & 21 deletions b/‎src/fundamend/cli.py‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎src/fundamend/commands/__init__.py‎
Lines changed: 3 additions & 6 deletions b/‎src/fundamend/commands/__init__.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎src/fundamend/commands/app.py‎
Lines changed: 9 additions & 0 deletions b/‎src/fundamend/commands/app.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/fundamend/commands/xml2json.py‎
Lines changed: 183 additions & 29 deletions b/‎src/fundamend/commands/xml2json.py‎
Lines changed: 183 additions & 29 deletions
diff --git a/‎src/fundamend/reader/ahbreader.py‎
Lines changed: 11 additions & 20 deletions b/‎src/fundamend/reader/ahbreader.py‎
Lines changed: 11 additions & 20 deletions
@@ -17,3 +17,4 @@ rekursion
 rekursive
 finde
 contrl
+Elemente
@@ -79,10 +79,9 @@ max-line-length = 120
 markers = ["snapshot: mark a test as a snapshot test"]
 
 [project.scripts]
-xml2json = "fundamend.cli:cli"
+xml2json = "fundamend.__main__:main"
 # fundamend is the package in the src directory
-# .cli means the cli.py module inside the fundamend package
-# :cli means the def cli() function inside the cli.py module
+# With no further specification, the entry point is fundamend.__main__ which is then called as main script
 
 [mypy]
 truethy-bool = true
 
@@ -0,0 +1,14 @@
+"""contains the entrypoint for the command line interface"""
+
+from fundamend.commands import app
+
+__all__ = ["app"]
+
+
+def main() -> None:
+    """entry point of the script defined in pyproject.toml"""
+    app()
+
+
+if __name__ == "__main__":
+    app()
@@ -2,10 +2,7 @@
 Contains the commands for the CLI.
 """
 
-import typer
+import fundamend.commands.xml2json
+from fundamend.commands.app import app
 
-from fundamend.commands.xml2json import app as xml2json_app
-
-app = typer.Typer()
-
-app.add_typer(xml2json_app)
+__all__ = ["app"]
@@ -0,0 +1,9 @@
+"""
+Contains the Typer app of the fundamend CLI tool.
+"""
+
+import typer
+from rich.console import Console
+
+app = typer.Typer(name="fundamend", help="CLI tool to work with XML files by BDEW", no_args_is_help=True)
+err_console = Console(stderr=True)  # https://typer.tiangolo.com/tutorial/printing/#printing-to-standard-error
@@ -2,48 +2,165 @@
 Contains the command to convert XML files to JSON files.
 """
 
-import json
+import re
+from itertools import groupby
 from pathlib import Path
+from typing import Iterator, Literal
 
 import typer
-from pydantic import RootModel
 from typing_extensions import Annotated
 
 from fundamend import AhbReader, Anwendungshandbuch, MessageImplementationGuide, MigReader
+from fundamend.commands.app import app
+from fundamend.sanitize import sanitize_ahb
 
-app = typer.Typer()
+FORMAT_AND_TYPE_REGEX = re.compile(r"^([A-Z]+)_(AHB|MIG)_(?:(Gas|Strom)_)?")
 
 
-def _convert_to_json_file(xml_file_path: Path) -> Path:
+def _write_ahb_models_splitted(
+    model: Anwendungshandbuch,
+    ahb_dir: Path,
+    *,
+    compressed: bool = False,
+) -> None:
+    """Writes the given Anwendungshandbuch model to multiple JSON files, one for each Anwendungsfall."""
+    ahb_dir.mkdir(parents=True, exist_ok=True)
+    for anwendungsfall in model.anwendungsfaelle:
+        json_file_path = ahb_dir / f"{anwendungsfall.pruefidentifikator}.json"
+        with open(json_file_path, encoding="utf-8", mode="w") as outfile:
+            outfile.write(anwendungsfall.model_dump_json(indent=None if compressed else 2))
+
+    # Write meta file
+    ahb_meta_file_path = ahb_dir / "meta.json"
+    with open(ahb_meta_file_path, encoding="utf-8", mode="w") as outfile:
+        outfile.write(model.model_dump_json(exclude={"anwendungsfaelle"}, indent=None if compressed else 2))
+
+
+def _write_model_to_json_file(
+    model: Anwendungshandbuch | MessageImplementationGuide,
+    xml_file_path: Path,
+    *,
+    compressed: bool = False,
+    split_ahb: bool = False,
+) -> None:
+    """Writes the given model to a JSON file at the specified path."""
+    if split_ahb:
+        if not isinstance(model, Anwendungshandbuch):
+            raise ValueError("split_ahb can only be used with Anwendungshandbuch models")
+        ahb_dir = xml_file_path.with_suffix("")
+        _write_ahb_models_splitted(model, ahb_dir, compressed=compressed)
+        typer.echo(f"Successfully converted {xml_file_path} to multiple JSON files in {ahb_dir}")
+    else:
+        json_file_path = xml_file_path.with_suffix(".json")
+        with open(json_file_path, encoding="utf-8", mode="w") as outfile:
+            outfile.write(model.model_dump_json(indent=None if compressed else 2))
+        typer.echo(f"Successfully converted {xml_file_path} to JSON {json_file_path}")
+
+
+def _convert_to_json_files(
+    mig_xml_file_path: Path, ahb_xml_file_path: Path, sanitize: bool = False
+) -> tuple[MessageImplementationGuide, Anwendungshandbuch]:
     """converts the given XML file to a JSON file and returns the path of the latter"""
-    if not xml_file_path.is_file():
-        raise ValueError(f"The given path {xml_file_path.absolute()} is not a file")
-    is_ahb = "ahb" in xml_file_path.stem.lower()
-    is_mig = "mig" in xml_file_path.stem.lower()
-    if is_ahb and is_mig:
-        raise ValueError(f"Cannot detect if {xml_file_path} is an AHB or MIG")
-    root_model: RootModel[Anwendungshandbuch] | RootModel[MessageImplementationGuide]
-    if is_ahb:
-        ahb_model = AhbReader(xml_file_path).read()
-        root_model = RootModel[Anwendungshandbuch](ahb_model)
-    elif is_mig:
-        mig_model = MigReader(xml_file_path).read()
-        root_model = RootModel[MessageImplementationGuide](mig_model)
+    if not mig_xml_file_path.is_file():  # pragma: no cover
+        raise ValueError(f"The given path {mig_xml_file_path.absolute()} is not a file")
+    if not ahb_xml_file_path.is_file():  # pragma: no cover
+        raise ValueError(f"The given path {ahb_xml_file_path.absolute()} is not a file")
+
+    mig_model = MigReader(mig_xml_file_path).read()
+    ahb_model = AhbReader(ahb_xml_file_path).read()
+
+    # Do sanitization if requested
+    if sanitize:
+        sanitize_ahb(mig_model, ahb_model)
+
+    return mig_model, ahb_model
+
+
+def xml2json_dir_mode(
+    xml_path: Path, sanitize: bool = False, compressed: bool = False, split_ahb: bool = False
+) -> None:
+    """
+    Converts all XML files in the given directory to JSON files.
+    The function expects to find pairs of MIG and AHB XML files in the directory.
+    The XML file names must match the pattern `<FORMAT>_<AHB|MIG>_[<Gas|Strom>_]*.xml`.
+    """
+
+    def groupby_key(path_and_match: tuple[Path, re.Match[str] | None]) -> str:
+        assert path_and_match[1] is not None
+        return path_and_match[1].group(1) + (path_and_match[1].group(3) or "")
+
+    def sort_key(path_and_match: tuple[Path, re.Match[str] | None]) -> str:
+        assert path_and_match[1] is not None
+        return groupby_key(path_and_match) + path_and_match[1].group(2)
+
+    def xmls_and_matches() -> Iterator[tuple[Path, re.Match[str]]]:
+        for _xml_path in xml_path.rglob("*.xml"):
+            match = FORMAT_AND_TYPE_REGEX.match(_xml_path.name)
+            if match is None:  # pragma: no cover
+                raise ValueError("XML file name does not match expected format: " + str(_xml_path))
+            yield _xml_path, match
+
+    for _, _xmls_and_matches in groupby(sorted(xmls_and_matches(), key=sort_key), key=groupby_key):
+        _xmls_and_matches_list = list(_xmls_and_matches)
+        assert len(_xmls_and_matches_list) == 2, (
+            "Expected exactly two XML files (AHB + MIG) for each format and powert type, but found: "
+            f"{_xmls_and_matches_list}"
+        )
+        assert (
+            _xmls_and_matches_list[0][1].group(2) == "AHB" and _xmls_and_matches_list[1][1].group(2) == "MIG"
+        ), f"Expected AHB on first and a MIG on second position, but found: {_xmls_and_matches_list}"
+        ahb_path = _xmls_and_matches_list[0][0]
+        mig_path = _xmls_and_matches_list[1][0]
+        mig, ahb = _convert_to_json_files(mig_path, ahb_path, sanitize=sanitize)
+        _write_model_to_json_file(mig, mig_path.with_suffix(".json"), compressed=compressed)
+        _write_model_to_json_file(ahb, ahb_path.with_suffix(".json"), compressed=compressed, split_ahb=split_ahb)
+
+
+def xml2json_file_mode(
+    xml_path: Path, sanitize: bool = False, compressed: bool = False, split_ahb: bool = False
+) -> None:
+    """
+    Converts a single XML file to JSON.
+    The function expects to find the corresponding AHB or MIG file in the same directory.
+
+    The XML file names must match the pattern `<FORMAT>_<AHB|MIG>_[<Gas|Strom>_]*.xml`.
+    """
+    match = FORMAT_AND_TYPE_REGEX.match(xml_path.name)
+    if match is None:  # pragma: no cover
+        raise ValueError("XML file name does not match expected format: " + str(xml_path))
+    match_type: Literal["MIG", "AHB"] = match.group(2)  # type: ignore[assignment]
+    match_type_other: Literal["MIG", "AHB"] = "AHB" if match_type == "MIG" else "MIG"
+    pattern_other = f"{match.group(1)}_{match_type_other}_"
+    if match.group(3) is not None:
+        pattern_other += f"{match.group(3)}_"
+    pattern_other += "*.xml"
+
+    other_matches = list(xml_path.parent.glob(pattern_other))
+    if len(other_matches) == 0:  # pragma: no cover
+        raise ValueError(
+            f"No other XML file found in the same directory as {xml_path} matching pattern {pattern_other}"
+        )
+    if len(other_matches) > 1:  # pragma: no cover
+        raise ValueError(
+            f"Multiple other XML files found in the same directory as {xml_path} matching pattern "
+            f"{pattern_other}: {other_matches}"
+        )
+    if match_type == "MIG":
+        mig, ahb = _convert_to_json_files(xml_path, other_matches[0], sanitize=sanitize)
+        _write_model_to_json_file(mig, xml_path.with_suffix(".json"), compressed=compressed)
     else:
-        raise ValueError(f"Seems like {xml_file_path} is neither an AHB nor a MIG")
-    out_dict = root_model.model_dump(mode="json")
-    json_file_path = xml_file_path.with_suffix(".json")
-    with open(json_file_path, encoding="utf-8", mode="w") as outfile:
-        json.dump(out_dict, outfile, indent=True, ensure_ascii=False)
-    typer.echo(f"Successfully converted {xml_file_path} file to JSON {json_file_path}")
-    return json_file_path
+        mig, ahb = _convert_to_json_files(other_matches[0], xml_path, sanitize=sanitize)
+        _write_model_to_json_file(ahb, xml_path.with_suffix(".json"), compressed=compressed, split_ahb=split_ahb)
 
 
 @app.command()
 def xml2json(
     xml_path: Annotated[
         Path,
         typer.Option(
+            ...,
+            "--xml-path",
+            "-p",
             exists=True,
             file_okay=True,
             dir_okay=True,
@@ -52,13 +169,50 @@ def xml2json(
             resolve_path=True,
         ),
     ],
+    sanitize: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            "--sanitize",
+            "-s",
+            help="Sanitize the MIG or AHB before writing the resulting JSON. As of now, it does two things:\n"
+            '1) Data elements or groups which are stated as "unused" in the MIG are missing in the AHB. '
+            "The sanitization will add them to the AHB to enable easy parallel iteration over MIG and AHB. \n"
+            "2) The five data elements C_C080 D_3036 model names. But in AHB there is only one D_3036 with "
+            'description "Name". The sanitization will add four extra D_3036 data elements to prevent reading'
+            "raster errors.",
+        ),
+    ] = False,
+    compressed: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            "--compressed",
+            "-c",
+            help="If set, the output JSON files will contain no whitespace outside of strings. If not set"
+            " (default), the output JSON files will be pretty-printed with an indentation of one space.",
+        ),
+    ] = False,
+    split_ahb: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            "--split-ahb",
+            "-a",
+            help="If set, the AHB will be split into multiple files, one for each Anwendungsfall. "
+            "The files will be named `<Prüfidentifikator>.json` in a directory named after the AHB file's "
+            "name (without the extension). It will contain an additional `meta.json` file containing the fields of "
+            "`Anwendungshandbuch` except for `anwendungsfaelle`.",
+        ),
+    ] = False,
 ) -> None:
     """
-    converts the xml file from xml_in_path to a json file next to the .xml
+    Converts the xml file(s) from `xml_in_path` to a json file next to the `*.xml`.
+    If `xml_in_path` is a directory, it will search for all XML files in the directory and its subdirectories.
+
+    All xml files must follow the naming convention `/^(?P<FORMAT>[A-Z]+)_(AHB|MIG)_((Gas|Strom)_)?.*\\.xml$/`
     """
-    assert xml_path.exists()  # ensured by typer
     if xml_path.is_dir():
-        for _xml_path in xml_path.rglob("*.xml"):
-            _convert_to_json_file(_xml_path)
+        xml2json_dir_mode(xml_path, sanitize=sanitize, compressed=compressed, split_ahb=split_ahb)
     else:
-        _convert_to_json_file(xml_path)
+        xml2json_file_mode(xml_path, sanitize=sanitize, compressed=compressed, split_ahb=split_ahb)
@@ -93,13 +93,11 @@ def _to_data_element(element: ET.Element) -> DataElement:
             codes.append(_to_code(child))
         else:
             raise ValueError(f"unexpected element: {child.tag}")
-    ahb_status: str | None = None
-    if "AHB_Status" in element.attrib and element.attrib["AHB_Status"].strip():
-        ahb_status = element.attrib["AHB_Status"]
+
     return DataElement(
         id=element.tag,
-        name=element.attrib["Name"],
-        ahb_status=ahb_status,
+        name=element.attrib["Name"].strip(),
+        ahb_status=element.attrib.get("AHB_Status", "").strip() or None,
         codes=tuple(codes),
     )
 
@@ -114,7 +112,7 @@ def _to_data_element_group(element: ET.Element) -> DataElementGroup:
             raise ValueError(f"unexpected element: {child.tag}")
     return DataElementGroup(
         id=element.tag,
-        name=element.attrib["Name"],
+        name=element.attrib["Name"].strip(),
         data_elements=tuple(data_elements),
     )
 
@@ -129,14 +127,11 @@ def _to_segment(element: ET.Element, is_uebertragungsdatei_level: bool = False)
             data_elements.append(_to_data_element(child))
         else:
             raise ValueError(f"unexpected element: {child.tag}")
-    ahb_status: str | None = None
-    if "AHB_Status" in element.attrib and element.attrib["AHB_Status"].strip():
-        ahb_status = element.attrib["AHB_Status"]
     return Segment(
         id=lstrip("S_", element.tag),
-        name=element.attrib["Name"],
-        number=element.attrib["Number"],
-        ahb_status=ahb_status,
+        name=element.attrib["Name"].strip(),
+        number=element.attrib["Number"].strip(),
+        ahb_status=element.attrib.get("AHB_Status", "").strip() or None,
         data_elements=tuple(data_elements),
         is_on_uebertragungsdatei_level=is_uebertragungsdatei_level,
     )
@@ -154,12 +149,8 @@ def _to_segment_group(element: ET.Element) -> SegmentGroup:
             raise ValueError(f"unexpected element: {child.tag}")
     return SegmentGroup(
         id=lstrip("G_", element.tag),
-        name=element.attrib["Name"],
-        ahb_status=(
-            element.attrib["AHB_Status"].strip() or None
-            if "AHB_Status" in element.attrib and element.attrib["AHB_Status"] is not None
-            else None
-        ),
+        name=element.attrib["Name"].strip(),
+        ahb_status=element.attrib.get("AHB_Status", "").strip() or None,
         elements=tuple(list(segments_and_groups)),
     )
 
@@ -261,8 +252,8 @@ def _read_anwendungsfall(self, original_element: ET.Element) -> Anwendungsfall:
             format_element = next((child for child in original_element[0] if child.tag.startswith("M_")))
         return Anwendungsfall(
             pruefidentifikator=original_element.attrib["Pruefidentifikator"],
-            beschreibung=original_element.attrib["Beschreibung"],
-            kommunikation_von=original_element.attrib["Kommunikation_von"],
+            beschreibung=original_element.attrib["Beschreibung"].strip(),
+            kommunikation_von=original_element.attrib["Kommunikation_von"].strip(),
             format=EdifactFormat(lstrip("M_", format_element.tag)),
             elements=tuple(segments_and_groups),
         )