Skip to content

Commit f7f1828

Browse files
authored
feat: compare all AHB props to handle row insertion at arb. positions (#37)
* Rename `test_[...]_multiple_entries_per_section_name` to `multiple_rows` * Add test case covering row insertion at arbitrary positions * Enhance comparison to check for matching rows considering all properties
1 parent 5895206 commit f7f1828

File tree

2 files changed

+187
-58
lines changed

2 files changed

+187
-58
lines changed

src/ahlbatross/core/ahb_comparison.py

Lines changed: 70 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ def _compare_ahb_rows(previous_ahb_row: AhbRow, subsequent_ahb_row: AhbRow) -> A
1818

1919
# consider all AHB properties except `section_name` (Segmentname) and `formatversion`
2020
for entry in AHB_PROPERTIES:
21-
previous_ahb_entry = getattr(previous_ahb_row, entry, "") or ""
22-
subsequent_ahb_entry = getattr(subsequent_ahb_row, entry, "") or ""
21+
previous_ahb_entry = normalize_entries(getattr(previous_ahb_row, entry, "") or "")
22+
subsequent_ahb_entry = normalize_entries(getattr(subsequent_ahb_row, entry, "") or "")
2323

24-
if (previous_ahb_entry.strip() or subsequent_ahb_entry.strip()) and previous_ahb_entry != subsequent_ahb_entry:
24+
if (previous_ahb_entry or subsequent_ahb_entry) and previous_ahb_entry != subsequent_ahb_entry:
2525
changed_entries.extend(
2626
[f"{entry}_{previous_ahb_row.formatversion}", f"{entry}_{subsequent_ahb_row.formatversion}"]
2727
)
@@ -50,16 +50,36 @@ def _add_empty_row(formatversion: str) -> AhbRow:
5050

5151

5252
def _find_matching_subsequent_row(
53-
current_ahb_row: AhbRow, subsequent_ahb_rows: List[AhbRow], start_idx: int
53+
current_ahb_row: AhbRow, subsequent_ahb_rows: List[AhbRow], start_idx: int, duplicate_indices: set[int]
5454
) -> Tuple[int, AhbRow | None]:
5555
"""
56-
Find matching row in subsequent version starting from given index.
56+
Find matching row in subsequent version starting from given index by consider all AHB properties
57+
within the same `section_name` group.
5758
"""
5859
normalized_current = normalize_entries(current_ahb_row.section_name)
60+
current_key = current_ahb_row.get_key()
5961

6062
for idx, row in enumerate(subsequent_ahb_rows[start_idx:], start_idx):
63+
if idx in duplicate_indices:
64+
continue
65+
66+
if (
67+
normalize_entries(row.section_name) == normalized_current
68+
and row.get_key() == current_key
69+
and row.segment_id == current_ahb_row.segment_id
70+
):
71+
return idx, row
72+
73+
# in case no match was found, continue by aligning `Segmentname` entries
74+
for idx, row in enumerate(subsequent_ahb_rows[start_idx:], start_idx):
75+
if idx in duplicate_indices:
76+
continue
77+
6178
if normalize_entries(row.section_name) == normalized_current:
79+
if row.ahb_expression is not None and current_ahb_row.ahb_expression is None:
80+
continue
6281
return idx, row
82+
6383
return -1, None
6484

6585

@@ -70,68 +90,74 @@ def align_ahb_rows(previous_ahb_rows: List[AhbRow], subsequent_ahb_rows: List[Ah
7090
result = []
7191
i = 0
7292
j = 0
93+
duplicate_indices: set[int] = set()
7394

7495
while i < len(previous_ahb_rows) or j < len(subsequent_ahb_rows):
7596
if i >= len(previous_ahb_rows):
76-
row = subsequent_ahb_rows[j]
77-
result.append(
78-
AhbRowComparison(
79-
previous_formatversion=_add_empty_row(row.formatversion),
80-
# label remaining rows of subsequent AHB as NEW
81-
diff=AhbRowDiff(diff_type=DiffType.ADDED),
82-
subsequent_formatversion=row,
97+
# add remaining rows as "new" if not already used
98+
if j not in duplicate_indices:
99+
result.append(
100+
AhbRowComparison(
101+
previous_formatversion=_add_empty_row(subsequent_ahb_rows[j].formatversion),
102+
diff=AhbRowDiff(diff_type=DiffType.ADDED),
103+
subsequent_formatversion=subsequent_ahb_rows[j],
104+
)
83105
)
84-
)
85106
j += 1
107+
continue
86108

87-
elif j >= len(subsequent_ahb_rows):
88-
row = previous_ahb_rows[i]
109+
if j >= len(subsequent_ahb_rows):
89110
result.append(
90111
AhbRowComparison(
91-
previous_formatversion=row,
112+
previous_formatversion=previous_ahb_rows[i],
92113
# label remaining rows of previous AHB as REMOVED
93114
diff=AhbRowDiff(diff_type=DiffType.REMOVED),
94-
subsequent_formatversion=_add_empty_row(row.formatversion),
115+
subsequent_formatversion=_add_empty_row(previous_ahb_rows[i].formatversion),
95116
)
96117
)
97118
i += 1
119+
continue
98120

99-
else:
100-
current_row = previous_ahb_rows[i]
101-
next_match_idx, matching_row = _find_matching_subsequent_row(current_row, subsequent_ahb_rows, j)
121+
current_row = previous_ahb_rows[i]
122+
next_match_idx, matching_row = _find_matching_subsequent_row(
123+
current_row, subsequent_ahb_rows, j, duplicate_indices
124+
)
102125

103-
if next_match_idx >= 0 and matching_row is not None:
104-
# add new rows until `section_name` (Segmentname) matches
105-
for k in range(j, next_match_idx):
106-
new_row = subsequent_ahb_rows[k]
126+
if next_match_idx >= 0 and matching_row is not None:
127+
# add new rows until `section_name` (Segmentname) matches
128+
while j < next_match_idx:
129+
if j not in duplicate_indices:
107130
result.append(
108131
AhbRowComparison(
109-
previous_formatversion=_add_empty_row(new_row.formatversion),
132+
previous_formatversion=_add_empty_row(subsequent_ahb_rows[j].formatversion),
110133
diff=AhbRowDiff(diff_type=DiffType.ADDED),
111-
subsequent_formatversion=new_row,
134+
subsequent_formatversion=subsequent_ahb_rows[j],
112135
)
113136
)
137+
j += 1
114138

115-
# add matching rows with comparison
116-
diff = _compare_ahb_rows(current_row, matching_row)
117-
result.append(
118-
AhbRowComparison(
119-
previous_formatversion=current_row, diff=diff, subsequent_formatversion=matching_row
120-
)
139+
# add matching rows with comparison
140+
diff = _compare_ahb_rows(current_row, matching_row)
141+
result.append(
142+
AhbRowComparison(
143+
previous_formatversion=current_row,
144+
diff=diff,
145+
subsequent_formatversion=matching_row,
121146
)
147+
)
148+
duplicate_indices.add(next_match_idx)
149+
i += 1
150+
j = next_match_idx + 1
122151

123-
i += 1
124-
j = next_match_idx + 1
125-
126-
else:
127-
# if no match found - label as REMOVED
128-
result.append(
129-
AhbRowComparison(
130-
previous_formatversion=current_row,
131-
diff=AhbRowDiff(diff_type=DiffType.REMOVED),
132-
subsequent_formatversion=_add_empty_row(current_row.formatversion),
133-
)
152+
else:
153+
# if no match found - label as REMOVED
154+
result.append(
155+
AhbRowComparison(
156+
previous_formatversion=current_row,
157+
diff=AhbRowDiff(diff_type=DiffType.REMOVED),
158+
subsequent_formatversion=_add_empty_row(current_row.formatversion),
134159
)
135-
i += 1
160+
)
161+
i += 1
136162

137163
return result

unittests/test_ahb_comparison.py

Lines changed: 117 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ def test_align_rows(self) -> None:
871871
assert "segment_group_key" in str(result[8].diff.changed_entries)
872872
assert "segment_group_key" in str(result[9].diff.changed_entries)
873873

874-
def test_align_rows_multiple_entries_per_section_name(self) -> None:
874+
def test_align_rows_multiple_rows_per_section_name(self) -> None:
875875
previous_ahb_rows = [
876876
AhbRow(
877877
formatversion=self.formatversions.previous_formatversion,
@@ -883,21 +883,21 @@ def test_align_rows_multiple_entries_per_section_name(self) -> None:
883883
AhbRow(
884884
formatversion=self.formatversions.previous_formatversion,
885885
section_name="1",
886-
segment_group_key="a",
886+
segment_group_key="b",
887887
value_pool_entry=None,
888888
name=None,
889889
),
890890
AhbRow(
891891
formatversion=self.formatversions.previous_formatversion,
892892
section_name="1",
893-
segment_group_key="a",
893+
segment_group_key="c",
894894
value_pool_entry=None,
895895
name=None,
896896
),
897897
AhbRow(
898898
formatversion=self.formatversions.previous_formatversion,
899899
section_name="2",
900-
segment_group_key="b",
900+
segment_group_key="a",
901901
value_pool_entry=None,
902902
name=None,
903903
),
@@ -911,7 +911,7 @@ def test_align_rows_multiple_entries_per_section_name(self) -> None:
911911
AhbRow(
912912
formatversion=self.formatversions.previous_formatversion,
913913
section_name="2",
914-
segment_group_key="b",
914+
segment_group_key="c",
915915
value_pool_entry=None,
916916
name=None,
917917
),
@@ -927,14 +927,14 @@ def test_align_rows_multiple_entries_per_section_name(self) -> None:
927927
AhbRow(
928928
formatversion=self.formatversions.subsequent_formatversion,
929929
section_name="1",
930-
segment_group_key="a",
930+
segment_group_key="b",
931931
value_pool_entry=None,
932932
name=None,
933933
),
934934
AhbRow(
935935
formatversion=self.formatversions.subsequent_formatversion,
936936
section_name="1",
937-
segment_group_key="x",
937+
segment_group_key="c",
938938
value_pool_entry=None,
939939
name=None,
940940
),
@@ -955,7 +955,7 @@ def test_align_rows_multiple_entries_per_section_name(self) -> None:
955955
AhbRow(
956956
formatversion=self.formatversions.subsequent_formatversion,
957957
section_name="2",
958-
segment_group_key="x",
958+
segment_group_key="c",
959959
value_pool_entry=None,
960960
name=None,
961961
),
@@ -966,17 +966,122 @@ def test_align_rows_multiple_entries_per_section_name(self) -> None:
966966
assert len(result) == 6
967967
assert result[0].diff.diff_type == DiffType.MODIFIED
968968
assert result[1].diff.diff_type == DiffType.UNCHANGED
969-
assert result[2].diff.diff_type == DiffType.MODIFIED
969+
assert result[2].diff.diff_type == DiffType.UNCHANGED
970970
assert result[3].diff.diff_type == DiffType.MODIFIED
971971
assert result[4].diff.diff_type == DiffType.UNCHANGED
972-
assert result[5].diff.diff_type == DiffType.MODIFIED
972+
assert result[5].diff.diff_type == DiffType.UNCHANGED
973973

974974
assert "segment_group_key" in str(result[0].diff.changed_entries)
975975
assert not result[1].diff.changed_entries
976-
assert "segment_group_key" in str(result[2].diff.changed_entries)
976+
assert not result[2].diff.changed_entries
977977
assert "segment_group_key" in str(result[3].diff.changed_entries)
978978
assert not result[4].diff.changed_entries
979-
assert "segment_group_key" in str(result[5].diff.changed_entries)
979+
assert not result[5].diff.changed_entries
980+
981+
def test_align_rows_all_ahb_properties_within_section_name(self) -> None:
982+
# to handle rows that have been inserted at arbitrary positions within the same `section_name` group
983+
# for example FV2504 UTILMD 55078 (SG3 Ansprechpartner):
984+
# `previous_ahb_rows` should add an empty row at the top (NEW) for AhbRow's to align properly
985+
previous_ahb_rows = [
986+
AhbRow( # equivalent to second AhbRow of `subsequent_ahb_rows`
987+
formatversion=self.formatversions.previous_formatversion,
988+
section_name="Ansprechpartner",
989+
segment_group_key="SG3",
990+
segment_code="CTA",
991+
data_element=None,
992+
segment_id="00009",
993+
value_pool_entry=None,
994+
name=None,
995+
ahb_expression="Muss",
996+
conditions=None,
997+
),
998+
AhbRow(
999+
formatversion=self.formatversions.previous_formatversion,
1000+
section_name="Ansprechpartner",
1001+
segment_group_key="SG3",
1002+
segment_code="CTA",
1003+
data_element="3139",
1004+
segment_id="00009",
1005+
value_pool_entry="IC",
1006+
name="Informationskontakt",
1007+
ahb_expression="X",
1008+
conditions=None,
1009+
),
1010+
AhbRow(
1011+
formatversion=self.formatversions.previous_formatversion,
1012+
section_name="Ansprechpartner",
1013+
segment_group_key="SG3",
1014+
segment_code="CTA",
1015+
data_element="3412",
1016+
segment_id="00009",
1017+
value_pool_entry=None,
1018+
name="Name vom Ansprechpartner",
1019+
ahb_expression="X",
1020+
conditions=None,
1021+
),
1022+
]
1023+
subsequent_ahb_rows = [
1024+
AhbRow( # NEW
1025+
formatversion=self.formatversions.subsequent_formatversion,
1026+
section_name="Ansprechpartner",
1027+
segment_group_key="SG3",
1028+
segment_code=None,
1029+
data_element=None,
1030+
segment_id=None,
1031+
value_pool_entry=None,
1032+
name=None,
1033+
ahb_expression="Kann",
1034+
conditions=None,
1035+
),
1036+
AhbRow( # equivalent to first AhbRow of `previous_ahb_rows`
1037+
formatversion=self.formatversions.subsequent_formatversion,
1038+
section_name="Ansprechpartner",
1039+
segment_group_key="SG3",
1040+
segment_code="CTA",
1041+
data_element=None,
1042+
segment_id="00009",
1043+
value_pool_entry=None,
1044+
name=None,
1045+
ahb_expression="Muss",
1046+
conditions=None,
1047+
),
1048+
AhbRow(
1049+
formatversion=self.formatversions.subsequent_formatversion,
1050+
section_name="Ansprechpartner",
1051+
segment_group_key="SG3",
1052+
segment_code="CTA",
1053+
data_element="3139",
1054+
segment_id="00009",
1055+
value_pool_entry="IC",
1056+
name="Informationskontakt",
1057+
ahb_expression="X",
1058+
conditions=None,
1059+
),
1060+
AhbRow(
1061+
formatversion=self.formatversions.subsequent_formatversion,
1062+
section_name="Ansprechpartner",
1063+
segment_group_key="SG3",
1064+
segment_code="CTA",
1065+
data_element="3412",
1066+
segment_id="00009",
1067+
value_pool_entry=None,
1068+
name="Name vom Ansprechpartner",
1069+
ahb_expression="X",
1070+
conditions=None,
1071+
),
1072+
]
1073+
1074+
result = align_ahb_rows(previous_ahb_rows, subsequent_ahb_rows)
1075+
1076+
assert len(result) == 4
1077+
assert result[0].diff.diff_type == DiffType.ADDED
1078+
assert result[1].diff.diff_type == DiffType.UNCHANGED
1079+
assert result[2].diff.diff_type == DiffType.UNCHANGED
1080+
assert result[3].diff.diff_type == DiffType.UNCHANGED
1081+
1082+
assert not result[1].diff.changed_entries
1083+
assert not result[2].diff.changed_entries
1084+
assert not result[3].diff.changed_entries
9801085

9811086
def test_align_rows_different_column_sets(self) -> None:
9821087
previous_ahb_rows = [
@@ -1019,12 +1124,10 @@ def test_align_rows_different_column_sets(self) -> None:
10191124
result = align_ahb_rows(previous_ahb_rows, subsequent_ahb_rows)
10201125

10211126
assert len(result) == 3
1022-
# Check sequence: removed, modified, added
10231127
assert result[0].diff.diff_type == DiffType.REMOVED
10241128
assert result[1].diff.diff_type == DiffType.MODIFIED
10251129
assert result[2].diff.diff_type == DiffType.ADDED
10261130

1027-
# Verify specific changes in the modified row
10281131
changed_entries = str(result[1].diff.changed_entries)
10291132
assert "data_element" in changed_entries
10301133
assert "value_pool_entry" in changed_entries

0 commit comments

Comments
 (0)