Skip to content

Commit 931aa99

Browse files
committed
Fix: Issue where speaker chains where not found in the test
1 parent ec10a4b commit 931aa99

1 file changed

Lines changed: 41 additions & 8 deletions

File tree

test/test-speaker-mapping-integrity.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def find_element_by_xml_id(root, uuid):
3939

4040

4141
def process_row(r):
42+
"""
43+
Check gold-standard speaker annotations, including <note type="speaker"> introductions.
44+
- If UUID points to <u>: check its @who.
45+
- If UUID points to <note type="speaker">: find the first <u> after it and check the whole <u> chain.
46+
"""
4247
xml_path = r['protocol_id']
4348
uuid = r['uuid']
4449
folder_type = r['folder_type']
@@ -60,15 +65,43 @@ def process_row(r):
6065
return result
6166

6267
if folder_type == 'is-speaker':
63-
if el.tag.endswith('u') and el.get('who') != expected_person:
64-
result['fail_is_speaker'] = [xml_path, uuid, expected_person, el.get('who')]
65-
logger.error(f"Speaker drift: {uuid} expected {expected_person}, got {el.get('who')}")
66-
if el.tag.endswith('note') and el.get('type') != 'speaker':
67-
result['fail_is_speaker'] = [xml_path, uuid, 'type=speaker', el.get('type')]
68-
logger.error(f"Speaker note drift: {uuid}")
68+
if el.tag.endswith('u'):
69+
who_attr = el.get('who')
70+
if not who_attr or who_attr != expected_person:
71+
result['fail_is_speaker'] = [xml_path, uuid, expected_person, who_attr]
72+
logger.error(f"Speaker drift: {uuid} expected {expected_person}, got {who_attr}")
73+
74+
elif el.tag.endswith('note') and el.get('type') == 'speaker':
75+
first_u = None
76+
for sibling in el.itersiblings():
77+
if sibling.tag.endswith('u'):
78+
first_u = sibling
79+
break
80+
81+
if first_u is None:
82+
result['fail_is_speaker'] = [xml_path, uuid, "no following <u>", None]
83+
logger.error(f"Speaker note {uuid} has no following <u>")
84+
else:
85+
u = first_u
86+
while u is not None:
87+
u_who = u.get('who')
88+
if u_who != expected_person:
89+
result['fail_is_speaker'] = [xml_path, uuid, expected_person, u_who]
90+
logger.error(f"Speaker chain drift: {uuid} expected {expected_person}, got {u_who}")
91+
break
92+
next_id = u.get('next')
93+
if not next_id:
94+
break
95+
u = find_element_by_xml_id(root, next_id)
96+
97+
else:
98+
result['fail_is_speaker'] = [xml_path, uuid, "unexpected tag", el.tag]
99+
logger.error(f"Unexpected tag for is-speaker: {uuid} ({el.tag})")
100+
69101
else:
70-
if el.get('who') or (el.tag.endswith('note') and el.get('type') == 'speaker'):
71-
actual = el.get('who') if el.get('who') else 'type=speaker'
102+
who_attr = el.get('who')
103+
if (who_attr is not None and who_attr.strip() != '') or (el.tag.endswith('note') and el.get('type') == 'speaker'):
104+
actual = who_attr if who_attr else 'type=speaker'
72105
result['fail_non_speaker'] = [xml_path, uuid, actual]
73106
logger.error(f"Non-speaker drift: {uuid} ({actual})")
74107

0 commit comments

Comments
 (0)