Skip to content

Commit ea14c52

Browse files
Normalize cluster names by removing extra whitespace in dialog2trajectories function
1 parent 7d5660d commit ea14c52

1 file changed

Lines changed: 2 additions & 1 deletion

File tree

src/sdialog/evaluation/dialog2flow/extract_trajectories.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
1010
@author: Sergio Burdisso (sergio.burdisso@idiap.ch)
1111
"""
12+
import re
1213
import os
1314
import json
1415
import torch
@@ -368,7 +369,7 @@ def dialog2trajectories(
368369
cluster_name = cluster_topk_utts[tid]["utterances"][0]
369370
else:
370371
cluster_name = cluster_topk_utts[tid]["name"]
371-
normalized_turn_names[speaker][tid] = {"name": f"{tid}_" + cluster_name,
372+
normalized_turn_names[speaker][tid] = {"name": f"{tid}_" + re.sub(r"\s+", " ", cluster_name).strip(),
372373
"info": cluster_topk_utts[tid],
373374
"id": f"{speaker[0].lower()}{tid}"}
374375

0 commit comments

Comments
 (0)