@@ -152,11 +152,13 @@ def align_chunks(
152152 """
153153 tokenizer_case = _get_processor_case (processor ) # determine if processor is cased or uncased
154154 chunk_mappings = []
155- for speech in metadata .speeches :
155+ for speech_idx , speech in enumerate (metadata .speeches ):
156+ speech_id = speech .speech_id if speech .speech_id is not None else speech_idx
156157 emissions_filepath = Path (emissions_dir ) / speech .probs_path
157158 emissions = np .load (emissions_filepath )
158159
159160 for i , chunk in enumerate (speech .chunks ):
161+ chunk .id = f"{ speech_id } -{ i } "
160162 normalized_tokens , mapping = text_normalizer_fn (chunk .text )
161163 emissions_chunk = emissions [i ]
162164 emissions_chunk = emissions_chunk [: chunk .num_logits ]
@@ -177,6 +179,8 @@ def align_chunks(
177179 alignment_mapping = process_fallback_alignment (
178180 mapping , chunk .start , chunk .end , chunk .text , tokenizer , None , ndigits
179181 )
182+ for j , seg in enumerate (alignment_mapping ):
183+ seg .id = f"{ speech_id } -{ i } -{ j } "
180184 chunk_mappings .extend (alignment_mapping )
181185 speech .alignments .extend (alignment_mapping )
182186 continue
@@ -225,6 +229,8 @@ def align_chunks(
225229 )
226230
227231 alignment_mapping = encode_alignments (mapping , ndigits = ndigits )
232+ for j , seg in enumerate (alignment_mapping ):
233+ seg .id = f"{ speech_id } -{ i } -{ j } "
228234
229235 chunk_mappings .extend (alignment_mapping )
230236 speech .alignments .extend (alignment_mapping )
@@ -253,7 +259,8 @@ def align_speech(
253259) -> list :
254260 tokenizer_case = _get_processor_case (processor )
255261 speech_mappings = []
256- for speech in metadata .speeches :
262+ for speech_idx , speech in enumerate (metadata .speeches ):
263+ speech_id = speech .speech_id if speech .speech_id is not None else speech_idx
257264 emissions_filepath = Path (emissions_dir ) / speech .probs_path
258265 emissions = np .load (emissions_filepath )
259266 emissions = np .vstack (emissions )
@@ -296,6 +303,8 @@ def align_speech(
296303 speech .text_spans ,
297304 ndigits ,
298305 )
306+ for j , seg in enumerate (alignment_mapping ):
307+ seg .id = f"{ speech_id } -{ j } "
299308 speech .alignments .extend (alignment_mapping )
300309 speech_mappings .extend (alignment_mapping )
301310 if delete_emissions :
@@ -346,6 +355,8 @@ def align_speech(
346355 )
347356
348357 alignment_mapping = encode_alignments (mapping , ndigits = ndigits )
358+ for j , seg in enumerate (alignment_mapping ):
359+ seg .id = f"{ speech_id } -{ j } "
349360 speech .alignments .extend (alignment_mapping )
350361 speech_mappings .extend (alignment_mapping )
351362
0 commit comments