keras-team · fchollet · Oct 21, 2024 · Oct 17, 2024
diff --git a/examples/audio/transformer_asr.py b/examples/audio/transformer_asr.py
@@ -30,7 +30,7 @@
 - [LJSpeech Dataset](https://keithito.com/LJ-Speech-Dataset/)
 """
 
-
+import re
 import os
 
 os.environ["KERAS_BACKEND"] = "tensorflow"
@@ -289,6 +289,8 @@ def generate(self, source, target_start_token_idx):
 takes ~5 minutes for the extraction of files.
 """
 
+pattern_wav_name = re.compile(r'([^/\\\.]+)')
+
 keras.utils.get_file(
     os.path.join(os.getcwd(), "data.tar.gz"),
     "https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2",
@@ -313,7 +315,7 @@ def get_data(wavs, id_to_text, maxlen=50):
     """returns mapping of audio paths and transcription texts"""
     data = []
     for w in wavs:
-        id = w.split("/")[-1].split(".")[0]
+        id = pattern_wav_name.split(test)[-4]
         if len(id_to_text[id]) < maxlen:
             data.append({"audio": w, "text": id_to_text[id]})
     return data