diff --git a/python/vosk/transcriber/transcriber.py b/python/vosk/transcriber/transcriber.py index a15cd956..f8a592ef 100644 --- a/python/vosk/transcriber/transcriber.py +++ b/python/vosk/transcriber/transcriber.py @@ -6,7 +6,7 @@ import datetime import shlex import subprocess - +from tqdm.auto import tqdm from vosk import KaldiRecognizer, Model from queue import Queue from timeit import default_timer as timer @@ -22,15 +22,36 @@ def __init__(self, args): self.args = args self.queue = Queue() - def recognize_stream(self, rec, stream): + def recognize_stream(self, rec, stream, duration): + tot_samples = 0 result = [] + show_bar=self.args.show_progress_bar + + progress_bar = tqdm( + total=int(duration * SAMPLE_RATE), + unit="samples", + unit_scale=True, + unit_divisor=SAMPLE_RATE, + desc="Processing", + disable=not show_bar + ) + + + while True: data = stream.stdout.read(CHUNK_SIZE) + if len(data) == 0: + progress_bar.update(int(duration)) break + progress_bar.update((len(data) // 2) ) + + + + tot_samples += len(data) if rec.AcceptWaveform(data): @@ -42,9 +63,11 @@ def recognize_stream(self, rec, stream): if jres["partial"] != "": logging.info(jres) + progress_bar.close() jres = json.loads(rec.FinalResult()) result.append(jres) + return result, tot_samples async def recognize_stream_server(self, proc): @@ -99,7 +122,7 @@ def format_result(self, result, words_per_line=7): monologues = {"schemaVersion":"2.0", "monologues":[], "text":[]} for part in result: if part["text"] != "": - monologues["text"] += part["text"] + monologue["text"] += part["text"] for _, res in enumerate(result): if not "result" in res: continue @@ -133,12 +156,6 @@ async def server_worker(self): start_time = timer() proc = await self.resample_ffmpeg_async(input_file) result, tot_samples = await self.recognize_stream_server(proc) - await proc.wait() - - # Bad input, continue - if tot_samples == 0: - self.queue.task_done() - continue processed_result = self.format_result(result) if output_file != "": @@ -148,6 +165,8 @@ async def server_worker(self): else: print(processed_result) + await proc.wait() + elapsed = timer() - start_time logging.info("Execution time: {:.3f} sec; "\ "xRT {:.3f}".format(elapsed, float(elapsed) * (2 * SAMPLE_RATE) / tot_samples)) @@ -157,6 +176,15 @@ def pool_worker(self, inputdata): logging.info("Recognizing {}".format(inputdata[0])) start_time = timer() + # Get the duration of the input file in seconds + duration = float( + subprocess.check_output( + ["ffprobe", "-v", "quiet", "-print_format", "compact=print_section=0:nokey=1:escape=csv", + "-show_entries", "format=duration", inputdata[0]] + ).decode("utf-8").strip() + ) + total_samples = int(duration * SAMPLE_RATE) + try: stream = self.resample_ffmpeg(inputdata[0]) except FileNotFoundError as e: @@ -168,11 +196,9 @@ def pool_worker(self, inputdata): rec = KaldiRecognizer(self.model, SAMPLE_RATE) rec.SetWords(True) - result, tot_samples = self.recognize_stream(rec, stream) - if tot_samples == 0: - return - + result, tot_samples = self.recognize_stream(rec, stream,duration) processed_result = self.format_result(result) + if inputdata[1] != "": logging.info("File {} processing complete".format(inputdata[1])) with open(inputdata[1], "w", encoding="utf-8") as fh: