1
+ from os import fsdecode
2
+ from mlpm .solver import Solver
3
+ from deepspeech import Model
4
+ import wave
5
+ import shlex
6
+ import subprocess
7
+ import numpy as np
8
+ from textblob import TextBlob
9
+ import textblob
10
+ try :
11
+ from shhlex import quote
12
+ except ImportError :
13
+ from pipes import quote
14
+
15
+ def convert_sample_rate (audio_path , desired_sample_rate ):
16
+ sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - ' .format (quote (audio_path ), desired_sample_rate )
17
+ try :
18
+ output = subprocess .check_output (shlex .split (sox_cmd ), stderr = subprocess .PIPE )
19
+ except subprocess .CalledProcessError as e :
20
+ raise RuntimeError ('SoX returned non-zero status: {}' .format (e .stderr ))
21
+ except OSError as e :
22
+ raise OSError (e .errno , 'SoX not found, use {}hz files or install it: {}' .format (desired_sample_rate , e .strerror ))
23
+
24
+ return desired_sample_rate , np .frombuffer (output , np .int16 )
25
+
26
+ class speechSolver (Solver ):
27
+ def __init__ (self , toml_file = None ):
28
+ super ().__init__ (toml_file )
29
+ # Do you Init Work here
30
+ self .ds = Model ("pretrained/deepspeech-0.9.3-models.pbmm" )
31
+ self .scorepath = ("pretrained/deepspeech-0.9.3-models.scorer" )
32
+ self .ds .enableExternalScorer (self .scorepath )
33
+ self .desired_sample_rate = self .ds .sampleRate ()
34
+ self .ready ()
35
+ def infer (self , data ):
36
+ # if you need to get file uploaded, get the path from input_file_path in data
37
+ # First convert audio file to wav format
38
+ fin = wave .open (data ['input_file_path' ], 'rb' )
39
+ fs_orig = fin .getframerate ()
40
+ resampled = False
41
+ if fs_orig != self .desired_sample_rate :
42
+ resampled = True
43
+ fs_new , audio = convert_sample_rate (data ['input_file_path' ], self .desired_sample_rate )
44
+ else :
45
+ audio = np .frombuffer (fin .readframes (fin .getnframes ()), np .int16 )
46
+
47
+ audio_length = fin .getnframes () * (1 / fs_orig )
48
+ fin .close ()
49
+
50
+ result = self .ds .stt (audio )
51
+ textblob_analyzer = TextBlob (result )
52
+ sentiment = []
53
+ for sentence in textblob_analyzer .sentences :
54
+ sentiment .append ({
55
+ 'sentence' : str (sentence ),
56
+ 'polarity' : sentence .sentiment .polarity ,
57
+ 'subjectivity' : sentence .sentiment .subjectivity
58
+ })
59
+ return {
60
+ "transcript" : result ,
61
+ "audio_length" : audio_length ,
62
+ "resampled" : resampled ,
63
+ "sentiment" : sentiment
64
+ }
0 commit comments