forked from hackabit19/_destroyingRecursively_
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeech_to_text.py
More file actions
70 lines (60 loc) · 2.38 KB
/
speech_to_text.py
File metadata and controls
70 lines (60 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pyaudio
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums
from google.oauth2 import service_account
import os
import io
import wave
class STT:
def __init__(self, *args, **kwargs):
self.credentials = service_account.Credentials.from_service_account_file(
"stt.json")
self.form_1 = pyaudio.paInt16
self.chans = 1
self.samp_rate = 44100
self.chunk = 4096
self.dev_index = 2
self.wav_output_filename = 'recording.wav'
self.language_code = "en-US"
self.audio = pyaudio.PyAudio()
self.client = speech_v1.SpeechClient(credentials=self.credentials)
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
self.config = {
"language_code": self.language_code,
"sample_rate_hertz": self.samp_rate,
"encoding": encoding,
}
def voice_recognize(self, record_secs, *args, **kwargs):
stream = self.audio.open(format=self.form_1, rate=self.samp_rate, channels=self.chans,
input_device_index=self.dev_index, input=True,
frames_per_buffer=self.chunk)
print("recording")
frames = []
for ii in range(0, int((self.samp_rate / self.chunk) * record_secs)):
data = stream.read(self.chunk, exception_on_overflow=False)
frames.append(data)
print("finished recording")
stream.stop_stream()
stream.close()
#self.audio.terminate()
wavefile = wave.open(self.wav_output_filename, 'wb')
wavefile.setnchannels(self.chans)
wavefile.setsampwidth(self.audio.get_sample_size(self.form_1))
wavefile.setframerate(self.samp_rate)
wavefile.writeframes(b''.join(frames))
wavefile.close()
with io.open(self.wav_output_filename, "rb") as f:
content = f.read()
audio = {"content": content}
response = self.client.recognize(self.config, audio)
text = ""
for result in response.results:
alternative = result.alternatives[0]
# print(u"Transcript: {}".format(alternative.transcript))
text += alternative.transcript
os.remove(self.wav_output_filename)
return text
if __name__ == "__main__":
stt = STT()
print(stt.voice_recognize(3))
print(stt.voice_recognize(3))