-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdonna_add_song.py
More file actions
72 lines (63 loc) · 2.94 KB
/
donna_add_song.py
File metadata and controls
72 lines (63 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
from oauth2client.tools import argparser
from gtts import gTTS
import os
import pyaudio
default_path = '/home/pi/projects/speech_recognition/pocketsphinx/model/en-us/en-us'
# Language model is an important component of the configuration which tells the Decoder which sequence of words are possible to recognzie
config = Decoder.default_config()
config.set_string('-hmm', default_path)
config.set_string('-lm', default_path + '/7864.lm')
# dictionary provides system the data to map vocabulary words to sequence of phonemes
config.set_string('-dict', default_path + '/7864.dic')
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
# Default settings for youtube search api request
argparser.add_argument("--q", help="Search term", default="Google")
argparser.add_argument("--max-results", help="Max results", default=1)
args = argparser.parse_args()
p = pyaudio.PyAudio()
stream = p.open(
format=pyaudio.paInt16, # size of each sample, calc size by: pyaudio.get_sample_size(pyaudio.paInt16)
channels=1, # mono
rate=16000, # amount of samples that are taken per second to represent the event digitally
input=True, # specifies whether this is an input stream. Defaults to False.
frames_per_buffer=1024 # number of frames the signals are split into
)
stream.start_stream() # delivering real-time audio through a network
def requested_donna_audio():
response_donna = "Hello Kevin, sure give me a moment!"
tts = gTTS(text= response_donna, lang='en')
# Google text to speech requires MP3 file
tts.save('response_donna.mp3')
# We are using Alsa as output to play the MP3
os.system('mpg321 -o alsa response_donna.mp3')
os.remove('response_donna.mp3')
in_speech_bf = False
# Start listening
decoder.start_utt()
while True:
# The sound buffer is a portion of computer memory that temporarily holds sound files on their way to audio speakers.
sound_buffer = stream.read(1024, exception_on_overflow = False)
if sound_buffer:
# Pass the Chunk to the decoder
decoder.process_raw(sound_buffer, False, False)
# checks if the last audio buffer contained speech
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech() # will be set to True now?
# When the speech ends:
if not in_speech_bf:
decoder.end_utt()
# Since the speech is ended, we can assume that we have final results, then display them
result = decoder.hyp().hypstr
print result
# we could also do something like if "DONNA ADD SONG" in result:
if result == "DONNA ADD SONG":
requested_donna_audio()
import search_song
search_song.youtube_search(args)
decoder.start_utt()
else:
break
decoder.end_utt()