@@ -31,6 +31,10 @@ class VoiceControl {
3131 this . transcriptHistory = [ ] ;
3232 this . maxHistory = 50 ;
3333
34+ // Audio source: 'video' or 'microphone'
35+ this . audioSource = options . audioSource || 'video' ;
36+ this . videoElement = options . videoElement || null ;
37+
3438 this . _loadRules ( ) ;
3539 }
3640
@@ -107,6 +111,8 @@ class VoiceControl {
107111
108112 async startAudioCapture ( ) {
109113 try {
114+ // Mute video when using microphone to avoid echo
115+ if ( this . videoElement ) this . videoElement . muted = true ;
110116 this . mediaStream = await navigator . mediaDevices . getUserMedia ( {
111117 audio : {
112118 channelCount : 1 ,
@@ -170,6 +176,61 @@ class VoiceControl {
170176 this . audioChunks = [ ] ;
171177 }
172178
179+ async startVideoAudioCapture ( videoElement ) {
180+ try {
181+ // Unmute video so MediaElementSource receives audio
182+ videoElement . muted = false ;
183+ videoElement . volume = 0.3 ;
184+
185+ this . audioContext = new ( window . AudioContext || window . webkitAudioContext ) ( {
186+ sampleRate : this . sampleRate
187+ } ) ;
188+
189+ // MediaElementSource can only be created once per element
190+ if ( ! videoElement . _mediaElementSource ) {
191+ videoElement . _mediaElementSource = this . audioContext . createMediaElementSource ( videoElement ) ;
192+ }
193+ const source = videoElement . _mediaElementSource ;
194+
195+ // Must connect to destination so user can still hear the video
196+ source . connect ( this . audioContext . destination ) ;
197+
198+ this . analyser = this . audioContext . createAnalyser ( ) ;
199+ this . analyser . fftSize = 256 ;
200+ source . connect ( this . analyser ) ;
201+
202+ this . processor = this . audioContext . createScriptProcessor ( 4096 , 1 , 1 ) ;
203+ source . connect ( this . processor ) ;
204+ this . processor . connect ( this . audioContext . destination ) ;
205+
206+ this . audioChunks = [ ] ;
207+
208+ this . processor . onaudioprocess = ( event ) => {
209+ if ( ! this . isRunning ) return ;
210+ const inputData = event . inputBuffer . getChannelData ( 0 ) ;
211+ this . audioChunks . push ( new Float32Array ( inputData ) ) ;
212+ } ;
213+
214+ this . _monitorAudioLevel ( ) ;
215+
216+ console . log ( '[VoiceControl] Video audio capture started' ) ;
217+ return true ;
218+ } catch ( error ) {
219+ console . error ( '[VoiceControl] Video audio capture failed:' , error ) ;
220+ this . onStatusUpdate ( 'Video audio capture failed: ' + error . message ) ;
221+ return false ;
222+ }
223+ }
224+
225+ setAudioSource ( source , videoElement ) {
226+ this . audioSource = source ;
227+ if ( videoElement ) this . videoElement = videoElement ;
228+ if ( this . isRunning ) {
229+ this . stop ( ) ;
230+ this . start ( ) ;
231+ }
232+ }
233+
173234 _monitorAudioLevel ( ) {
174235 if ( ! this . analyser || ! this . isRunning ) return ;
175236
@@ -340,9 +401,13 @@ class VoiceControl {
340401 if ( ! loaded ) return false ;
341402 }
342403
343- const capturing = await this . startAudioCapture ( ) ;
404+ let capturing ;
405+ if ( this . audioSource === 'video' && this . videoElement ) {
406+ capturing = await this . startVideoAudioCapture ( this . videoElement ) ;
407+ } else {
408+ capturing = await this . startAudioCapture ( ) ;
409+ }
344410 if ( ! capturing ) return false ;
345-
346411 this . isRunning = true ;
347412
348413 this . processingInterval = setInterval ( ( ) => {
@@ -415,6 +480,8 @@ class VoiceTriggersApp {
415480 this . voiceControl = new VoiceControl ( {
416481 chunkDuration : parseInt ( document . getElementById ( 'chunkDuration' ) . value ) * 1000 ,
417482 cooldown : parseInt ( document . getElementById ( 'cooldown' ) . value ) * 1000 ,
483+ audioSource : 'video' ,
484+ videoElement : document . getElementById ( 'video' ) ,
418485
419486 onTranscript : ( text , entry ) => this . handleTranscript ( text , entry ) ,
420487 onRuleTriggered : ( info ) => this . handleRuleTrigger ( info ) ,
@@ -438,6 +505,16 @@ class VoiceTriggersApp {
438505 cameraSelect . disabled = source === 'sample' ;
439506 if ( refreshBtn ) refreshBtn . disabled = source === 'sample' ;
440507 this . reasoningConsole . logInfo ( `Switched to ${ source === 'camera' ? 'live camera' : 'sample video' } ` ) ;
508+
509+ // Auto-switch audio source based on video source
510+ const audioSourceSelect = document . getElementById ( 'audioSourceSelect' ) ;
511+ if ( source === 'sample' ) {
512+ if ( audioSourceSelect ) audioSourceSelect . value = 'video' ;
513+ this . voiceControl . setAudioSource ( 'video' , videoElement ) ;
514+ } else {
515+ if ( audioSourceSelect ) audioSourceSelect . value = 'microphone' ;
516+ this . voiceControl . setAudioSource ( 'microphone' , videoElement ) ;
517+ }
441518 }
442519 } ) ;
443520 VideoSourceAdapter . switchToSample ( ) . catch ( ( ) => {
@@ -449,6 +526,23 @@ class VoiceTriggersApp {
449526 initEventListeners ( ) {
450527 document . getElementById ( 'startBtn' ) . addEventListener ( 'click' , ( ) => this . toggleVoice ( ) ) ;
451528
529+ // Audio source toggle
530+ const audioSourceSelect = document . getElementById ( 'audioSourceSelect' ) ;
531+ if ( audioSourceSelect ) {
532+ audioSourceSelect . addEventListener ( 'change' , ( e ) => {
533+ const source = e . target . value ;
534+ const video = document . getElementById ( 'video' ) ;
535+ if ( source === 'video' ) {
536+ video . muted = false ;
537+ video . volume = 0.3 ;
538+ } else {
539+ video . muted = true ;
540+ }
541+ this . voiceControl . setAudioSource ( source , video ) ;
542+ this . reasoningConsole . logInfo ( `Audio source: ${ source === 'video' ? 'Video Audio' : 'Microphone' } ` ) ;
543+ } ) ;
544+ }
545+
452546 document . getElementById ( 'chunkDuration' ) . addEventListener ( 'input' , ( e ) => {
453547 const value = e . target . value ;
454548 document . getElementById ( 'chunkDurationValue' ) . textContent = `${ value } s` ;
0 commit comments