@@ -34,6 +34,7 @@ import SourcesPopup from './SourcesPopup';
3434import ToolsPopup from './ToolsPopup' ;
3535import { handleAbort } from '../conversation/conversationSlice' ;
3636import {
37+ AUDIO_FILE_ACCEPT_ATTR ,
3738 FILE_UPLOAD_ACCEPT ,
3839 FILE_UPLOAD_ACCEPT_ATTR ,
3940} from '../constants/fileUpload' ;
@@ -54,6 +55,24 @@ type AudioContextWindow = Window &
5455 webkitAudioContext ?: typeof AudioContext ;
5556 } ;
5657
58+ type LegacyNavigator = Navigator & {
59+ getUserMedia ?: (
60+ constraints : MediaStreamConstraints ,
61+ successCallback : ( stream : MediaStream ) => void ,
62+ errorCallback : ( error : DOMException ) => void ,
63+ ) => void ;
64+ webkitGetUserMedia ?: (
65+ constraints : MediaStreamConstraints ,
66+ successCallback : ( stream : MediaStream ) => void ,
67+ errorCallback : ( error : DOMException ) => void ,
68+ ) => void ;
69+ mozGetUserMedia ?: (
70+ constraints : MediaStreamConstraints ,
71+ successCallback : ( stream : MediaStream ) => void ,
72+ errorCallback : ( error : DOMException ) => void ,
73+ ) => void ;
74+ } ;
75+
5776type LiveAudioSnapshot = {
5877 blob : Blob ;
5978 chunkIndex : number ;
@@ -69,6 +88,90 @@ const getAudioContextConstructor = (): typeof AudioContext | null => {
6988 return audioWindow . AudioContext || audioWindow . webkitAudioContext || null ;
7089} ;
7190
91+ const getLegacyGetUserMedia = ( ) => {
92+ if ( typeof navigator === 'undefined' ) {
93+ return null ;
94+ }
95+
96+ const legacyNavigator = navigator as LegacyNavigator ;
97+ return (
98+ legacyNavigator . getUserMedia ||
99+ legacyNavigator . webkitGetUserMedia ||
100+ legacyNavigator . mozGetUserMedia ||
101+ null
102+ ) ;
103+ } ;
104+
105+ const getVoiceInputSupportError = ( ) : string | null => {
106+ if ( typeof window === 'undefined' || typeof navigator === 'undefined' ) {
107+ return 'Voice input is unavailable right now.' ;
108+ }
109+
110+ if ( ! window . isSecureContext ) {
111+ return 'Voice input requires a secure connection (HTTPS or localhost).' ;
112+ }
113+
114+ if ( ! navigator . mediaDevices ?. getUserMedia && ! getLegacyGetUserMedia ( ) ) {
115+ return 'Voice input is not available in this browser.' ;
116+ }
117+
118+ if ( ! getAudioContextConstructor ( ) ) {
119+ return 'Voice input requires Web Audio support in this browser.' ;
120+ }
121+
122+ return null ;
123+ } ;
124+
125+ const getUserMediaStream = (
126+ constraints : MediaStreamConstraints ,
127+ ) : Promise < MediaStream > => {
128+ if ( navigator . mediaDevices ?. getUserMedia ) {
129+ return navigator . mediaDevices . getUserMedia ( constraints ) ;
130+ }
131+
132+ const legacyGetUserMedia = getLegacyGetUserMedia ( ) ;
133+ if ( ! legacyGetUserMedia ) {
134+ return Promise . reject (
135+ new Error ( 'Voice input is not available in this browser.' ) ,
136+ ) ;
137+ }
138+
139+ return new Promise ( ( resolve , reject ) => {
140+ legacyGetUserMedia . call ( navigator , constraints , resolve , reject ) ;
141+ } ) ;
142+ } ;
143+
144+ const getVoiceInputErrorMessage = ( error : unknown ) : string => {
145+ if ( typeof window !== 'undefined' && ! window . isSecureContext ) {
146+ return 'Voice input requires a secure connection (HTTPS or localhost).' ;
147+ }
148+
149+ if ( error instanceof DOMException ) {
150+ switch ( error . name ) {
151+ case 'NotAllowedError' :
152+ case 'PermissionDeniedError' :
153+ case 'SecurityError' :
154+ return 'Microphone access was blocked. Allow microphone permission and try again.' ;
155+ case 'NotFoundError' :
156+ case 'DevicesNotFoundError' :
157+ return 'No microphone was found on this device.' ;
158+ case 'NotReadableError' :
159+ case 'TrackStartError' :
160+ return 'The microphone is unavailable or already in use.' ;
161+ case 'AbortError' :
162+ return 'Microphone access was interrupted before recording started.' ;
163+ default :
164+ break ;
165+ }
166+ }
167+
168+ if ( error instanceof Error && error . message ) {
169+ return error . message ;
170+ }
171+
172+ return 'Microphone access was denied.' ;
173+ } ;
174+
72175const downsampleFloat32Buffer = (
73176 source : Float32Array ,
74177 inputSampleRate : number ,
@@ -197,6 +300,7 @@ export default function MessageInput({
197300 const { t } = useTranslation ( ) ;
198301 const [ value , setValue ] = useState ( '' ) ;
199302 const inputRef = useRef < HTMLTextAreaElement > ( null ) ;
303+ const voiceFileInputRef = useRef < HTMLInputElement > ( null ) ;
200304 const sourceButtonRef = useRef < HTMLButtonElement > ( null ) ;
201305 const toolButtonRef = useRef < HTMLButtonElement > ( null ) ;
202306 const [ isSourcesPopupOpen , setIsSourcesPopupOpen ] = useState ( false ) ;
@@ -808,6 +912,48 @@ export default function MessageInput({
808912 } , 0 ) ;
809913 } ;
810914
915+ const promptVoiceFileFallback = ( message : string ) => {
916+ setRecordingState ( 'idle' ) ;
917+ setVoiceError ( `${ message } Choose or record an audio file instead.` ) ;
918+ setTimeout ( ( ) => {
919+ voiceFileInputRef . current ?. click ( ) ;
920+ } , 0 ) ;
921+ } ;
922+
923+ const transcribeUploadedAudioFile = async ( file : File ) => {
924+ try {
925+ setVoiceError ( null ) ;
926+ setRecordingState ( 'transcribing' ) ;
927+ voiceBaseValueRef . current = value ;
928+ liveTranscriptRef . current = '' ;
929+
930+ const response = await userService . transcribeAudio ( file , token ) ;
931+ const data = await response . json ( ) ;
932+
933+ if ( ! response . ok || ! data ?. success ) {
934+ throw new Error ( data ?. message || 'Failed to transcribe audio.' ) ;
935+ }
936+
937+ if ( typeof data . text !== 'string' || ! data . text . trim ( ) ) {
938+ throw new Error ( 'No transcript was returned for this audio file.' ) ;
939+ }
940+
941+ applyLiveTranscript ( data . text ) ;
942+ setRecordingState ( 'idle' ) ;
943+ if ( autoFocus ) {
944+ setTimeout ( ( ) => {
945+ inputRef . current ?. focus ( ) ;
946+ } , 0 ) ;
947+ }
948+ } catch ( error ) {
949+ console . error ( 'Uploaded audio transcription failed' , error ) ;
950+ setRecordingState ( 'error' ) ;
951+ setVoiceError (
952+ error instanceof Error ? error . message : 'Failed to transcribe audio.' ,
953+ ) ;
954+ }
955+ } ;
956+
811957 const trimLivePcmBuffer = ( ) => {
812958 const maxBufferedSamples =
813959 LIVE_CAPTURE_SAMPLE_RATE * LIVE_CAPTURE_MAX_BUFFER_SECONDS ;
@@ -1024,24 +1170,29 @@ export default function MessageInput({
10241170 return ;
10251171 }
10261172
1027- if ( ! navigator . mediaDevices ?. getUserMedia ) {
1028- setRecordingState ( 'error' ) ;
1029- setVoiceError ( 'Voice input is not supported in this browser.' ) ;
1173+ const voiceInputSupportError = getVoiceInputSupportError ( ) ;
1174+ if ( voiceInputSupportError ) {
1175+ promptVoiceFileFallback ( voiceInputSupportError ) ;
10301176 return ;
10311177 }
10321178
10331179 const AudioContextConstructor = getAudioContextConstructor ( ) ;
10341180 if ( ! AudioContextConstructor ) {
10351181 setRecordingState ( 'error' ) ;
1036- setVoiceError ( 'Voice input is not supported in this browser.' ) ;
1182+ setVoiceError ( 'Voice input requires Web Audio support in this browser.' ) ;
10371183 return ;
10381184 }
10391185
10401186 let stream : MediaStream | null = null ;
10411187 try {
10421188 setVoiceError ( null ) ;
1043- stream = await navigator . mediaDevices . getUserMedia ( { audio : true } ) ;
1189+ stream = await getUserMediaStream ( { audio : true } ) ;
1190+ } catch ( error ) {
1191+ promptVoiceFileFallback ( getVoiceInputErrorMessage ( error ) ) ;
1192+ return ;
1193+ }
10441194
1195+ try {
10451196 const liveStartResponse = await userService . startLiveTranscription ( token ) ;
10461197 const liveStartData = await liveStartResponse . json ( ) ;
10471198 if ( ! liveStartResponse . ok || ! liveStartData ?. success ) {
@@ -1121,7 +1272,7 @@ export default function MessageInput({
11211272
11221273 setRecordingState ( 'recording' ) ;
11231274 } catch ( error ) {
1124- console . error ( 'Microphone access failed' , error ) ;
1275+ console . error ( 'Live voice transcription failed' , error ) ;
11251276 stream ?. getTracks ( ) . forEach ( ( track ) => track . stop ( ) ) ;
11261277 stopAudioProcessing ( ) ;
11271278 await cleanupLiveSession ( ) ;
@@ -1130,7 +1281,7 @@ export default function MessageInput({
11301281 setVoiceError (
11311282 error instanceof Error
11321283 ? error . message
1133- : 'Microphone access was denied .' ,
1284+ : 'Failed to start live transcription .' ,
11341285 ) ;
11351286 }
11361287 } ;
@@ -1186,6 +1337,19 @@ export default function MessageInput({
11861337 }
11871338 } ;
11881339
1340+ const handleVoiceFileAttachment = (
1341+ e : React . ChangeEvent < HTMLInputElement > ,
1342+ ) => {
1343+ const file = e . target . files ?. [ 0 ] ;
1344+ e . target . value = '' ;
1345+
1346+ if ( ! file ) {
1347+ return ;
1348+ }
1349+
1350+ void transcribeUploadedAudioFile ( file ) ;
1351+ } ;
1352+
11891353 const handlePostDocumentSelect = ( _docs : Doc [ ] | null ) => {
11901354 // SourcesPopup updates Redux selection directly; this preserves the prop contract.
11911355 void _docs ;
@@ -1265,6 +1429,14 @@ export default function MessageInput({
12651429 < div { ...getRootProps ( ) } className = "flex w-full flex-col" >
12661430 { /* react-dropzone input (for drag/drop) */ }
12671431 < input { ...getInputProps ( ) } />
1432+ < input
1433+ ref = { voiceFileInputRef }
1434+ type = "file"
1435+ className = "hidden"
1436+ accept = { AUDIO_FILE_ACCEPT_ATTR }
1437+ capture = "user"
1438+ onChange = { handleVoiceFileAttachment }
1439+ />
12681440
12691441 < div className = "border-dark-gray bg-lotion dark:border-grey relative flex w-full flex-col rounded-[23px] border dark:bg-transparent" >
12701442 < div className = "flex flex-wrap gap-1.5 px-2 py-2 sm:gap-2 sm:px-3" >
0 commit comments