@@ -6,6 +6,12 @@ const aiService = require('../services/aiService');
66const multer = require ( 'multer' ) ;
77const fileUtils = require ( '../utils/fileUtils' ) ;
88const { MODELS_MULTIPLIER } = require ( '../utils/aiUtils' ) ;
9+ const mongoose = require ( 'mongoose' ) ;
10+ const uuidv4 = require ( 'uuid/v4' ) ;
11+ const FileGridFsService = require ( '../services/fileGridFsService' ) ;
12+
13+ const fileService = new FileGridFsService ( 'files' ) ;
14+ const chatFileExpirationTime = parseInt ( process . env . CHAT_FILE_EXPIRATION_TIME || '2592000' , 10 ) ;
915
1016let MAX_UPLOAD_FILE_SIZE = process . env . MAX_UPLOAD_FILE_SIZE ;
1117let uploadlimits = undefined ;
@@ -18,6 +24,20 @@ if (MAX_UPLOAD_FILE_SIZE) {
1824}
1925var upload = multer ( { limits : uploadlimits } ) ;
2026
27+ const TRANSCRIPTION_DEFAULTS = {
28+ provider : 'openai' ,
29+ model : 'whisper-1' ,
30+ voice : 'alloy' ,
31+ language : 'en'
32+ } ;
33+
34+ const SPEECH_DEFAULTS = {
35+ provider : 'openai' ,
36+ model : 'tts-1' ,
37+ voice : 'coral' ,
38+ language : 'en'
39+ } ;
40+
2141router . post ( '/preview' , async ( req , res ) => {
2242
2343 let id_project = req . projectid ;
@@ -122,32 +142,68 @@ router.post('/transcription', upload.single('uploadFile'), async (req, res) => {
122142
123143 let id_project = req . projectid ;
124144
145+ const provider = ( req . body . provider || TRANSCRIPTION_DEFAULTS . provider ) . toLowerCase ( ) ;
146+ const model = req . body . model || TRANSCRIPTION_DEFAULTS . model ;
147+ const voice = req . body . voice || TRANSCRIPTION_DEFAULTS . voice ;
148+ const language = req . body . language !== undefined && req . body . language !== null
149+ ? req . body . language
150+ : TRANSCRIPTION_DEFAULTS . language ;
151+
125152 let file ;
153+ let contentType = 'audio/mpeg' ;
154+ let filename = 'audiofile' ;
126155 if ( req . body . url ) {
127156 file = await fileUtils . downloadFromUrl ( req . body . url ) ;
128157 } else if ( req . file ) {
129158 file = req . file . buffer ;
159+ contentType = req . file . mimetype || contentType ;
160+ filename = req . file . originalname || filename ;
130161 } else {
131162 return res . status ( 400 ) . send ( { success : false , error : "No audio file or URL provided" } )
132163 }
133164
134165 let key ;
135166
136- let integration = await Integration . findOne ( { id_project : id_project , name : 'openai' } ) . catch ( ( err ) => {
137- winston . error ( "Error finding integration for openai" ) ;
138- return res . status ( 500 ) . send ( { success : false , error : "Error finding integration for openai" } ) ;
139- } )
140- if ( ! integration ) {
141- winston . verbose ( "Integration for openai not found." )
142- return res . status ( 404 ) . send ( { success : false , error : "Integration for openai not found." } )
167+ let integration ;
168+ try {
169+ integration = await Integration . findOne ( { id_project : id_project , name : provider } ) ;
170+ } catch ( err ) {
171+ winston . error ( "Error finding integration for " + provider ) ;
172+ return res . status ( 500 ) . send ( { success : false , error : "Error finding integration for " + provider } ) ;
143173 }
144- if ( ! integration ?. value ?. apikey ) {
145- return res . status ( 422 ) . send ( { success : false , error : "The key provided for openai is not valid or undefined." } )
174+ if ( ! integration ) {
175+ winston . verbose ( "Integration for " + provider + " not found." )
176+ if ( provider === 'openai' ) {
177+ winston . verbose ( "Try to retrieve shared OpenAI key for transcription" )
178+ if ( ! process . env . GPTKEY ) {
179+ winston . error ( "Shared key for OpenAI not configured." ) ;
180+ return res . status ( 404 ) . send ( { success : false , error : "No key found for " + provider } ) ;
181+ }
182+ key = process . env . GPTKEY ;
183+ winston . verbose ( "Using shared OpenAI key as fallback for transcription." ) ;
184+ } else {
185+ return res . status ( 404 ) . send ( { success : false , error : "Integration for " + provider + " not found." } )
186+ }
187+ } else if ( ! integration ?. value ?. apikey ) {
188+ if ( provider === 'openai' && process . env . GPTKEY ) {
189+ key = process . env . GPTKEY ;
190+ winston . verbose ( "Using shared OpenAI key (integration key missing) for transcription." ) ;
191+ } else {
192+ return res . status ( 422 ) . send ( { success : false , error : "The key provided for " + provider + " is not valid or undefined." } )
193+ }
194+ } else {
195+ key = integration . value . apikey ;
146196 }
147197
148- key = integration . value . apikey ;
149-
150- aiService . transcription ( file , key ) . then ( ( response ) => {
198+ aiService . transcription ( file , {
199+ key,
200+ provider,
201+ model,
202+ voice,
203+ language,
204+ filename,
205+ contentType
206+ } ) . then ( ( response ) => {
151207 winston . verbose ( "Transcript response: " , response . data ) ;
152208 res . status ( 200 ) . send ( { text : response . data . text } ) ;
153209 } ) . catch ( ( err ) => {
@@ -157,5 +213,95 @@ router.post('/transcription', upload.single('uploadFile'), async (req, res) => {
157213
158214} )
159215
216+ router . post ( '/speech' , async ( req , res ) => {
217+
218+ let id_project = req . projectid ;
219+
220+ const provider = ( req . body . provider || SPEECH_DEFAULTS . provider ) . toLowerCase ( ) ;
221+ const model = req . body . model || SPEECH_DEFAULTS . model ;
222+ const voice = req . body . voice || SPEECH_DEFAULTS . voice ;
223+ const language = req . body . language !== undefined && req . body . language !== null
224+ ? req . body . language
225+ : SPEECH_DEFAULTS . language ;
226+
227+ let text = req . body . text ;
228+
229+ if ( ! text ) {
230+ return res . status ( 400 ) . send ( { success : false , error : "No text provided" } )
231+ }
232+
233+ let key ;
234+
235+ let integration ;
236+ try {
237+ integration = await Integration . findOne ( { id_project : id_project , name : provider } ) ;
238+ } catch ( err ) {
239+ winston . error ( "Error finding integration for " + provider ) ;
240+ return res . status ( 500 ) . send ( { success : false , error : "Error finding integration for " + provider } ) ;
241+ }
242+ if ( ! integration ) {
243+ winston . verbose ( "Integration for " + provider + " not found." )
244+ if ( provider === 'openai' ) {
245+ winston . verbose ( "Try to retrieve shared OpenAI key for speech" )
246+ if ( ! process . env . GPTKEY ) {
247+ winston . error ( "Shared key for OpenAI not configured." ) ;
248+ return res . status ( 404 ) . send ( { success : false , error : "No key found for " + provider } ) ;
249+ }
250+ key = process . env . GPTKEY ;
251+
252+ }
253+ } else if ( ! integration ?. value ?. apikey ) {
254+ if ( provider === 'openai' && process . env . GPTKEY ) {
255+ key = process . env . GPTKEY ;
256+ winston . verbose ( "Using shared OpenAI key (integration key missing) for speech." ) ;
257+ } else {
258+ return res . status ( 422 ) . send ( { success : false , error : "The key provided for " + provider + " is not valid or undefined." } )
259+ }
260+ } else {
261+ key = integration . value . apikey ;
262+ }
263+
264+ try {
265+ const response = await aiService . speech ( text , {
266+ key,
267+ provider,
268+ model,
269+ voice,
270+ language,
271+ response_format : req . body . response_format
272+ } ) ;
273+ const audioBuffer = response . data ;
274+ const contentType = response . contentType || 'audio/mpeg' ;
275+ const ext = ( response . extension || 'mp3' ) . replace ( / ^ \. / , '' ) ;
276+
277+ const expireAt = new Date ( Date . now ( ) + chatFileExpirationTime * 1000 ) ;
278+ var subfolder = '/public' ;
279+ if ( req . user && req . user . id ) {
280+ subfolder = '/users/' + req . user . id ;
281+ }
282+ const folder = uuidv4 ( ) ;
283+ const filePath = `uploads${ subfolder } /files/${ folder } /speech.${ ext } ` ;
284+
285+ await fileService . createFile ( filePath , audioBuffer , undefined , contentType , {
286+ metadata : { expireAt }
287+ } ) ;
288+ const fileRecord = await fileService . find ( filePath ) ;
289+ await mongoose . connection . db . collection ( 'files.chunks' ) . updateMany (
290+ { files_id : fileRecord . _id } ,
291+ { $set : { 'metadata.expireAt' : expireAt } }
292+ ) ;
293+
294+ winston . verbose ( 'Speech audio stored at:' , filePath ) ;
295+ return res . status ( 201 ) . send ( {
296+ message : 'Speech audio saved successfully' ,
297+ filename : encodeURIComponent ( filePath ) ,
298+ contentType
299+ } ) ;
300+ } catch ( err ) {
301+ winston . error ( 'Speech error: ' , err . response ?. data || err ) ;
302+ return res . status ( 500 ) . send ( { success : false , error : err . response ?. data || err . message || err } ) ;
303+ }
304+ } )
305+
160306
161307module . exports = router ;
0 commit comments