Skip to content

Commit 82cd4d1

Browse files
committed
Enhance audio processing capabilities in LLM routes
- Added support for transcription and speech generation using multiple providers (OpenAI, Google, ElevenLabs). - Introduced default configurations for transcription and speech services. - Updated the aiService to handle different providers for audio transcription and speech synthesis. - Implemented file storage for generated audio files with expiration handling. - Improved error handling for integration key retrieval and provider validation.
1 parent db9e9c5 commit 82cd4d1

File tree

2 files changed

+350
-30
lines changed

2 files changed

+350
-30
lines changed

routes/llm.js

Lines changed: 158 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ const aiService = require('../services/aiService');
66
const multer = require('multer');
77
const fileUtils = require('../utils/fileUtils');
88
const { MODELS_MULTIPLIER } = require('../utils/aiUtils');
9+
const mongoose = require('mongoose');
10+
const uuidv4 = require('uuid/v4');
11+
const FileGridFsService = require('../services/fileGridFsService');
12+
13+
const fileService = new FileGridFsService('files');
14+
const chatFileExpirationTime = parseInt(process.env.CHAT_FILE_EXPIRATION_TIME || '2592000', 10);
915

1016
let MAX_UPLOAD_FILE_SIZE = process.env.MAX_UPLOAD_FILE_SIZE;
1117
let uploadlimits = undefined;
@@ -18,6 +24,20 @@ if (MAX_UPLOAD_FILE_SIZE) {
1824
}
1925
var upload = multer({limits: uploadlimits});
2026

27+
const TRANSCRIPTION_DEFAULTS = {
28+
provider: 'openai',
29+
model: 'whisper-1',
30+
voice: 'alloy',
31+
language: 'en'
32+
};
33+
34+
const SPEECH_DEFAULTS = {
35+
provider: 'openai',
36+
model: 'tts-1',
37+
voice: 'coral',
38+
language: 'en'
39+
};
40+
2141
router.post('/preview', async (req, res) => {
2242

2343
let id_project = req.projectid;
@@ -122,32 +142,68 @@ router.post('/transcription', upload.single('uploadFile'), async (req, res) => {
122142

123143
let id_project = req.projectid;
124144

145+
const provider = (req.body.provider || TRANSCRIPTION_DEFAULTS.provider).toLowerCase();
146+
const model = req.body.model || TRANSCRIPTION_DEFAULTS.model;
147+
const voice = req.body.voice || TRANSCRIPTION_DEFAULTS.voice;
148+
const language = req.body.language !== undefined && req.body.language !== null
149+
? req.body.language
150+
: TRANSCRIPTION_DEFAULTS.language;
151+
125152
let file;
153+
let contentType = 'audio/mpeg';
154+
let filename = 'audiofile';
126155
if (req.body.url) {
127156
file = await fileUtils.downloadFromUrl(req.body.url);
128157
} else if (req.file) {
129158
file = req.file.buffer;
159+
contentType = req.file.mimetype || contentType;
160+
filename = req.file.originalname || filename;
130161
} else {
131162
return res.status(400).send({ success: false, error: "No audio file or URL provided"})
132163
}
133164

134165
let key;
135166

136-
let integration = await Integration.findOne({ id_project: id_project, name: 'openai' }).catch((err) => {
137-
winston.error("Error finding integration for openai");
138-
return res.status(500).send({ success: false, error: "Error finding integration for openai"});
139-
})
140-
if (!integration) {
141-
winston.verbose("Integration for openai not found.")
142-
return res.status(404).send({ success: false, error: "Integration for openai not found."})
167+
let integration;
168+
try {
169+
integration = await Integration.findOne({ id_project: id_project, name: provider });
170+
} catch (err) {
171+
winston.error("Error finding integration for " + provider);
172+
return res.status(500).send({ success: false, error: "Error finding integration for " + provider});
143173
}
144-
if (!integration?.value?.apikey) {
145-
return res.status(422).send({ success: false, error: "The key provided for openai is not valid or undefined." })
174+
if (!integration) {
175+
winston.verbose("Integration for " + provider + " not found.")
176+
if (provider === 'openai') {
177+
winston.verbose("Try to retrieve shared OpenAI key for transcription")
178+
if (!process.env.GPTKEY) {
179+
winston.error("Shared key for OpenAI not configured.");
180+
return res.status(404).send({ success: false, error: "No key found for " + provider });
181+
}
182+
key = process.env.GPTKEY;
183+
winston.verbose("Using shared OpenAI key as fallback for transcription.");
184+
} else {
185+
return res.status(404).send({ success: false, error: "Integration for " + provider + " not found." })
186+
}
187+
} else if (!integration?.value?.apikey) {
188+
if (provider === 'openai' && process.env.GPTKEY) {
189+
key = process.env.GPTKEY;
190+
winston.verbose("Using shared OpenAI key (integration key missing) for transcription.");
191+
} else {
192+
return res.status(422).send({ success: false, error: "The key provided for " + provider + " is not valid or undefined." })
193+
}
194+
} else {
195+
key = integration.value.apikey;
146196
}
147197

148-
key = integration.value.apikey;
149-
150-
aiService.transcription(file, key).then((response) => {
198+
aiService.transcription(file, {
199+
key,
200+
provider,
201+
model,
202+
voice,
203+
language,
204+
filename,
205+
contentType
206+
}).then((response) => {
151207
winston.verbose("Transcript response: ", response.data);
152208
res.status(200).send({ text: response.data.text});
153209
}).catch((err) => {
@@ -157,5 +213,95 @@ router.post('/transcription', upload.single('uploadFile'), async (req, res) => {
157213

158214
})
159215

216+
router.post('/speech', async (req, res) => {
217+
218+
let id_project = req.projectid;
219+
220+
const provider = (req.body.provider || SPEECH_DEFAULTS.provider).toLowerCase();
221+
const model = req.body.model || SPEECH_DEFAULTS.model;
222+
const voice = req.body.voice || SPEECH_DEFAULTS.voice;
223+
const language = req.body.language !== undefined && req.body.language !== null
224+
? req.body.language
225+
: SPEECH_DEFAULTS.language;
226+
227+
let text = req.body.text;
228+
229+
if (!text) {
230+
return res.status(400).send({ success: false, error: "No text provided"})
231+
}
232+
233+
let key;
234+
235+
let integration;
236+
try {
237+
integration = await Integration.findOne({ id_project: id_project, name: provider });
238+
} catch (err) {
239+
winston.error("Error finding integration for " + provider);
240+
return res.status(500).send({ success: false, error: "Error finding integration for " + provider});
241+
}
242+
if (!integration) {
243+
winston.verbose("Integration for " + provider + " not found.")
244+
if (provider === 'openai') {
245+
winston.verbose("Try to retrieve shared OpenAI key for speech")
246+
if (!process.env.GPTKEY) {
247+
winston.error("Shared key for OpenAI not configured.");
248+
return res.status(404).send({ success: false, error: "No key found for " + provider });
249+
}
250+
key = process.env.GPTKEY;
251+
252+
}
253+
} else if (!integration?.value?.apikey) {
254+
if (provider === 'openai' && process.env.GPTKEY) {
255+
key = process.env.GPTKEY;
256+
winston.verbose("Using shared OpenAI key (integration key missing) for speech.");
257+
} else {
258+
return res.status(422).send({ success: false, error: "The key provided for " + provider + " is not valid or undefined." })
259+
}
260+
} else {
261+
key = integration.value.apikey;
262+
}
263+
264+
try {
265+
const response = await aiService.speech(text, {
266+
key,
267+
provider,
268+
model,
269+
voice,
270+
language,
271+
response_format: req.body.response_format
272+
});
273+
const audioBuffer = response.data;
274+
const contentType = response.contentType || 'audio/mpeg';
275+
const ext = (response.extension || 'mp3').replace(/^\./, '');
276+
277+
const expireAt = new Date(Date.now() + chatFileExpirationTime * 1000);
278+
var subfolder = '/public';
279+
if (req.user && req.user.id) {
280+
subfolder = '/users/' + req.user.id;
281+
}
282+
const folder = uuidv4();
283+
const filePath = `uploads${subfolder}/files/${folder}/speech.${ext}`;
284+
285+
await fileService.createFile(filePath, audioBuffer, undefined, contentType, {
286+
metadata: { expireAt }
287+
});
288+
const fileRecord = await fileService.find(filePath);
289+
await mongoose.connection.db.collection('files.chunks').updateMany(
290+
{ files_id: fileRecord._id },
291+
{ $set: { 'metadata.expireAt': expireAt } }
292+
);
293+
294+
winston.verbose('Speech audio stored at:', filePath);
295+
return res.status(201).send({
296+
message: 'Speech audio saved successfully',
297+
filename: encodeURIComponent(filePath),
298+
contentType
299+
});
300+
} catch (err) {
301+
winston.error('Speech error: ', err.response?.data || err);
302+
return res.status(500).send({ success: false, error: err.response?.data || err.message || err });
303+
}
304+
})
305+
160306

161307
module.exports = router;

0 commit comments

Comments
 (0)