@@ -6,86 +6,129 @@ import {AudioRequest} from "../types/audioTypes.js";
66const genAI = new GoogleGenerativeAI ( process . env . GEMINI_API_KEY ! ) ;
77
88/**
9- * Generates a response from the Gemini AI model based on the provided prompt text .
9+ * List of Gemini AI model versions to attempt in order of preference .
1010 *
11- * @param {string } promptText - The text prompt to send to the Gemini AI model.
12- * @returns {Promise<GeminiResponseType> } - A promise that resolves to an object containing the generated text and its summary.
11+ * The system will try each model sequentially until a successful response is received,
12+ * starting with the fastest and latest versions and falling back to older or alternative models if necessary.
13+ *
14+ * This ordering is designed to maximize reliability and performance.
1315 */
14- export const generateGeminiResponse = async ( promptText : string ) : Promise < GeminiResponseType > => {
15- try {
16- const model = genAI . getGenerativeModel ( { model : "gemini-2.0-flash" } ) ;
17-
18- const result = await model . generateContent ( {
19- contents : [
20- {
21- role : "user" ,
22- parts : [ { text : promptText } ] ,
23- } ,
24- ] ,
25- } ) ;
26-
27- const text = stripMarkdown ( result . response . text ( ) ) ;
28-
29- const summaryResult = await model . generateContent ( {
30- contents : [ {
31- role : "user" ,
32- parts : [ {
33- text : `
34- You are a smart assistant. Based on the following:
35-
36- You are a smart assistant. Based on the interaction below, summarize it in no more than 35 words.
37- Avoid using phrases like "the user" or "AI." Write a brief, neutral description of the request and response.
38-
39- Prompt:
40- ${ promptText }
41-
42- Response:
43- ${ text }
44- ` . trim ( )
45- } ] ,
46- } ] ,
47- } ) ;
48-
49- const summary = summaryResult . response . text ( ) . trim ( ) ;
50-
51- return { text, summary} ;
52- } catch ( error ) {
53- console . error ( "Gemini Error:" , error ) ;
54- throw error ;
16+ const MODEL_VERSIONS = [
17+ "gemini-2.0-flash" ,
18+ "gemini-2.0-flash-001" ,
19+ "gemini-2.0-flash-lite" ,
20+ "gemini-1.5-flash" ,
21+ "gemini-1.5-flash-8b" ,
22+ "gemini-1.5-pro" ,
23+ ] ;
24+
25+ /**
26+ * Attempts to generate content from Gemini AI by trying multiple model versions in order.
27+ *
28+ * Iterates through the predefined list of model versions, sending the provided prompt to each model.
29+ * If a model responds successfully, returns the stripped (plain text) result.
30+ * If a model fails with a 500 or 503 error, automatically retries with the next model.
31+ * Throws a fatal error immediately for other types of failures, or if all models fail.
32+ *
33+ * @param {any } prompt - The prompt payload to send to the AI model.
34+ * @returns {Promise<string> } - The generated plain text content.
35+ * @throws {Error } - Throws if all models fail or a non-retryable error occurs.
36+ */
37+ async function tryGenerateContent (
38+ prompt : any ,
39+ ) : Promise < string > {
40+ for ( const modelVersion of MODEL_VERSIONS ) {
41+ try {
42+ const model = genAI . getGenerativeModel ( { model : modelVersion } ) ;
43+ const result = await model . generateContent ( { contents : [ prompt ] } ) ;
44+ return stripMarkdown ( result . response . text ( ) ) ;
45+ } catch ( error : any ) {
46+ if ( error ?. status === 500 || error ?. status === 503 ) {
47+ console . warn ( `Model ${ modelVersion } failed with status ${ error . status } . Trying next...` ) ;
48+ continue ;
49+ }
50+ console . error ( `Gemini Fatal Error [${ modelVersion } ]:` , error ) ;
51+ throw error ;
52+ }
5553 }
54+ throw new Error ( "All Gemini models failed." ) ;
55+ }
56+
57+
58+ /**
59+ * Generates a detailed AI response and a concise summary based on a given prompt text.
60+ *
61+ * Constructs two prompts:
62+ * - One to generate a full response from Gemini AI.
63+ * - Another to request a summarized version of the interaction in no more than 35 words,
64+ * avoiding references to "user" or "AI."
65+ *
66+ * Sends both prompts through the model retry mechanism to ensure robustness against failures.
67+ *
68+ * @param {string } promptText - The input text used to generate the AI response.
69+ * @returns {Promise<GeminiResponseType> } - An object containing both the generated text and its summary.
70+ * @throws {Error } - Throws if all model versions fail for either the full response or the summary.
71+ */
72+ export const generateGeminiResponse = async ( promptText : string ) : Promise < GeminiResponseType > => {
73+ const prompt = {
74+ role : "user" ,
75+ parts : [ { text : promptText } ] ,
76+ } ;
77+
78+ const text = await tryGenerateContent ( prompt ) ;
79+
80+ const summaryPrompt = {
81+ role : "user" ,
82+ parts : [ {
83+ text : `
84+ You are a smart assistant. Based on the interaction below, summarize it in no more than 35 words.
85+ Avoid using phrases like "the user" or "AI." Write a brief, neutral description of the request and response.
86+
87+ Prompt:
88+ ${ promptText }
89+
90+ Response:
91+ ${ text }
92+ ` . trim ( ) ,
93+ } ] ,
94+ } ;
95+
96+ const summary = await tryGenerateContent ( summaryPrompt ) ;
97+
98+ return { text, summary } ;
5699} ;
57100
101+ /**
102+ * Generates a transcript from the provided audio input using the Gemini AI model.
103+ *
104+ * Converts the audio buffer to a base64-encoded string, builds a prompt requesting a transcription,
105+ * and attempts to generate a transcript by sending the prompt to Gemini AI.
106+ * Automatically retries across multiple model versions if initial attempts fail.
107+ *
108+ * @param {AudioRequest } param0 - An object containing the audio buffer and its MIME type.
109+ * @returns {Promise<string> } - The generated transcript as plain text.
110+ * @throws {Error } - Throws an error if all model attempts fail.
111+ */
58112export const generateGeminiAudioResponse = async (
59- { audioBuffer, mimeType } : AudioRequest
113+ { audioBuffer, mimeType} : AudioRequest
60114) : Promise < string > => {
61- try {
62-
63- const model = genAI . getGenerativeModel ( { model : "gemini-2.0-flash" } ) ;
64- const base64Audio = audioBuffer . toString ( 'base64' ) ;
65-
66- const result = await model . generateContent ( {
67- contents : [ {
68- role : "user" ,
69- parts : [
70- { text : "Generate a transcript of this audio:" } ,
71- {
72- inlineData : {
73- data : base64Audio ,
74- mimeType
75- }
76- }
77- ]
78- } ]
79- } ) ;
80-
81- return stripMarkdown ( result . response . text ( ) ) ;
82-
83- } catch ( error ) {
84- console . error ( "Gemini Audio Error:" , error ) ;
85- throw error ;
86- }
87- } ;
115+ const base64Audio = audioBuffer . toString ( 'base64' ) ;
116+
117+ const audioPrompt = {
118+ role : "user" ,
119+ parts : [
120+ { text : "Generate a transcript of this audio:" } ,
121+ {
122+ inlineData : {
123+ data : base64Audio ,
124+ mimeType,
125+ } ,
126+ } ,
127+ ] ,
128+ } ;
88129
130+ return await tryGenerateContent ( audioPrompt ) ;
131+ } ;
89132
90133
91134/**
0 commit comments