@@ -6,121 +6,128 @@ import {AudioRequest} from "../types/audioTypes.js";
66const genAI = new GoogleGenerativeAI ( process . env . GEMINI_API_KEY ! ) ;
77
88/**
9- * Generates a response from the Gemini AI model based on the provided prompt text .
9+ * List of Gemini AI model versions to attempt in order of preference .
1010 *
11- * @param {string } promptText - The text prompt to send to the Gemini AI model.
12- * @returns {Promise<GeminiResponseType> } - A promise that resolves to an object containing the generated text and its summary.
11+ * The system will try each model sequentially until a successful response is received,
12+ * starting with the fastest and latest versions and falling back to older or alternative models if necessary.
13+ *
14+ * This ordering is designed to maximize reliability and performance.
1315 */
14- export const generateGeminiResponse = async ( promptText : string ) : Promise < GeminiResponseType > => {
15- // try {
16-
17- const modelVersions = [
18- "gemini-2.0-flash" ,
19- "gemini-2.0-flash-001" ,
20- "gemini-2.0-flash-lite" ,
21- "gemini-1.5-flash" ,
22- "gemini-1.5-flash-8b" ,
23- "gemini-1.5-pro"
24- ] ;
25-
26- for ( const modelVersion of modelVersions ) {
27- try {
28- const model = genAI . getGenerativeModel ( { model : modelVersion } ) ;
29-
30- const result = await model . generateContent ( {
31- contents : [
32- {
33- role : "user" ,
34- parts : [ { text : promptText } ] ,
35- } ,
36- ] ,
37- } ) ;
38-
39- const text = stripMarkdown ( result . response . text ( ) ) ;
40-
41- const summaryResult = await model . generateContent ( {
42- contents : [ {
43- role : "user" ,
44- parts : [ {
45- text : `
46- You are a smart assistant. Based on the interaction below, summarize it in no more than 35 words.
47- Avoid using phrases like "the user" or "AI." Write a brief, neutral description of the request and response.
48-
49- Prompt:
50- ${ promptText }
51-
52- Response:
53- ${ text }
54- ` . trim ( )
55- } ] ,
56- } ] ,
57- } ) ;
58-
59- const summary = summaryResult . response . text ( ) . trim ( ) ;
60-
61- return { text, summary} ;
16+ const MODEL_VERSIONS = [
17+ "gemini-2.0-flash" ,
18+ "gemini-2.0-flash-001" ,
19+ "gemini-2.0-flash-lite" ,
20+ "gemini-1.5-flash" ,
21+ "gemini-1.5-flash-8b" ,
22+ "gemini-1.5-pro" ,
23+ ] ;
6224
25+ /**
26+ * Attempts to generate content from Gemini AI by trying multiple model versions in order.
27+ *
28+ * Iterates through the predefined list of model versions, sending the provided prompt to each model.
29+ * If a model responds successfully, returns the stripped (plain text) result.
30+ * If a model fails with a 500 or 503 error, automatically retries with the next model.
31+ * Throws a fatal error immediately for other types of failures, or if all models fail.
32+ *
33+ * @param {any } prompt - The prompt payload to send to the AI model.
34+ * @returns {Promise<string> } - The generated plain text content.
35+ * @throws {Error } - Throws if all models fail or a non-retryable error occurs.
36+ */
37+ async function tryGenerateContent (
38+ prompt : any ,
39+ ) : Promise < string > {
40+ for ( const modelVersion of MODEL_VERSIONS ) {
41+ try {
42+ const model = genAI . getGenerativeModel ( { model : modelVersion } ) ;
43+ const result = await model . generateContent ( { contents : [ prompt ] } ) ;
44+ return stripMarkdown ( result . response . text ( ) ) ;
6345 } catch ( error : any ) {
6446 if ( error ?. status === 500 || error ?. status === 503 ) {
6547 console . warn ( `Model ${ modelVersion } failed with status ${ error . status } . Trying next...` ) ;
66- continue ; // Try next model
48+ continue ;
6749 }
68-
69- console . error ( "Gemini Fatal Error:" , error ) ;
50+ console . error ( `Gemini Fatal Error [${ modelVersion } ]:` , error ) ;
7051 throw error ;
7152 }
72-
7353 }
74-
7554 throw new Error ( "All Gemini models failed." ) ;
55+ }
56+
57+
58+ /**
59+ * Generates a detailed AI response and a concise summary based on a given prompt text.
60+ *
61+ * Constructs two prompts:
62+ * - One to generate a full response from Gemini AI.
63+ * - Another to request a summarized version of the interaction in no more than 35 words,
64+ * avoiding references to "user" or "AI."
65+ *
66+ * Sends both prompts through the model retry mechanism to ensure robustness against failures.
67+ *
68+ * @param {string } promptText - The input text used to generate the AI response.
69+ * @returns {Promise<GeminiResponseType> } - An object containing both the generated text and its summary.
70+ * @throws {Error } - Throws if all model versions fail for either the full response or the summary.
71+ */
72+ export const generateGeminiResponse = async ( promptText : string ) : Promise < GeminiResponseType > => {
73+ const prompt = {
74+ role : "user" ,
75+ parts : [ { text : promptText } ] ,
76+ } ;
7677
78+ const text = await tryGenerateContent ( prompt ) ;
79+
80+ const summaryPrompt = {
81+ role : "user" ,
82+ parts : [ {
83+ text : `
84+ You are a smart assistant. Based on the interaction below, summarize it in no more than 35 words.
85+ Avoid using phrases like "the user" or "AI." Write a brief, neutral description of the request and response.
86+
87+ Prompt:
88+ ${ promptText }
89+
90+ Response:
91+ ${ text }
92+ ` . trim ( ) ,
93+ } ] ,
94+ } ;
95+
96+ const summary = await tryGenerateContent ( summaryPrompt ) ;
97+
98+ return { text, summary } ;
7799} ;
78100
101+ /**
102+ * Generates a transcript from the provided audio input using the Gemini AI model.
103+ *
104+ * Converts the audio buffer to a base64-encoded string, builds a prompt requesting a transcription,
105+ * and attempts to generate a transcript by sending the prompt to Gemini AI.
106+ * Automatically retries across multiple model versions if initial attempts fail.
107+ *
108+ * @param {AudioRequest } param0 - An object containing the audio buffer and its MIME type.
109+ * @returns {Promise<string> } - The generated transcript as plain text.
110+ * @throws {Error } - Throws an error if all model attempts fail.
111+ */
79112export const generateGeminiAudioResponse = async (
80113 { audioBuffer, mimeType} : AudioRequest
81114) : Promise < string > => {
82- const modelVersions = [
83- "gemini-2.0-flash" ,
84- "gemini-2.0-flash-001" ,
85- "gemini-2.0-flash-lite" ,
86- "gemini-1.5-flash" ,
87- "gemini-1.5-flash-8b" ,
88- "gemini-1.5-pro"
89- ] ;
90-
91115 const base64Audio = audioBuffer . toString ( 'base64' ) ;
92- for ( const modelVersion of modelVersions ) {
93- try {
94116
95- const model = genAI . getGenerativeModel ( { model : modelVersion } ) ;
96-
97- const result = await model . generateContent ( {
98- contents : [ {
99- role : "user" ,
100- parts : [
101- { text : "Generate a transcript of this audio:" } ,
102- {
103- inlineData : {
104- data : base64Audio ,
105- mimeType
106- }
107- }
108- ]
109- } ]
110- } ) ;
111-
112- return stripMarkdown ( result . response . text ( ) ) ;
113-
114- } catch ( error : any ) {
115- if ( error ?. status === 500 || error ?. status === 503 ) {
116- console . warn ( `Model ${ modelVersion } failed with status ${ error . status } . Trying next...` ) ;
117- continue ; // Try next model
118- }
119- console . error ( "Gemini Audio Fatal Error:" , error ) ;
120- throw error ;
121- }
122- }
123- throw new Error ( "All Gemini audio models failed." ) ;
117+ const audioPrompt = {
118+ role : "user" ,
119+ parts : [
120+ { text : "Generate a transcript of this audio:" } ,
121+ {
122+ inlineData : {
123+ data : base64Audio ,
124+ mimeType,
125+ } ,
126+ } ,
127+ ] ,
128+ } ;
129+
130+ return await tryGenerateContent ( audioPrompt ) ;
124131} ;
125132
126133
0 commit comments