Skip to content

Commit 772dca2

Browse files
committed
feat: add historyConfig support for live connection initial context history
1 parent 94642b6 commit 772dca2

File tree

8 files changed

+437
-0
lines changed

8 files changed

+437
-0
lines changed

api-report/genai-node.api.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,6 +1987,11 @@ export enum HarmSeverity {
19871987
HARM_SEVERITY_UNSPECIFIED = "HARM_SEVERITY_UNSPECIFIED"
19881988
}
19891989

1990+
// @public
1991+
export interface HistoryConfig {
1992+
initialHistoryInClientContent?: boolean;
1993+
}
1994+
19901995
// @public
19911996
export enum HttpElementLocation {
19921997
HTTP_IN_BODY = "HTTP_IN_BODY",
@@ -2398,6 +2403,7 @@ export interface LiveClientSetup {
23982403
contextWindowCompression?: ContextWindowCompressionConfig;
23992404
explicitVadSignal?: boolean;
24002405
generationConfig?: GenerationConfig;
2406+
historyConfig?: HistoryConfig;
24012407
inputAudioTranscription?: AudioTranscriptionConfig;
24022408
model?: string;
24032409
outputAudioTranscription?: AudioTranscriptionConfig;
@@ -2420,6 +2426,7 @@ export interface LiveConnectConfig {
24202426
enableAffectiveDialog?: boolean;
24212427
explicitVadSignal?: boolean;
24222428
generationConfig?: GenerationConfig;
2429+
historyConfig?: HistoryConfig;
24232430
httpOptions?: HttpOptions;
24242431
inputAudioTranscription?: AudioTranscriptionConfig;
24252432
maxOutputTokens?: number;

api-report/genai-web.api.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,6 +1987,11 @@ export enum HarmSeverity {
19871987
HARM_SEVERITY_UNSPECIFIED = "HARM_SEVERITY_UNSPECIFIED"
19881988
}
19891989

1990+
// @public
1991+
export interface HistoryConfig {
1992+
initialHistoryInClientContent?: boolean;
1993+
}
1994+
19901995
// @public
19911996
export enum HttpElementLocation {
19921997
HTTP_IN_BODY = "HTTP_IN_BODY",
@@ -2398,6 +2403,7 @@ export interface LiveClientSetup {
23982403
contextWindowCompression?: ContextWindowCompressionConfig;
23992404
explicitVadSignal?: boolean;
24002405
generationConfig?: GenerationConfig;
2406+
historyConfig?: HistoryConfig;
24012407
inputAudioTranscription?: AudioTranscriptionConfig;
24022408
model?: string;
24032409
outputAudioTranscription?: AudioTranscriptionConfig;
@@ -2420,6 +2426,7 @@ export interface LiveConnectConfig {
24202426
enableAffectiveDialog?: boolean;
24212427
explicitVadSignal?: boolean;
24222428
generationConfig?: GenerationConfig;
2429+
historyConfig?: HistoryConfig;
24232430
httpOptions?: HttpOptions;
24242431
inputAudioTranscription?: AudioTranscriptionConfig;
24252432
maxOutputTokens?: number;

api-report/genai.api.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,6 +1987,11 @@ export enum HarmSeverity {
19871987
HARM_SEVERITY_UNSPECIFIED = "HARM_SEVERITY_UNSPECIFIED"
19881988
}
19891989

1990+
// @public
1991+
export interface HistoryConfig {
1992+
initialHistoryInClientContent?: boolean;
1993+
}
1994+
19901995
// @public
19911996
export enum HttpElementLocation {
19921997
HTTP_IN_BODY = "HTTP_IN_BODY",
@@ -2398,6 +2403,7 @@ export interface LiveClientSetup {
23982403
contextWindowCompression?: ContextWindowCompressionConfig;
23992404
explicitVadSignal?: boolean;
24002405
generationConfig?: GenerationConfig;
2406+
historyConfig?: HistoryConfig;
24012407
inputAudioTranscription?: AudioTranscriptionConfig;
24022408
model?: string;
24032409
outputAudioTranscription?: AudioTranscriptionConfig;
@@ -2420,6 +2426,7 @@ export interface LiveConnectConfig {
24202426
enableAffectiveDialog?: boolean;
24212427
explicitVadSignal?: boolean;
24222428
generationConfig?: GenerationConfig;
2429+
historyConfig?: HistoryConfig;
24232430
httpOptions?: HttpOptions;
24242431
inputAudioTranscription?: AudioTranscriptionConfig;
24252432
maxOutputTokens?: number;

sdk-samples/live_history_config.ts

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import {GoogleGenAI, LiveServerMessage, Modality} from '@google/genai';
7+
import {writeFile} from 'fs';
8+
9+
const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
10+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
11+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION;
12+
const GOOGLE_GENAI_USE_VERTEXAI = process.env.GOOGLE_GENAI_USE_VERTEXAI;
13+
14+
class AsyncQueue<T> {
15+
private queue: T[] = [];
16+
private waiting: ((value: T) => void)[] = [];
17+
18+
/**
19+
* Adds an item to the queue.
20+
* If there's a waiting consumer, it resolves immediately.
21+
* @param item The item to add to the queue.
22+
*/
23+
put(item: T): void {
24+
if (this.waiting.length > 0) {
25+
const resolve = this.waiting.shift();
26+
if (resolve) {
27+
resolve(item);
28+
}
29+
} else {
30+
this.queue.push(item);
31+
}
32+
}
33+
34+
/**
35+
* Gets the next item from the queue.
36+
* If the queue is empty, it waits for an item to be added.
37+
* @return A Promise that resolves with the next item.
38+
*/
39+
get(): Promise<T> {
40+
return new Promise<T>((resolve) => {
41+
if (this.queue.length > 0) {
42+
resolve(this.queue.shift()!);
43+
} else {
44+
this.waiting.push(resolve);
45+
}
46+
});
47+
}
48+
49+
/**
50+
* Clears the queue.
51+
*/
52+
clear(): void {
53+
this.queue = [];
54+
this.waiting = [];
55+
}
56+
}
57+
58+
// ---------------------------------------------------------------------------
59+
// Audio handling utilities (not specific to historyConfig)
60+
// ---------------------------------------------------------------------------
61+
62+
interface WavConversionOptions {
63+
numChannels: number;
64+
sampleRate: number;
65+
bitsPerSample: number;
66+
}
67+
68+
function parseMimeType(mimeType: string): WavConversionOptions {
69+
const [fileType, ...params] = mimeType.split(';').map((s) => s.trim());
70+
const [, format] = fileType.split('/');
71+
72+
const options: Partial<WavConversionOptions> = {
73+
numChannels: 1,
74+
bitsPerSample: 16,
75+
};
76+
77+
if (format && format.startsWith('L')) {
78+
const bits = parseInt(format.slice(1), 10);
79+
if (!isNaN(bits)) {
80+
options.bitsPerSample = bits;
81+
}
82+
}
83+
84+
for (const param of params) {
85+
const [key, value] = param.split('=').map((s) => s.trim());
86+
if (key === 'rate') {
87+
options.sampleRate = parseInt(value, 10);
88+
}
89+
}
90+
91+
return options as WavConversionOptions;
92+
}
93+
94+
function createWavHeader(
95+
dataLength: number,
96+
options: WavConversionOptions,
97+
): Buffer {
98+
const {numChannels, sampleRate, bitsPerSample} = options;
99+
100+
// http://soundfile.sapp.org/doc/WaveFormat
101+
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
102+
const blockAlign = (numChannels * bitsPerSample) / 8;
103+
const buffer = Buffer.alloc(44);
104+
105+
buffer.write('RIFF', 0); // ChunkID
106+
buffer.writeUInt32LE(36 + dataLength, 4); // ChunkSize
107+
buffer.write('WAVE', 8); // Format
108+
buffer.write('fmt ', 12); // Subchunk1ID
109+
buffer.writeUInt32LE(16, 16); // Subchunk1Size (PCM)
110+
buffer.writeUInt16LE(1, 20); // AudioFormat (1 = PCM)
111+
buffer.writeUInt16LE(numChannels, 22); // NumChannels
112+
buffer.writeUInt32LE(sampleRate, 24); // SampleRate
113+
buffer.writeUInt32LE(byteRate, 28); // ByteRate
114+
buffer.writeUInt16LE(blockAlign, 32); // BlockAlign
115+
buffer.writeUInt16LE(bitsPerSample, 34); // BitsPerSample
116+
buffer.write('data', 36); // Subchunk2ID
117+
buffer.writeUInt32LE(dataLength, 40); // Subchunk2Size
118+
119+
return buffer;
120+
}
121+
122+
function convertToWav(rawData: string[], mimeType: string): Buffer {
123+
const options = parseMimeType(mimeType);
124+
const dataLength = rawData.reduce((a, b) => a + b.length, 0);
125+
const wavHeader = createWavHeader(dataLength, options);
126+
const buffer = Buffer.concat(
127+
// TODO: go/ts59upgrade - Remove this suppression after TS 5.9.2 upgrade
128+
// error TS2345: Argument of type 'Buffer[]' is not assignable to parameter of type 'readonly Uint8Array<ArrayBufferLike>[]'.
129+
// @ts-ignore
130+
rawData.map((data) => Buffer.from(data, 'base64')),
131+
);
132+
133+
// TODO: go/ts59upgrade - Remove this suppression after TS 5.9.2 upgrade
134+
// error TS2322: Type 'Buffer' is not assignable to type 'Uint8Array<ArrayBufferLike>'.
135+
// @ts-ignore
136+
return Buffer.concat([wavHeader, buffer]);
137+
}
138+
139+
function saveBinaryFile(fileName: string, content: Buffer) {
140+
// TODO: go/ts59upgrade - Remove this suppression after TS 5.9.2 upgrade
141+
// error TS2345: Argument of type 'Buffer' is not assignable to parameter of type 'string | ArrayBufferView'.
142+
// @ts-ignore
143+
writeFile(fileName, content, 'utf8', (err) => {
144+
if (err) {
145+
console.error(`Error writing file ${fileName}:`, err);
146+
return;
147+
}
148+
console.log(`Appending stream content to file ${fileName}.`);
149+
});
150+
}
151+
152+
// ---------------------------------------------------------------------------
153+
// Main example: historyConfig with initial conversation context seeding
154+
// ---------------------------------------------------------------------------
155+
156+
async function live(client: GoogleGenAI, model: string) {
157+
const responseQueue = new AsyncQueue<LiveServerMessage>();
158+
const audioParts: string[] = [];
159+
160+
function handleModelTurn(message: LiveServerMessage) {
161+
if (message.serverContent?.outputTranscription) {
162+
console.log('Transcription: ', message.serverContent.outputTranscription);
163+
}
164+
if (message.serverContent?.modelTurn?.parts) {
165+
const part = message.serverContent.modelTurn.parts[0];
166+
167+
if (part?.fileData) {
168+
console.log(`File: ${part.fileData.fileUri}`);
169+
}
170+
171+
if (part?.inlineData) {
172+
const fileName = 'audio.wav';
173+
const inlineData = part.inlineData;
174+
175+
audioParts.push(inlineData.data ?? '');
176+
177+
const buffer = convertToWav(audioParts, inlineData.mimeType ?? '');
178+
saveBinaryFile(fileName, buffer);
179+
}
180+
181+
if (part?.text) {
182+
console.log(part.text);
183+
}
184+
}
185+
}
186+
187+
async function handleTurn(): Promise<LiveServerMessage[]> {
188+
const turn: LiveServerMessage[] = [];
189+
// eslint-disable-next-line no-constant-condition
190+
while (true) {
191+
const message = await responseQueue.get();
192+
handleModelTurn(message);
193+
turn.push(message);
194+
if (message.serverContent?.turnComplete) {
195+
return turn;
196+
}
197+
}
198+
}
199+
200+
// Connect with historyConfig to enable seeding initial context history
201+
// via clientContent before starting the realtime conversation.
202+
const session = await client.live.connect({
203+
model: model,
204+
callbacks: {
205+
onopen: () => {
206+
console.debug('Opened');
207+
},
208+
onmessage: (message: LiveServerMessage) => {
209+
responseQueue.put(message);
210+
},
211+
onerror: (e: ErrorEvent) => {
212+
console.debug('Error:', e.message);
213+
},
214+
onclose: (e: CloseEvent) => {
215+
console.debug('Close:', e.reason);
216+
responseQueue.clear();
217+
},
218+
},
219+
config: {
220+
responseModalities: [Modality.AUDIO],
221+
systemInstruction: 'You are a helpful and friendly AI assistant.',
222+
speechConfig: {
223+
voiceConfig: {
224+
prebuiltVoiceConfig: {
225+
voiceName: 'Kore',
226+
},
227+
},
228+
},
229+
outputAudioTranscription: {},
230+
historyConfig: {initialHistoryInClientContent: true},
231+
},
232+
});
233+
234+
// Seed the session with initial conversation history.
235+
// With historyConfig.initialHistoryInClientContent set to true, the server
236+
// will process these clientContent messages as context history without
237+
// triggering a model response. The history can end with role MODEL.
238+
// After turnComplete is true, the client can start the realtime conversation.
239+
console.log('-'.repeat(80));
240+
console.log('Seeding initial conversation history...');
241+
session.sendClientContent({
242+
turns: [
243+
{
244+
role: 'user',
245+
parts: [
246+
{text: "My name is Jad and I'm building a live streaming app."},
247+
],
248+
},
249+
{
250+
role: 'model',
251+
parts: [
252+
{
253+
text: 'Nice to meet you, Jad! That sounds like an exciting project. What kind of live streaming are you focusing on?',
254+
},
255+
],
256+
},
257+
{
258+
role: 'user',
259+
parts: [
260+
{
261+
text: "I'm focusing on real-time audio conversations using the Gemini API.",
262+
},
263+
],
264+
},
265+
{
266+
role: 'model',
267+
parts: [
268+
{
269+
text: 'Great use case! The Gemini Live API is well-suited for real-time audio interactions. How can I help?',
270+
},
271+
],
272+
},
273+
],
274+
turnComplete: true,
275+
});
276+
console.log('History seeded.');
277+
278+
// Now send a follow-up question that relies on the seeded context.
279+
// The model should be able to recall details from the conversation history.
280+
// After history seeding, use sendRealtimeInput to continue the conversation.
281+
console.log('-'.repeat(80));
282+
const followUp = 'What was my name and what am I building?';
283+
console.log(`Sent: ${followUp}`);
284+
session.sendRealtimeInput({text: followUp});
285+
286+
await handleTurn();
287+
288+
session.close();
289+
}
290+
291+
async function main() {
292+
if (GOOGLE_GENAI_USE_VERTEXAI) {
293+
// Note: historyConfig support may not yet be available on all Vertex AI
294+
// models. Check model documentation for compatibility.
295+
const client = new GoogleGenAI({
296+
vertexai: true,
297+
project: GOOGLE_CLOUD_PROJECT,
298+
location: GOOGLE_CLOUD_LOCATION,
299+
});
300+
const model = 'gemini-2.0-flash-live-preview-04-09';
301+
await live(client, model).catch((e) => console.error('got error', e));
302+
return;
303+
}
304+
305+
const model = 'gemini-3.1-flash-live-preview';
306+
const client = new GoogleGenAI({
307+
vertexai: false,
308+
apiKey: GEMINI_API_KEY,
309+
});
310+
311+
await live(client, model).catch((e) => console.error('got error', e));
312+
}
313+
314+
main();

0 commit comments

Comments
 (0)