gemini-live-api-examples/command-line/node/main.mts at cabf60de1143dbd13016325c9e8a6f980230d64b · google-gemini/gemini-live-api-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import { GoogleGenAI, Modality, type LiveServerMessage } from '@google/genai';
import mic from 'mic';
import Speaker from 'speaker';

const ai = new GoogleGenAI({});
// WARNING: Do not use API keys in client-side (browser based) applications
// Consider using Ephemeral Tokens instead
// More information at: https://ai.google.dev/gemini-api/docs/ephemeral-tokens

// --- Live API config ---
const model = 'gemini-3.1-flash-live-preview';
const config = {
  responseModalities: [Modality.AUDIO],
  systemInstruction: "You are a helpful and friendly AI assistant.",
  outputAudioTranscription: {},
  inputAudioTranscription: {},
};

async function live() {
  const responseQueue: LiveServerMessage[] = [];
  const audioQueue: Buffer[] = [];
  let speaker: Speaker | null = null;

  async function waitMessage(): Promise<LiveServerMessage> {
    while (responseQueue.length === 0) {
      await new Promise<void>((resolve) => setImmediate(resolve));
    }
    return responseQueue.shift()!;
  }

  function createSpeaker() {
    if (speaker) {
      process.stdin.unpipe(speaker);
      speaker.end();
    }
    speaker = new Speaker({
      channels: 1,
      bitDepth: 16,
      sampleRate: 24000,
    });
    speaker.on('error', (err: Error) => console.error('Speaker error:', err));
    process.stdin.pipe(speaker);
  }

  let lastWasInput = false;

  async function messageLoop() {
    // Puts incoming messages in the audio queue.
    while (true) {
      const message = await waitMessage();
      const sc = message.serverContent;
      if (!sc) continue;
      if (sc.interrupted) {
        // Empty the queue on interruption to stop playback
        audioQueue.length = 0;
        continue;
      }
      if (sc.modelTurn?.parts) {
        for (const part of sc.modelTurn.parts) {
          if (part.inlineData?.data) {
            audioQueue.push(Buffer.from(part.inlineData.data, 'base64'));
          }
        }
      }
      if (sc.outputTranscription?.text) {
        if (lastWasInput) { process.stdout.write('\n'); lastWasInput = false; }
        const t = sc.outputTranscription.text;
        process.stdout.write(t);
        if (/[.!?]\s*$/.test(t)) process.stdout.write('\n');
      }
      if (sc.inputTranscription?.text) {
        if (!lastWasInput) { process.stdout.write('\n'); lastWasInput = true; }
        const t = sc.inputTranscription.text;
        process.stdout.write(`\x1b[3m${t}\x1b[0m`);
        if (/[.!?]\s*$/.test(t)) process.stdout.write('\n');
      }
    }
  }

  async function playbackLoop() {
    // Plays audio from the audio queue.
    while (true) {
      if (audioQueue.length === 0) {
        if (speaker) {
          // Destroy speaker if no more audio to avoid warnings from speaker library
          process.stdin.unpipe(speaker);
          speaker.end();
          speaker = null;
        }
        await new Promise<void>((resolve) => setImmediate(resolve));
      } else {
        if (!speaker) createSpeaker();
        const chunk = audioQueue.shift()!;
        await new Promise<void>((resolve) => {
          speaker!.write(chunk, () => resolve());
        });
      }
    }
  }

  // Start loops
  messageLoop();
  playbackLoop();

  // Connect to Gemini Live API
  const session = await ai.live.connect({
    model: model,
    config: config,
    callbacks: {
      onopen: () => console.log('Connected to Gemini Live API'),
      onmessage: (message: LiveServerMessage) => responseQueue.push(message),
      onerror: (e: ErrorEvent) => console.error('Error:', e.message),
      onclose: (e: CloseEvent) => console.log('Closed:', e.reason),
    },
  });

  // Setup Microphone for input
  const micInstance = mic({
    rate: '16000',
    bitwidth: '16',
    channels: '1',
  });
  const micInputStream = micInstance.getAudioStream();

  micInputStream.on('data', (data: Buffer) => {
    // API expects base64 encoded PCM data
    session.sendRealtimeInput({
      audio: {
        data: data.toString('base64'),
        mimeType: "audio/pcm;rate=16000"
      }
    });
  });

  micInputStream.on('error', (err: Error) => {
    console.error('Microphone error:', err);
  });

  micInstance.start();
  console.log('Microphone started. Speak now...');
}

live().catch(console.error);