-
Notifications
You must be signed in to change notification settings - Fork 71
Expand file tree
/
Copy pathmain.mts
More file actions
143 lines (128 loc) · 4.14 KB
/
main.mts
File metadata and controls
143 lines (128 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import { GoogleGenAI, Modality, type LiveServerMessage } from '@google/genai';
import mic from 'mic';
import Speaker from 'speaker';
const ai = new GoogleGenAI({});
// WARNING: Do not use API keys in client-side (browser based) applications
// Consider using Ephemeral Tokens instead
// More information at: https://ai.google.dev/gemini-api/docs/ephemeral-tokens
// --- Live API config ---
const model = 'gemini-3.1-flash-live-preview';
const config = {
responseModalities: [Modality.AUDIO],
systemInstruction: "You are a helpful and friendly AI assistant.",
outputAudioTranscription: {},
inputAudioTranscription: {},
};
async function live() {
const responseQueue: LiveServerMessage[] = [];
const audioQueue: Buffer[] = [];
let speaker: Speaker | null = null;
async function waitMessage(): Promise<LiveServerMessage> {
while (responseQueue.length === 0) {
await new Promise<void>((resolve) => setImmediate(resolve));
}
return responseQueue.shift()!;
}
function createSpeaker() {
if (speaker) {
process.stdin.unpipe(speaker);
speaker.end();
}
speaker = new Speaker({
channels: 1,
bitDepth: 16,
sampleRate: 24000,
});
speaker.on('error', (err: Error) => console.error('Speaker error:', err));
process.stdin.pipe(speaker);
}
let lastWasInput = false;
async function messageLoop() {
// Puts incoming messages in the audio queue.
while (true) {
const message = await waitMessage();
const sc = message.serverContent;
if (!sc) continue;
if (sc.interrupted) {
// Empty the queue on interruption to stop playback
audioQueue.length = 0;
continue;
}
if (sc.modelTurn?.parts) {
for (const part of sc.modelTurn.parts) {
if (part.inlineData?.data) {
audioQueue.push(Buffer.from(part.inlineData.data, 'base64'));
}
}
}
if (sc.outputTranscription?.text) {
if (lastWasInput) { process.stdout.write('\n'); lastWasInput = false; }
const t = sc.outputTranscription.text;
process.stdout.write(t);
if (/[.!?]\s*$/.test(t)) process.stdout.write('\n');
}
if (sc.inputTranscription?.text) {
if (!lastWasInput) { process.stdout.write('\n'); lastWasInput = true; }
const t = sc.inputTranscription.text;
process.stdout.write(`\x1b[3m${t}\x1b[0m`);
if (/[.!?]\s*$/.test(t)) process.stdout.write('\n');
}
}
}
async function playbackLoop() {
// Plays audio from the audio queue.
while (true) {
if (audioQueue.length === 0) {
if (speaker) {
// Destroy speaker if no more audio to avoid warnings from speaker library
process.stdin.unpipe(speaker);
speaker.end();
speaker = null;
}
await new Promise<void>((resolve) => setImmediate(resolve));
} else {
if (!speaker) createSpeaker();
const chunk = audioQueue.shift()!;
await new Promise<void>((resolve) => {
speaker!.write(chunk, () => resolve());
});
}
}
}
// Start loops
messageLoop();
playbackLoop();
// Connect to Gemini Live API
const session = await ai.live.connect({
model: model,
config: config,
callbacks: {
onopen: () => console.log('Connected to Gemini Live API'),
onmessage: (message: LiveServerMessage) => responseQueue.push(message),
onerror: (e: ErrorEvent) => console.error('Error:', e.message),
onclose: (e: CloseEvent) => console.log('Closed:', e.reason),
},
});
// Setup Microphone for input
const micInstance = mic({
rate: '16000',
bitwidth: '16',
channels: '1',
});
const micInputStream = micInstance.getAudioStream();
micInputStream.on('data', (data: Buffer) => {
// API expects base64 encoded PCM data
session.sendRealtimeInput({
audio: {
data: data.toString('base64'),
mimeType: "audio/pcm;rate=16000"
}
});
});
micInputStream.on('error', (err: Error) => {
console.error('Microphone error:', err);
});
micInstance.start();
console.log('Microphone started. Speak now...');
}
live().catch(console.error);