Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions command-line/node/main.mts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ const model = 'gemini-3.1-flash-live-preview';
const config = {
responseModalities: [Modality.AUDIO],
systemInstruction: "You are a helpful and friendly AI assistant.",
outputAudioTranscription: {},
inputAudioTranscription: {},
};

async function live() {
Expand Down Expand Up @@ -40,22 +42,38 @@ async function live() {
process.stdin.pipe(speaker);
}

let lastWasInput = false;

async function messageLoop() {
// Puts incoming messages in the audio queue.
while (true) {
const message = await waitMessage();
if (message.serverContent && message.serverContent.interrupted) {
const sc = message.serverContent;
if (!sc) continue;
if (sc.interrupted) {
// Empty the queue on interruption to stop playback
audioQueue.length = 0;
continue;
}
if (message.serverContent && message.serverContent.modelTurn && message.serverContent.modelTurn.parts) {
for (const part of message.serverContent.modelTurn.parts) {
if (part.inlineData && part.inlineData.data) {
if (sc.modelTurn?.parts) {
for (const part of sc.modelTurn.parts) {
if (part.inlineData?.data) {
audioQueue.push(Buffer.from(part.inlineData.data, 'base64'));
}
}
}
if (sc.outputTranscription?.text) {
if (lastWasInput) { process.stdout.write('\n'); lastWasInput = false; }
const t = sc.outputTranscription.text;
process.stdout.write(t);
if (/[.!?]\s*$/.test(t)) process.stdout.write('\n');
}
if (sc.inputTranscription?.text) {
if (!lastWasInput) { process.stdout.write('\n'); lastWasInput = true; }
const t = sc.inputTranscription.text;
process.stdout.write(`\x1b[3m${t}\x1b[0m`);
if (/[.!?]\s*$/.test(t)) process.stdout.write('\n');
}
}
}

Expand Down
26 changes: 24 additions & 2 deletions command-line/python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
CONFIG = {
"response_modalities": ["AUDIO"],
"system_instruction": "You are a helpful and friendly AI assistant.",
"output_audio_transcription": {},
"input_audio_transcription": {},
}

audio_queue_output = asyncio.Queue()
Expand Down Expand Up @@ -50,13 +52,33 @@ async def send_realtime(session):

async def receive_audio(session):
"""Receives responses from GenAI and puts audio data into the speaker audio queue."""
last_was_input = False
while True:
turn = session.receive()
async for response in turn:
if (response.server_content and response.server_content.model_turn):
for part in response.server_content.model_turn.parts:
sc = response.server_content
if not sc:
continue
if sc.model_turn:
for part in sc.model_turn.parts:
if part.inline_data and isinstance(part.inline_data.data, bytes):
audio_queue_output.put_nowait(part.inline_data.data)
if sc.output_transcription:
if last_was_input:
print()
last_was_input = False
t = sc.output_transcription.text
print(t, end="", flush=True)
if t.rstrip()[-1:] in '.!?':
print()
if sc.input_transcription:
if not last_was_input:
print()
last_was_input = True
t = sc.input_transcription.text
print(f"\033[3m{t}\033[0m", end="", flush=True)
if t.rstrip()[-1:] in '.!?':
print()

# Empty the queue on interruption to stop playback
while not audio_queue_output.empty():
Expand Down