Skip to content

Commit c9476e0

Browse files
committed
stream recording, try local file dump
1 parent 2d37054 commit c9476e0

File tree

5 files changed

+129
-35
lines changed

5 files changed

+129
-35
lines changed

Diff for: packages/firebase_vertexai/firebase_vertexai/example/lib/pages/bidi_page.dart

+58-19
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import 'dart:io';
1515
import 'dart:typed_data';
1616
import 'dart:async';
17-
17+
import 'package:path_provider/path_provider.dart';
1818
import 'package:flutter/material.dart';
1919
import 'package:firebase_vertexai/firebase_vertexai.dart';
2020
import '../widgets/message_widget.dart';
@@ -55,6 +55,14 @@ class _BidiPageState extends State<BidiPage> {
5555
);
5656
}
5757

58+
@override
59+
void dispose() {
60+
if (_session_opening) {
61+
_session.close();
62+
}
63+
super.dispose();
64+
}
65+
5866
@override
5967
Widget build(BuildContext context) {
6068
return Scaffold(
@@ -135,7 +143,8 @@ class _BidiPageState extends State<BidiPage> {
135143
if (!_loading)
136144
IconButton(
137145
onPressed: () async {
138-
await _sendTextPrompt(textPrompt: _textController.text);
146+
await _sendPremadeAudioPayload();
147+
// await _sendTextPrompt(textPrompt: _textController.text);
139148
},
140149
icon: Icon(
141150
Icons.send,
@@ -144,6 +153,15 @@ class _BidiPageState extends State<BidiPage> {
144153
)
145154
else
146155
const CircularProgressIndicator(),
156+
IconButton(
157+
onPressed: () async {
158+
await _checkWsStatus();
159+
},
160+
icon: Icon(
161+
Icons.check,
162+
color: Theme.of(context).colorScheme.primary,
163+
),
164+
)
147165
],
148166
),
149167
),
@@ -178,6 +196,7 @@ class _BidiPageState extends State<BidiPage> {
178196
if (!_session_opening) {
179197
_session = await widget.model.connect(model: modelName, config: config);
180198
_session_opening = true;
199+
unawaited(_handle_response());
181200
} else {
182201
await _session!.close();
183202
await _audioManager.stop();
@@ -219,15 +238,14 @@ class _BidiPageState extends State<BidiPage> {
219238

220239
Future<void> _stopRecording() async {
221240
await _audioRecorder.stopRecording();
222-
var audioPrompt = await _audioRecorder.getAudioBytes(fromFile: true);
223-
//await _streamAudioChunks(audioPrompt, 'audio/pcm');
224-
//await _sendAudioPrompt(audioPrompt);
225-
await _sendAudioRealtime(audioPrompt);
241+
var audioPrompt = await _audioRecorder.getAudioBytes();
242+
await _streamAudioChunks(audioPrompt, 'audio/pcm');
243+
// await _sendAudioPrompt(audioPrompt);
244+
// await _sendAudioRealtime(audioPrompt);
226245
}
227246

228247
List<Uint8List> _splitIntoChunks(Uint8List audioData, int chunkSize) {
229248
final chunks = <Uint8List>[];
230-
const chunkSize = 1024;
231249

232250
for (var i = 0; i < audioData.length; i += chunkSize) {
233251
final end =
@@ -246,8 +264,7 @@ class _BidiPageState extends State<BidiPage> {
246264
final streamController = StreamController<InlineDataPart>();
247265
for (var chunk in chunks) {
248266
if (identical(chunk, chunks.last)) {
249-
final lastData =
250-
InlineDataPart('audio/pcm', chunk, willContinue: false);
267+
final lastData = InlineDataPart('audio/pcm', chunk, willContinue: true);
251268
streamController.add(lastData);
252269
} else {
253270
final data = InlineDataPart('audio/pcm', chunk, willContinue: true);
@@ -262,8 +279,8 @@ class _BidiPageState extends State<BidiPage> {
262279
// Process the message received from the server
263280
print('Received message: $message');
264281
}
265-
print('Stream all audio chunk to server');
266-
await _handle_response_audio();
282+
print('Send all audio chunk to server');
283+
_session.printWsStatus();
267284
setState(() {
268285
_loading = false;
269286
});
@@ -273,7 +290,7 @@ class _BidiPageState extends State<BidiPage> {
273290
setState(() {
274291
_loading = true;
275292
});
276-
final chunks = _splitIntoChunks(audio, 1024);
293+
final chunks = _splitIntoChunks(audio, 512);
277294

278295
final media_chunks = <InlineDataPart>[];
279296
for (var chunk in chunks) {
@@ -288,8 +305,7 @@ class _BidiPageState extends State<BidiPage> {
288305
}
289306
await _session!.stream(mediaChunks: media_chunks);
290307
print('Stream realtime audio chunk to server in one request');
291-
292-
await _handle_response_audio();
308+
_session.printWsStatus();
293309
setState(() {
294310
_loading = false;
295311
});
@@ -300,16 +316,36 @@ class _BidiPageState extends State<BidiPage> {
300316
_loading = true;
301317
});
302318
final prompt = Content.inlineData('audio/pcm', audio);
303-
await _session!.send(input: prompt, turnComplete: true);
319+
await _session!.send(input: prompt);
304320

305321
print('Sent audio chunk to server');
322+
_session.printWsStatus();
323+
setState(() {
324+
_loading = false;
325+
});
326+
}
327+
328+
Future<void> _sendPremadeAudioPayload() async {
329+
setState(() {
330+
_loading = true;
331+
});
332+
final dir = await getDownloadsDirectory();
333+
final path = '${dir!.path}/audio_payload.json';
334+
335+
final file = File(path!);
336+
final dataDump = await file.readAsString();
337+
338+
_session!.dumpData(dataDump);
306339

307-
await _handle_response_audio();
308340
setState(() {
309341
_loading = false;
310342
});
311343
}
312344

345+
Future<void> _checkWsStatus() async {
346+
_session!.printWsStatus();
347+
}
348+
313349
Future<void> _sendTextPrompt({String? textPrompt}) async {
314350
setState(() {
315351
_loading = true;
@@ -330,15 +366,16 @@ class _BidiPageState extends State<BidiPage> {
330366

331367
await _session!.send(input: prompt, turnComplete: true);
332368
print('Prompt sent to server');
333-
await _handle_response_audio();
369+
_session.printWsStatus();
370+
// await _handle_response();
334371
}
335372

336373
setState(() {
337374
_loading = false;
338375
});
339376
}
340377

341-
Future<void> _handle_response_audio() async {
378+
Future<void> _handle_response() async {
342379
final responseStream = _session!.receive();
343380
var chunkBuilder = BytesBuilder();
344381
var audioIndex = 0;
@@ -394,9 +431,11 @@ class _BidiPageState extends State<BidiPage> {
394431
24000,
395432
);
396433
_audioManager.addAudio(chunk);
434+
audioIndex = 0;
435+
chunkBuilder.clear();
397436
}
398437

399-
break; // Exit the loop if the turn is complete
438+
//break; // Exit the loop if the turn is complete
400439
}
401440
}
402441
}

Diff for: packages/firebase_vertexai/firebase_vertexai/example/lib/utils/audio_recorder.dart

+27-11
Original file line numberDiff line numberDiff line change
@@ -86,23 +86,26 @@ class InMemoryAudioRecorder {
8686

8787
_lastAudioPath = await _getPath();
8888

89-
await _recorder.start(recordConfig, path: _lastAudioPath!);
89+
//await _recorder.start(recordConfig, path: _lastAudioPath!);
9090

91-
// final stream = await _recorder.startStream(recordConfig);
92-
// _recordSubscription = stream.listen((data) {
93-
// _audioChunks.add(data);
94-
// print('captured data $data');
95-
// });
91+
final stream = await _recorder.startStream(recordConfig);
92+
_recordSubscription = stream.listen((data) {
93+
_audioChunks.add(data);
94+
//print('captured data $data');
95+
});
9696
}
9797

9898
Future<void> stopRecording() async {
99-
// await _recordSubscription?.cancel();
100-
// _recordSubscription = null;
99+
await _recordSubscription?.cancel();
100+
_recordSubscription = null;
101101

102102
await _recorder.stop();
103103
}
104104

105-
Future<Uint8List> getAudioBytes({bool fromFile = false}) async {
105+
Future<Uint8List> getAudioBytes({
106+
bool fromFile = false,
107+
bool removeHeader = false,
108+
}) async {
106109
if (fromFile) {
107110
return _getAudioBytesFromFile(_lastAudioPath!);
108111
} else {
@@ -114,14 +117,27 @@ class InMemoryAudioRecorder {
114117
}
115118
}
116119

117-
Future<Uint8List> _getAudioBytesFromFile(String filePath) async {
120+
Future<Uint8List> _removeWavHeader(Uint8List audio) async {
121+
// Assuming a standard WAV header size of 44 bytes
122+
const wavHeaderSize = 44;
123+
final audioData = audio.sublist(wavHeaderSize);
124+
return audioData;
125+
}
126+
127+
Future<Uint8List> _getAudioBytesFromFile(
128+
String filePath, {
129+
bool removeHeader = false,
130+
}) async {
118131
final file = File(_lastAudioPath!);
119132
if (!await file.exists()) {
120133
throw Exception('Audio file not found: ${file.path}');
121134
}
122135

123-
final pcmBytes = await file.readAsBytes();
136+
var pcmBytes = await file.readAsBytes();
124137
print('pcm file ${file.path} has byte size of ${pcmBytes.length}');
138+
if (removeHeader) {
139+
pcmBytes = await _removeWavHeader(pcmBytes);
140+
}
125141
return pcmBytes;
126142
}
127143
}

Diff for: packages/firebase_vertexai/firebase_vertexai/lib/src/content.dart

+21-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@ final class Content {
6060
/// Convert the [Content] to json format.
6161
Map<String, Object?> toJson() => {
6262
if (role case final role?) 'role': role,
63-
'parts': parts.map((p) => p.toJson()).toList()
63+
'parts': parts?.map((p) {
64+
if (p is InlineDataPart && p.mimeType.startsWith('audio/')) {
65+
return p.toJson();
66+
} else {
67+
return p.toJson();
68+
}
69+
}).toList(),
6470
};
6571
}
6672

@@ -142,6 +148,20 @@ final class InlineDataPart implements Part {
142148
if (willContinue != null) 'willContinue': willContinue,
143149
}
144150
};
151+
152+
Object toMediaChunkJson() => {
153+
'mimeType': mimeType,
154+
'data': base64Encode(bytes),
155+
if (willContinue != null) 'willContinue': willContinue,
156+
};
157+
158+
Object toAudioJson() => {
159+
'inlineData': {
160+
'data': bytes.toString(),
161+
'mimeType': mimeType,
162+
if (willContinue != null) 'willContinue': willContinue,
163+
}
164+
};
145165
}
146166

147167
/// A predicted `FunctionCall` returned from the model that contains

Diff for: packages/firebase_vertexai/firebase_vertexai/lib/src/live.dart

+21-3
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ class AsyncSession {
4646
Future<void> stream({
4747
required List<InlineDataPart> mediaChunks,
4848
}) async {
49-
// var clientMessage = _parseClientMessage(input, endOfTurn);
5049
var clientMessage = LiveClientRealtimeInput(mediaChunks: mediaChunks);
50+
5151
var clientJson = jsonEncode(clientMessage.toJson());
5252
print('Streaming $clientJson');
5353
_ws.sink.add(clientJson);
@@ -119,17 +119,26 @@ class AsyncSession {
119119
print('send audio data with size ${data.bytes.length}');
120120

121121
await stream(mediaChunks: [data]);
122+
123+
// await send(input: Content.inlineData(mimeType, data.bytes));
122124
// Give a chance for the receive loop to process responses.
123-
await Future.delayed(Duration.zero);
125+
await Future.delayed(const Duration(milliseconds: 1));
126+
printWsStatus();
124127
}
125128
} finally {
126129
print('client audio sent complete');
127-
// await send(turnComplete: true);
130+
await send(turnComplete: true);
128131
// Complete the completer to signal the end of the stream.
129132
completer.complete();
130133
}
131134
}
132135

136+
Future<void> dumpData(String data) async {
137+
print('dump data $data');
138+
_ws.sink.add(data);
139+
printWsStatus();
140+
}
141+
133142
Map<String, dynamic> _LiveServerContentFromMldev(dynamic fromObject) {
134143
var toObject = <String, dynamic>{};
135144
if (fromObject is Map && fromObject.containsKey('modelTurn')) {
@@ -279,4 +288,13 @@ class AsyncSession {
279288
Future<void> close() async {
280289
await _ws.sink.close();
281290
}
291+
292+
void printWsStatus() {
293+
if (_ws.closeCode != null) {
294+
print('WebSocket status: Closed, close code ${_ws.closeCode}');
295+
print('Closed reason ${_ws.closeReason}');
296+
} else {
297+
print('WebSocket status: Open');
298+
}
299+
}
282300
}

Diff for: packages/firebase_vertexai/firebase_vertexai/lib/src/live_api.dart

+2-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ class LiveClientRealtimeInput {
171171

172172
Map<String, dynamic> toJson() => {
173173
'realtime_input': {
174-
'media_chunks': mediaChunks?.map((e) => e.toJson()).toList(),
174+
'media_chunks':
175+
mediaChunks?.map((e) => e.toMediaChunkJson()).toList(),
175176
},
176177
};
177178
}

0 commit comments

Comments
 (0)