@@ -93,7 +93,10 @@ impl ParaformerRealtimeV2Asr {
9393 Ok ( ( ) )
9494 }
9595
96- pub async fn start_pcm_recognition ( & mut self ) -> anyhow:: Result < ( ) > {
96+ pub async fn start_pcm_recognition (
97+ & mut self ,
98+ semantic_punctuation_enabled : bool ,
99+ ) -> anyhow:: Result < ( ) > {
97100 let task_id = Uuid :: new_v4 ( ) . to_string ( ) ;
98101 log:: info!( "Starting asr task with ID: {}" , task_id) ;
99102 self . task_id = task_id;
@@ -112,6 +115,7 @@ impl ParaformerRealtimeV2Asr {
112115 "parameters" : {
113116 "format" : "pcm" ,
114117 "sample_rate" : self . sample_rate,
118+ "semantic_punctuation_enabled" : semantic_punctuation_enabled,
115119 } ,
116120 "input" : { }
117121 } ,
@@ -163,6 +167,7 @@ impl ParaformerRealtimeV2Asr {
163167 "streaming" : "duplex"
164168 } ,
165169 "payload" : {
170+ "task_group" : "audio" ,
166171 "input" : { }
167172 }
168173 } ) ;
@@ -197,6 +202,7 @@ impl ParaformerRealtimeV2Asr {
197202 } else if let Some ( output) = response. payload . output {
198203 return Ok ( Some ( output. sentence ) ) ;
199204 } else {
205+ log:: error!( "ASR response has no output: {:?}" , text) ;
200206 return Err ( anyhow:: anyhow!( "ASR error: {:?}" , text) ) ;
201207 }
202208 }
@@ -226,31 +232,116 @@ async fn test_paraformer_asr() {
226232 let mut asr = ParaformerRealtimeV2Asr :: connect ( "" , token, head. sample_rate )
227233 . await
228234 . unwrap ( ) ;
229- asr. start_pcm_recognition ( ) . await . unwrap ( ) ;
235+ asr. start_pcm_recognition ( false ) . await . unwrap ( ) ;
230236
231237 asr. send_audio ( audio_data. clone ( ) ) . await . unwrap ( ) ;
232238 asr. finish_task ( ) . await . unwrap ( ) ;
233239
234240 loop {
235241 if let Ok ( Some ( sentence) ) = asr. next_result ( ) . await {
236- println ! ( "{:?}" , sentence) ;
242+ log :: info !( "{:?}" , sentence) ;
237243 if sentence. sentence_end {
238- println ! ( ) ;
244+ log :: info! ( "Final sentence received, ending recognition session." ) ;
239245 }
240246 } else {
241247 break ;
242248 }
243249 }
244250
245- asr. start_pcm_recognition ( ) . await . unwrap ( ) ;
251+ asr. start_pcm_recognition ( false ) . await . unwrap ( ) ;
246252 asr. send_audio ( audio_data) . await . unwrap ( ) ;
247253 asr. finish_task ( ) . await . unwrap ( ) ;
248254
249255 loop {
250256 if let Ok ( Some ( sentence) ) = asr. next_result ( ) . await {
251- println ! ( "{:?}" , sentence) ;
257+ log:: info!( "{:?}" , sentence) ;
258+ if sentence. sentence_end {
259+ log:: info!( "Final sentence received, ending recognition session." ) ;
260+ }
261+ } else {
262+ break ;
263+ }
264+ }
265+ }
266+
267+ // cargo test --package echokit_server --bin echokit_server -- ai::bailian::realtime_asr::test_paraformer_stream_asr --exact --show-output
268+ #[ tokio:: test]
269+ async fn test_paraformer_stream_asr ( ) {
270+ env_logger:: init ( ) ;
271+ let token = std:: env:: var ( "COSYVOICE_TOKEN" ) . unwrap ( ) ;
272+
273+ let data = std:: fs:: read ( "./resources/test/out.wav" ) . unwrap ( ) ;
274+ let mut reader = wav_io:: reader:: Reader :: from_vec ( data) . expect ( "Failed to create WAV reader" ) ;
275+ let header = reader. read_header ( ) . unwrap ( ) ;
276+ let mut samples = crate :: util:: get_samples_f32 ( & mut reader) . unwrap ( ) ;
277+
278+ // pad 10 seconds of silence
279+ samples. extend_from_slice ( & [ 0.0 ; 16000 * 10 ] ) ;
280+
281+ let samples = crate :: util:: convert_samples_f32_to_i16_bytes ( & samples) ;
282+ let audio_data = bytes:: Bytes :: from ( samples) ;
283+
284+ let mut asr = ParaformerRealtimeV2Asr :: connect ( "" , token, header. sample_rate )
285+ . await
286+ . unwrap ( ) ;
287+ asr. start_pcm_recognition ( true ) . await . unwrap ( ) ;
288+
289+ let mut ms = 0 ;
290+
291+ for chunk in audio_data. chunks ( 3200 ) {
292+ ms += 100 ;
293+ log:: info!( "Sending audio chunk at {} ms" , ms) ;
294+ asr. send_audio ( Bytes :: copy_from_slice ( chunk) ) . await . unwrap ( ) ;
295+ // tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
296+ let wait_asr_fut = asr. next_result ( ) ;
297+
298+ let ( sentence, has_result) = tokio:: select! {
299+ res = wait_asr_fut => {
300+ ( res. unwrap( ) , true )
301+ }
302+ _ = async { } => {
303+ ( None , false )
304+ }
305+ } ;
306+
307+ if has_result {
308+ log:: info!( "{:?} {ms}" , sentence) ;
309+ }
310+
311+ if let Some ( s) = sentence {
312+ if s. sentence_end {
313+ break ;
314+ }
315+ }
316+ }
317+
318+ asr. finish_task ( ) . await . unwrap ( ) ;
319+
320+ loop {
321+ if let Ok ( Some ( sentence) ) = asr. next_result ( ) . await {
322+ log:: info!( "{:?}" , sentence) ;
323+ if sentence. sentence_end {
324+ log:: info!( "End of sentence" ) ;
325+ }
326+ } else {
327+ break ;
328+ }
329+ }
330+
331+ asr. start_pcm_recognition ( true ) . await . unwrap ( ) ;
332+
333+ ms = 0 ;
334+ for chunk in audio_data. chunks ( 3200 ) {
335+ ms += 100 ;
336+ log:: info!( "Sending audio chunk at {} ms" , ms) ;
337+ asr. send_audio ( Bytes :: copy_from_slice ( chunk) ) . await . unwrap ( ) ;
338+ // tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
339+ }
340+ loop {
341+ if let Ok ( Some ( sentence) ) = asr. next_result ( ) . await {
342+ log:: info!( "{:?}" , sentence) ;
252343 if sentence. sentence_end {
253- println ! ( ) ;
344+ log :: info! ( "End of sentence" ) ;
254345 }
255346 } else {
256347 break ;
0 commit comments