-
Notifications
You must be signed in to change notification settings - Fork 69
Expand file tree
/
Copy pathzipformer_transducer.rs
More file actions
87 lines (72 loc) · 2.54 KB
/
Copy pathzipformer_transducer.rs
File metadata and controls
87 lines (72 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
use std::path::PathBuf;
use std::time::Instant;
use transcribe_rs::onnx::zipformer_transducer::ZipformerTransducerModel;
use transcribe_rs::onnx::Quantization;
use transcribe_rs::SpeechModel;
fn get_audio_duration(path: &PathBuf) -> Result<f64, Box<dyn std::error::Error>> {
let reader = hound::WavReader::open(path)?;
let spec = reader.spec();
let duration = reader.duration() as f64 / spec.sample_rate as f64;
Ok(duration)
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let args: Vec<String> = std::env::args().collect();
let positional: Vec<&String> = args
.iter()
.skip(1)
.filter(|a| !a.starts_with("--"))
.collect();
let int8 = args.iter().any(|a| a == "--int8");
let model_path = PathBuf::from(
positional
.first()
.map(|s| s.as_str())
.unwrap_or("models/sherpa-onnx-zipformer-zh-en-2023-11-22"),
);
let wav_path = PathBuf::from(
positional
.get(1)
.map(|s| s.as_str())
.unwrap_or("samples/zh.wav"),
);
let audio_duration = get_audio_duration(&wav_path)?;
println!("Audio duration: {:.2}s", audio_duration);
let quantization = if int8 {
Quantization::Int8
} else {
Quantization::FP32
};
println!("Using Zipformer Transducer engine");
println!(
"Loading model: {:?} (quantization: {})",
model_path,
if int8 { "int8" } else { "fp32" }
);
let load_start = Instant::now();
let mut model = ZipformerTransducerModel::load(&model_path, &quantization)?;
let load_duration = load_start.elapsed();
println!("Model loaded in {:.2?}", load_duration);
println!("Transcribing file: {:?}", wav_path);
let transcribe_start = Instant::now();
let result = model.transcribe_file(&wav_path, &transcribe_rs::TranscribeOptions::default())?;
let transcribe_duration = transcribe_start.elapsed();
println!("Transcription completed in {:.2?}", transcribe_duration);
let speedup_factor = audio_duration / transcribe_duration.as_secs_f64();
println!(
"Real-time speedup: {:.2}x faster than real-time",
speedup_factor
);
println!("Transcription result:");
println!("{}", result.text);
if let Some(segments) = result.segments {
println!("\nSegments:");
for segment in segments {
println!(
"[{:.2}s - {:.2}s]: {}",
segment.start, segment.end, segment.text
);
}
}
Ok(())
}