Add OpenVINO example (and rework example structure)

tazz4843 · tazz4843 · commit c338931a275b · 2025-04-10T14:47:04.000-06:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 members = ["sys"]
-exclude = ["examples/full_usage"]
+exclude = ["examples/examples_common", "examples/full_usage", "examples/openvino_usage"]
 
 [package]
 name = "whisper-rs"
diff --git a/examples/examples_common/Cargo.toml b/examples/examples_common/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "examples-common"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+hound = "3"
diff --git a/examples/examples_common/src/lib.rs b/examples/examples_common/src/lib.rs
@@ -0,0 +1,24 @@
+use hound::{SampleFormat, WavReader};
+use std::path::Path;
+
+pub fn parse_wav_file(path: &Path) -> Vec<i16> {
+    let reader = WavReader::open(path).expect("failed to read file");
+
+    if reader.spec().channels != 1 {
+        panic!("expected mono audio file");
+    }
+    if reader.spec().sample_format != SampleFormat::Int {
+        panic!("expected integer sample format");
+    }
+    if reader.spec().sample_rate != 16000 {
+        panic!("expected 16KHz sample rate");
+    }
+    if reader.spec().bits_per_sample != 16 {
+        panic!("expected 16 bits per sample");
+    }
+
+    reader
+        .into_samples::<i16>()
+        .map(|x| x.expect("sample"))
+        .collect::<Vec<_>>()
+}
diff --git a/examples/full_usage/2830-3980-0043.wav b/examples/full_usage/2830-3980-0043.wav
diff --git a/examples/full_usage/Cargo.toml b/examples/full_usage/Cargo.toml
@@ -1,10 +1,8 @@
 [package]
 name = "full_usage"
 version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2024"
 
 [dependencies]
-hound = "3"
 whisper-rs = { path = "../.." }
+examples-common = { path = "../examples_common" }
diff --git a/examples/full_usage/src/main.rs b/examples/full_usage/src/main.rs
@@ -1,31 +1,8 @@
 #![allow(clippy::uninlined_format_args)]
 
-use hound::{SampleFormat, WavReader};
 use std::path::Path;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
 
-fn parse_wav_file(path: &Path) -> Vec<i16> {
-    let reader = WavReader::open(path).expect("failed to read file");
-
-    if reader.spec().channels != 1 {
-        panic!("expected mono audio file");
-    }
-    if reader.spec().sample_format != SampleFormat::Int {
-        panic!("expected integer sample format");
-    }
-    if reader.spec().sample_rate != 16000 {
-        panic!("expected 16KHz sample rate");
-    }
-    if reader.spec().bits_per_sample != 16 {
-        panic!("expected 16 bits per sample");
-    }
-
-    reader
-        .into_samples::<i16>()
-        .map(|x| x.expect("sample"))
-        .collect::<Vec<_>>()
-}
-
 fn main() {
     let arg1 = std::env::args()
         .nth(1)
@@ -42,7 +19,7 @@ fn main() {
         panic!("whisper file doesn't exist")
     }
 
-    let original_samples = parse_wav_file(audio_path);
+    let original_samples = examples_common::parse_wav_file(audio_path);
     let mut samples = vec![0.0f32; original_samples.len()];
     whisper_rs::convert_integer_to_float_audio(&original_samples, &mut samples)
         .expect("failed to convert samples");
diff --git a/examples/openvino_usage/Cargo.toml b/examples/openvino_usage/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "openvino_usage"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+whisper-rs = { path = "../..", features = ["openvino"] }
+examples-common = { path = "../examples_common" }
diff --git a/examples/openvino_usage/README.md b/examples/openvino_usage/README.md
@@ -0,0 +1,41 @@
+# OpenVINO Usage Example
+
+Run `cargo build --release` in this directory,
+then `./target/release/openvino_usage ../examples_common/2830-3890-0043.wav /path/to/ggml-model.bin`
+
+There should be an OpenVINO file associated with the model next to it,
+otherwise you will get an error at runtime.
+
+## Getting your paws on OpenVINO data
+
+Unfortunately there's no downloads of OpenVINO state. The only way to get it is generating it.
+
+Example for most Linux distros (run this from the current directory):
+
+```bash
+cd ../..
+
+# We need to pull in whisper.cpp.
+# This should've already been done when you cloned the repo, but let's be sure.
+git submodule update --init --recursive
+
+cd sys/whisper.cpp/models/
+
+# Generate a new venv and install the required things.
+# This might take a bit, grab a drink.
+# (yes this installs CUDA even if you don't have a Nvidia GPU, enjoy your 6GB venv setup)
+python3.12 -m venv venv
+source venv/bin/activate
+python3 -m pip install -U pip
+python3 -m pip install -r requirements-openvino.txt
+
+# This is the key line. Change base as necessary to the name of the model you want.
+python3 convert-whisper-to-openvino.py --model base
+```
+
+Do note a line that states
+`assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"`
+is not fatal.
+The output file will still be generated normally.
+
+See upstream's README for more info: https://github.com/ggerganov/whisper.cpp/#openvino-support
diff --git a/examples/openvino_usage/src/main.rs b/examples/openvino_usage/src/main.rs
@@ -0,0 +1,66 @@
+#![allow(clippy::uninlined_format_args)]
+
+use std::path::Path;
+use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
+
+fn main() {
+    let arg1 = std::env::args()
+        .nth(1)
+        .expect("first argument should be path to WAV file");
+    let audio_path = Path::new(&arg1);
+    if !audio_path.exists() {
+        panic!("audio file doesn't exist");
+    }
+    let arg2 = std::env::args()
+        .nth(2)
+        .expect("second argument should be path to Whisper model");
+    let whisper_path = Path::new(&arg2);
+    if !whisper_path.exists() {
+        panic!("whisper file doesn't exist")
+    }
+
+    let original_samples = examples_common::parse_wav_file(audio_path);
+    let mut samples = vec![0.0f32; original_samples.len()];
+    whisper_rs::convert_integer_to_float_audio(&original_samples, &mut samples)
+        .expect("failed to convert samples");
+
+    let ctx = WhisperContext::new_with_params(
+        &whisper_path.to_string_lossy(),
+        WhisperContextParameters::default(),
+    )
+    .expect("failed to open model");
+    let mut state = ctx.create_state().expect("failed to create a model state");
+
+    // Enable OpenVINO now
+    // We're expecting the OpenVINO file sitting right next to the model
+    state
+        .init_openvino_encoder(None, "GPU", None)
+        .expect("failed to enable openvino");
+
+    let mut params = FullParams::new(SamplingStrategy::default());
+    params.set_initial_prompt("experience");
+    params.set_progress_callback_safe(|progress| println!("Progress callback: {}%", progress));
+
+    let st = std::time::Instant::now();
+    state
+        .full(params, &samples)
+        .expect("failed to convert samples");
+    let et = std::time::Instant::now();
+
+    let num_segments = state
+        .full_n_segments()
+        .expect("failed to get number of segments");
+    for i in 0..num_segments {
+        let segment = state
+            .full_get_segment_text(i)
+            .expect("failed to get segment");
+        let start_timestamp = state
+            .full_get_segment_t0(i)
+            .expect("failed to get start timestamp");
+        let end_timestamp = state
+            .full_get_segment_t1(i)
+            .expect("failed to get end timestamp");
+        println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
+    }
+    println!("took {}ms", (et - st).as_millis());
+}