Skip to content
This repository was archived by the owner on Jul 30, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions BUILDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,36 @@ brew install cmake
```

CMake can also be installed from https://cmake.org/download/ but `cmake` binary needs to be in your PATH.

# OpenVINO support

## Development Tools
OpenVINO support requires the OpenVINO Development Tools to be installed. You can find
instructions for installing the OpenVINO Development Tools here:
https://docs.openvino.ai/2023.0/openvino_docs_install_guides_install_dev_tools.html#for-c-developers

On Arch Linux, you can install the OpenVINO Development Tools with the following command:
```
paru -S openvino
```
This build may take a significant amount of time, but can save massive headaches later on.

## Building
First, the `openvino` feature must be enabled in your Cargo.toml.

Next, you must set the `OpenVINO_DIR` environment variable to the path where CMake can find
`OpenVINOConfig.cmake`.
This is usually in the `cmake` directory of the OpenVINO installation.

If you used the AUR package to install OpenVINO, the location of this file is `/opt/intel/openvino/runtime/cmake`.

```
export OpenVINO_DIR=/opt/intel/openvino/runtime/cmake
```

Finally, you can build whisper-rs as normal.

## Tested platforms
- Arch Linux

If you have successfully built whisper-rs with OpenVINO on another platform, please open a PR to document it here!
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[workspace]
members = ["sys"]
exclude = ["examples/full_usage"]
exclude = ["examples/examples_common", "examples/full_usage", "examples/openvino_usage"]

[package]
name = "whisper-rs"
Expand Down Expand Up @@ -34,6 +34,7 @@ metal = ["whisper-rs-sys/metal", "_gpu"]
vulkan = ["whisper-rs-sys/vulkan", "_gpu"]
openmp = ["whisper-rs-sys/openmp"]
_gpu = []
openvino = ["whisper-rs-sys/openvino"]
test-with-tiny-model = []

# Bring logs into Rust via the log crate. *Warning*: not mutually exclusive with tracing_backend,
Expand All @@ -43,3 +44,7 @@ log_backend = ["dep:log"]
# Bring logs into Rust via the tracing crate. *Warning*: not mutually exclusive with log_backend,
# will result in duplicate logs if both are enabled and one consumes logs from the other.
tracing_backend = ["dep:tracing"]

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ All disabled by default unless otherwise specified.

## Building

See [BUILDING.md](BUILDING.md) for instructions for building whisper-rs on Windows and OSX M1. Linux builds should just
See [BUILDING.md](BUILDING.md) for instructions for building whisper-rs on Windows and OSX M1,
or with OpenVINO on any OS.
Besides OpenVINO, Linux builds should just
work out of the box.

## Troubleshooting
Expand Down
7 changes: 7 additions & 0 deletions examples/examples_common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
name = "examples-common"
version = "0.1.0"
edition = "2024"

[dependencies]
hound = "3"
24 changes: 24 additions & 0 deletions examples/examples_common/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use hound::{SampleFormat, WavReader};
use std::path::Path;

pub fn parse_wav_file(path: &Path) -> Vec<i16> {
let reader = WavReader::open(path).expect("failed to read file");

if reader.spec().channels != 1 {
panic!("expected mono audio file");
}
if reader.spec().sample_format != SampleFormat::Int {
panic!("expected integer sample format");
}
if reader.spec().sample_rate != 16000 {
panic!("expected 16KHz sample rate");
}
if reader.spec().bits_per_sample != 16 {
panic!("expected 16 bits per sample");
}

reader
.into_samples::<i16>()
.map(|x| x.expect("sample"))
.collect::<Vec<_>>()
}
Binary file removed examples/full_usage/2830-3980-0043.wav
Binary file not shown.
6 changes: 2 additions & 4 deletions examples/full_usage/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
[package]
name = "full_usage"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
edition = "2024"

[dependencies]
hound = "3"
whisper-rs = { path = "../.." }
examples-common = { path = "../examples_common" }
25 changes: 1 addition & 24 deletions examples/full_usage/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,8 @@
#![allow(clippy::uninlined_format_args)]

use hound::{SampleFormat, WavReader};
use std::path::Path;
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};

fn parse_wav_file(path: &Path) -> Vec<i16> {
let reader = WavReader::open(path).expect("failed to read file");

if reader.spec().channels != 1 {
panic!("expected mono audio file");
}
if reader.spec().sample_format != SampleFormat::Int {
panic!("expected integer sample format");
}
if reader.spec().sample_rate != 16000 {
panic!("expected 16KHz sample rate");
}
if reader.spec().bits_per_sample != 16 {
panic!("expected 16 bits per sample");
}

reader
.into_samples::<i16>()
.map(|x| x.expect("sample"))
.collect::<Vec<_>>()
}

fn main() {
let arg1 = std::env::args()
.nth(1)
Expand All @@ -42,7 +19,7 @@ fn main() {
panic!("whisper file doesn't exist")
}

let original_samples = parse_wav_file(audio_path);
let original_samples = examples_common::parse_wav_file(audio_path);
let mut samples = vec![0.0f32; original_samples.len()];
whisper_rs::convert_integer_to_float_audio(&original_samples, &mut samples)
.expect("failed to convert samples");
Expand Down
8 changes: 8 additions & 0 deletions examples/openvino_usage/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "openvino_usage"
version = "0.1.0"
edition = "2024"

[dependencies]
whisper-rs = { path = "../..", features = ["openvino"] }
examples-common = { path = "../examples_common" }
41 changes: 41 additions & 0 deletions examples/openvino_usage/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# OpenVINO Usage Example

Run `cargo build --release` in this directory,
then `./target/release/openvino_usage ../examples_common/2830-3890-0043.wav /path/to/ggml-model.bin`

There should be an OpenVINO file associated with the model next to it,
otherwise you will get an error at runtime.

## Getting your paws on OpenVINO data

Unfortunately there's no downloads of OpenVINO state. The only way to get it is generating it.

Example for most Linux distros (run this from the current directory):

```bash
cd ../..

# We need to pull in whisper.cpp.
# This should've already been done when you cloned the repo, but let's be sure.
git submodule update --init --recursive

cd sys/whisper.cpp/models/

# Generate a new venv and install the required things.
# This might take a bit, grab a drink.
# (yes this installs CUDA even if you don't have a Nvidia GPU, enjoy your 6GB venv setup)
python3.12 -m venv venv
source venv/bin/activate
python3 -m pip install -U pip
python3 -m pip install -r requirements-openvino.txt

# This is the key line. Change base as necessary to the name of the model you want.
python3 convert-whisper-to-openvino.py --model base
```

Do note a line that states
`assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"`
is not fatal.
The output file will still be generated normally.

See upstream's README for more info: https://github.com/ggerganov/whisper.cpp/#openvino-support
66 changes: 66 additions & 0 deletions examples/openvino_usage/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#![allow(clippy::uninlined_format_args)]

use std::path::Path;
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};

fn main() {
let arg1 = std::env::args()
.nth(1)
.expect("first argument should be path to WAV file");
let audio_path = Path::new(&arg1);
if !audio_path.exists() {
panic!("audio file doesn't exist");
}
let arg2 = std::env::args()
.nth(2)
.expect("second argument should be path to Whisper model");
let whisper_path = Path::new(&arg2);
if !whisper_path.exists() {
panic!("whisper file doesn't exist")
}

let original_samples = examples_common::parse_wav_file(audio_path);
let mut samples = vec![0.0f32; original_samples.len()];
whisper_rs::convert_integer_to_float_audio(&original_samples, &mut samples)
.expect("failed to convert samples");

let ctx = WhisperContext::new_with_params(
&whisper_path.to_string_lossy(),
WhisperContextParameters::default(),
)
.expect("failed to open model");
let mut state = ctx.create_state().expect("failed to create a model state");

// Enable OpenVINO now
// We're expecting the OpenVINO file sitting right next to the model
state
.init_openvino_encoder(None, "GPU", None)
.expect("failed to enable openvino");

let mut params = FullParams::new(SamplingStrategy::default());
params.set_initial_prompt("experience");
params.set_progress_callback_safe(|progress| println!("Progress callback: {}%", progress));

let st = std::time::Instant::now();
state
.full(params, &samples)
.expect("failed to convert samples");
let et = std::time::Instant::now();

let num_segments = state
.full_n_segments()
.expect("failed to get number of segments");
for i in 0..num_segments {
let segment = state
.full_get_segment_text(i)
.expect("failed to get segment");
let start_timestamp = state
.full_get_segment_t0(i)
.expect("failed to get start timestamp");
let end_timestamp = state
.full_get_segment_t1(i)
.expect("failed to get end timestamp");
println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
}
println!("took {}ms", (et - st).as_millis());
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![allow(clippy::uninlined_format_args)]
#![cfg_attr(test, feature(test))]
#![cfg_attr(docsrs, feature(doc_cfg))]

mod common_logging;
mod error;
Expand Down
2 changes: 2 additions & 0 deletions src/whisper_ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ impl WhisperInnerContext {
}
}

// we don't implement `whisper_init()` here since i have zero clue what `whisper_model_loader` does

/// Convert the provided text into tokens.
///
/// # Arguments
Expand Down
64 changes: 63 additions & 1 deletion src/whisper_state.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::ffi::{c_int, CStr};
use std::ffi::{c_int, CStr, CString};
use std::sync::Arc;

use crate::{FullParams, WhisperError, WhisperInnerContext, WhisperToken, WhisperTokenData};
Expand Down Expand Up @@ -30,6 +30,68 @@ impl WhisperState {
Self { ctx, ptr }
}

/// Using this context, enable use of OpenVINO for encoder inference.
///
/// # Arguments
/// * `model_path`: An optional path to the OpenVINO encoder IR model.
/// Setting this to [`None`] will default to a file next to the path
/// passed in to [`crate::WhisperContext::new_with_params`].
/// For example, if the model path was `/path/to/ggml-base.en.bin`,
/// then the OpenVINO IR model path will default to `/path/to/ggml-base.en-encoder-openvino.xml`.
///
/// * `device`: The OpenVINO device to use for inference (e.g. `CPU`, `GPU`).
/// This is a string, as OpenVINO can randomly add new devices,
/// and having a hardcoded enum would result in major issues if one tried
/// to use a new device with an outdated enum.
/// Chances are unless you're doing something special, you just want `GPU` for this string.
///
/// * `cache_dir`: Optional cache directory that can speed up init time,
/// especially for GPU, by caching compiled 'blobs' in it.
/// Setting this to [`None`] will default to placing this directory next to the model path.
/// For example, if the model path was `/path/to/ggml-base.en.bin`,
/// then the cache dir model path will default to `/path/to/ggml-base.en-encoder-openvino-cache`.
///
/// **Note**: if you called [`crate::WhisperContext::new_from_buffer_with_params`], and either
/// `model_path` or `cache_dir` is None, this function will fail to initialize,
/// as there's no path for it to initialize from.
///
/// # Returns
/// `Ok(())` if no error.
///
/// `Err(WhisperError::GenericError(ret))` on error,
/// where ret is the return value from `whisper.cpp` (as of writing, this will always be 1).
/// Checking output logs for the actual error is more productive
/// than looking at the contained value.
///
/// # C++ equivalent
/// `int whisper_ctx_init_openvino_encoder(struct whisper_context * ctx, const char * model_path, const char * device, const char * cache_dir);`
#[cfg(feature = "openvino")]
#[cfg_attr(docsrs, doc(cfg(feature = "openvino")))]
pub fn init_openvino_encoder(
&mut self,
model_path: Option<&str>,
device: &str,
cache_dir: Option<&str>,
) -> Result<(), WhisperError> {
let model_path = model_path.map(|s| CString::new(s).unwrap());
let device = CString::new(device).unwrap();
let cache_dir = cache_dir.map(|s| CString::new(s).unwrap());
let ret = unsafe {
whisper_rs_sys::whisper_ctx_init_openvino_encoder_with_state(
self.ctx.ctx,
self.ptr,
model_path.map_or_else(std::ptr::null, |s| s.as_ptr()),
device.as_ptr(),
cache_dir.map_or_else(std::ptr::null, |s| s.as_ptr()),
)
};
if ret == 0 {
Ok(())
} else {
Err(WhisperError::GenericError(ret))
}
}

/// Convert raw PCM audio (floating point 32 bit) to log mel spectrogram.
/// The resulting spectrogram is stored in the context transparently.
///
Expand Down
3 changes: 3 additions & 0 deletions sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ metal = []
vulkan = []
force-debug = []
openmp = []
openvino = []

[dependencies]

[build-dependencies]
cmake = "0.1"
Expand Down
Loading
Loading