feat!: add Engine::load_from_bytes() (#59)

cm-ayf · web-flow · commit 13cf2f3fd77f · 2025-05-10T14:35:14.000+09:00
BREAKING CHANGE: move `jbonsai::model::load_htsvoice_file` to
`jbonsai::model::load_htsvoice_from_bytes`
diff --git a/benches/bonsais.rs b/benches/bonsais.rs
@@ -22,7 +22,7 @@ fn bonsai(bencher: &mut Bencher) {
         "a^i-sil+xx=xx/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:4_4!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_4/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+1-4",
     ];
 
-    let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+    let engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
 
     bencher.iter(|| {
         engine.synthesize(&lines).unwrap();
@@ -60,7 +60,7 @@ fn is_bonsai(bencher: &mut Bencher) {
         "k^a-sil+xx=xx/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:7_5!1_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:2_10/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+2-10",
     ];
 
-    let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+    let engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
 
     bencher.iter(|| {
         engine.synthesize(&lines).unwrap();
@@ -132,7 +132,7 @@ fn bonsai_letter(bencher: &mut Bencher) {
         "t^a-sil+xx=xx/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:3_1!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:6_24/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+6-24",
     ];
 
-    let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+    let engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
 
     bencher.iter(|| {
         engine.synthesize(&lines).unwrap();
diff --git a/examples/genji/main.rs b/examples/genji/main.rs
@@ -4,7 +4,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let label_str = std::fs::read_to_string("examples/genji/genji.lab")?;
 
     let lines: Vec<_> = label_str.lines().collect();
-    let mut engine = Engine::load(&[
+    let mut engine = Engine::load([
         "models/tohoku-f01/tohoku-f01-sad.htsvoice",
         "models/tohoku-f01/tohoku-f01-happy.htsvoice",
     ])?;
diff --git a/examples/is-bonsai/main.rs b/examples/is-bonsai/main.rs
@@ -24,7 +24,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         "k^a-sil+xx=xx/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:7_5!1_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:2_10/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+2-10",
     ];
 
-    let engine = Engine::load(&[
+    let engine = Engine::load([
         "models/hts_voice_nitech_jp_atr503_m001-1.05/nitech_jp_atr503_m001.htsvoice",
     ])?;
     let speech = engine.synthesize(&lines)?;
diff --git a/src/engine.rs b/src/engine.rs
@@ -254,20 +254,37 @@ pub struct Engine {
 impl Engine {
     /// Load `.htsvoice` files and create a new [`Engine`].
     #[cfg(feature = "htsvoice")]
-    pub fn load<P: AsRef<Path>>(voices: &[P]) -> Result<Self, EngineError> {
-        use crate::model::load_htsvoice_file;
+    pub fn load<P: AsRef<Path>>(voices: impl IntoIterator<Item = P>) -> Result<Self, EngineError> {
+        Self::load_from_result_bytes(voices.into_iter().map(std::fs::read))
+    }
+
+    /// Load htsvoice file content and create a new [`Engine`].
+    #[cfg(feature = "htsvoice")]
+    pub fn load_from_bytes<B: AsRef<[u8]>>(
+        voices: impl IntoIterator<Item = B>,
+    ) -> Result<Self, EngineError> {
+        Self::load_from_result_bytes(voices.into_iter().map(Ok))
+    }
+
+    #[cfg(feature = "htsvoice")]
+    fn load_from_result_bytes<B: AsRef<[u8]>>(
+        voices: impl IntoIterator<Item = std::io::Result<B>>,
+    ) -> Result<Self, EngineError> {
+        use crate::model::load_htsvoice_from_bytes;
 
         let voices = voices
-            .iter()
-            .map(|path| Ok(Arc::new(load_htsvoice_file(path)?)))
+            .into_iter()
+            .map(|bytes| Ok(Arc::new(load_htsvoice_from_bytes(bytes?.as_ref())?)))
             .collect::<Result<Vec<_>, ModelError>>()?;
+
         let voiceset = VoiceSet::new(voices)?;
 
         let mut condition = Condition::default();
         condition.load_model(&voiceset)?;
 
         Ok(Self::new(voiceset, condition))
     }
+
     /// Create a new [`Engine`] with provided voices and condition.
     pub fn new(voices: VoiceSet, condition: Condition) -> Self {
         Engine { voices, condition }
diff --git a/src/lib.rs b/src/lib.rs
@@ -38,7 +38,19 @@ mod tests {
 
     #[test]
     fn bonsai() {
-        let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+        let engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
+
+        let speech = engine.synthesize(&SAMPLE_SENTENCE_1).unwrap();
+
+        assert_eq!(speech.len(), 66480);
+        approx::assert_abs_diff_eq!(speech[2000], 19.35141137623778, epsilon = 1.0e-10);
+        approx::assert_abs_diff_eq!(speech[30000], -980.6757547598129, epsilon = 1.0e-10);
+    }
+
+    #[test]
+    fn bonsai_load_from_bytes() {
+        let model_bytes = std::fs::read(MODEL_NITECH_ATR503).unwrap();
+        let engine = Engine::load_from_bytes(&[model_bytes]).unwrap();
 
         let speech = engine.synthesize(&SAMPLE_SENTENCE_1).unwrap();
 
@@ -54,7 +66,7 @@ mod tests {
             .map(|l| l.parse().unwrap())
             .collect();
 
-        let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+        let engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
 
         let speech = engine.synthesize(labels).unwrap();
 
@@ -65,7 +77,7 @@ mod tests {
 
     #[test]
     fn bonsai_multi() {
-        let mut engine = Engine::load(&[MODEL_TOHOKU_F01_NORMAL, MODEL_TOHOKU_F01_HAPPY]).unwrap();
+        let mut engine = Engine::load([MODEL_TOHOKU_F01_NORMAL, MODEL_TOHOKU_F01_HAPPY]).unwrap();
         let iw = engine.condition.get_interporation_weight_mut();
         iw.set_duration(&[0.7, 0.3]).unwrap();
         iw.set_parameter(0, &[0.7, 0.3]).unwrap();
@@ -110,7 +122,7 @@ mod tests {
 
     #[test]
     fn is_this_bonsai() {
-        let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+        let engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
 
         let speech = engine.synthesize(&SAMPLE_SENTENCE_2).unwrap();
 
@@ -123,7 +135,7 @@ mod tests {
 
     #[test]
     fn is_this_bonsai_fast() {
-        let mut engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+        let mut engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
         engine.condition.set_speed(1.4);
 
         let speech = engine.synthesize(&SAMPLE_SENTENCE_2).unwrap();
@@ -137,7 +149,7 @@ mod tests {
 
     #[test]
     fn empty() {
-        let mut engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
+        let mut engine = Engine::load([MODEL_NITECH_ATR503]).unwrap();
         let labels: [&str; 0] = [];
 
         let speech = engine.synthesize(&labels[..]).unwrap();
diff --git a/src/model/mod.rs b/src/model/mod.rs
@@ -156,11 +156,10 @@ impl<'a> Models<'a> {
     }
 }
 
-/// Load `.htsvoice` file as [`Voice`].
+/// Load `.htsvoice` file content as [`Voice`].
 #[cfg(feature = "htsvoice")]
-pub fn load_htsvoice_file<P: AsRef<std::path::Path>>(path: &P) -> Result<Voice, ModelError> {
-    let f = std::fs::read(path)?;
-    Ok(parser::parse_htsvoice(&f)?)
+pub fn load_htsvoice_from_bytes(bytes: &[u8]) -> Result<Voice, ModelError> {
+    Ok(parser::parse_htsvoice(bytes)?)
 }
 
 #[cfg(all(test, feature = "htsvoice"))]
@@ -174,10 +173,11 @@ pub(crate) mod tests {
         },
     };
 
-    use super::{Models, Voice, VoiceSet, load_htsvoice_file};
+    use super::{Models, Voice, VoiceSet, load_htsvoice_from_bytes};
 
     fn load_voice() -> Voice {
-        load_htsvoice_file(&MODEL_NITECH_ATR503).unwrap()
+        let htsvoice = std::fs::read(MODEL_NITECH_ATR503).unwrap();
+        load_htsvoice_from_bytes(&htsvoice).unwrap()
     }
 
     #[test]
@@ -394,8 +394,10 @@ pub(crate) mod tests {
 
     #[test]
     fn multiple_models() {
-        let normal = load_htsvoice_file(&MODEL_TOHOKU_F01_NORMAL).unwrap();
-        let happy = load_htsvoice_file(&MODEL_TOHOKU_F01_HAPPY).unwrap();
+        let normal_htsvoice = std::fs::read(MODEL_TOHOKU_F01_NORMAL).unwrap();
+        let normal = load_htsvoice_from_bytes(&normal_htsvoice).unwrap();
+        let happy_htsvoice = std::fs::read(MODEL_TOHOKU_F01_HAPPY).unwrap();
+        let happy = load_htsvoice_from_bytes(&happy_htsvoice).unwrap();
         let voiceset = VoiceSet::new(vec![Arc::new(normal), Arc::new(happy)]).unwrap();
         let labels = vec![SAMPLE_SENTENCE_1[2].parse().unwrap()];