Merge pull request #146 from danielferr85/main

NotPunchnox · web-flow · commit f87dc3b4a280 · 2026-04-22T11:14:27.000+02:00
Fix #116 for multispeaker for Piper voice
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # RKLLama: LLM Server and Client for Rockchip 3588/3576
 
-### [Version: 0.0.66](#New-Version)
+### [Version: 0.0.67](#New-Version)
 
 Video demo ( version 0.0.1 ):
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "rkllama"
-version = "0.0.66"
+version = "0.0.67"
 authors = [
     { name="NotPunchnox", email="punchnoxpro@gmail.com" },
     { name="TomJacobsUK", email="tom@tomjacobs.co.uk" },
diff --git a/src/rkllama/api/models/audio/piper.py b/src/rkllama/api/models/audio/piper.py
@@ -206,8 +206,9 @@ def phoneme_ids_to_audio(
         )
     
         # Get the encoder outputs
+        g = None # In case of Multispeaker Voice
         if speaker_id is not None:
-            z, y_mask, _ = encoder_output
+            z, y_mask, g = encoder_output
         else:
             z, y_mask = encoder_output
 
@@ -241,6 +242,10 @@ def phoneme_ids_to_audio(
             # Construct inputs for RKNN decoder model
             inputs_chunk = [zc.astype(np.float32), yc.astype(np.float32)]
 
+            # For multispeaker models, we need to add the channel info generated by the encoder
+            if g is not None:
+                inputs_chunk.append(g)
+
             # Inference RKNN (decoder) of the chunk
             result = self.session_rknn.inference(inputs=inputs_chunk, data_format="nchw")