style: fix ruff formatting in speaker_embedding_interpolation example

marksverdhei · claude · marksverdhei · commit 3bc6f0ba13f2 · 2026-02-05T19:03:41.000+01:00
Signed-off-by: marksverdhei &lt;marksverdhei@hotmail.com&gt;
Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
Signed-off-by: marksverdhei &lt;marksverdhei@hotmail.com&gt;
diff --git a/examples/online_serving/qwen3_tts/speaker_embedding_interpolation.py b/examples/online_serving/qwen3_tts/speaker_embedding_interpolation.py
@@ -73,7 +73,6 @@ def load_speaker_encoder(model_path: str, device: str = "cpu") -> torch.nn.Modul
         config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         # Dynamically import from the downloaded model files
         import importlib
-        import tempfile
 
         from huggingface_hub import snapshot_download
 
@@ -119,31 +118,24 @@ def _load_speaker_encoder_weights(encoder: torch.nn.Module, model_path: str) ->
     state_dict = {}
 
     # Try safetensors first, then pytorch bin
-    safetensor_files = sorted(
-        f for f in os.listdir(model_dir) if f.endswith(".safetensors")
-    )
+    safetensor_files = sorted(f for f in os.listdir(model_dir) if f.endswith(".safetensors"))
     if safetensor_files:
         for fname in safetensor_files:
             shard = load_file(os.path.join(model_dir, fname))
             for k, v in shard.items():
                 if k.startswith(prefix):
                     state_dict[k[len(prefix) :]] = v
     else:
-        bin_files = sorted(
-            f for f in os.listdir(model_dir) if f.endswith(".bin")
-        )
+        bin_files = sorted(f for f in os.listdir(model_dir) if f.endswith(".bin"))
         for fname in bin_files:
-            shard = torch.load(
-                os.path.join(model_dir, fname), map_location="cpu", weights_only=True
-            )
+            shard = torch.load(os.path.join(model_dir, fname), map_location="cpu", weights_only=True)
             for k, v in shard.items():
                 if k.startswith(prefix):
                     state_dict[k[len(prefix) :]] = v
 
     if not state_dict:
         raise RuntimeError(
-            f"No speaker_encoder weights found in {model_path}. "
-            "Make sure this is a Qwen3-TTS-*-Base checkpoint."
+            f"No speaker_encoder weights found in {model_path}. Make sure this is a Qwen3-TTS-*-Base checkpoint."
         )
 
     encoder.load_state_dict(state_dict)
@@ -161,9 +153,7 @@ def compute_mel_spectrogram(audio: np.ndarray, sr: int = 24000) -> torch.Tensor:
 
     from librosa.filters import mel as librosa_mel_fn
 
-    mel_basis = torch.from_numpy(
-        librosa_mel_fn(sr=24000, n_fft=1024, n_mels=128, fmin=0, fmax=12000)
-    ).float()
+    mel_basis = torch.from_numpy(librosa_mel_fn(sr=24000, n_fft=1024, n_mels=128, fmin=0, fmax=12000)).float()
 
     n_fft = 1024
     hop_size = 256
@@ -173,8 +163,13 @@ def compute_mel_spectrogram(audio: np.ndarray, sr: int = 24000) -> torch.Tensor:
 
     hann_window = torch.hann_window(win_size)
     spec = torch.stft(
-        y, n_fft, hop_length=hop_size, win_length=win_size,
-        window=hann_window, center=False, return_complex=True,
+        y,
+        n_fft,
+        hop_length=hop_size,
+        win_length=win_size,
+        window=hann_window,
+        center=False,
+        return_complex=True,
     )
     spec = torch.abs(spec)
     mel = torch.matmul(mel_basis, spec)
@@ -183,9 +178,7 @@ def compute_mel_spectrogram(audio: np.ndarray, sr: int = 24000) -> torch.Tensor:
 
 
 @torch.inference_mode()
-def extract_embedding(
-    encoder: torch.nn.Module, audio_path: str, device: str = "cpu"
-) -> np.ndarray:
+def extract_embedding(encoder: torch.nn.Module, audio_path: str, device: str = "cpu") -> np.ndarray:
     """Extract a 1024-dim speaker embedding from an audio file."""
     import librosa
 
@@ -342,7 +335,8 @@ def main():
     parser.add_argument("--api-base", default=DEFAULT_API_BASE, help="TTS API base URL")
     parser.add_argument("--api-key", default=DEFAULT_API_KEY, help="API key")
     parser.add_argument(
-        "--model", default="Qwen/Qwen3-TTS-12Hz-1.7B-Base",
+        "--model",
+        default="Qwen/Qwen3-TTS-12Hz-1.7B-Base",
         help="Model name (used for both weight loading and API requests)",
     )
     parser.add_argument("--device", default="cpu", help="Device for embedding extraction (cpu/cuda)")
@@ -367,7 +361,10 @@ def main():
     p_pipe.add_argument("--audio-a", required=True, help="Audio file for voice A")
     p_pipe.add_argument("--audio-b", required=True, help="Audio file for voice B")
     p_pipe.add_argument(
-        "--ratios", nargs="+", type=float, default=[0.0, 0.25, 0.5, 0.75, 1.0],
+        "--ratios",
+        nargs="+",
+        type=float,
+        default=[0.0, 0.25, 0.5, 0.75, 1.0],
         help="SLERP ratios to generate (default: 0.0 0.25 0.5 0.75 1.0)",
     )
     p_pipe.add_argument("--text", required=True, help="Text to synthesize")