Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions backend/services/llm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,72 @@ def chat(self, *, system: str, user: str, timeout: Optional[float] = None) -> st
# ── Off — explicit no-LLM path ────────────────────────────────────────────


class MiniMaxBackend(LLMBackend):
"""MiniMax (MiniMax) — OpenAI-compatible chat endpoint.

Models:
• MiniMax-M2.7 — Peak Performance. Ultimate Value.
• MiniMax-M2.7-highspeed — Same performance, faster and more agile.

Config env vars:
MINIMAX_API_KEY — required
MINIMAX_BASE_URL — optional (default https://api.minimax.io/v1)
MINIMAX_MODEL — optional (default MiniMax-M2.7)
"""

id = "minimax"
display_name = "MiniMax (MiniMax)"

def __init__(self):
self._client = None

@classmethod
def is_available(cls) -> tuple[bool, str]:
try:
import openai # noqa: F401
except ImportError:
return False, "openai package missing (install with `pip install openai`)."
if not os.environ.get("MINIMAX_API_KEY"):
return False, (
"MINIMAX_API_KEY not set. Get one at https://platform.minimax.io "
"and export MINIMAX_API_KEY=<key>."
)
return True, "ready"

@property
def model_name(self) -> str:
return os.environ.get("MINIMAX_MODEL", "MiniMax-M2.7")

def _get_client(self):
if self._client is not None:
return self._client
from openai import OpenAI

api_key = os.environ.get("MINIMAX_API_KEY")
if not api_key:
raise RuntimeError("MINIMAX_API_KEY not set. See `is_available()` for details.")
base_url = os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io/v1")
self._client = OpenAI(api_key=api_key, base_url=base_url)
return self._client

def chat(self, *, system: str, user: str, timeout: Optional[float] = None) -> str:
if timeout is None:
try:
timeout = float(os.environ.get("OMNIVOICE_LLM_TIMEOUT", "45"))
except ValueError:
timeout = 45.0
res = self._get_client().chat.completions.create(
model=self.model_name,
timeout=timeout,
temperature=1.0, # MiniMax requires (0.0, 1.0], cannot be 0
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
)
return (res.choices[0].message.content or "").strip()


class OffBackend(LLMBackend):
id = "off"
display_name = "Off (no LLM)"
Expand All @@ -146,6 +212,7 @@ def chat(self, **kw) -> str:

_REGISTRY: dict[str, type[LLMBackend]] = {
"openai-compat": OpenAICompatBackend,
"minimax": MiniMaxBackend,
"off": OffBackend,
}

Expand Down
120 changes: 120 additions & 0 deletions backend/services/tts_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,124 @@ def generate(self, text: str, **kw) -> torch.Tensor:
return wav


# ── MiniMax TTS adapter (cloud API, hex-encoded audio) ────────────────────


class MiniMaxTTSBackend(TTSBackend):
"""MiniMax TTS — cloud-based text-to-speech via the MiniMax API.

Models: speech-2.8-hd (default, highest quality), speech-2.8-turbo (fast).
Endpoint: POST https://api.minimax.io/v1/t2a_v2
Audio: hex-encoded MP3, decoded to PCM tensor.

Config env vars:
MINIMAX_API_KEY — required (shared with MiniMax Chat)
MINIMAX_BASE_URL — optional (default https://api.minimax.io)
MINIMAX_TTS_MODEL — optional (default speech-2.8-hd)
"""

id = "minimax"
display_name = "MiniMax TTS (cloud, speech-2.8-hd / turbo)"

# MiniMax voices — a curated subset of the system voice list.
VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]

@classmethod
def is_available(cls) -> tuple[bool, str]:
if not os.environ.get("MINIMAX_API_KEY"):
return False, (
"MINIMAX_API_KEY not set. Get one at https://platform.minimax.io "
"and export MINIMAX_API_KEY=<key>."
)
return True, "ready"

@property
def sample_rate(self) -> int:
return 32000

@property
def supported_languages(self) -> list[str]:
return ["multi"]

def generate(self, text: str, **kw) -> torch.Tensor:
import io
import json
import urllib.request

api_key = os.environ.get("MINIMAX_API_KEY")
if not api_key:
raise RuntimeError("MINIMAX_API_KEY not set. See `is_available()` for details.")

base_url = os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io")
base_url = base_url.rstrip("/")
model = os.environ.get("MINIMAX_TTS_MODEL", "speech-2.8-hd")
voice = kw.get("voice", self.VOICES[0])
speed = float(kw.get("speed", 1.0))

payload = json.dumps({
"model": model,
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice,
"speed": speed,
"vol": 1,
"pitch": 0,
},
"audio_setting": {
"sample_rate": self.sample_rate,
"bitrate": 128000,
"format": "mp3",
"channel": 1,
},
}).encode()

req = urllib.request.Request(
f"{base_url}/v1/t2a_v2",
data=payload,
Comment on lines +1167 to +1193
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Normalize MINIMAX_BASE_URL before composing the TTS endpoint.

This currently appends /v1/t2a_v2 unconditionally. If MINIMAX_BASE_URL is set to https://api.minimax.io/v1 (the same shape used by the MiniMax LLM backend), requests go to /v1/v1/t2a_v2 and fail.

Suggested fix
-        base_url = os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io")
-        base_url = base_url.rstrip("/")
+        base_url = os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io/v1").rstrip("/")
+        endpoint = (
+            f"{base_url}/t2a_v2"
+            if base_url.endswith("/v1")
+            else f"{base_url}/v1/t2a_v2"
+        )
@@
-            f"{base_url}/v1/t2a_v2",
+            endpoint,
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
base_url = os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io")
base_url = base_url.rstrip("/")
model = os.environ.get("MINIMAX_TTS_MODEL", "speech-2.8-hd")
voice = kw.get("voice", self.VOICES[0])
speed = float(kw.get("speed", 1.0))
payload = json.dumps({
"model": model,
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice,
"speed": speed,
"vol": 1,
"pitch": 0,
},
"audio_setting": {
"sample_rate": self.sample_rate,
"bitrate": 128000,
"format": "mp3",
"channel": 1,
},
}).encode()
req = urllib.request.Request(
f"{base_url}/v1/t2a_v2",
data=payload,
base_url = os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io/v1").rstrip("/")
endpoint = (
f"{base_url}/t2a_v2"
if base_url.endswith("/v1")
else f"{base_url}/v1/t2a_v2"
)
model = os.environ.get("MINIMAX_TTS_MODEL", "speech-2.8-hd")
voice = kw.get("voice", self.VOICES[0])
speed = float(kw.get("speed", 1.0))
payload = json.dumps({
"model": model,
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice,
"speed": speed,
"vol": 1,
"pitch": 0,
},
"audio_setting": {
"sample_rate": self.sample_rate,
"bitrate": 128000,
"format": "mp3",
"channel": 1,
},
}).encode()
req = urllib.request.Request(
endpoint,
data=payload,
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/services/tts_backend.py` around lines 1167 - 1193, The code builds
the TTS endpoint by unconditionally appending "/v1/t2a_v2" to the
MINIMAX_BASE_URL (variable base_url) which causes double "/v1/v1" if
MINIMAX_BASE_URL already contains a path; fix by normalizing base_url before
composing the Request: strip trailing slashes and also remove a trailing "/v1"
segment if present (or use urllib.parse.urljoin to safely join base_url and
"/v1/t2a_v2"), then use the normalized base_url in the Request call (refer to
base_url and the Request f"{base_url}/v1/t2a_v2" in tts_backend.py).

headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
},
method="POST",
)

try:
with urllib.request.urlopen(req, timeout=120) as resp:
result = json.loads(resp.read())
except Exception as e:
raise RuntimeError(f"MiniMax TTS API call failed: {e}")

status_code = result.get("base_resp", {}).get("status_code", -1)
if status_code != 0:
msg = result.get("base_resp", {}).get("status_msg", "unknown error")
raise RuntimeError(f"MiniMax TTS error ({status_code}): {msg}")

hex_audio = result.get("data", {}).get("audio", "")
if not hex_audio:
raise RuntimeError("MiniMax TTS returned empty audio data.")

# MiniMax returns hex-encoded audio (not base64).
audio_bytes = bytes.fromhex(hex_audio)

import torchaudio
wav, sr = torchaudio.load(io.BytesIO(audio_bytes))
if sr != self.sample_rate:
wav = torchaudio.functional.resample(wav, sr, self.sample_rate)
if wav.ndim == 1:
wav = wav.unsqueeze(0)
elif wav.ndim == 2 and wav.shape[0] > 1:
wav = wav.mean(dim=0, keepdim=True)
return wav


# ── Registry ────────────────────────────────────────────────────────────────


Expand All @@ -1122,6 +1240,7 @@ def generate(self, text: str, **kw) -> torch.Tensor:
"indextts2": IndexTTS2Backend,
"gpt-sovits": GPTSoVITSBackend,
"sherpa-onnx": SherpaOnnxBackend,
"minimax": MiniMaxTTSBackend,
}


Expand All @@ -1138,6 +1257,7 @@ def generate(self, text: str, **kw) -> torch.Tensor:
"indextts2": "git clone index-tts/index-tts && uv pip install -e . (NOT uv sync --all-extras)",
"gpt-sovits": "External API server — start api_v2.py on port 9880",
"sherpa-onnx": "pip install sherpa-onnx (universal ONNX runtime, WASM-ready)",
"minimax": "Cloud API — set MINIMAX_API_KEY (get key at https://platform.minimax.io)",
}


Expand Down
82 changes: 81 additions & 1 deletion tests/test_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_asr_env_override(monkeypatch):
def test_llm_registry_includes_off():
rows = llm_backend.list_backends()
ids = {r["id"] for r in rows}
assert ids == {"openai-compat", "off"}
assert {"openai-compat", "off", "minimax"}.issubset(ids)


def test_llm_off_chat_raises_actionable(monkeypatch):
Expand Down Expand Up @@ -131,3 +131,83 @@ def test_llm_auto_selects_openai_compat_when_configured(monkeypatch):
except ImportError:
pytest.skip("openai package not available in this environment")
assert llm_backend.active_backend_id() == "openai-compat"


# ── MiniMax (minimax) ──────────────────────────────────────────────────────


def test_llm_registry_includes_minimax():
rows = llm_backend.list_backends()
ids = {r["id"] for r in rows}
assert "minimax" in ids


def test_llm_minimax_unavailable_without_key(monkeypatch):
monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
ok, msg = llm_backend.MiniMaxBackend.is_available()
assert not ok
assert "MINIMAX_API_KEY" in msg or "openai" in msg


def test_llm_minimax_available_with_key(monkeypatch):
monkeypatch.setenv("MINIMAX_API_KEY", "test-key")
try:
import openai # noqa: F401
except ImportError:
pytest.skip("openai package not available in this environment")
ok, _ = llm_backend.MiniMaxBackend.is_available()
assert ok


def test_llm_minimax_default_model():
be = llm_backend.MiniMaxBackend()
assert be.model_name == "MiniMax-M2.7"

Comment on lines +162 to +165
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Isolate MINIMAX_MODEL in the default-model test.

This test depends on ambient environment state and can fail when MINIMAX_MODEL is already set in CI/dev shells.

Suggested fix
-def test_llm_minimax_default_model():
+def test_llm_minimax_default_model(monkeypatch):
+    monkeypatch.delenv("MINIMAX_MODEL", raising=False)
     be = llm_backend.MiniMaxBackend()
     assert be.model_name == "MiniMax-M2.7"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def test_llm_minimax_default_model():
be = llm_backend.MiniMaxBackend()
assert be.model_name == "MiniMax-M2.7"
def test_llm_minimax_default_model(monkeypatch):
monkeypatch.delenv("MINIMAX_MODEL", raising=False)
be = llm_backend.MiniMaxBackend()
assert be.model_name == "MiniMax-M2.7"
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/test_engines.py` around lines 162 - 165, The test
test_llm_minimax_default_model relies on ambient MINIMAX_MODEL env state; before
creating MiniMaxBackend() ensure the env var is isolated by removing or
overriding it (e.g., call os.environ.pop("MINIMAX_MODEL", None) or use pytest's
monkeypatch.delenv("MINIMAX_MODEL", raising=False)) so
MiniMaxBackend().model_name equals the hardcoded default "MiniMax-M2.7"; update
the test to remove/delenv the variable just prior to instantiating
MiniMaxBackend to guarantee deterministic behavior.


def test_llm_minimax_custom_model(monkeypatch):
monkeypatch.setenv("MINIMAX_MODEL", "MiniMax-M2.7-highspeed")
be = llm_backend.MiniMaxBackend()
assert be.model_name == "MiniMax-M2.7-highspeed"


def test_llm_minimax_env_override_selects(monkeypatch):
monkeypatch.setenv("OMNIVOICE_LLM_BACKEND", "minimax")
assert llm_backend.active_backend_id() == "minimax"


def test_tts_minimax_unavailable_without_key(monkeypatch):
monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
ok, msg = tts_backend.MiniMaxTTSBackend.is_available()
assert not ok
assert "MINIMAX_API_KEY" in msg


def test_tts_minimax_available_with_key(monkeypatch):
monkeypatch.setenv("MINIMAX_API_KEY", "test-key")
ok, _ = tts_backend.MiniMaxTTSBackend.is_available()
assert ok


def test_tts_minimax_sample_rate():
assert tts_backend.MiniMaxTTSBackend().sample_rate == 32000


def test_tts_minimax_voices():
assert len(tts_backend.MiniMaxTTSBackend.VOICES) == 6
assert "English_Graceful_Lady" in tts_backend.MiniMaxTTSBackend.VOICES


def test_tts_minimax_languages():
langs = tts_backend.MiniMaxTTSBackend().supported_languages
assert "multi" in langs


def test_tts_minimax_env_override_selects(monkeypatch):
monkeypatch.setenv("OMNIVOICE_TTS_BACKEND", "minimax")
assert tts_backend.active_backend_id() == "minimax"


def test_tts_minimax_in_registry():
rows = tts_backend.list_backends()
ids = {r["id"] for r in rows}
assert "minimax" in ids