diff --git a/.gitignore b/.gitignore
index b60f269..b4a618f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,4 @@ backend/data/
 
 # IDE
 .vscode/
+airi/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..35410ca
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/easycode.ignore b/.idea/easycode.ignore
new file mode 100644
index 0000000..be585df
--- /dev/null
+++ b/.idea/easycode.ignore
@@ -0,0 +1,13 @@
+node_modules/
+dist/
+vendor/
+cache/
+.*/
+*.min.*
+*.test.*
+*.spec.*
+*.bundle.*
+*.bundle-min.*
+*.*.js
+*.*.ts
+*.log
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..22cc06f
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,144 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="129">
+            <item index="0" class="java.lang.String" itemvalue="pyasn1_modules" />
+            <item index="1" class="java.lang.String" itemvalue="altgraph" />
+            <item index="2" class="java.lang.String" itemvalue="protobuf" />
+            <item index="3" class="java.lang.String" itemvalue="Babel" />
+            <item index="4" class="java.lang.String" itemvalue="PyQt5-sip" />
+            <item index="5" class="java.lang.String" itemvalue="setuptools" />
+            <item index="6" class="java.lang.String" itemvalue="docxcompose" />
+            <item index="7" class="java.lang.String" itemvalue="MarkupSafe" />
+            <item index="8" class="java.lang.String" itemvalue="pycparser" />
+            <item index="9" class="java.lang.String" itemvalue="pilk" />
+            <item index="10" class="java.lang.String" itemvalue="PyQt5-Qt5" />
+            <item index="11" class="java.lang.String" itemvalue="Pymem" />
+            <item index="12" class="java.lang.String" itemvalue="pycryptodomex" />
+            <item index="13" class="java.lang.String" itemvalue="PyQt5" />
+            <item index="14" class="java.lang.String" itemvalue="certifi" />
+            <item index="15" class="java.lang.String" itemvalue="lxml" />
+            <item index="16" class="java.lang.String" itemvalue="soupsieve" />
+            <item index="17" class="java.lang.String" itemvalue="PyAudio" />
+            <item index="18" class="java.lang.String" itemvalue="silk-python" />
+            <item index="19" class="java.lang.String" itemvalue="fuzzywuzzy" />
+            <item index="20" class="java.lang.String" itemvalue="beautifulsoup4" />
+            <item index="21" class="java.lang.String" itemvalue="pywin32" />
+            <item index="22" class="java.lang.String" itemvalue="Werkzeug" />
+            <item index="23" class="java.lang.String" itemvalue="handright" />
+            <item index="24" class="java.lang.String" itemvalue="rapidfuzz" />
+            <item index="25" class="java.lang.String" itemvalue="google" />
+            <item index="26" class="java.lang.String" itemvalue="click" />
+            <item index="27" class="java.lang.String" itemvalue="pefile" />
+            <item index="28" class="java.lang.String" itemvalue="psutil" />
+            <item index="29" class="java.lang.String" itemvalue="pyinstaller-hooks-contrib" />
+            <item index="30" class="java.lang.String" itemvalue="simplejson" />
+            <item index="31" class="java.lang.String" itemvalue="prettytable" />
+            <item index="32" class="java.lang.String" itemvalue="charset-normalizer" />
+            <item index="33" class="java.lang.String" itemvalue="idna" />
+            <item index="34" class="java.lang.String" itemvalue="jieba" />
+            <item index="35" class="java.lang.String" itemvalue="python-Levenshtein" />
+            <item index="36" class="java.lang.String" itemvalue="cffi" />
+            <item index="37" class="java.lang.String" itemvalue="wcwidth" />
+            <item index="38" class="java.lang.String" itemvalue="Levenshtein" />
+            <item index="39" class="java.lang.String" itemvalue="requests" />
+            <item index="40" class="java.lang.String" itemvalue="Jinja2" />
+            <item index="41" class="java.lang.String" itemvalue="pywin32-ctypes" />
+            <item index="42" class="java.lang.String" itemvalue="pyecharts" />
+            <item index="43" class="java.lang.String" itemvalue="urllib3" />
+            <item index="44" class="java.lang.String" itemvalue="itsdangerous" />
+            <item index="45" class="java.lang.String" itemvalue="Flask" />
+            <item index="46" class="java.lang.String" itemvalue="blinker" />
+            <item index="47" class="java.lang.String" itemvalue="pyinstaller" />
+            <item index="48" class="java.lang.String" itemvalue="python-docx" />
+            <item index="49" class="java.lang.String" itemvalue="six" />
+            <item index="50" class="java.lang.String" itemvalue="lz4" />
+            <item index="51" class="java.lang.String" itemvalue="packaging" />
+            <item index="52" class="java.lang.String" itemvalue="colorama" />
+            <item index="53" class="java.lang.String" itemvalue="typing_extensions" />
+            <item index="54" class="java.lang.String" itemvalue="pillow" />
+            <item index="55" class="java.lang.String" itemvalue="pytz" />
+            <item index="56" class="java.lang.String" itemvalue="motmetrics" />
+            <item index="57" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="58" class="java.lang.String" itemvalue="sklearn" />
+            <item index="59" class="java.lang.String" itemvalue="lap" />
+            <item index="60" class="java.lang.String" itemvalue="visualdl" />
+            <item index="61" class="java.lang.String" itemvalue="pycocotools" />
+            <item index="62" class="java.lang.String" itemvalue="terminaltables" />
+            <item index="63" class="java.lang.String" itemvalue="mmcv" />
+            <item index="64" class="java.lang.String" itemvalue="timm" />
+            <item index="65" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="66" class="java.lang.String" itemvalue="opencv_python_headless" />
+            <item index="67" class="java.lang.String" itemvalue="numpy" />
+            <item index="68" class="java.lang.String" itemvalue="toml" />
+            <item index="69" class="java.lang.String" itemvalue="torchvision" />
+            <item index="70" class="java.lang.String" itemvalue="albumentations" />
+            <item index="71" class="java.lang.String" itemvalue="easydict" />
+            <item index="72" class="java.lang.String" itemvalue="tqdm" />
+            <item index="73" class="java.lang.String" itemvalue="tensorboard" />
+            <item index="74" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="75" class="java.lang.String" itemvalue="einops" />
+            <item index="76" class="java.lang.String" itemvalue="Cython" />
+            <item index="77" class="java.lang.String" itemvalue="opencv_python" />
+            <item index="78" class="java.lang.String" itemvalue="Pillow" />
+            <item index="79" class="java.lang.String" itemvalue="flask_marshmallow" />
+            <item index="80" class="java.lang.String" itemvalue="flask_cors" />
+            <item index="81" class="java.lang.String" itemvalue="marshmallow_sqlalchemy" />
+            <item index="82" class="java.lang.String" itemvalue="flask_sqlalchemy" />
+            <item index="83" class="java.lang.String" itemvalue="tornado" />
+            <item index="84" class="java.lang.String" itemvalue="pymysql" />
+            <item index="85" class="java.lang.String" itemvalue="apscheduler" />
+            <item index="86" class="java.lang.String" itemvalue="pyjwt" />
+            <item index="87" class="java.lang.String" itemvalue="redis" />
+            <item index="88" class="java.lang.String" itemvalue="tifffile" />
+            <item index="89" class="java.lang.String" itemvalue="sqlalchemy" />
+            <item index="90" class="java.lang.String" itemvalue="SQLAlchemy" />
+            <item index="91" class="java.lang.String" itemvalue="chromadb" />
+            <item index="92" class="java.lang.String" itemvalue="transformers" />
+            <item index="93" class="java.lang.String" itemvalue="python-dateutil" />
+            <item index="94" class="java.lang.String" itemvalue="typing" />
+            <item index="95" class="java.lang.String" itemvalue="cryptography" />
+            <item index="96" class="java.lang.String" itemvalue="sentence-transformers" />
+            <item index="97" class="java.lang.String" itemvalue="kombu" />
+            <item index="98" class="java.lang.String" itemvalue="PyMySQL" />
+            <item index="99" class="java.lang.String" itemvalue="pypinyin" />
+            <item index="100" class="java.lang.String" itemvalue="openpyxl" />
+            <item index="101" class="java.lang.String" itemvalue="typing-extensions" />
+            <item index="102" class="java.lang.String" itemvalue="geopy" />
+            <item index="103" class="java.lang.String" itemvalue="langchain-core" />
+            <item index="104" class="java.lang.String" itemvalue="langchain-deepseek" />
+            <item index="105" class="java.lang.String" itemvalue="ultralight" />
+            <item index="106" class="java.lang.String" itemvalue="fastface" />
+            <item index="107" class="java.lang.String" itemvalue="torch" />
+            <item index="108" class="java.lang.String" itemvalue="python-dotenv" />
+            <item index="109" class="java.lang.String" itemvalue="uv" />
+            <item index="110" class="java.lang.String" itemvalue="DrissionPage" />
+            <item index="111" class="java.lang.String" itemvalue="filelock" />
+            <item index="112" class="java.lang.String" itemvalue="cssselect" />
+            <item index="113" class="java.lang.String" itemvalue="DownloadKit" />
+            <item index="114" class="java.lang.String" itemvalue="requests-file" />
+            <item index="115" class="java.lang.String" itemvalue="et_xmlfile" />
+            <item index="116" class="java.lang.String" itemvalue="tldextract" />
+            <item index="117" class="java.lang.String" itemvalue="DataRecorder" />
+            <item index="118" class="java.lang.String" itemvalue="websocket-client" />
+            <item index="119" class="java.lang.String" itemvalue="rank_bm25" />
+            <item index="120" class="java.lang.String" itemvalue="PyMuPDF" />
+            <item index="121" class="java.lang.String" itemvalue="pytest" />
+            <item index="122" class="java.lang.String" itemvalue="bandit" />
+            <item index="123" class="java.lang.String" itemvalue="safety" />
+            <item index="124" class="java.lang.String" itemvalue="anthropic" />
+            <item index="125" class="java.lang.String" itemvalue="nest_asyncio" />
+            <item index="126" class="java.lang.String" itemvalue="langchain-openai" />
+            <item index="127" class="java.lang.String" itemvalue="langchain-ollama" />
+            <item index="128" class="java.lang.String" itemvalue="langchain-anthropic" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="Stylelint" enabled="true" level="ERROR" enabled_by_default="true" />
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..23569e9
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="whale-whisper" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..3cb5f09
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/whale-whisper.iml" filepath="$PROJECT_DIR$/.idea/whale-whisper.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..a51a69b
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/airi" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/whale-whisper.iml b/.idea/whale-whisper.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/whale-whisper.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..2610404
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,296 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## 项目概述
+
+WhaleWhisper（鲸语）是一个模块化的数字人/虚拟角色智能体框架，提供完整的数字人解决方案。核心能力包括：
+
+- **角色舞台**：支持 Live2D/VRM 模型渲染，可根据对话内容自动调用表情和动作
+- **多模态交互**：文本对话 + 语音识别(ASR) + 语音合成(TTS)
+- **智能体编排**：LLM 推理 + Agent 工作流 + 工具调用
+- **本地记忆**：基于 SQLite 的对话记忆与上下文管理
+- **多端支持**：Web 应用 + Tauri 桌面端
+
+## 开发环境要求
+
+- Python 3.10+（CI: 3.11）
+- Node.js 20+
+- pnpm 9.12.2
+- uv（推荐用于 Python 依赖管理）
+
+## 快速开始
+
+### 1. 启动后端
+
+```bash
+cd backend
+
+# 方式一：使用 uv（推荐）
+uv venv
+uv pip install -e ".[dev]"
+uv run uvicorn app.main:app --reload --port 8090
+
+# 方式二：使用传统 venv
+python -m venv .venv
+.venv\Scripts\activate  # Windows
+# source .venv/bin/activate  # Linux/Mac
+pip install -e ".[dev]"
+uvicorn app.main:app --reload --port 8090
+```
+
+后端服务地址：
+- 健康检查：http://localhost:8090/health
+- WebSocket：ws://localhost:8090/ws
+- API 端点：`/api/llm`、`/api/asr`、`/api/tts`、`/api/agent`、`/api/memory`、`/api/providers`
+
+### 2. 启动前端
+
+```bash
+cd frontend
+pnpm install
+pnpm --filter @whalewhisper/web dev
+```
+
+访问 http://localhost:5174
+
+## 项目结构
+
+```
+WhaleWhisper/
+├── backend/          # FastAPI 后端服务
+│   ├── app/         # 应用核心代码
+│   │   ├── api/     # API 路由（llm.py, asr.py, tts.py, agent.py, memory.py 等）
+│   │   ├── core/    # 核心模块（engines, agents, memory, providers）
+│   │   ├── services/# 业务服务
+│   │   ├── extensions/# 扩展模块
+│   │   └── main.py  # 应用入口
+│   ├── config/      # 配置文件（engines.yaml 等）
+│   ├── examples/    # 示例代码
+│   └── scripts/     # 脚本
+├── frontend/         # 前端工作区（pnpm workspace）
+│   ├── apps/
+│   │   ├── web/            # Web 应用
+│   │   └── desktop-tauri/  # Tauri 桌面应用
+│   └── packages/           # 共享组件库
+├── airi/            # 数字人交互框架（子模块）
+├── assets/          # 静态资源（模型、素材）
+├── data/            # 数据/缓存
+└── scripts/         # 构建与部署脚本
+```
+
+## 核心架构
+
+### 配置驱动架构
+
+项目采用 YAML 配置驱动，主要配置文件位于 `backend/config/`：
+
+- `engines.yaml` - LLM/ASR/TTS/Agent 引擎配置，支持多提供商（OpenAI、Groq、DeepSeek、Dify、Coze 等）
+- `plugins.yaml` - 插件配置
+- `providers.yaml` - 提供商配置
+
+配置结构示例：
+```yaml
+llm:
+  default: openai
+  engines:
+    - id: openai
+      type: openai_compat
+      base_url: https://api.openai.com/v1
+      model: gpt-4o-mini
+      api_key_env: OPENAI_API_KEY
+```
+
+### 异步优先
+
+- FastAPI 路由全部使用 `async def`
+- 数据库操作、API 调用等 I/O 密集型任务使用异步
+- 避免阻塞事件循环
+
+### 模块化设计
+
+- **引擎层**：抽象不同 AI 服务商的统一接口（OpenAI 兼容、Dify、Coze 等）
+- **服务层**：业务逻辑封装（LLM、ASR、TTS、Agent、Memory）
+- **API 层**：HTTP/WebSocket 接口
+- **扩展层**：插件系统、事件分发
+
+## 代码规范
+
+### 后端（Python）
+
+- 遵循 PEP 8 规范
+- 使用 4 空格缩进
+- 类型注解：Python 3.10+ 类型提示
+- 异步优先：`async def`
+- 配置驱动：避免硬编码
+
+示例：
+```python
+from typing import Optional
+from fastapi import APIRouter
+
+router = APIRouter()
+
+@router.get("/health")
+async def health_check() -> dict[str, str]:
+    return {"status": "ok"}
+```
+
+### 前端（TypeScript/Vue）
+
+- 2 空格缩进
+- TypeScript 严格模式
+- Vue 3 Composition API（`<script setup>`）
+- 组件命名：PascalCase
+- 共享代码放在 `frontend/packages/`，应用内代码放在对应 `frontend/apps/*/src`
+
+示例：
+```vue
+<script setup lang="ts">
+import { ref } from 'vue'
+
+const message = ref<string>('Hello')
+</script>
+
+<template>
+  <div>{{ message }}</div>
+</template>
+```
+
+### Git 工作流
+
+- **分支规范**：
+  - `feature/<描述>` - 新功能（如 `feature/live2d-emotion`）
+  - `fix/<问题ID或范围>` - Bug 修复（如 `fix/websocket-reconnect`）
+  - `hotfix/<范围>` - 紧急修复
+  - `chore/<范围>` - 文档、工具、依赖更新
+
+- **提交格式**：Conventional Commits
+  ```
+  feat: add VRM model support
+  fix: resolve WebSocket reconnection issue
+  chore: update FastAPI to 0.110
+  ```
+
+- **PR 流程**：
+  - 所有 PR 必须提交到 `dev` 分支（`main` 仅用于发布）
+  - 使用 "Squash and merge" 保持提交历史整洁
+  - PR 标题遵循提交格式规范
+
+## 测试
+
+提交 PR 前，请在本地运行以下检查：
+
+### 后端测试
+
+```bash
+cd backend
+
+# Python 语法检查
+python -m compileall -q app
+
+# 导入测试
+python -c "from app.main import app; print('backend import: ok')"
+```
+
+### 前端测试
+
+```bash
+cd frontend
+
+# 构建测试
+pnpm --filter @whalewhisper/web build
+```
+
+### 集成测试
+
+- 启动后端和前端，验证核心功能正常工作
+- 测试 WebSocket 连接、对话流程、表情动作触发
+
+## 常用命令
+
+### 后端
+
+```bash
+# 安装依赖
+cd backend
+uv pip install -e ".[dev]"
+
+# 启动开发服务器
+uv run uvicorn app.main:app --reload --port 8090
+
+# 语法检查
+python -m compileall -q app
+```
+
+### 前端
+
+```bash
+# 安装依赖
+cd frontend
+pnpm install
+
+# 启动 Web 开发服务器
+pnpm --filter @whalewhisper/web dev
+
+# 构建 Web 应用
+pnpm --filter @whalewhisper/web build
+
+# 构建桌面应用
+pnpm --filter @whalewhisper/desktop-tauri build
+```
+
+### 配置
+
+编辑 `backend/config/engines.yaml` 配置 LLM/ASR/TTS 提供商：
+
+```yaml
+llm:
+  default: openai
+  providers:
+    openai:
+      api_key: "your-api-key"
+      model: "gpt-4"
+```
+
+## 环境变量
+
+| 变量 | 说明 | 默认值 |
+|------|------|--------|
+| `ENGINE_CONFIG_PATH` | Engine 配置文件路径 | `backend/config/engines.yaml` |
+| `WS_AUTH_TOKEN` | WebSocket 鉴权令牌（可选） | - |
+| `DATABASE_URL` | 数据库连接字符串 | SQLite 本地文件 |
+
+## 常见问题
+
+### WebSocket 连接问题
+
+- 检查后端服务是否正常运行
+- 验证 WebSocket 地址是否正确：`ws://localhost:8090/ws`
+- 查看浏览器控制台和网络面板排查问题
+
+### 模型加载问题
+
+- 检查模型文件路径配置
+- 验证模型格式是否支持（Live2D/VRM）
+- 查看后端日志获取详细错误信息
+
+### API 密钥配置
+
+- 在 `backend/config/engines.yaml` 中配置
+- 或使用环境变量（推荐）
+- 确保密钥有效且未过期
+
+## 贡献指南
+
+1. 阅读 [CONTRIBUTING.md](CONTRIBUTING.md) 了解详细规范
+2. 所有 PR 必须提交到 `dev` 分支
+3. 遵循代码风格和提交格式规范
+4. 提交前运行测试和构建检查
+5. 保持 PR 聚焦，避免混合多个功能
+
+## 相关资源
+
+- [项目 README](README.md)
+- [贡献指南](CONTRIBUTING.md)
+- [airi 框架](airi/) - 数字人交互框架子模块
diff --git a/backend/app/api/asr.py b/backend/app/api/asr.py
index 9801960..d15515b 100644
--- a/backend/app/api/asr.py
+++ b/backend/app/api/asr.py
@@ -1,11 +1,17 @@
 import base64
+import asyncio
 import io
 import json
+import time
 import wave
+from dataclasses import dataclass, field
 from typing import Any, Dict, Optional
+from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
 
 import httpx
 from fastapi import APIRouter, File, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
+from websockets import connect as ws_connect
+from websockets.exceptions import ConnectionClosed
 
 from app.api.engine_schemas import (
     EngineDefaultResponse,
@@ -37,6 +43,11 @@
 )
 
 
+ALIYUN_ASR_ENGINE_TYPES = frozenset(
+    {"aliyun_nls_asr", "aliyun_nls", "aliyun_dashscope_asr", "aliyun_dashscope"}
+)
+
+
 @router.get("/engines", response_model=EngineListResponse)
 async def list_asr_engines() -> EngineListResponse:
     engines = [EngineDesc.from_spec(spec) for spec in registry.list("asr")]
@@ -78,6 +89,8 @@ async def run_asr_engine(request: EngineRunRequest) -> dict:
         return await _forward_dify_transcription(config, audio_bytes, overrides, filename, content_type)
     if engine_type in {"coze_asr", "coze"}:
         return await _forward_coze_transcription(config, audio_bytes, overrides, filename, content_type)
+    if engine_type in ALIYUN_ASR_ENGINE_TYPES:
+        return await _forward_aliyun_dashscope_transcription(config, audio_bytes, overrides, filename, content_type)
     response = await _forward_transcription(config, audio_bytes, overrides, filename, content_type)
     return response
 
@@ -97,6 +110,8 @@ async def run_asr_engine_file(
         return await _forward_dify_transcription(config, audio_bytes, {}, filename, content_type)
     if engine_type in {"coze_asr", "coze"}:
         return await _forward_coze_transcription(config, audio_bytes, {}, filename, content_type)
+    if engine_type in ALIYUN_ASR_ENGINE_TYPES:
+        return await _forward_aliyun_dashscope_transcription(config, audio_bytes, {}, filename, content_type)
     response = await _forward_transcription(config, audio_bytes, {}, filename, content_type)
     return response
 
@@ -107,14 +122,27 @@ async def run_asr_engine_stream(websocket: WebSocket) -> None:
 
     engine_id = "default"
     engine_config = None
+    engine_type = ""
     overrides: Dict[str, Any] = {}
     sample_rate = 16000
     channels = 1
     buffer = bytearray()
+    aliyun_realtime_session: Optional["AliyunRealtimeSession"] = None
+    has_audio = False
 
     try:
         while True:
-            message = await websocket.receive()
+            try:
+                message = await websocket.receive()
+            except WebSocketDisconnect:
+                break
+            except RuntimeError as exc:
+                if _is_disconnect_receive_runtime_error(exc):
+                    break
+                raise
+
+            if _is_websocket_disconnect_message(message):
+                break
 
             if message.get("text") is not None:
                 try:
@@ -125,55 +153,117 @@ async def run_asr_engine_stream(websocket: WebSocket) -> None:
 
                 message_type = payload.get("type")
                 if message_type == "start":
+                    if aliyun_realtime_session is not None:
+                        await _close_aliyun_realtime_session(aliyun_realtime_session)
+                        aliyun_realtime_session = None
                     engine_id = payload.get("engine", "default")
                     engine_id = _resolve_engine_id(engine_id)
                     engine_config = _get_engine_config(engine_id)
+                    engine_type = (engine_config.engine_type or "openai_compat").lower()
                     overrides = payload.get("config") if isinstance(payload.get("config"), dict) else {}
                     sample_rate = int(payload.get("sample_rate") or payload.get("sampleRate") or 16000)
                     channels = int(payload.get("channels") or 1)
                     buffer = bytearray()
+                    has_audio = False
+                    if engine_type in ALIYUN_ASR_ENGINE_TYPES:
+                        aliyun_realtime_session = await _open_aliyun_realtime_session(
+                            config=engine_config,
+                            overrides=overrides,
+                            sample_rate=sample_rate,
+                        )
                     await websocket.send_json({"type": "ready"})
                 elif message_type == "stop":
                     if engine_config is None:
                         await websocket.send_json({"type": "error", "error": "Engine not initialized."})
                         continue
-                    if not buffer:
+                    if not has_audio:
                         await websocket.send_json({"type": "error", "error": "Missing audio data."})
                         continue
 
-                    wav_bytes = _encode_wav_pcm16(bytes(buffer), sample_rate, channels)
-                    filename = overrides.get("filename") or "audio.wav"
-                    content_type = overrides.get("content_type") or "audio/wav"
-
-                    engine_type = (engine_config.engine_type or "openai_compat").lower()
-                    if engine_type in {"dify_asr", "dify"}:
-                        response = await _forward_dify_transcription(
-                            engine_config, wav_bytes, overrides, filename, content_type
-                        )
-                    elif engine_type in {"coze_asr", "coze"}:
-                        response = await _forward_coze_transcription(
-                            engine_config, wav_bytes, overrides, filename, content_type
+                    if engine_type in ALIYUN_ASR_ENGINE_TYPES:
+                        if aliyun_realtime_session is None:
+                            await websocket.send_json(
+                                {"type": "error", "error": "Alibaba Bailian realtime session is not initialized."}
+                            )
+                            continue
+                        response = await _finish_aliyun_realtime_session(
+                            aliyun_realtime_session,
+                            overrides,
                         )
+                        await _close_aliyun_realtime_session(aliyun_realtime_session)
+                        aliyun_realtime_session = None
+                        has_audio = False
+                        await websocket.send_json({"type": "result", "data": response})
                     else:
-                        response = await _forward_transcription(
-                            engine_config, wav_bytes, overrides, filename, content_type
-                        )
-
-                    await websocket.send_json({"type": "result", "data": response})
-                    buffer = bytearray()
+                        pcm_bytes = bytes(buffer)
+                        wav_bytes = _encode_wav_pcm16(pcm_bytes, sample_rate, channels)
+                        filename = overrides.get("filename") or "audio.wav"
+                        content_type = overrides.get("content_type") or "audio/wav"
+
+                        if engine_type in {"dify_asr", "dify"}:
+                            response = await _forward_dify_transcription(
+                                engine_config, wav_bytes, overrides, filename, content_type
+                            )
+                        elif engine_type in {"coze_asr", "coze"}:
+                            response = await _forward_coze_transcription(
+                                engine_config, wav_bytes, overrides, filename, content_type
+                            )
+                        else:
+                            response = await _forward_transcription(
+                                engine_config, wav_bytes, overrides, filename, content_type
+                            )
+
+                        await websocket.send_json({"type": "result", "data": response})
+                        buffer = bytearray()
+                        has_audio = False
                 elif message_type == "reset":
                     buffer = bytearray()
+                    has_audio = False
                 else:
                     await websocket.send_json({"type": "error", "error": "Unknown message type."})
 
             elif message.get("bytes") is not None:
-                buffer.extend(message["bytes"])
+                chunk = message["bytes"]
+                if not isinstance(chunk, (bytes, bytearray)) or not chunk:
+                    continue
+                has_audio = True
+                if engine_type in ALIYUN_ASR_ENGINE_TYPES and aliyun_realtime_session is not None:
+                    await _append_aliyun_realtime_audio(aliyun_realtime_session, bytes(chunk))
+                else:
+                    buffer.extend(chunk)
 
     except WebSocketDisconnect:
+        if aliyun_realtime_session is not None:
+            await _close_aliyun_realtime_session(aliyun_realtime_session)
         return
     except Exception as exc:
-        await websocket.send_json({"type": "error", "error": str(exc)})
-        await websocket.close(code=1011)
+        if aliyun_realtime_session is not None:
+            await _close_aliyun_realtime_session(aliyun_realtime_session)
+        sent = await _safe_send_ws_error(websocket, str(exc))
+        if not sent:
+            return
+        try:
+            await websocket.close(code=1011)
+        except (RuntimeError, WebSocketDisconnect):
+            return
+
+
+def _is_websocket_disconnect_message(message: Dict[str, Any]) -> bool:
+    return isinstance(message, dict) and message.get("type") == "websocket.disconnect"
+
+
+def _is_disconnect_receive_runtime_error(exc: BaseException) -> bool:
+    if not isinstance(exc, RuntimeError):
+        return False
+    return "disconnect message has been received" in str(exc).lower()
+
+
+async def _safe_send_ws_error(websocket: WebSocket, error_message: str) -> bool:
+    try:
+        await websocket.send_json({"type": "error", "error": error_message})
+    except (RuntimeError, WebSocketDisconnect):
+        return False
+    return True
 
 
 def _resolve_engine_id(engine_id: str) -> str:
@@ -190,7 +280,7 @@ def _get_engine_config(engine_id: str):
     if not config or not config.base_url:
         raise HTTPException(status_code=404, detail="ASR engine not configured")
     engine_type = (config.engine_type or "openai_compat").lower()
-    if engine_type not in {"dify_asr", "coze_asr", "dify", "coze"} and not config.model:
+    if engine_type not in {"dify_asr", "coze_asr", "dify", "coze"} | ALIYUN_ASR_ENGINE_TYPES and not config.model:
         raise HTTPException(status_code=400, detail="ASR engine missing model")
     return config
 
@@ -337,6 +427,505 @@ async def _forward_coze_transcription(
     return {"text": text} if text else payload
 
 
+def _to_int(value: Any, fallback: int) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return fallback
+
+
+def _to_float(value: Any, fallback: float) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return fallback
+
+
+def _to_bool(value: Any, fallback: bool) -> bool:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)):
+        return bool(value)
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"1", "true", "yes", "on"}:
+            return True
+        if normalized in {"0", "false", "no", "off"}:
+            return False
+    return fallback
+
+
+def _first_present(params: Dict[str, Any], *keys: str) -> Any:
+    for key in keys:
+        if key in params and params.get(key) is not None:
+            return params.get(key)
+    return None
+
+
+def _resolve_audio_format(filename: str, content_type: str) -> str:
+    normalized_type = (content_type or "").strip().lower()
+    if "/" in normalized_type:
+        fmt = normalized_type.split("/", 1)[1].split(";", 1)[0].strip()
+        if fmt:
+            return fmt
+    if "." in (filename or ""):
+        return filename.rsplit(".", 1)[-1].strip().lower()
+    return "wav"
+
+
+DASHSCOPE_CN_BASE = "https://dashscope.aliyuncs.com"
+DASHSCOPE_INTL_BASE = "https://dashscope-intl.aliyuncs.com"
+ALIYUN_ASR_REALTIME_MODEL = "qwen3-asr-flash-realtime"
+
+
+@dataclass
+class AliyunRealtimeSession:
+    ws: Any
+    reader_task: Optional[asyncio.Task] = None
+    finished: asyncio.Event = field(default_factory=asyncio.Event)
+    transcripts: list[str] = field(default_factory=list)
+    error_message: Optional[str] = None
+
+
+def _resolve_aliyun_dashscope_base_url(params: Dict[str, Any], config) -> str:
+    explicit_base = str(
+        _first_present(params, "base_url", "baseUrl", "dashscope_base_url", "dashscopeBaseUrl")
+        or config.base_url
+        or ""
+    ).strip()
+    if explicit_base:
+        return explicit_base.rstrip("/")
+
+    region = str(_first_present(params, "region") or "cn-beijing").strip().lower()
+    if region in {"intl", "sg", "singapore", "intl-singapore", "ap-southeast-1"}:
+        return DASHSCOPE_INTL_BASE
+    return DASHSCOPE_CN_BASE
+
+
+def _resolve_aliyun_dashscope_credentials(config, overrides: Dict[str, Any]) -> Dict[str, Any]:
+    params = _merge_params(config, overrides)
+    api_key = str(
+        _first_present(params, "api_key", "apiKey", "dashscope_api_key", "dashscopeApiKey")
+        or resolve_api_key(config.api_key_env)
+        or ""
+    ).strip()
+    if not api_key:
+        raise HTTPException(status_code=400, detail="Alibaba Bailian ASR missing apiKey")
+
+    model = ALIYUN_ASR_REALTIME_MODEL
+    if not model:
+        raise HTTPException(status_code=400, detail="Alibaba Bailian ASR missing model")
+
+    return {
+        "params": params,
+        "api_key": api_key,
+        "model": model,
+        "base_url": _resolve_aliyun_dashscope_base_url(params, config),
+    }
+
+
+def _build_aliyun_dashscope_urls(base_url: str) -> Dict[str, str]:
+    root = base_url.rstrip("/")
+    return {
+        "chat": f"{root}/compatible-mode/v1/chat/completions",
+    }
+
+
+def _resolve_aliyun_realtime_model(model: str) -> str:
+    normalized = (model or "").strip()
+    if not normalized:
+        return ALIYUN_ASR_REALTIME_MODEL
+    if "realtime" in normalized.lower():
+        return normalized
+    return ALIYUN_ASR_REALTIME_MODEL
+
+
+def _resolve_aliyun_non_realtime_model(model: str) -> str:
+    normalized = (model or "").strip()
+    if not normalized:
+        return "qwen3-asr-flash"
+    if "realtime" in normalized.lower():
+        return "qwen3-asr-flash"
+    return normalized
+
+
+def _build_aliyun_realtime_ws_url(base_url: str, model: str) -> str:
+    raw = (base_url or "").strip().rstrip("/")
+    if not raw:
+        raw = DASHSCOPE_CN_BASE
+
+    parsed = urlsplit(raw)
+    if not parsed.scheme:
+        parsed = urlsplit(f"https://{raw}")
+
+    scheme = parsed.scheme.lower()
+    if scheme == "https":
+        ws_scheme = "wss"
+    elif scheme == "http":
+        ws_scheme = "ws"
+    elif scheme in {"ws", "wss"}:
+        ws_scheme = scheme
+    else:
+        ws_scheme = "wss"
+
+    path = parsed.path.rstrip("/")
+    realtime_path = "/api-ws/v1/realtime"
+    if path.endswith(realtime_path):
+        final_path = path
+    elif not path:
+        final_path = realtime_path
+    else:
+        final_path = f"{path}{realtime_path}"
+
+    query_items = dict(parse_qsl(parsed.query, keep_blank_values=True))
+    query_items["model"] = _resolve_aliyun_realtime_model(model)
+    return urlunsplit((ws_scheme, parsed.netloc, final_path, urlencode(query_items), ""))
+
+
+def _resolve_audio_mime_type(filename: str, content_type: str) -> str:
+    normalized_type = (content_type or "").strip().lower()
+    if normalized_type.startswith("audio/"):
+        return normalized_type.split(";", 1)[0]
+
+    audio_format = _resolve_audio_format(filename, content_type)
+    if audio_format:
+        return f"audio/{audio_format}"
+    return "audio/wav"
+
+
+def _build_audio_data_url(audio_bytes: bytes, filename: str, content_type: str) -> str:
+    encoded = base64.b64encode(audio_bytes).decode("ascii")
+    return f"data:{_resolve_audio_mime_type(filename, content_type)};base64,{encoded}"
+
+
+def _collect_aliyun_asr_options(params: Dict[str, Any]) -> Dict[str, Any]:
+    options: Dict[str, Any] = {}
+    language = _first_present(params, "language")
+    if language is not None:
+        options["language"] = str(language).strip()
+
+    enable_itn = _first_present(params, "enable_itn", "enableItn")
+    if enable_itn is not None:
+        options["enable_itn"] = _to_bool(enable_itn, False)
+
+    enable_words = _first_present(params, "enable_words", "enableWords")
+    if enable_words is not None:
+        options["enable_words"] = _to_bool(enable_words, False)
+
+    channel_id = _first_present(params, "channel_id", "channelId")
+    if channel_id is not None:
+        if isinstance(channel_id, list):
+            options["channel_id"] = [_to_int(item, 0) for item in channel_id]
+        else:
+            options["channel_id"] = [_to_int(channel_id, 0)]
+
+    return options
+
+
+def _normalize_realtime_language(language: Any) -> Optional[str]:
+    if language is None:
+        return None
+    value = str(language).strip()
+    if not value:
+        return None
+    if "-" in value:
+        return value.split("-", 1)[0].lower()
+    return value.lower()
+
+
+def _build_aliyun_realtime_session_event(params: Dict[str, Any], sample_rate: int) -> Dict[str, Any]:
+    event: Dict[str, Any] = {
+        "event_id": f"event_{int(time.time() * 1000)}",
+        "type": "session.update",
+        "session": {
+            "modalities": ["text"],
+            "input_audio_format": "pcm",
+            "sample_rate": max(8000, _to_int(sample_rate, 16000)),
+        },
+    }
+
+    language = _normalize_realtime_language(_first_present(params, "language"))
+    if language:
+        event["session"]["input_audio_transcription"] = {"language": language}
+
+    enable_server_vad = _to_bool(_first_present(params, "enable_server_vad", "enableServerVad"), False)
+    if enable_server_vad:
+        event["session"]["turn_detection"] = {
+            "type": "server_vad",
+            "threshold": _to_float(_first_present(params, "vad_threshold", "vadThreshold"), 0.0),
+            "silence_duration_ms": _to_int(
+                _first_present(params, "vad_silence_ms", "vadSilenceMs", "silence_duration_ms"),
+                400,
+            ),
+        }
+    else:
+        event["session"]["turn_detection"] = None
+
+    return event
+
+
+def _extract_aliyun_realtime_event_text(event: Dict[str, Any]) -> str:
+    for key in ("transcript", "text", "delta"):
+        value = event.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+
+    data = event.get("data")
+    if isinstance(data, dict):
+        for key in ("transcript", "text", "delta"):
+            value = data.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+
+    output = event.get("output")
+    if isinstance(output, dict):
+        value = output.get("text")
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+
+    item = event.get("item")
+    if isinstance(item, dict):
+        for key in ("transcript", "text", "delta"):
+            value = item.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+
+    return ""
+
+
+def _extract_aliyun_realtime_error(event: Dict[str, Any]) -> str:
+    error = event.get("error")
+    if isinstance(error, dict):
+        message = error.get("message")
+        if isinstance(message, str) and message.strip():
+            return message.strip()
+    if isinstance(error, str) and error.strip():
+        return error.strip()
+    message = event.get("message")
+    if isinstance(message, str) and message.strip():
+        return message.strip()
+    return "Alibaba Bailian realtime ASR failed."
+
+
+async def _read_aliyun_realtime_events(session: AliyunRealtimeSession) -> None:
+    ws = session.ws
+    try:
+        async for raw_message in ws:
+            if not isinstance(raw_message, str):
+                continue
+            try:
+                event = json.loads(raw_message)
+            except json.JSONDecodeError:
+                continue
+            if not isinstance(event, dict):
+                continue
+
+            text = _extract_aliyun_realtime_event_text(event)
+            if text and (not session.transcripts or session.transcripts[-1] != text):
+                session.transcripts.append(text)
+
+            event_type = str(event.get("type") or "").lower()
+            if event_type == "error":
+                session.error_message = _extract_aliyun_realtime_error(event)
+                session.finished.set()
+                return
+            if event_type == "session.finished":
+                session.finished.set()
+                return
+    except ConnectionClosed:
+        if not session.finished.is_set():
+            session.finished.set()
+    except Exception as exc:
+        if not session.finished.is_set():
+            session.error_message = str(exc)
+            session.finished.set()
+
+
+async def _open_aliyun_realtime_session(config, overrides: Dict[str, Any], sample_rate: int) -> AliyunRealtimeSession:
+    resolved = _resolve_aliyun_dashscope_credentials(config, overrides)
+    ws_url = _build_aliyun_realtime_ws_url(
+        base_url=resolved["base_url"],
+        model=resolved["model"],
+    )
+    timeout_seconds = max(float(config.timeout or 60.0), 10.0)
+    ws = await ws_connect(
+        ws_url,
+        additional_headers={
+            "Authorization": f"Bearer {resolved['api_key']}",
+            "OpenAI-Beta": "realtime=v1",
+        },
+        open_timeout=min(timeout_seconds, 30.0),
+        ping_interval=20,
+        ping_timeout=20,
+        max_size=2**22,
+    )
+    session = AliyunRealtimeSession(ws=ws)
+    session.reader_task = asyncio.create_task(_read_aliyun_realtime_events(session))
+    await ws.send(
+        json.dumps(
+            _build_aliyun_realtime_session_event(
+                params=resolved["params"],
+                sample_rate=sample_rate,
+            ),
+            ensure_ascii=False,
+        )
+    )
+    return session
+
+
+async def _append_aliyun_realtime_audio(session: AliyunRealtimeSession, pcm_bytes: bytes) -> None:
+    if not pcm_bytes:
+        return
+    await session.ws.send(
+        json.dumps(
+            {
+                "event_id": f"event_{int(time.time() * 1000)}",
+                "type": "input_audio_buffer.append",
+                "audio": base64.b64encode(pcm_bytes).decode("ascii"),
+            }
+        )
+    )
+
+
+async def _finish_aliyun_realtime_session(session: AliyunRealtimeSession, overrides: Dict[str, Any]) -> Dict[str, Any]:
+    enable_server_vad = _to_bool(
+        _first_present(overrides, "enable_server_vad", "enableServerVad"),
+        False,
+    )
+    if not enable_server_vad:
+        await session.ws.send(
+            json.dumps(
+                {
+                    "event_id": f"event_{int(time.time() * 1000)}",
+                    "type": "input_audio_buffer.commit",
+                }
+            )
+        )
+    await session.ws.send(
+        json.dumps(
+            {
+                "event_id": f"event_{int(time.time() * 1000)}",
+                "type": "session.finish",
+            }
+        )
+    )
+    try:
+        await asyncio.wait_for(session.finished.wait(), timeout=15)
+    except asyncio.TimeoutError:
+        if session.transcripts:
+            return {"text": "\n".join(session.transcripts).strip()}
+        raise HTTPException(status_code=504, detail="Alibaba Bailian realtime ASR timed out")
+
+    if session.error_message:
+        raise HTTPException(status_code=400, detail=session.error_message)
+
+    text = "\n".join(part for part in session.transcripts if part).strip()
+    return {"text": text} if text else {"text": ""}
+
+
+async def _close_aliyun_realtime_session(session: AliyunRealtimeSession) -> None:
+    if session.reader_task is not None:
+        session.reader_task.cancel()
+        try:
+            await session.reader_task
+        except asyncio.CancelledError:
+            pass
+        except Exception:
+            pass
+    try:
+        await session.ws.close()
+    except Exception:
+        pass
+
+
+def _extract_dashscope_asr_text(payload: Dict[str, Any]) -> str:
+    output = payload.get("output")
+    if isinstance(output, dict):
+        if isinstance(output.get("text"), str):
+            return output["text"].strip()
+        results = output.get("results")
+        if isinstance(results, list):
+            chunks = []
+            for item in results:
+                if not isinstance(item, dict):
+                    continue
+                for key in ("text", "transcript", "result", "sentence"):
+                    value = item.get(key)
+                    if isinstance(value, str) and value.strip():
+                        chunks.append(value.strip())
+                        break
+            if chunks:
+                return "\n".join(chunks).strip()
+
+    choices = payload.get("choices")
+    if isinstance(choices, list) and choices:
+        first = choices[0] if isinstance(choices[0], dict) else {}
+        message = first.get("message") if isinstance(first, dict) else {}
+        content = message.get("content") if isinstance(message, dict) else None
+        if isinstance(content, str):
+            return content.strip()
+        if isinstance(content, list):
+            parts = []
+            for item in content:
+                if not isinstance(item, dict):
+                    continue
+                text = item.get("text")
+                if isinstance(text, str) and text.strip():
+                    parts.append(text.strip())
+            if parts:
+                return "\n".join(parts).strip()
+
+    return _extract_text(payload)
+
+
+async def _forward_aliyun_dashscope_transcription(
+    config,
+    audio_bytes: bytes,
+    overrides: Dict[str, Any],
+    filename: str,
+    content_type: str,
+) -> dict:
+    resolved = _resolve_aliyun_dashscope_credentials(config, overrides)
+    params = resolved["params"]
+    api_key = resolved["api_key"]
+    model = resolved["model"]
+    urls = _build_aliyun_dashscope_urls(resolved["base_url"])
+    timeout_seconds = max(float(config.timeout or 60.0), 10.0)
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    async with httpx.AsyncClient(timeout=timeout_seconds) as client:
+        audio_data_url = _build_audio_data_url(audio_bytes, filename, content_type)
+        chat_model = _resolve_aliyun_non_realtime_model(model)
+        chat_payload: Dict[str, Any] = {
+            "model": chat_model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "input_audio",
+                            "input_audio": {"data": audio_data_url},
+                        }
+                    ],
+                }
+            ],
+            "stream": False,
+        }
+        asr_options = _collect_aliyun_asr_options(params)
+        if asr_options:
+            chat_payload["asr_options"] = asr_options
+
+        chat_response = await client.post(urls["chat"], headers=headers, json=chat_payload)
+        chat_response.raise_for_status()
+        chat_data = chat_response.json()
+        text = _extract_dashscope_asr_text(chat_data)
+        return {"text": text} if text else chat_data
+
+
 def _extract_text(payload: Dict[str, Any]) -> str:
     if isinstance(payload.get("text"), str):
         return payload["text"]
diff --git a/backend/app/api/providers.py b/backend/app/api/providers.py
index f8493ae..2e85c21 100644
--- a/backend/app/api/providers.py
+++ b/backend/app/api/providers.py
@@ -10,6 +10,8 @@
 
 router = APIRouter(prefix="/providers", tags=["providers"])
 
+ALIYUN_NLS_PROVIDER_ID = "aliyun-nls-transcription"
+
 
 class ProviderRequest(BaseModel):
     provider_id: str = Field(..., alias="providerId")
@@ -35,6 +37,55 @@ class ProviderVoicesResponse(BaseModel):
     voices: List[Dict[str, Any]] = Field(default_factory=list)
 
 
+def _provider_field_to_dict(field) -> Dict[str, Any]:
+    return {
+        "id": field.id,
+        "label": field.label,
+        "type": field.field_type,
+        "required": field.required,
+        "placeholder": field.placeholder,
+        "default": field.default,
+        "description": field.description,
+        "scope": field.scope,
+        "options": [
+            {
+                "id": option.id,
+                "label": option.label,
+                "description": option.description,
+                "icon": option.icon,
+            }
+            for option in field.options
+        ],
+        "optionsSource": field.options_source,
+    }
+
+
+def _aliyun_nls_default_field_dicts() -> List[Dict[str, Any]]:
+    return [
+        {
+            "id": "apiKey",
+            "label": "API Key",
+            "type": "secret",
+            "required": True,
+            "placeholder": None,
+            "default": None,
+            "description": None,
+            "scope": "config",
+            "options": [],
+            "optionsSource": None,
+        },
+    ]
+
+
+def _resolve_provider_field_dicts(spec) -> List[Dict[str, Any]]:
+    fields = [_provider_field_to_dict(field) for field in spec.fields]
+    if spec.id != ALIYUN_NLS_PROVIDER_ID:
+        return fields
+
+    # Keep UI minimal for this provider: only API key is exposed.
+    return _aliyun_nls_default_field_dicts()
+
+
 def _to_config(request: ProviderRequest) -> ProviderConfig:
     return ProviderConfig(
         provider_id=request.provider_id,
@@ -84,29 +135,7 @@ async def list_provider_catalog() -> ProviderCatalogResponse:
             }
             if spec.defaults
             else None,
-            fields=[
-                {
-                    "id": field.id,
-                    "label": field.label,
-                    "type": field.field_type,
-                    "required": field.required,
-                    "placeholder": field.placeholder,
-                    "default": field.default,
-                    "description": field.description,
-                    "scope": field.scope,
-                    "options": [
-                        {
-                            "id": option.id,
-                            "label": option.label,
-                            "description": option.description,
-                            "icon": option.icon,
-                        }
-                        for option in field.options
-                    ],
-                    "optionsSource": field.options_source,
-                }
-                for field in spec.fields
-            ],
+            fields=_resolve_provider_field_dicts(spec),
         )
         for spec in catalog.list()
     ]
diff --git a/backend/app/api/tts.py b/backend/app/api/tts.py
index ad671ea..15998d5 100644
--- a/backend/app/api/tts.py
+++ b/backend/app/api/tts.py
@@ -1,8 +1,13 @@
-from typing import Any, AsyncIterator, Dict, Optional
+import asyncio
+import base64
+import json
+import uuid
+from typing import Any, Dict, Optional
 
 import httpx
-from fastapi import APIRouter, HTTPException
-from fastapi.responses import Response, StreamingResponse
+from fastapi import APIRouter, HTTPException, Response
+from websockets import connect as ws_connect
+from websockets.exceptions import ConnectionClosed
 
 from app.api.engine_schemas import (
     EngineDefaultResponse,
@@ -15,12 +20,21 @@
     VoiceDesc,
     VoiceListResponse,
 )
+from app.core.http_utils import normalize_path, resolve_api_key
 from app.services.engines import registry, runtime_store
 from app.services.engines.health import check_engine_health
-from app.core.http_utils import normalize_path, resolve_api_key, sanitize_config
 
 router = APIRouter(prefix="/tts", tags=["tts"])
 
+VOLCENGINE_ENGINE_IDS = {"volcengine-speech", "volcengine"}
+ALIBABA_ENGINE_IDS = {
+    "alibaba-cloud-model-studio-speech",
+    "alibaba-cloud-model-studio",
+}
+VOLCENGINE_TTS_URL = "https://openspeech.bytedance.com/api/v1/tts"
+ALIBABA_TTS_WS_CN = "wss://dashscope.aliyuncs.com/api-ws/v1/inference"
+ALIBABA_TTS_WS_INTL = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference"
+
 
 @router.get("/engines", response_model=EngineListResponse)
 async def list_tts_engines() -> EngineListResponse:
@@ -60,256 +74,718 @@ async def get_tts_engine_health(engine: str) -> HealthResponse:
 
 
 @router.post("/engines")
-async def run_tts_engine(request: EngineRunRequest) -> StreamingResponse:
-    engine_id = _resolve_engine_id(request.engine)
-    config = _get_engine_config(engine_id)
-    text = _coerce_text(request.data)
+async def run_tts_engine(request: EngineRunRequest) -> Response:
+    engine_id = _resolve_tts_engine_id(request.engine)
+    runtime_config = _get_tts_engine_config(engine_id)
+
+    text = _extract_tts_input(request.data)
     if not text:
         raise HTTPException(status_code=400, detail="Missing text input")
 
-    engine_type = (config.engine_type or "openai_compat").lower()
     overrides = request.config if isinstance(request.config, dict) else {}
+    api_key = _resolve_tts_api_key(runtime_config, overrides)
+    if not api_key:
+        raise HTTPException(status_code=400, detail="Missing apiKey for TTS provider")
+
+    if engine_id in VOLCENGINE_ENGINE_IDS:
+        return await _forward_volcengine_tts(
+            runtime_config=runtime_config,
+            text=text,
+            overrides=overrides,
+            api_key=api_key,
+        )
 
-    if engine_type in {"dify_tts", "dify"}:
-        stream = await _stream_dify_tts(config, text, overrides)
-        return StreamingResponse(stream, media_type="audio/mpeg")
+    if engine_id in ALIBABA_ENGINE_IDS:
+        return await _forward_alibaba_tts(
+            engine_id=engine_id,
+            runtime_config=runtime_config,
+            text=text,
+            overrides=overrides,
+            api_key=api_key,
+        )
 
-    if engine_type in {"coze_tts", "coze"}:
-        stream = await _stream_coze_tts(config, text, overrides)
-        return StreamingResponse(stream, media_type="audio/mpeg")
+    payload = _build_unspeech_payload(
+        engine_id=engine_id,
+        runtime_config=runtime_config,
+        text=text,
+        overrides=overrides,
+    )
 
-    base_url_override, api_key_override = _resolve_connection_overrides(overrides)
-    payload: Dict[str, Any] = {"model": config.model, "input": text}
-    payload.update(config.default_params)
-    payload.update(sanitize_config(overrides))
+    speech_path = runtime_config.paths.get("speech") if runtime_config.paths else None
+    url = runtime_config.base_url.rstrip("/") + normalize_path(speech_path or "/audio/speech")
 
-    if "voice" not in payload:
-        raise HTTPException(status_code=400, detail="Missing voice for TTS")
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}",
+    }
+    headers.update(runtime_config.headers)
 
-    speech_path = config.paths.get("speech") if config.paths else None
-    path = normalize_path(speech_path or "/audio/speech")
-    url = (base_url_override or config.base_url).rstrip("/") + path
+    try:
+        async with httpx.AsyncClient(timeout=runtime_config.timeout) as client:
+            response = await client.post(url, headers=headers, json=payload)
+    except httpx.HTTPError as exc:
+        raise HTTPException(
+            status_code=502,
+            detail=f"Upstream TTS connection failed: {exc}",
+        ) from exc
 
-    headers = {"Content-Type": "application/json"}
-    headers.update(config.headers)
-    api_key = api_key_override or resolve_api_key(config.api_key_env)
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
+    if response.status_code >= 400:
+        raise HTTPException(
+            status_code=response.status_code,
+            detail=_extract_tts_error(response, engine_id),
+        )
 
-    media_type = _audio_media_type(payload.get("response_format") or payload.get("format"))
-    content = await _request_tts_bytes(url, headers, payload, timeout=config.timeout)
-    return Response(content=content, media_type=media_type)
+    media_type = response.headers.get("Content-Type") or "audio/mpeg"
+    return Response(content=response.content, media_type=media_type, status_code=response.status_code)
 
 
-def _resolve_engine_id(engine_id: str) -> str:
+def _resolve_tts_engine_id(engine_id: str) -> str:
     if engine_id == "default":
         default_spec = registry.get_default("tts")
         return default_spec.id if default_spec else ""
     return engine_id
 
 
-def _get_engine_config(engine_id: str):
+def _get_tts_engine_config(engine_id: str):
     if not engine_id:
         raise HTTPException(status_code=400, detail="Missing engine id")
+
     config = runtime_store.get("tts", engine_id)
     if not config or not config.base_url:
         raise HTTPException(status_code=404, detail="TTS engine not configured")
-    engine_type = (config.engine_type or "openai_compat").lower()
-    if engine_type not in {"dify_tts", "coze_tts", "dify", "coze"} and not config.model:
-        raise HTTPException(status_code=400, detail="TTS engine missing model")
+
     return config
 
 
-def _coerce_text(data: Any) -> str:
-    if data is None:
-        return ""
+def _extract_tts_input(data: Any) -> str:
     if isinstance(data, str):
-        return data
+        return data.strip()
     if isinstance(data, dict):
-        text = data.get("text") or data.get("input") or data.get("prompt")
-        if isinstance(text, str):
-            return text
+        for key in ("text", "input", "prompt"):
+            value = data.get(key)
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+    return ""
+
+
+def _extract_json_error_message(payload: Any) -> str:
+    if not isinstance(payload, dict):
+        return ""
+
+    message = payload.get("message")
+    if isinstance(message, str) and message.strip():
+        return message.strip()
+
+    detail = payload.get("detail")
+    if isinstance(detail, str) and detail.strip():
+        return detail.strip()
+
+    error = payload.get("error")
+    if isinstance(error, str) and error.strip():
+        return error.strip()
+    if isinstance(error, dict):
+        nested = error.get("message")
+        if isinstance(nested, str) and nested.strip():
+            return nested.strip()
+
+    errors = payload.get("errors")
+    if isinstance(errors, list) and len(errors) > 0:
+        first = errors[0]
+        if isinstance(first, dict):
+            first_detail = first.get("detail")
+            if isinstance(first_detail, str) and first_detail.strip():
+                return first_detail.strip()
+            first_title = first.get("title")
+            if isinstance(first_title, str) and first_title.strip():
+                return first_title.strip()
+
+    return ""
+
+
+def _decorate_tts_error(detail: str, engine_id: Optional[str]) -> str:
+    normalized = detail.strip()
+    if not normalized:
+        return normalized
+
+    lower = normalized.lower()
+    if (
+        engine_id in VOLCENGINE_ENGINE_IDS
+        and "requested grant not found in saas storage" in lower
+    ):
+        return (
+            f"{normalized} (Volcengine credentials mismatch: "
+            "check that apiKey is a valid Volcengine Speech token and appId belongs to the same app.)"
+        )
+    return normalized
+
+
+def _extract_tts_error(response: httpx.Response, engine_id: Optional[str] = None) -> str:
+    content_type = response.headers.get("Content-Type", "")
+    if "application/json" in content_type:
+        try:
+            payload = response.json()
+            message = _extract_json_error_message(payload)
+            if message:
+                return _decorate_tts_error(message, engine_id)
+            return _decorate_tts_error(json.dumps(payload, ensure_ascii=False), engine_id)
+        except Exception:
+            pass
+
+    text = response.text.strip()
+    if text:
+        if text.startswith("{") and text.endswith("}"):
+            try:
+                nested_payload = json.loads(text)
+                nested_message = _extract_json_error_message(nested_payload)
+                if nested_message:
+                    return _decorate_tts_error(nested_message, engine_id)
+            except Exception:
+                pass
+        return _decorate_tts_error(text, engine_id)
+    return f"Upstream TTS request failed: {response.status_code}"
+
+
+def _read_string(config: Dict[str, Any], *keys: str) -> str:
+    for key in keys:
+        value = config.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
     return ""
 
 
-def _resolve_connection_overrides(config: Dict[str, Any]) -> tuple[Optional[str], Optional[str]]:
-    base_url = config.get("base_url") or config.get("baseUrl")
-    api_key = config.get("api_key") or config.get("apiKey")
-    return (
-        str(base_url).strip() if isinstance(base_url, str) and base_url.strip() else None,
-        str(api_key).strip() if isinstance(api_key, str) and api_key.strip() else None,
+def _read_number(config: Dict[str, Any], *keys: str) -> Optional[float]:
+    for key in keys:
+        value = config.get(key)
+        if isinstance(value, (int, float)):
+            return float(value)
+        if isinstance(value, str) and value.strip():
+            try:
+                return float(value.strip())
+            except ValueError:
+                continue
+    return None
+
+
+def _read_map(config: Dict[str, Any], *keys: str) -> Dict[str, Any]:
+    for key in keys:
+        value = config.get(key)
+        if isinstance(value, dict):
+            return dict(value)
+    return {}
+
+
+def _resolve_tts_api_key(runtime_config, overrides: Dict[str, Any]) -> str:
+    override_key = _read_string(overrides, "apiKey", "api_key")
+    if override_key:
+        return override_key
+    return resolve_api_key(runtime_config.api_key_env)
+
+
+def _resolve_tts_model(engine_id: str, runtime_config, overrides: Dict[str, Any]) -> str:
+    model = _read_string(overrides, "model") or str(runtime_config.model or "").strip()
+    if not model:
+        return ""
+
+    if engine_id in VOLCENGINE_ENGINE_IDS and "/" not in model:
+        return f"volcengine/{model}"
+
+    if engine_id in ALIBABA_ENGINE_IDS and "/" not in model:
+        return f"alibaba/{model}"
+
+    return model
+
+
+def _resolve_tts_voice(runtime_config, overrides: Dict[str, Any]) -> str:
+    override_voice = _read_string(overrides, "voice")
+    if override_voice:
+        return override_voice
+
+    default_voice = runtime_config.default_params.get("voice") if runtime_config.default_params else None
+    if isinstance(default_voice, str):
+        return default_voice.strip()
+
+    return ""
+
+
+def _build_volcengine_extra_body(overrides: Dict[str, Any]) -> Dict[str, Any]:
+    extra_body = {
+        **_read_map(overrides, "extraBody"),
+        **_read_map(overrides, "extra_body"),
+    }
+
+    app = {
+        **_read_map(extra_body, "app"),
+        **_read_map(overrides, "app"),
+    }
+    app_id = (
+        _read_string(overrides, "appId", "appid", "app_id")
+        or _read_string(app, "appId", "appid", "app_id")
     )
+    if not app_id:
+        raise HTTPException(status_code=400, detail="Missing appId for Volcengine speech")
+
+    app["appid"] = app_id
+    extra_body["app"] = app
+
+    audio = {
+        **_read_map(extra_body, "audio"),
+        **_read_map(overrides, "audio"),
+    }
+    speed = _read_number(overrides, "speed")
+    if speed is not None:
+        audio["speed_ratio"] = speed
+    if audio:
+        extra_body["audio"] = audio
+
+    request = _read_map(overrides, "request")
+    if request:
+        extra_body["request"] = request
+
+    user = _read_map(overrides, "user")
+    if user:
+        extra_body["user"] = user
+
+    return extra_body
+
+
+def _build_alibaba_extra_body(overrides: Dict[str, Any]) -> Dict[str, Any]:
+    extra_body = {
+        **_read_map(overrides, "extraBody"),
+        **_read_map(overrides, "extra_body"),
+    }
 
+    rate = _read_number(overrides, "rate")
+    if rate is not None:
+        extra_body["rate"] = rate
 
-def _audio_media_type(format_name: Optional[str]) -> str:
-    if not format_name:
+    pitch = _read_number(overrides, "pitch")
+    if pitch is not None:
+        extra_body["pitch"] = pitch
+
+    volume = _read_number(overrides, "volume")
+    if volume is not None:
+        extra_body["volume"] = volume
+
+    sample_rate = _read_number(overrides, "sample_rate", "sampleRate")
+    if sample_rate is not None:
+        extra_body["sample_rate"] = int(sample_rate)
+
+    return extra_body
+
+
+def _build_unspeech_payload(
+    engine_id: str,
+    runtime_config,
+    text: str,
+    overrides: Dict[str, Any],
+) -> Dict[str, Any]:
+    model = _resolve_tts_model(engine_id, runtime_config, overrides)
+    if not model:
+        raise HTTPException(status_code=400, detail="Missing model")
+
+    voice = _resolve_tts_voice(runtime_config, overrides)
+    if not voice:
+        raise HTTPException(status_code=400, detail="Missing voice")
+
+    payload: Dict[str, Any] = {
+        "model": model,
+        "input": text,
+        "voice": voice,
+    }
+
+    response_format = _read_string(overrides, "response_format", "responseFormat", "format")
+    if not response_format and runtime_config.default_params:
+        default_format = runtime_config.default_params.get("response_format")
+        if isinstance(default_format, str):
+            response_format = default_format.strip()
+    if response_format:
+        payload["response_format"] = response_format
+
+    speed = _read_number(overrides, "speed")
+    if speed is not None:
+        payload["speed"] = speed
+
+    if engine_id in VOLCENGINE_ENGINE_IDS:
+        payload["extra_body"] = _build_volcengine_extra_body(overrides)
+    elif engine_id in ALIBABA_ENGINE_IDS:
+        extra_body = _build_alibaba_extra_body(overrides)
+        if extra_body:
+            payload["extra_body"] = extra_body
+
+    return payload
+
+
+def _to_int(value: Any, fallback: int) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return fallback
+
+
+def _to_float(value: Any, fallback: float) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return fallback
+
+
+def _resolve_tts_response_format(runtime_config, overrides: Dict[str, Any]) -> str:
+    response_format = _read_string(overrides, "response_format", "responseFormat", "format")
+    if response_format:
+        return response_format
+    if runtime_config.default_params:
+        default_format = runtime_config.default_params.get("response_format")
+        if isinstance(default_format, str) and default_format.strip():
+            return default_format.strip()
+    return "mp3"
+
+
+def _resolve_tts_media_type(format_name: str) -> str:
+    value = str(format_name or "").strip().lower()
+    if value in {"", "mp3"}:
         return "audio/mpeg"
-    value = str(format_name).lower()
     if value == "wav":
         return "audio/wav"
-    if value == "opus":
-        return "audio/opus"
-    if value == "aac":
-        return "audio/aac"
-    if value == "flac":
-        return "audio/flac"
+    if value in {"ogg", "opus", "ogg_opus"}:
+        return "audio/ogg"
+    if value == "pcm":
+        return "audio/pcm"
     return "audio/mpeg"
 
 
-async def _create_tts_stream(
-    url: str,
-    headers: Dict[str, str],
-    payload: Dict[str, Any],
-    *,
-    timeout: float,
-) -> AsyncIterator[bytes]:
-    client = httpx.AsyncClient(timeout=timeout)
-    try:
-        response = await client.stream("POST", url, headers=headers, json=payload).__aenter__()
-    except httpx.HTTPError as exc:
-        await client.aclose()
-        raise HTTPException(status_code=502, detail=str(exc)) from exc
+def _resolve_volcengine_tts_url(runtime_config, overrides: Dict[str, Any]) -> str:
+    base_url = str(runtime_config.base_url or "").strip().lower()
+    if "openspeech.bytedance.com" in base_url:
+        return runtime_config.base_url.rstrip("/")
+    return VOLCENGINE_TTS_URL
 
-    if response.status_code >= 400:
-        detail = await response.aread()
-        await response.aclose()
-        await client.aclose()
-        raise HTTPException(
-            status_code=response.status_code,
-            detail=detail.decode("utf-8", errors="ignore") or response.reason_phrase,
-        )
 
-    async def iterator() -> AsyncIterator[bytes]:
-        try:
-            async for chunk in response.aiter_bytes():
-                yield chunk
-        finally:
-            await response.aclose()
-            await client.aclose()
-
-    return iterator()
-
-
-async def _request_tts_bytes(
-    url: str,
-    headers: Dict[str, str],
-    payload: Dict[str, Any],
-    *,
-    timeout: float,
-) -> bytes:
+def _resolve_alibaba_tts_ws_url(runtime_config, overrides: Dict[str, Any]) -> str:
+    runtime_base = str(runtime_config.base_url or "").strip()
+    normalized_base = runtime_base.lower()
+
+    region = _read_string(overrides, "region").lower()
+    if region in {"intl", "sg", "singapore", "intl-singapore", "ap-southeast-1"}:
+        return ALIBABA_TTS_WS_INTL
+
+    if "dashscope-intl.aliyuncs.com" in normalized_base:
+        return ALIBABA_TTS_WS_INTL
+    if "dashscope.aliyuncs.com" in normalized_base:
+        return ALIBABA_TTS_WS_CN
+    return ALIBABA_TTS_WS_CN
+
+
+def _normalize_alibaba_provider_model(model: str) -> str:
+    normalized = str(model or "").strip()
+    if normalized.lower().startswith("alibaba/"):
+        return normalized.split("/", 1)[1].strip()
+    return normalized
+
+
+def _build_volcengine_provider_payload(
+    runtime_config,
+    text: str,
+    overrides: Dict[str, Any],
+    api_key: str,
+) -> Dict[str, Any]:
+    voice = _resolve_tts_voice(runtime_config, overrides)
+    if not voice:
+        raise HTTPException(status_code=400, detail="Missing voice")
+
+    extra_body = _build_volcengine_extra_body(overrides)
+    app = _read_map(extra_body, "app")
+    audio = _read_map(extra_body, "audio")
+    request = _read_map(extra_body, "request")
+    user = _read_map(extra_body, "user")
+
+    app_id = _read_string(app, "appid", "app_id", "appId")
+    if not app_id:
+        raise HTTPException(status_code=400, detail="Missing appId for Volcengine speech")
+
+    cluster = _read_string(app, "cluster") or "volcano_tts"
+    request_id = _read_string(request, "reqid", "request_id", "requestId") or str(uuid.uuid4())
+    user_id = _read_string(user, "uid", "user_id", "userId") or str(uuid.uuid4())
+    operation = _read_string(request, "operation") or "query"
+    encoding = _resolve_tts_response_format(runtime_config, overrides)
+    speed_ratio = _read_number(audio, "speed_ratio")
+    if speed_ratio is None:
+        speed_ratio = _read_number(overrides, "speed")
+    if speed_ratio is None:
+        speed_ratio = 1.0
+
+    payload: Dict[str, Any] = {
+        "app": {
+            "appid": app_id,
+            "token": api_key,
+            "cluster": cluster,
+        },
+        "user": {
+            "uid": user_id,
+        },
+        "audio": {
+            "voice_type": voice,
+            "encoding": encoding,
+            "speed_ratio": speed_ratio,
+        },
+        "request": {
+            "reqid": request_id,
+            "text": text,
+            "operation": operation,
+        },
+    }
+
+    for key in (
+        "emotion",
+        "enable_emotion",
+        "emotion_scale",
+        "rate",
+        "bit_rate",
+        "explicit_language",
+        "context_language",
+        "loudness_ratio",
+    ):
+        if key in audio and audio[key] is not None:
+            payload["audio"][key] = audio[key]
+
+    for key in (
+        "text_type",
+        "silence_duration",
+        "with_timestamp",
+        "extra_param",
+        "disable_markdown_filter",
+        "enable_latex_tn",
+        "cache_config",
+        "use_cache",
+    ):
+        if key in request and request[key] is not None:
+            payload["request"][key] = request[key]
+
+    return payload
+
+
+async def _forward_volcengine_tts(
+    runtime_config,
+    text: str,
+    overrides: Dict[str, Any],
+    api_key: str,
+) -> Response:
+    url = _resolve_volcengine_tts_url(runtime_config, overrides)
+    payload = _build_volcengine_provider_payload(runtime_config, text, overrides, api_key)
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer;{api_key}",
+    }
+    headers.update(runtime_config.headers)
+
     try:
-        async with httpx.AsyncClient(timeout=timeout) as client:
+        async with httpx.AsyncClient(timeout=runtime_config.timeout) as client:
             response = await client.post(url, headers=headers, json=payload)
     except httpx.HTTPError as exc:
-        raise HTTPException(status_code=502, detail=str(exc)) from exc
+        raise HTTPException(status_code=502, detail=f"Volcengine TTS connection failed: {exc}") from exc
 
     if response.status_code >= 400:
         raise HTTPException(
             status_code=response.status_code,
-            detail=response.text or response.reason_phrase,
+            detail=_extract_tts_error(response, "volcengine-speech"),
         )
 
-    return response.content
+    try:
+        data = response.json()
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"Invalid Volcengine TTS response: {exc}") from exc
+
+    audio_base64 = data.get("data") if isinstance(data, dict) else None
+    if not isinstance(audio_base64, str) or not audio_base64.strip():
+        raise HTTPException(status_code=502, detail="Volcengine TTS returned empty audio payload")
 
+    try:
+        audio_bytes = base64.b64decode(audio_base64, validate=True)
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"Invalid Volcengine audio base64: {exc}") from exc
 
-def _merge_params(config, overrides: Dict[str, Any]) -> Dict[str, Any]:
-    merged = dict(config.default_params or {})
-    merged.update(sanitize_config(overrides))
-    return merged
+    media_type = _resolve_tts_media_type(_resolve_tts_response_format(runtime_config, overrides))
+    return Response(content=audio_bytes, media_type=media_type, status_code=200)
 
 
-async def _stream_dify_tts(
-    config,
+def _build_alibaba_run_task_event(
+    task_id: str,
+    model: str,
     text: str,
-    overrides: Dict[str, Any],
-) -> AsyncIterator[bytes]:
-    params = _merge_params(config, overrides)
-    api_server = params.get("api_server") or config.base_url
-    api_key = params.get("api_key") or resolve_api_key(config.api_key_env)
-    username = params.get("username") or params.get("user")
-
-    if not api_server:
-        return
-    if not api_key:
-        return
-    if not username:
-        return
+    voice: str,
+    response_format: str,
+    sample_rate: int,
+    volume: int,
+    rate: float,
+    pitch: float,
+) -> Dict[str, Any]:
+    return {
+        "header": {
+            "task_id": task_id,
+            "action": "run-task",
+            "streaming": "duplex",
+        },
+        "payload": {
+            "task_group": "audio",
+            "task": "tts",
+            "function": "SpeechSynthesizer",
+            "model": model,
+            "input": {},
+            "parameters": {
+                "text_type": "PlainText",
+                "voice": voice,
+                "format": response_format,
+                "sample_rate": sample_rate,
+                "volume": volume,
+                "rate": rate,
+                "pitch": pitch,
+            },
+        },
+    }
 
-    payload = {"text": text, "user": username}
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    headers.update(config.headers)
 
-    speech_path = config.paths.get("speech") if config.paths else None
-    path = normalize_path(speech_path or "/text-to-audio")
-    url = api_server.rstrip("/") + path
+def _build_alibaba_continue_task_event(task_id: str, text: str) -> Dict[str, Any]:
+    return {
+        "header": {
+            "task_id": task_id,
+            "action": "continue-task",
+            "streaming": "duplex",
+        },
+        "payload": {
+            "task_group": "audio",
+            "task": "tts",
+            "function": "SpeechSynthesizer",
+            "input": {
+                "text": text,
+            },
+        },
+    }
 
-    return await _create_tts_stream(url, headers, payload, timeout=config.timeout)
 
+def _build_alibaba_finish_task_event(task_id: str) -> Dict[str, Any]:
+    return {
+        "header": {
+            "task_id": task_id,
+            "action": "finish-task",
+            "streaming": "duplex",
+        },
+        "payload": {
+            "input": {},
+        },
+    }
 
-async def _stream_coze_tts(
-    config,
+
+async def _forward_alibaba_tts(
+    engine_id: str,
+    runtime_config,
     text: str,
     overrides: Dict[str, Any],
-) -> AsyncIterator[bytes]:
-    params = _merge_params(config, overrides)
-    api_base = params.get("api_base") or config.base_url
-    token = params.get("token") or resolve_api_key(config.api_key_env)
-    bot_id = params.get("bot_id")
-    response_format = params.get("response_format") or "mp3"
-    sample_rate = params.get("sample_rate") or 16000
-
-    if not api_base or not token or not bot_id:
-        return
-
-    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-    headers.update(config.headers)
-
-    voice_id = await _fetch_coze_voice_id(
-        api_base,
-        headers,
-        bot_id,
-        config.paths.get("bot") if config.paths else None,
-        timeout=config.timeout,
+    api_key: str,
+) -> Response:
+    model = _normalize_alibaba_provider_model(
+        _resolve_tts_model(engine_id, runtime_config, overrides)
     )
-    if not voice_id:
-        return
-
-    payload = {
-        "input": text,
-        "voice_id": voice_id,
-        "speed": params.get("speed") or 1.0,
-        "response_format": response_format,
-        "sample_rate": sample_rate,
-    }
+    if not model:
+        raise HTTPException(status_code=400, detail="Missing model")
+
+    voice = _resolve_tts_voice(runtime_config, overrides)
+    if not voice:
+        raise HTTPException(status_code=400, detail="Missing voice")
+
+    response_format = _resolve_tts_response_format(runtime_config, overrides)
+    sample_rate = _to_int(_read_number(overrides, "sample_rate", "sampleRate"), 22050)
+    volume = _to_int(_read_number(overrides, "volume"), 50)
+    rate = _to_float(_read_number(overrides, "rate"), 1.0)
+    pitch = _to_float(_read_number(overrides, "pitch"), 1.0)
+    ws_url = _resolve_alibaba_tts_ws_url(runtime_config, overrides)
+    task_id = str(uuid.uuid4())
+    audio_binary = bytearray()
+    timeout_seconds = max(float(runtime_config.timeout or 60.0), 10.0)
 
-    speech_path = config.paths.get("speech") if config.paths else None
-    path = normalize_path(speech_path or "/v1/audio/speech")
-    url = api_base.rstrip("/") + path
-
-    return await _create_tts_stream(url, headers, payload, timeout=config.timeout)
-
-
-async def _fetch_coze_voice_id(
-    api_base: str,
-    headers: Dict[str, str],
-    bot_id: str,
-    path_template: Optional[str],
-    *,
-    timeout: float,
-) -> Optional[str]:
-    path = path_template or "/v1/bots/{bot_id}"
-    if "{bot_id}" in path:
-        path = path.replace("{bot_id}", bot_id)
-    path = normalize_path(path)
-    url = api_base.rstrip("/") + path
-    async with httpx.AsyncClient(timeout=timeout) as client:
-        response = await client.get(url, headers=headers)
-        response.raise_for_status()
-        data = response.json()
-    voice_info = (data.get("data") or {}).get("voice_info_list") or []
-    if not voice_info:
-        return None
-    voice_id = voice_info[0].get("voice_id")
-    if isinstance(voice_id, str) and voice_id:
-        return voice_id
-    return None
+    try:
+        async with ws_connect(
+            ws_url,
+            additional_headers={
+                "Authorization": api_key,
+                "X-DashScope-DataInspection": "enable",
+            },
+            open_timeout=min(timeout_seconds, 30.0),
+            ping_interval=20,
+            ping_timeout=20,
+            max_size=2**23,
+        ) as ws:
+            await ws.send(
+                json.dumps(
+                    _build_alibaba_run_task_event(
+                        task_id=task_id,
+                        model=model,
+                        text=text,
+                        voice=voice,
+                        response_format=response_format,
+                        sample_rate=sample_rate,
+                        volume=volume,
+                        rate=rate,
+                        pitch=pitch,
+                    ),
+                    ensure_ascii=False,
+                )
+            )
+
+            while True:
+                raw_message = await asyncio.wait_for(ws.recv(), timeout=timeout_seconds)
+
+                if isinstance(raw_message, (bytes, bytearray)):
+                    audio_binary.extend(raw_message)
+                    continue
+                if not isinstance(raw_message, str):
+                    continue
+
+                try:
+                    event = json.loads(raw_message)
+                except json.JSONDecodeError:
+                    continue
+                if not isinstance(event, dict):
+                    continue
+
+                header = event.get("header")
+                header = header if isinstance(header, dict) else {}
+                event_type = str(header.get("event") or "").strip().lower()
+
+                if event_type == "task-started":
+                    await ws.send(
+                        json.dumps(
+                            _build_alibaba_continue_task_event(task_id=task_id, text=text),
+                            ensure_ascii=False,
+                        )
+                    )
+                    await ws.send(
+                        json.dumps(
+                            _build_alibaba_finish_task_event(task_id=task_id),
+                            ensure_ascii=False,
+                        )
+                    )
+                    continue
+
+                if event_type == "task-failed":
+                    code = str(header.get("error_code") or "").strip()
+                    message = str(header.get("error_message") or "Alibaba TTS task failed.").strip()
+                    detail = f"{message} ({code})" if code else message
+                    raise HTTPException(status_code=400, detail=detail)
+
+                if event_type == "task-finished":
+                    break
+
+    except HTTPException:
+        raise
+    except asyncio.TimeoutError as exc:
+        raise HTTPException(status_code=504, detail="Alibaba TTS timed out") from exc
+    except ConnectionClosed as exc:
+        raise HTTPException(status_code=502, detail=f"Alibaba TTS websocket closed: {exc}") from exc
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"Alibaba TTS connection failed: {exc}") from exc
+
+    if len(audio_binary) == 0:
+        raise HTTPException(status_code=502, detail="Alibaba TTS returned empty audio payload")
+
+    media_type = _resolve_tts_media_type(response_format)
+    return Response(content=bytes(audio_binary), media_type=media_type, status_code=200)
diff --git a/backend/app/services/providers/registry.py b/backend/app/services/providers/registry.py
index 6951ccb..c8d7892 100644
--- a/backend/app/services/providers/registry.py
+++ b/backend/app/services/providers/registry.py
@@ -1,7 +1,13 @@
-from typing import List, Optional
+import os
+import json
+import logging
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
 
 import httpx
 
+from app.services.engines import runtime_store
 from app.services.providers.types import ProviderConfig, ProviderValidation
 
 
@@ -37,12 +43,41 @@
     "player2",
 }
 
+ALIYUN_NLS_ASR_IDS = {
+    "aliyun-nls-transcription",
+    "aliyun-nls-asr",
+}
+
+LOCAL_TTS_PROVIDER_IDS = {
+    "volcengine-speech",
+    "alibaba-cloud-model-studio-speech",
+}
+LOCAL_TTS_VOICES_DIR = Path(__file__).resolve().parent / "voices"
+LOCAL_TTS_VOICE_FILES = {
+    "volcengine-speech": LOCAL_TTS_VOICES_DIR / "volcengine.json",
+    "alibaba-cloud-model-studio-speech": LOCAL_TTS_VOICES_DIR / "alibaba.json",
+}
+_LAST_GOOD_LOCAL_TTS_VOICES: Dict[str, List[dict]] = {}
+logger = logging.getLogger(__name__)
+
 
 class ProviderRegistry:
     async def validate(self, config: ProviderConfig) -> ProviderValidation:
         provider_id = config.provider_id
+        if provider_id in ALIYUN_NLS_ASR_IDS:
+            api_key = (
+                config.api_key
+                or config.extra.get("apiKey")
+                or config.extra.get("api_key")
+                or os.getenv("DASHSCOPE_API_KEY")
+            )
+            if not str(api_key or "").strip():
+                return ProviderValidation(valid=False, reason="Missing apiKey for Alibaba Bailian ASR")
+            return ProviderValidation(valid=True)
+
         if provider_id in {"dify", "fastgpt"}:
             return self._validate_basic(config, require_base_url=True, require_api_key=True)
+
         if provider_id == "coze":
             result = self._validate_basic(config, require_base_url=True, require_api_key=True)
             if not result.valid:
@@ -65,20 +100,64 @@ async def validate(self, config: ProviderConfig) -> ProviderValidation:
 
     async def list_models(self, config: ProviderConfig) -> List[dict]:
         provider_id = config.provider_id
+        if provider_id in ALIYUN_NLS_ASR_IDS:
+            return [
+                {"id": "qwen3-asr-flash-realtime", "label": "qwen3-asr-flash-realtime"},
+                {"id": "qwen3-asr-flash", "label": "qwen3-asr-flash"},
+            ]
+
         if provider_id in OPENAI_COMPAT_IDS or "openai" in provider_id:
             return await self._fetch_openai_models(config)
+
         return []
 
     async def list_voices(self, config: ProviderConfig) -> List[dict]:
-        return []
+        if config.provider_id not in LOCAL_TTS_PROVIDER_IDS:
+            return []
+
+        voices = await _load_local_tts_voices(config.provider_id)
+        tts_runtime = runtime_store.get("tts", config.provider_id)
+
+        if config.provider_id == "alibaba-cloud-model-studio-speech":
+            runtime_model = str(tts_runtime.model or "").strip() if tts_runtime else ""
+            filter_model = config.model or runtime_model
+            model_candidates = _resolve_model_candidates(filter_model)
+            if model_candidates:
+                voices = [
+                    voice
+                    for voice in voices
+                    if _voice_matches_model_candidates(voice, model_candidates)
+                ]
+
+        options: List[dict] = []
+        for voice in voices:
+            if not isinstance(voice, dict):
+                continue
+
+            voice_id = voice.get("id")
+            voice_name = voice.get("name")
+            if not isinstance(voice_id, str) or not voice_id.strip():
+                continue
+            if not isinstance(voice_name, str) or not voice_name.strip():
+                continue
+
+            options.append(
+                {
+                    "id": voice_id.strip(),
+                    "label": voice_name.strip(),
+                    "description": _build_voice_description(voice),
+                }
+            )
+
+        return options
 
     @staticmethod
     def _validate_basic(
         config: ProviderConfig, require_base_url: bool, require_api_key: bool
     ) -> ProviderValidation:
-        if require_api_key and not config.api_key:
+        if require_api_key and not str(config.api_key or "").strip():
             return ProviderValidation(valid=False, reason="Missing API key")
-        if require_base_url and not config.base_url:
+        if require_base_url and not str(config.base_url or "").strip():
             return ProviderValidation(valid=False, reason="Missing base URL")
         return ProviderValidation(valid=True)
 
@@ -86,6 +165,7 @@ def _validate_basic(
     async def _fetch_openai_models(config: ProviderConfig) -> List[dict]:
         if not config.base_url:
             raise ValueError("Base URL is required")
+
         headers = {}
         if config.api_key:
             headers["Authorization"] = f"Bearer {config.api_key}"
@@ -109,4 +189,176 @@ async def _fetch_openai_models(config: ProviderConfig) -> List[dict]:
         ]
 
 
+async def _load_local_tts_voices(provider_id: str) -> List[dict]:
+    path = LOCAL_TTS_VOICE_FILES.get(provider_id)
+    if not path:
+        return []
+
+    source = Path(path)
+    try:
+        mtime_ns = source.stat().st_mtime_ns
+        voices = _load_local_tts_voices_cached(provider_id, str(source), mtime_ns)
+    except FileNotFoundError:
+        logger.warning("Local TTS voices file not found for provider=%s path=%s", provider_id, source)
+        return _load_last_good_local_tts_voices(provider_id)
+    except (OSError, UnicodeDecodeError, json.JSONDecodeError) as exc:
+        logger.exception(
+            "Failed to load local TTS voices for provider=%s path=%s",
+            provider_id,
+            source,
+            exc_info=exc,
+        )
+        return _load_last_good_local_tts_voices(provider_id)
+    except Exception as exc:
+        logger.exception(
+            "Unexpected error while loading local TTS voices for provider=%s path=%s",
+            provider_id,
+            source,
+            exc_info=exc,
+        )
+        return _load_last_good_local_tts_voices(provider_id)
+
+    _LAST_GOOD_LOCAL_TTS_VOICES[provider_id] = list(voices)
+    return list(voices)
+
+
+def _load_last_good_local_tts_voices(provider_id: str) -> List[dict]:
+    voices = _LAST_GOOD_LOCAL_TTS_VOICES.get(provider_id)
+    if not voices:
+        return []
+    return list(voices)
+
+
+@lru_cache(maxsize=16)
+def _load_local_tts_voices_cached(provider_id: str, path: str, mtime_ns: int) -> List[dict]:
+    source = Path(path)
+    _ = mtime_ns
+    raw = json.loads(source.read_text(encoding="utf-8"))
+
+    if provider_id == "alibaba-cloud-model-studio-speech":
+        return _parse_alibaba_voices(raw)
+    if provider_id == "volcengine-speech":
+        return _parse_volcengine_voices(raw)
+    return []
+
+
+def _parse_alibaba_voices(raw: Any) -> List[dict]:
+    if not isinstance(raw, list):
+        return []
+
+    voices: List[dict] = []
+    for item in raw:
+        if not isinstance(item, dict):
+            continue
+
+        voice_id = str(item.get("voice") or "").strip()
+        name = str(item.get("name") or "").strip()
+        model = str(item.get("model") or "").strip()
+        language = str(item.get("language") or "").strip()
+
+        if not voice_id:
+            continue
+
+        voice: Dict[str, Any] = {
+            "id": voice_id,
+            "name": name or voice_id,
+            "compatible_models": [model] if model else [],
+        }
+        if language:
+            voice["languages"] = [{"title": language, "code": language}]
+        voices.append(voice)
+
+    return voices
+
+
+def _parse_volcengine_voices(raw: Any) -> List[dict]:
+    if not isinstance(raw, dict):
+        return []
+
+    data = raw.get("data")
+    if not isinstance(data, dict):
+        return []
+
+    resource_packs = data.get("resource_packs")
+    if not isinstance(resource_packs, list):
+        return []
+
+    voices: List[dict] = []
+    for item in resource_packs:
+        if not isinstance(item, dict):
+            continue
+
+        details = item.get("details")
+        details = details if isinstance(details, dict) else {}
+        voice_id = str(item.get("code") or "").strip()
+        name = str(item.get("resource_display") or "").strip()
+        language = str(details.get("language") or "").strip()
+
+        if not voice_id:
+            continue
+
+        voice: Dict[str, Any] = {
+            "id": voice_id,
+            "name": name or voice_id,
+            "compatible_models": ["v1"],
+        }
+        if language:
+            voice["languages"] = [{"title": language, "code": language}]
+        voices.append(voice)
+
+    return voices
+
+
+def _resolve_model_candidates(model: Optional[str]) -> Set[str]:
+    if not model:
+        return set()
+
+    candidate = model.strip()
+    if not candidate:
+        return set()
+
+    result: Set[str] = {candidate}
+    if "/" in candidate:
+        short_model = candidate.split("/")[-1].strip()
+        if short_model:
+            result.add(short_model)
+    else:
+        result.add(f"alibaba/{candidate}")
+    return result
+
+
+def _voice_matches_model_candidates(voice: dict, model_candidates: Set[str]) -> bool:
+    compatible_models = voice.get("compatible_models")
+    if not isinstance(compatible_models, list) or len(compatible_models) == 0:
+        return True
+
+    normalized = {
+        str(model).strip()
+        for model in compatible_models
+        if isinstance(model, str) and str(model).strip()
+    }
+    return len(normalized.intersection(model_candidates)) > 0
+
+
+def _build_voice_description(voice: dict) -> str:
+    descriptions: List[str] = []
+
+    languages = voice.get("languages")
+    if isinstance(languages, list) and len(languages) > 0:
+        titles: List[str] = []
+        for language in languages:
+            if not isinstance(language, dict):
+                continue
+            title = language.get("title")
+            code = language.get("code")
+            if isinstance(title, str) and title.strip():
+                titles.append(title.strip())
+            elif isinstance(code, str) and code.strip():
+                titles.append(code.strip())
+        if titles:
+            descriptions.append(", ".join(titles))
+
+    return " | ".join(descriptions)
+
+
 registry = ProviderRegistry()
diff --git a/backend/app/services/providers/voices/alibaba.json b/backend/app/services/providers/voices/alibaba.json
new file mode 100644
index 0000000..9a3eb8f
--- /dev/null
+++ b/backend/app/services/providers/voices/alibaba.json
@@ -0,0 +1,294 @@
+[
+  {
+    "name": "龙婉",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240830/dzkngm/%E9%BE%99%E5%A9%89.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longwan",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "聊天数字人"
+    ],
+    "language": "中文普通话",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙橙",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240830/ggjwfl/%E9%BE%99%E6%A9%99.wav",
+    "model": "cosyvoice-v1",
+    "voice": "longcheng",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "聊天数字人"
+    ],
+    "language": "中文普通话",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙华",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240830/jpjtvy/%E9%BE%99%E5%8D%8E.wav",
+    "model": "cosyvoice-v1",
+    "voice": "longhua",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "聊天数字人"
+    ],
+    "language": "中文普通话",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙小淳",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/rlfvcd/%E9%BE%99%E5%B0%8F%E6%B7%B3.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longxiaochun",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "聊天数字人"
+    ],
+    "language": "中文+英文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙小夏",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/wzywtu/%E9%BE%99%E5%B0%8F%E5%A4%8F.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longxiaoxia",
+    "scenarios": [
+      "语音助手",
+      "聊天数字人"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙小诚",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/xrqksx/%E9%BE%99%E5%B0%8F%E8%AF%9A.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longxiaocheng",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "聊天数字人"
+    ],
+    "language": "中文+英文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙小白",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/vusvze/%E9%BE%99%E5%B0%8F%E7%99%BD.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longxiaobai",
+    "scenarios": [
+      "聊天数字人",
+      "有声书",
+      "语音助手"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙老铁",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/pfsfir/%E9%BE%99%E8%80%81%E9%93%81.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longlaotie",
+    "scenarios": [
+      "新闻播报",
+      "有声书",
+      "语音助手",
+      "直播带货",
+      "导航播报"
+    ],
+    "language": "中文东北口音",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙书",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/azcerd/%E9%BE%99%E4%B9%A6.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longshu",
+    "scenarios": [
+      "有声书",
+      "语音助手",
+      "导航播报",
+      "新闻播报",
+      "智能客服"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙硕",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/lcykpl/%E9%BE%99%E7%A1%95.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longshuo",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "新闻播报",
+      "客服催收"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙婧",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/ozkbmb/%E9%BE%99%E5%A9%A7.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longjing",
+    "scenarios": [
+      "语音助手",
+      "导航播报",
+      "新闻播报",
+      "客服催收"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙妙",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/zjnqis/%E9%BE%99%E5%A6%99.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longmiao",
+    "scenarios": [
+      "客服催收",
+      "导航播报",
+      "有声书",
+      "语音助手"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙悦",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/nrkjqf/%E9%BE%99%E6%82%A6.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longyue",
+    "scenarios": [
+      "语音助手",
+      "诗词朗诵",
+      "有声书朗读",
+      "导航播报",
+      "新闻播报",
+      "客服催收"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙媛",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/xuboos/%E9%BE%99%E5%AA%9B.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longyuan",
+    "scenarios": [
+      "有声书",
+      "语音助手",
+      "聊天数字人"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙飞",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/bhkjjx/%E9%BE%99%E9%A3%9E.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longfei",
+    "scenarios": [
+      "会议播报",
+      "新闻播报",
+      "有声书"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙杰力豆",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/dctiyg/%E9%BE%99%E6%9D%B0%E5%8A%9B%E8%B1%86.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longjielidou",
+    "scenarios": [
+      "新闻播报",
+      "有声书",
+      "聊天助手"
+    ],
+    "language": "中文+英文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙彤",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/qyqmvo/%E9%BE%99%E5%BD%A4.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longtong",
+    "scenarios": [
+      "有声书",
+      "导航播报",
+      "聊天数字人"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "龙祥",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/jybshd/%E9%BE%99%E7%A5%A5.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "longxiang",
+    "scenarios": [
+      "新闻播报",
+      "有声书",
+      "导航播报"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "Stella",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/haffms/Stella.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "loongstella",
+    "scenarios": [
+      "语音助手",
+      "直播带货",
+      "导航播报",
+      "客服催收",
+      "有声书"
+    ],
+    "language": "中文+英文",
+    "bitrate": "22050",
+    "format": "mp3"
+  },
+  {
+    "name": "Bella",
+    "preview_audio_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20240624/tguine/Bella.mp3",
+    "model": "cosyvoice-v1",
+    "voice": "loongbella",
+    "scenarios": [
+      "语音助手",
+      "客服催收",
+      "新闻播报",
+      "导航播报"
+    ],
+    "language": "中文",
+    "bitrate": "22050",
+    "format": "mp3"
+  }
+]
diff --git a/backend/app/services/providers/voices/volcengine.json b/backend/app/services/providers/voices/volcengine.json
new file mode 100644
index 0000000..309d837
--- /dev/null
+++ b/backend/app/services/providers/voices/volcengine.json
@@ -0,0 +1,3176 @@
+{
+  "status": "success",
+  "error": null,
+  "data": {
+    "resource_packs": [
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_wanqudashu_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "湾区大叔",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/湾区大叔.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_female_wanqudashu_moon_bigtts",
+          "voice_type": "zh_female_wanqudashu_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_daimengchuanmei_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "呆萌川妹",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/呆萌川妹.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_female_daimengchuanmei_moon_bigtts",
+          "voice_type": "zh_female_daimengchuanmei_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_guozhoudege_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "广州德哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/广州德哥.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_male_guozhoudege_moon_bigtts",
+          "voice_type": "zh_male_guozhoudege_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_beijingxiaoye_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "北京小爷",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/北京小爷.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_male_beijingxiaoye_moon_bigtts",
+          "voice_type": "zh_male_beijingxiaoye_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_shaonianzixin_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "少年梓辛/Brayan",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/少年梓辛.mp3",
+          "language": "中/英",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_shaonianzixin_moon_bigtts",
+          "voice_type": "zh_male_shaonianzixin_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_meilinvyou_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "魅力女友",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/魅力女友.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_meilinvyou_moon_bigtts",
+          "voice_type": "zh_female_meilinvyou_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_shenyeboke_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "深夜播客",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/深夜播客.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_shenyeboke_moon_bigtts",
+          "voice_type": "zh_male_shenyeboke_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_sajiaonvyou_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "柔美女友",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/柔美女友.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_sajiaonvyou_moon_bigtts",
+          "voice_type": "zh_female_sajiaonvyou_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_yuanqinvyou_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "撒娇学妹",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/撒娇学妹.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_yuanqinvyou_moon_bigtts",
+          "voice_type": "zh_female_yuanqinvyou_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_haoyuxiaoge_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "浩宇小哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/浩宇小哥.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_male_haoyuxiaoge_moon_bigtts",
+          "voice_type": "zh_male_haoyuxiaoge_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_guangxiyuanzhou_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "广西远舟",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/广西远舟.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_male_guangxiyuanzhou_moon_bigtts",
+          "voice_type": "zh_male_guangxiyuanzhou_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_meituojieer_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "妹坨洁儿",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/妹坨洁儿.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_female_meituojieer_moon_bigtts",
+          "voice_type": "zh_female_meituojieer_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_yuzhouzixuan_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "豫州子轩",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/豫州子轩.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_male_yuzhouzixuan_moon_bigtts",
+          "voice_type": "zh_male_yuzhouzixuan_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_linjianvhai_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "邻家女孩",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/邻家女孩.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_linjianvhai_moon_bigtts",
+          "voice_type": "zh_female_linjianvhai_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_gaolengyujie_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "高冷御姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/高冷御姐.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_gaolengyujie_moon_bigtts",
+          "voice_type": "zh_female_gaolengyujie_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_yuanboxiaoshu_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "渊博小叔",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/渊博小叔.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_yuanboxiaoshu_moon_bigtts",
+          "voice_type": "zh_male_yuanboxiaoshu_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_yangguangqingnian_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "阳光青年",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/阳光青年.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_yangguangqingnian_moon_bigtts",
+          "voice_type": "zh_male_yangguangqingnian_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_aojiaobazong_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "傲娇霸总",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/傲娇霸总.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_aojiaobazong_moon_bigtts",
+          "voice_type": "zh_male_aojiaobazong_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_jingqiangkanye_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "京腔侃爷/Harmony",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/京腔侃爷.mp3",
+          "language": "中/英",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_male_jingqiangkanye_moon_bigtts",
+          "voice_type": "zh_male_jingqiangkanye_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_shuangkuaisisi_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "爽快思思/Skye",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/爽快思思.mp3",
+          "language": "中/英",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_shuangkuaisisi_moon_bigtts",
+          "voice_type": "zh_female_shuangkuaisisi_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_wennuanahu_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "温暖阿虎/Alvin",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/温暖阿虎.mp3",
+          "language": "中/英",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_wennuanahu_moon_bigtts",
+          "voice_type": "zh_male_wennuanahu_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_wanwanxiaohe_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "湾湾小何",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E6%B9%BE%E6%B9%BE%E5%B0%8F%E4%BD%95.mp3",
+          "language": "中文",
+          "recommended_scenario": "趣味方言",
+          "tone_number": "zh_female_wanwanxiaohe_moon_bigtts",
+          "voice_type": "zh_female_wanwanxiaohe_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "multi_female_shuangkuaisisi_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "はるこ/Esmeralda",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/晴子.wav",
+          "language": "日/西",
+          "recommended_scenario": "多语种",
+          "tone_number": "multi_female_shuangkuaisisi_moon_bigtts",
+          "voice_type": "multi_female_shuangkuaisisi_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "multi_male_jingqiangkanye_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "かずね/Javier or Álvaro",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/和音.wav",
+          "language": "日/西",
+          "recommended_scenario": "多语种",
+          "tone_number": "multi_male_jingqiangkanye_moon_bigtts",
+          "voice_type": "multi_male_jingqiangkanye_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "multi_female_gaolengyujie_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "あけみ",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/朱美.mp3",
+          "language": "日语",
+          "recommended_scenario": "多语种",
+          "tone_number": "multi_female_gaolengyujie_moon_bigtts",
+          "voice_type": "multi_female_gaolengyujie_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "multi_male_wanqudashu_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "ひろし/Roberto",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/広志.mp3",
+          "language": "日/西",
+          "recommended_scenario": "多语种",
+          "tone_number": "multi_male_wanqudashu_moon_bigtts",
+          "voice_type": "multi_male_wanqudashu_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_bingruoshaonv_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "病弱少女",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E7%97%85%E5%BC%B1%E5%B0%91%E5%A5%B3.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_bingruoshaonv_tob",
+          "voice_type": "ICL_zh_female_bingruoshaonv_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_huoponvhai_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "活泼女孩",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E6%B4%BB%E6%B3%BC%E5%A5%B3%E5%AD%A9.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_huoponvhai_tob",
+          "voice_type": "ICL_zh_female_huoponvhai_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_heainainai_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "和蔼奶奶",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E5%92%8C%E8%94%BC%E5%A5%B6%E5%A5%B6.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_heainainai_tob",
+          "voice_type": "ICL_zh_female_heainainai_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_linjuayi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "邻居阿姨",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E9%82%BB%E5%B1%85%E9%98%BF%E5%A7%A8.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_linjuayi_tob",
+          "voice_type": "ICL_zh_female_linjuayi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_wenrouxiaoya_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "温柔小雅",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E6%B8%A9%E6%9F%94%E5%B0%8F%E9%9B%85.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_wenrouxiaoya_moon_bigtts",
+          "voice_type": "zh_female_wenrouxiaoya_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_tianmeixiaoyuan_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "甜美小源",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E7%94%9C%E7%BE%8E%E5%B0%8F%E6%BA%90.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_tianmeixiaoyuan_moon_bigtts",
+          "voice_type": "zh_female_tianmeixiaoyuan_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_qingchezizi_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "清澈梓梓",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E6%B8%85%E6%BE%88%E6%A2%93%E6%A2%93.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_qingchezizi_moon_bigtts",
+          "voice_type": "zh_female_qingchezizi_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_dongfanghaoran_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "东方浩然",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E4%B8%9C%E6%96%B9%E6%B5%A9%E7%84%B6.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_dongfanghaoran_moon_bigtts",
+          "voice_type": "zh_male_dongfanghaoran_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_jieshuoxiaoming_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "解说小明",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E8%A7%A3%E8%AF%B4%E5%B0%8F%E6%98%8E.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_jieshuoxiaoming_moon_bigtts",
+          "voice_type": "zh_male_jieshuoxiaoming_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_kailangjiejie_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "开朗姐姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E5%BC%80%E6%9C%97%E5%A7%90%E5%A7%90.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_kailangjiejie_moon_bigtts",
+          "voice_type": "zh_female_kailangjiejie_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_linjiananhai_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "邻家男孩",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E9%82%BB%E5%AE%B6%E7%94%B7%E5%AD%A9.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_linjiananhai_moon_bigtts",
+          "voice_type": "zh_male_linjiananhai_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_tianmeiyueyue_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "甜美悦悦",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E7%94%9C%E7%BE%8E%E6%82%A6%E6%82%A6.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_tianmeiyueyue_moon_bigtts",
+          "voice_type": "zh_female_tianmeiyueyue_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_xinlingjitang_moon_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "心灵鸡汤",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/portal/bigtts/%E5%BF%83%E7%81%B5%E9%B8%A1%E6%B1%A4.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_xinlingjitang_moon_bigtts",
+          "voice_type": "zh_female_xinlingjitang_moon_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_cancan_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "灿灿",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_cancan_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_cancan_mars_bigtts",
+          "voice_type": "zh_female_cancan_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_female_anna_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Anna",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_female_anna_mars_bigtts.mp3",
+          "language": "英文",
+          "recommended_scenario": "播报解说",
+          "tone_number": "en_female_anna_mars_bigtts",
+          "voice_type": "en_female_anna_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_tiancaitongsheng_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "天才童声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_tiancaitongsheng_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_tiancaitongsheng_mars_bigtts",
+          "voice_type": "zh_male_tiancaitongsheng_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_naiqimengwa_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "奶气萌娃",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_naiqimengwa_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_naiqimengwa_mars_bigtts",
+          "voice_type": "zh_male_naiqimengwa_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_sunwukong_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "猴哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_sunwukong_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_sunwukong_mars_bigtts",
+          "voice_type": "zh_male_sunwukong_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_xionger_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "熊二",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_xionger_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_xionger_mars_bigtts",
+          "voice_type": "zh_male_xionger_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_peiqi_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "佩奇猪",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_peiqi_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_peiqi_mars_bigtts",
+          "voice_type": "zh_female_peiqi_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_zhixingnvsheng_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "知性女声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_zhixingnvsheng_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_zhixingnvsheng_mars_bigtts",
+          "voice_type": "zh_female_zhixingnvsheng_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_qingxinnvsheng_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "清新女声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_qingxinnvsheng_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_qingxinnvsheng_mars_bigtts",
+          "voice_type": "zh_female_qingxinnvsheng_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_changtianyi_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "悬疑解说",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_changtianyi_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "播报解说",
+          "tone_number": "zh_male_changtianyi_mars_bigtts",
+          "voice_type": "zh_male_changtianyi_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_popo_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "婆婆",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_popo_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_popo_mars_bigtts",
+          "voice_type": "zh_female_popo_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_wuzetian_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "武则天",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_wuzetian_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_wuzetian_mars_bigtts",
+          "voice_type": "zh_female_wuzetian_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_shaoergushi_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "少儿故事",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_shaoergushi_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_shaoergushi_mars_bigtts",
+          "voice_type": "zh_female_shaoergushi_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_silang_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "四郎",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_silang_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_male_silang_mars_bigtts",
+          "voice_type": "zh_male_silang_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_gujie_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "顾姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_gujie_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_gujie_mars_bigtts",
+          "voice_type": "zh_female_gujie_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_yingtaowanzi_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "樱桃丸子",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_yingtaowanzi_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_yingtaowanzi_mars_bigtts",
+          "voice_type": "zh_female_yingtaowanzi_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_jieshuonansheng_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "磁性解说男声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_jieshuonansheng_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "播报解说",
+          "tone_number": "zh_male_jieshuonansheng_mars_bigtts",
+          "voice_type": "zh_male_jieshuonansheng_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_jitangmeimei_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "鸡汤妹妹",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_jitangmeimei_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "播报解说",
+          "tone_number": "zh_female_jitangmeimei_mars_bigtts",
+          "voice_type": "zh_female_jitangmeimei_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_chunhui_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "广告解说",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_chunhui_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "播报解说",
+          "tone_number": "zh_male_chunhui_mars_bigtts",
+          "voice_type": "zh_male_chunhui_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_qingshuangnanda_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "清爽男大",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_qingshuangnanda_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_qingshuangnanda_mars_bigtts",
+          "voice_type": "zh_male_qingshuangnanda_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_tiexinnvsheng_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "贴心女声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_tiexinnvsheng_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_tiexinnvsheng_mars_bigtts",
+          "voice_type": "zh_female_tiexinnvsheng_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_qiaopinvsheng_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "俏皮女声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_qiaopinvsheng_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_qiaopinvsheng_mars_bigtts",
+          "voice_type": "zh_female_qiaopinvsheng_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_mengyatou_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "萌丫头",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_mengyatou_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "zh_female_mengyatou_mars_bigtts",
+          "voice_type": "zh_female_mengyatou_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_male_adam_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Adam",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_male_adam_mars_bigtts.mp3",
+          "language": "英语",
+          "recommended_scenario": "多语种",
+          "tone_number": "en_male_adam_mars_bigtts",
+          "voice_type": "en_male_adam_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_female_sarah_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Sarah",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_female_sarah_mars_bigtts.mp3",
+          "language": "英语",
+          "recommended_scenario": "多语种",
+          "tone_number": "en_female_sarah_mars_bigtts",
+          "voice_type": "en_female_sarah_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_male_dryw_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Dryw",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_male_dryw_mars_bigtts.mp3",
+          "language": "英语",
+          "recommended_scenario": "多语种",
+          "tone_number": "en_male_dryw_mars_bigtts",
+          "voice_type": "en_male_dryw_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_male_smith_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Smith",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_male_smith_mars_bigtts.mp3",
+          "language": "英语",
+          "recommended_scenario": "多语种",
+          "tone_number": "en_male_smith_mars_bigtts",
+          "voice_type": "en_male_smith_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_ruyaqingnian_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "儒雅青年",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_ruyaqingnian_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_male_ruyaqingnian_mars_bigtts",
+          "voice_type": "zh_male_ruyaqingnian_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_baqiqingshu_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "霸气青叔",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_baqiqingshu_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_male_baqiqingshu_mars_bigtts",
+          "voice_type": "zh_male_baqiqingshu_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_qingcang_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "擎苍",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_qingcang_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_male_qingcang_mars_bigtts",
+          "voice_type": "zh_male_qingcang_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_yangguangqingnian_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "活力小哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_yangguangqingnian_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_male_yangguangqingnian_mars_bigtts",
+          "voice_type": "zh_male_yangguangqingnian_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_gufengshaoyu_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "古风少御",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_gufengshaoyu_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_female_gufengshaoyu_mars_bigtts",
+          "voice_type": "zh_female_gufengshaoyu_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_wenroushunv_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "温柔淑女",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_wenroushunv_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_female_wenroushunv_mars_bigtts",
+          "voice_type": "zh_female_wenroushunv_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_zhixingwenwan_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "知性温婉",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_zhixingwenwan_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "ICL_zh_female_zhixingwenwan_tob",
+          "voice_type": "ICL_zh_female_zhixingwenwan_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_lvchaxiaoge_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "绿茶小哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_lvchaxiaoge_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_lvchaxiaoge_tob",
+          "voice_type": "ICL_zh_male_lvchaxiaoge_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_jiaoruoluoli_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "娇弱萝莉",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_jiaoruoluoli_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_jiaoruoluoli_tob",
+          "voice_type": "ICL_zh_female_jiaoruoluoli_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_lengdanshuli_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "冷淡疏离",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_lengdanshuli_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_lengdanshuli_tob",
+          "voice_type": "ICL_zh_male_lengdanshuli_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_nuanxintitie_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "暖心体贴",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_nuanxintitie_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "ICL_zh_male_nuanxintitie_tob",
+          "voice_type": "ICL_zh_male_nuanxintitie_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_hanhoudunshi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "憨厚敦实",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_hanhoudunshi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_hanhoudunshi_tob",
+          "voice_type": "ICL_zh_male_hanhoudunshi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_wenrouwenya_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "温柔文雅",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_wenrouwenya_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "ICL_zh_female_wenrouwenya_tob",
+          "voice_type": "ICL_zh_female_wenrouwenya_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_aiqilingren_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "傲气凌人",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_aiqilingren_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_aiqilingren_tob",
+          "voice_type": "ICL_zh_male_aiqilingren_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_kailangqingkuai_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "开朗轻快",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_kailangqingkuai_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "ICL_zh_male_kailangqingkuai_tob",
+          "voice_type": "ICL_zh_male_kailangqingkuai_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_huopodiaoman_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "活泼刁蛮",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_huopodiaoman_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_huopodiaoman_tob",
+          "voice_type": "ICL_zh_female_huopodiaoman_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_guzhibingjiao_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "固执病娇",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_guzhibingjiao_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_guzhibingjiao_tob",
+          "voice_type": "ICL_zh_male_guzhibingjiao_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_huoposhuanglang_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "活泼爽朗",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_huoposhuanglang_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "ICL_zh_male_huoposhuanglang_tob",
+          "voice_type": "ICL_zh_male_huoposhuanglang_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_sajiaonianren_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "撒娇粘人",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_sajiaonianren_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_sajiaonianren_tob",
+          "voice_type": "ICL_zh_male_sajiaonianren_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_aomanjiaosheng_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "傲慢娇声",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_aomanjiaosheng_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_aomanjiaosheng_tob",
+          "voice_type": "ICL_zh_female_aomanjiaosheng_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_xiaosasuixing_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "潇洒随性",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_xiaosasuixing_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_xiaosasuixing_tob",
+          "voice_type": "ICL_zh_male_xiaosasuixing_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_fuheigongzi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "腹黑公子",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_fuheigongzi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_fuheigongzi_tob",
+          "voice_type": "ICL_zh_male_fuheigongzi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_guiyishenmi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "诡异神秘",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_guiyishenmi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_guiyishenmi_tob",
+          "voice_type": "ICL_zh_male_guiyishenmi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_ruyacaijun_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "儒雅才俊",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_ruyacaijun_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_ruyacaijun_tob",
+          "voice_type": "ICL_zh_male_ruyacaijun_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_bingjiaobailian_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "病娇白莲",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_bingjiaobailian_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_bingjiaobailian_tob",
+          "voice_type": "ICL_zh_male_bingjiaobailian_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_zhengzhiqingnian_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "正直青年",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_zhengzhiqingnian_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_zhengzhiqingnian_tob",
+          "voice_type": "ICL_zh_male_zhengzhiqingnian_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_shuaizhenxiaohuo_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "率真小伙",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_shuaizhenxiaohuo_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "ICL_zh_male_shuaizhenxiaohuo_tob",
+          "voice_type": "ICL_zh_male_shuaizhenxiaohuo_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_jiaohannvwang_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "娇憨女王",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_jiaohannvwang_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_jiaohannvwang_tob",
+          "voice_type": "ICL_zh_female_jiaohannvwang_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_bingjiaomengmei_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "病娇萌妹",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_bingjiaomengmei_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_bingjiaomengmei_tob",
+          "voice_type": "ICL_zh_female_bingjiaomengmei_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_qingsenaigou_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "青涩小生",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_qingsenaigou_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_qingsenaigou_tob",
+          "voice_type": "ICL_zh_male_qingsenaigou_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_chunzhenxuedi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "纯真学弟",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_chunzhenxuedi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_chunzhenxuedi_tob",
+          "voice_type": "ICL_zh_male_chunzhenxuedi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_nuanxinxuejie_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "暖心学姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_nuanxinxuejie_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_nuanxinxuejie_tob",
+          "voice_type": "ICL_zh_female_nuanxinxuejie_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_keainvsheng_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "可爱女生",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_keainvsheng_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_keainvsheng_tob",
+          "voice_type": "ICL_zh_female_keainvsheng_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_chengshujiejie_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "成熟姐姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_chengshujiejie_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_chengshujiejie_tob",
+          "voice_type": "ICL_zh_female_chengshujiejie_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_bingjiaojiejie_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "病娇姐姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_bingjiaojiejie_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_bingjiaojiejie_tob",
+          "voice_type": "ICL_zh_female_bingjiaojiejie_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_youroubangzhu_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "优柔帮主",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_youroubangzhu_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_youroubangzhu_tob",
+          "voice_type": "ICL_zh_male_youroubangzhu_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_yourougongzi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "优柔公子",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_yourougongzi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_yourougongzi_tob",
+          "voice_type": "ICL_zh_male_yourougongzi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_wumeiyujie_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "妩媚御姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_wumeiyujie_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_wumeiyujie_tob",
+          "voice_type": "ICL_zh_female_wumeiyujie_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_tiaopigongzhu_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "调皮公主",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_tiaopigongzhu_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_tiaopigongzhu_tob",
+          "voice_type": "ICL_zh_female_tiaopigongzhu_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_aojiaonvyou_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "傲娇女友",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_aojiaonvyou_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_aojiaonvyou_tob",
+          "voice_type": "ICL_zh_female_aojiaonvyou_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_tiexinnanyou_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "贴心男友",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_tiexinnanyou_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_tiexinnanyou_tob",
+          "voice_type": "ICL_zh_male_tiexinnanyou_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_shaonianjiangjun_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "少年将军",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_shaonianjiangjun_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_shaonianjiangjun_tob",
+          "voice_type": "ICL_zh_male_shaonianjiangjun_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_tiexinnvyou_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "贴心女友",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_tiexinnvyou_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_tiexinnvyou_tob",
+          "voice_type": "ICL_zh_female_tiexinnvyou_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_bingjiaogege_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "病娇哥哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_bingjiaogege_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_bingjiaogege_tob",
+          "voice_type": "ICL_zh_male_bingjiaogege_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_xuebanantongzhuo_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "学霸男同桌",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_xuebanantongzhuo_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_xuebanantongzhuo_tob",
+          "voice_type": "ICL_zh_male_xuebanantongzhuo_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_youmoshushu_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "幽默叔叔",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_youmoshushu_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_youmoshushu_tob",
+          "voice_type": "ICL_zh_male_youmoshushu_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_xingganyujie_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "性感御姐",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_xingganyujie_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_xingganyujie_tob",
+          "voice_type": "ICL_zh_female_xingganyujie_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_female_jiaxiaozi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "假小子",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_female_jiaxiaozi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_female_jiaxiaozi_tob",
+          "voice_type": "ICL_zh_female_jiaxiaozi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_lengjunshangsi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "冷峻上司",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_lengjunshangsi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_lengjunshangsi_tob",
+          "voice_type": "ICL_zh_male_lengjunshangsi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_wenrounantongzhuo_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "温柔男同桌",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_wenrounantongzhuo_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_wenrounantongzhuo_tob",
+          "voice_type": "ICL_zh_male_wenrounantongzhuo_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_bingjiaodidi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "病娇弟弟",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_bingjiaodidi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_bingjiaodidi_tob",
+          "voice_type": "ICL_zh_male_bingjiaodidi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_youmodaye_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "幽默大爷",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_youmodaye_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_youmodaye_tob",
+          "voice_type": "ICL_zh_male_youmodaye_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_aomanshaoye_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "傲慢少爷",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_aomanshaoye_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_aomanshaoye_tob",
+          "voice_type": "ICL_zh_male_aomanshaoye_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "ICL_zh_male_shenmifashi_tob",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "神秘法师",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/ICL_zh_male_shenmifashi_tob.mp3",
+          "language": "中文",
+          "recommended_scenario": "角色扮演",
+          "tone_number": "ICL_zh_male_shenmifashi_tob",
+          "voice_type": "ICL_zh_male_shenmifashi_tob"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_fanjuanqingnian_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "反卷青年",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_fanjuanqingnian_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "有声阅读",
+          "tone_number": "zh_male_fanjuanqingnian_mars_bigtts",
+          "voice_type": "zh_male_fanjuanqingnian_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_male_jackson_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Jackson",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_male_jackson_mars_bigtts.mp3",
+          "language": "英语",
+          "recommended_scenario": "多语种",
+          "tone_number": "en_male_jackson_mars_bigtts",
+          "voice_type": "en_male_jackson_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "en_female_amanda_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "Amanda",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/en_female_amanda_mars_bigtts.mp3",
+          "language": "英语",
+          "recommended_scenario": "多语种",
+          "tone_number": "en_female_amanda_mars_bigtts",
+          "voice_type": "en_female_amanda_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_dongmanhaimian_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "亮嗓萌仔",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_dongmanhaimian_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "视频配音",
+          "tone_number": "zh_male_dongmanhaimian_mars_bigtts",
+          "voice_type": "zh_male_dongmanhaimian_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_wenrouxiaoge_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "温柔小哥",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_wenrouxiaoge_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_male_wenrouxiaoge_mars_bigtts",
+          "voice_type": "zh_male_wenrouxiaoge_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_lanxiaoyang_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "懒音绵宝",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_lanxiaoyang_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "视频配音",
+          "tone_number": "zh_male_lanxiaoyang_mars_bigtts",
+          "voice_type": "zh_male_lanxiaoyang_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_beijingxiaoye_emo_v2_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "北京小爷（多情感）",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_beijingxiaoye_emo_v2_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "多情感",
+          "tone_number": "zh_male_beijingxiaoye_emo_v2_mars_bigtts",
+          "voice_type": "zh_male_beijingxiaoye_emo_v2_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_roumeinvyou_emo_v2_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "柔美女友（多情感）",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_roumeinvyou_emo_v2_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "多情感",
+          "tone_number": "zh_female_roumeinvyou_emo_v2_mars_bigtts",
+          "voice_type": "zh_female_roumeinvyou_emo_v2_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_male_yangguangqingnian_emo_v2_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "阳光青年（多情感）",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_male_yangguangqingnian_emo_v2_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "多情感",
+          "tone_number": "zh_male_yangguangqingnian_emo_v2_mars_bigtts",
+          "voice_type": "zh_male_yangguangqingnian_emo_v2_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_meilinvyou_emo_v2_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "魅力女友（多情感）",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_meilinvyou_emo_v2_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "多情感",
+          "tone_number": "zh_female_meilinvyou_emo_v2_mars_bigtts",
+          "voice_type": "zh_female_meilinvyou_emo_v2_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "",
+        "is_shareable": false,
+        "resource_id": "",
+        "code": "zh_female_shuangkuaisisi_emo_v2_mars_bigtts",
+        "configuration_code": "Timbre-Free",
+        "resource_display": "爽快思思（多情感）",
+        "raw_type": "",
+        "type": "",
+        "purchased_amount": "",
+        "current_usage": "",
+        "expires": "-",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_shuangkuaisisi_emo_v2_mars_bigtts.mp3",
+          "language": "中文",
+          "recommended_scenario": "多情感",
+          "tone_number": "zh_female_shuangkuaisisi_emo_v2_mars_bigtts",
+          "voice_type": "zh_female_shuangkuaisisi_emo_v2_mars_bigtts"
+        },
+        "group_name": "",
+        "alias": "",
+        "train_id": "",
+        "state": ""
+      },
+      {
+        "instance_number": "zh_female_linjia_mars_bigtts_zcBsDgt0mUyYnGo3",
+        "is_shareable": false,
+        "resource_id": "volc.service_type.10029",
+        "code": "zh_female_linjia_mars_bigtts",
+        "configuration_code": "zh_female_linjia_mars_bigtts",
+        "resource_display": "邻家小妹",
+        "raw_type": "",
+        "type": "INVALID CHINESE RESOURCE PACK TYPE (), CONTACT Admin",
+        "purchased_amount": "INVALID TYPE",
+        "current_usage": "0 INVALID CHINESE TYPED VALUE , CONTACT Admin",
+        "expires": "2026-04-16 23:59:59",
+        "details": {
+          "demo_link": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lm_hz_ihsph/ljhwZthlaukjlkulzlp/console/bigtts/zh_female_linjia_mars_bigtts.mp3",
+          "language": "中/英",
+          "recommended_scenario": "通用场景",
+          "tone_number": "zh_female_linjia_mars_bigtts",
+          "voice_type": "zh_female_linjia_mars_bigtts"
+        },
+        "group_name": "大模型语音合成",
+        "alias": "",
+        "train_id": "S_LtIKfxfp1",
+        "state": "active"
+      }
+    ]
+  }
+}
diff --git a/backend/config/engines.yaml b/backend/config/engines.yaml
index 741e004..2e8b1f4 100644
--- a/backend/config/engines.yaml
+++ b/backend/config/engines.yaml
@@ -106,7 +106,7 @@ tts:
     - id: volcengine-speech
       label: Volcengine
       type: openai_compat
-      base_url: https://unspeech.hyp3r.link/v1
+      base_url: https://openspeech.bytedance.com/api/v1/tts
       model: v1
       api_key_env: UNSPEECH_API_KEY
       paths:
@@ -120,8 +120,8 @@ tts:
     - id: alibaba-cloud-model-studio-speech
       label: Alibaba Cloud Model Studio
       type: openai_compat
-      base_url: https://unspeech.hyp3r.link/v1
-      model: alibaba/cosyvoice-v1
+      base_url: https://dashscope.aliyuncs.com
+      model: cosyvoice-v1
       api_key_env: BAILIAN_API_KEY
       paths:
         speech: /audio/speech
@@ -197,6 +197,30 @@ asr:
           default: json
         - name: language
           type: string
+    - id: aliyun-nls-asr
+      label: Alibaba Bailian ASR
+      type: aliyun_dashscope_asr
+      base_url: https://dashscope.aliyuncs.com
+      model: qwen3-asr-flash-realtime
+      api_key_env: DASHSCOPE_API_KEY
+      defaults:
+        region: cn-beijing
+        enable_server_vad: false
+        enable_itn: false
+        enable_words: true
+      params:
+        - name: enable_server_vad
+          type: bool
+          default: false
+        - name: region
+          type: string
+          default: cn-beijing
+        - name: enable_itn
+          type: bool
+          default: false
+        - name: enable_words
+          type: bool
+          default: true
     - id: dify-asr
       label: Dify ASR
       type: dify_asr
diff --git a/backend/config/providers.yaml b/backend/config/providers.yaml
index 3d7f533..ea6c63e 100644
--- a/backend/config/providers.yaml
+++ b/backend/config/providers.yaml
@@ -205,7 +205,7 @@ providers:
     description: volcengine.com
     engine_id: volcengine-speech
     defaults:
-      base_url: https://unspeech.hyp3r.link/v1/
+      base_url: https://openspeech.bytedance.com/api/v1/tts
       model: v1
     fields:
       - id: apiKey
@@ -216,7 +216,7 @@ providers:
         label: Base URL
         type: text
         required: true
-        default: https://unspeech.hyp3r.link/v1/
+        default: https://openspeech.bytedance.com/api/v1/tts
       - id: model
         label: Model
         type: select
@@ -239,8 +239,8 @@ providers:
     description: bailian.console.aliyun.com
     engine_id: alibaba-cloud-model-studio-speech
     defaults:
-      base_url: https://unspeech.hyp3r.link/v1/
-      model: alibaba/cosyvoice-v1
+      base_url: https://dashscope.aliyuncs.com
+      model: cosyvoice-v1
     fields:
       - id: apiKey
         label: API Key
@@ -250,14 +250,14 @@ providers:
         label: Base URL
         type: text
         required: true
-        default: https://unspeech.hyp3r.link/v1/
+        default: https://dashscope.aliyuncs.com
       - id: model
         label: Model
         type: select
         options:
-          - id: alibaba/cosyvoice-v1
+          - id: cosyvoice-v1
             label: cosyvoice-v1
-          - id: alibaba/cosyvoice-v2
+          - id: cosyvoice-v2
             label: cosyvoice-v2
       - id: voice
         label: Voice
@@ -639,8 +639,7 @@ providers:
         options_source: models
       - id: voice
         label: Voice
-        type: select
-        options_source: voices
+        type: text
   - id: elevenlabs
     label: ElevenLabs
     category: speech
@@ -763,22 +762,16 @@ providers:
         type: select
         options_source: models
   - id: aliyun-nls-transcription
-    label: Aliyun NLS
+    label: Alibaba Cloud Model Studio
     category: transcription
     icon: i-lobe-icons:alibabacloud
-    description: Aliyun transcription.
+    description: Alibaba Bailian ASR transcription.
+    engine_id: aliyun-nls-asr
     fields:
       - id: apiKey
         label: API Key
         type: secret
         required: true
-      - id: baseUrl
-        label: Base URL
-        type: text
-      - id: model
-        label: Model
-        type: select
-        options_source: models
   - id: comet-api-transcription
     label: Comet API
     category: transcription
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 8f4a0d9..5e75f48 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -12,12 +12,12 @@ dependencies = [
   "python-multipart",
   "python-dotenv",
   "pyyaml",
+  "websockets>=12.0",
 ]
 
 [project.optional-dependencies]
 dev = [
   "pyinstaller",
-  "websockets>=12.0",
 ]
 
 [build-system]
diff --git a/backend/tests/test_asr_aliyun_dashscope.py b/backend/tests/test_asr_aliyun_dashscope.py
new file mode 100644
index 0000000..3f7e0f4
--- /dev/null
+++ b/backend/tests/test_asr_aliyun_dashscope.py
@@ -0,0 +1,112 @@
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+
+from app.api.asr import (  # noqa: E402
+    _build_aliyun_dashscope_urls,
+    _build_aliyun_realtime_ws_url,
+    _extract_aliyun_realtime_event_text,
+    _extract_dashscope_asr_text,
+    _resolve_aliyun_dashscope_credentials,
+    _resolve_aliyun_non_realtime_model,
+    _resolve_aliyun_realtime_model,
+)
+from app.services.engines.runtime_store import EngineRuntimeConfig  # noqa: E402
+
+
+def run(name: str, fn):
+    try:
+        fn()
+        print(f"PASS {name}")
+    except Exception:
+        print(f"FAIL {name}")
+        raise
+
+
+def test_build_aliyun_dashscope_urls():
+    urls = _build_aliyun_dashscope_urls("https://dashscope.aliyuncs.com")
+    assert urls["chat"] == "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
+
+
+def test_extract_dashscope_asr_text_from_chat_response():
+    payload = {
+        "choices": [
+            {
+                "message": {
+                    "content": "hello world",
+                }
+            }
+        ]
+    }
+    assert _extract_dashscope_asr_text(payload) == "hello world"
+
+
+def test_extract_dashscope_asr_text_from_output_results():
+    payload = {
+        "output": {
+            "task_status": "SUCCEEDED",
+            "results": [
+                {"text": "first line"},
+                {"transcript": "second line"},
+            ],
+        }
+    }
+    assert _extract_dashscope_asr_text(payload) == "first line\nsecond line"
+
+
+def test_resolve_realtime_models():
+    assert _resolve_aliyun_realtime_model("") == "qwen3-asr-flash-realtime"
+    assert _resolve_aliyun_realtime_model("qwen3-asr-flash") == "qwen3-asr-flash-realtime"
+    assert _resolve_aliyun_realtime_model("qwen3-asr-flash-realtime") == "qwen3-asr-flash-realtime"
+    assert _resolve_aliyun_non_realtime_model("qwen3-asr-flash-realtime") == "qwen3-asr-flash"
+
+
+def test_resolve_dashscope_credentials_always_lock_realtime_model():
+    config = EngineRuntimeConfig(
+        id="aliyun-nls-asr",
+        base_url="https://dashscope.aliyuncs.com",
+        model="qwen3-asr-flash",
+        api_key_env=None,
+        default_params={"apiKey": "test-key"},
+        engine_type="aliyun_nls_asr",
+    )
+    resolved = _resolve_aliyun_dashscope_credentials(
+        config,
+        {
+            "model": "qwen3-asr-flash",
+        },
+    )
+    assert resolved["model"] == "qwen3-asr-flash-realtime"
+
+
+def test_build_realtime_ws_url():
+    assert (
+        _build_aliyun_realtime_ws_url("https://dashscope.aliyuncs.com", "qwen3-asr-flash-realtime")
+        == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=qwen3-asr-flash-realtime"
+    )
+    assert (
+        _build_aliyun_realtime_ws_url("https://dashscope-intl.aliyuncs.com", "qwen3-asr-flash-realtime")
+        == "wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime?model=qwen3-asr-flash-realtime"
+    )
+
+
+def test_extract_realtime_event_text():
+    assert _extract_aliyun_realtime_event_text({"type": "response.text.delta", "delta": "你"}) == "你"
+    assert _extract_aliyun_realtime_event_text({"type": "session.finished", "transcript": "你好"}) == "你好"
+    assert _extract_aliyun_realtime_event_text({"type": "noop"}) == ""
+
+
+if __name__ == "__main__":
+    run("build aliyun dashscope urls", test_build_aliyun_dashscope_urls)
+    run("extract dashscope asr text from chat response", test_extract_dashscope_asr_text_from_chat_response)
+    run("extract dashscope asr text from output results", test_extract_dashscope_asr_text_from_output_results)
+    run("resolve realtime models", test_resolve_realtime_models)
+    run(
+        "resolve dashscope credentials always lock realtime model",
+        test_resolve_dashscope_credentials_always_lock_realtime_model,
+    )
+    run("build realtime ws url", test_build_realtime_ws_url)
+    run("extract realtime event text", test_extract_realtime_event_text)
diff --git a/backend/tests/test_asr_stream_disconnect.py b/backend/tests/test_asr_stream_disconnect.py
new file mode 100644
index 0000000..5e460e2
--- /dev/null
+++ b/backend/tests/test_asr_stream_disconnect.py
@@ -0,0 +1,45 @@
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+
+from app.api.asr import (  # noqa: E402
+    _is_disconnect_receive_runtime_error,
+    _is_websocket_disconnect_message,
+)
+
+
+def run(name: str, fn):
+    try:
+        fn()
+        print(f"PASS {name}")
+    except Exception:
+        print(f"FAIL {name}")
+        raise
+
+
+def test_detects_disconnect_message_frame():
+    assert _is_websocket_disconnect_message({"type": "websocket.disconnect"}) is True
+    assert _is_websocket_disconnect_message({"type": "websocket.receive"}) is False
+    assert _is_websocket_disconnect_message({}) is False
+
+
+def test_detects_disconnect_runtime_error_message():
+    exc = RuntimeError('Cannot call "receive" once a disconnect message has been received.')
+    assert _is_disconnect_receive_runtime_error(exc) is True
+
+
+def test_ignores_unrelated_runtime_errors():
+    exc = RuntimeError("boom")
+    assert _is_disconnect_receive_runtime_error(exc) is False
+
+
+if __name__ == "__main__":
+    run("detect disconnect message frame", test_detects_disconnect_message_frame)
+    run(
+        "detect disconnect runtime error message",
+        test_detects_disconnect_runtime_error_message,
+    )
+    run("ignore unrelated runtime errors", test_ignores_unrelated_runtime_errors)
diff --git a/backend/tests/test_provider_catalog_aliyun_fields.py b/backend/tests/test_provider_catalog_aliyun_fields.py
new file mode 100644
index 0000000..e1a4d14
--- /dev/null
+++ b/backend/tests/test_provider_catalog_aliyun_fields.py
@@ -0,0 +1,76 @@
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+
+from app.api.providers import _resolve_provider_field_dicts
+from app.services.catalogs.provider_catalog import ProviderFieldSpec, ProviderSpec
+
+
+def run(name: str, fn):
+    try:
+        fn()
+        print(f"PASS {name}")
+    except Exception:
+        print(f"FAIL {name}")
+        raise
+
+
+def _field(field_id: str, label: str = "", field_type: str = "text", required: bool = False, scope: str = "config"):
+    return ProviderFieldSpec(
+        id=field_id,
+        label=label or field_id,
+        field_type=field_type,
+        required=required,
+        scope=scope,
+    )
+
+
+def test_aliyun_catalog_fields_are_normalized_when_openai_style_fields_present():
+    spec = ProviderSpec(
+        id="aliyun-nls-transcription",
+        label="Alibaba Bailian",
+        category="transcription",
+        fields=[
+            _field("apiKey", "API Key", "secret", True),
+            _field("baseUrl", "Base URL", "text", True),
+        ],
+    )
+
+    fields = _resolve_provider_field_dicts(spec)
+    field_ids = [item["id"] for item in fields]
+
+    assert field_ids == ["apiKey"]
+    assert "baseUrl" not in field_ids
+    assert "model" not in field_ids
+
+
+def test_aliyun_catalog_fields_are_forced_to_minimal_shape():
+    spec = ProviderSpec(
+        id="aliyun-nls-transcription",
+        label="Alibaba Bailian",
+        category="transcription",
+        fields=[
+            _field("apiKey", "API Key", "secret", True, "config"),
+            _field("model", "Model", "select", False, "config"),
+            _field("region", "Region", "select", False, "extra"),
+        ],
+    )
+
+    fields = _resolve_provider_field_dicts(spec)
+    field_ids = [item["id"] for item in fields]
+
+    assert field_ids == ["apiKey"]
+
+
+if __name__ == "__main__":
+    run(
+        "aliyun catalog fields are normalized when openai-style fields present",
+        test_aliyun_catalog_fields_are_normalized_when_openai_style_fields_present,
+    )
+    run(
+        "aliyun catalog fields are forced to minimal shape",
+        test_aliyun_catalog_fields_are_forced_to_minimal_shape,
+    )
diff --git a/backend/tests/test_provider_catalog_tts_defaults.py b/backend/tests/test_provider_catalog_tts_defaults.py
new file mode 100644
index 0000000..9464901
--- /dev/null
+++ b/backend/tests/test_provider_catalog_tts_defaults.py
@@ -0,0 +1,43 @@
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+
+from app.services.catalogs.provider_catalog import get_provider_catalog
+
+
+def run(name: str, fn):
+    try:
+        fn()
+        print(f"PASS {name}")
+    except Exception:
+        print(f"FAIL {name}")
+        raise
+
+
+def test_tts_provider_defaults_use_official_direct_endpoints():
+    catalog = get_provider_catalog()
+
+    volc = catalog.get("volcengine-speech")
+    assert volc is not None
+    assert volc.defaults.base_url == "https://openspeech.bytedance.com/api/v1/tts"
+    assert volc.defaults.model == "v1"
+
+    alibaba = catalog.get("alibaba-cloud-model-studio-speech")
+    assert alibaba is not None
+    assert alibaba.defaults.base_url == "https://dashscope.aliyuncs.com"
+    assert alibaba.defaults.model == "cosyvoice-v1"
+
+    model_field = next((f for f in alibaba.fields if f.id == "model"), None)
+    assert model_field is not None
+    option_ids = [option.id for option in model_field.options]
+    assert option_ids == ["cosyvoice-v1", "cosyvoice-v2"]
+
+
+if __name__ == "__main__":
+    run(
+        "tts provider defaults use official direct endpoints",
+        test_tts_provider_defaults_use_official_direct_endpoints,
+    )
diff --git a/backend/tests/test_provider_voices_tts.py b/backend/tests/test_provider_voices_tts.py
new file mode 100644
index 0000000..60bfefa
--- /dev/null
+++ b/backend/tests/test_provider_voices_tts.py
@@ -0,0 +1,216 @@
+import asyncio
+import sys
+import uuid
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+
+import app.services.providers.registry as provider_registry_module  # noqa: E402
+from app.services.engines.runtime_store import EngineRuntimeConfig, store as runtime_store  # noqa: E402
+from app.services.providers.types import ProviderConfig  # noqa: E402
+
+
+def run(name: str, fn):
+    try:
+        fn()
+        print(f"PASS {name}")
+    except Exception:
+        print(f"FAIL {name}")
+        raise
+
+
+def test_list_voices_volcengine_from_local_catalog():
+    async def fake_load(provider_id: str):
+        assert provider_id == "volcengine-speech"
+        return [
+            {
+                "id": "voice-a",
+                "name": "Voice A",
+                "languages": [{"title": "Chinese"}],
+                "compatible_models": ["v1"],
+            }
+        ]
+
+    original = provider_registry_module._load_local_tts_voices
+    provider_registry_module._load_local_tts_voices = fake_load
+    try:
+        result = asyncio.run(
+            provider_registry_module.registry.list_voices(
+                ProviderConfig(
+                    provider_id="volcengine-speech",
+                    api_key="",
+                )
+            )
+        )
+    finally:
+        provider_registry_module._load_local_tts_voices = original
+
+    assert result == [
+        {
+            "id": "voice-a",
+            "label": "Voice A",
+            "description": "Chinese",
+        }
+    ]
+
+
+def test_list_voices_alibaba_filters_by_model():
+    async def fake_load(provider_id: str):
+        assert provider_id == "alibaba-cloud-model-studio-speech"
+        return [
+            {
+                "id": "voice-1",
+                "name": "Voice One",
+                "languages": [{"title": "Chinese"}],
+                "compatible_models": ["cosyvoice-v1"],
+            },
+            {
+                "id": "voice-2",
+                "name": "Voice Two",
+                "languages": [{"title": "Chinese"}],
+                "compatible_models": ["cosyvoice-v2"],
+            },
+        ]
+
+    original = provider_registry_module._load_local_tts_voices
+    provider_registry_module._load_local_tts_voices = fake_load
+    try:
+        result = asyncio.run(
+            provider_registry_module.registry.list_voices(
+                ProviderConfig(
+                    provider_id="alibaba-cloud-model-studio-speech",
+                    api_key="",
+                    model="cosyvoice-v1",
+                )
+            )
+        )
+    finally:
+        provider_registry_module._load_local_tts_voices = original
+
+    assert len(result) == 1
+    assert result[0]["id"] == "voice-1"
+
+
+def test_list_voices_unsupported_provider_returns_empty():
+    result = asyncio.run(
+        provider_registry_module.registry.list_voices(
+            ProviderConfig(
+                provider_id="unknown-provider",
+                api_key="",
+            )
+        )
+    )
+    assert result == []
+
+
+def _volcengine_voices_payload() -> str:
+    return """
+{
+  "status": "success",
+  "error": null,
+  "data": {
+    "resource_packs": [
+      {
+        "code": "zh_female_test",
+        "resource_display": "Test Voice",
+        "details": {
+          "language": "Chinese",
+          "voice_type": "zh_female_test",
+          "tone_number": "zh_female_test",
+          "recommended_scenario": "General"
+        }
+      }
+    ]
+  }
+}
+""".strip()
+
+
+def _make_local_temp_file() -> Path:
+    root = ROOT / "tests_tmp"
+    root.mkdir(parents=True, exist_ok=True)
+    path = root / f"tmp-{uuid.uuid4().hex}.json"
+    return path
+
+
+def test_load_local_tts_voices_recovers_after_transient_parse_error():
+    temp_path = _make_local_temp_file()
+    try:
+        temp_path.write_text("{ invalid json", encoding="utf-8")
+
+        original_map = provider_registry_module.LOCAL_TTS_VOICE_FILES
+        provider_registry_module.LOCAL_TTS_VOICE_FILES = {
+            **original_map,
+            "volcengine-speech": temp_path,
+        }
+        provider_registry_module._load_local_tts_voices_cached.cache_clear()
+        provider_registry_module._LAST_GOOD_LOCAL_TTS_VOICES.pop("volcengine-speech", None)
+        try:
+            broken = asyncio.run(
+                provider_registry_module._load_local_tts_voices("volcengine-speech")
+            )
+            assert broken == []
+
+            temp_path.write_text(_volcengine_voices_payload(), encoding="utf-8")
+            recovered = asyncio.run(
+                provider_registry_module._load_local_tts_voices("volcengine-speech")
+            )
+            assert len(recovered) == 1
+            assert recovered[0]["id"] == "zh_female_test"
+        finally:
+            provider_registry_module.LOCAL_TTS_VOICE_FILES = original_map
+            provider_registry_module._load_local_tts_voices_cached.cache_clear()
+            provider_registry_module._LAST_GOOD_LOCAL_TTS_VOICES.pop("volcengine-speech", None)
+    finally:
+        if temp_path.exists():
+            temp_path.unlink()
+
+
+def test_load_local_tts_voices_uses_last_good_on_parse_error():
+    temp_path = _make_local_temp_file()
+    try:
+        temp_path.write_text(_volcengine_voices_payload(), encoding="utf-8")
+
+        original_map = provider_registry_module.LOCAL_TTS_VOICE_FILES
+        provider_registry_module.LOCAL_TTS_VOICE_FILES = {
+            **original_map,
+            "volcengine-speech": temp_path,
+        }
+        provider_registry_module._load_local_tts_voices_cached.cache_clear()
+        provider_registry_module._LAST_GOOD_LOCAL_TTS_VOICES.pop("volcengine-speech", None)
+        try:
+            first = asyncio.run(
+                provider_registry_module._load_local_tts_voices("volcengine-speech")
+            )
+            assert len(first) == 1
+
+            temp_path.write_text("{ invalid json", encoding="utf-8")
+            provider_registry_module._load_local_tts_voices_cached.cache_clear()
+            fallback = asyncio.run(
+                provider_registry_module._load_local_tts_voices("volcengine-speech")
+            )
+            assert len(fallback) == 1
+            assert fallback[0]["id"] == "zh_female_test"
+        finally:
+            provider_registry_module.LOCAL_TTS_VOICE_FILES = original_map
+            provider_registry_module._load_local_tts_voices_cached.cache_clear()
+            provider_registry_module._LAST_GOOD_LOCAL_TTS_VOICES.pop("volcengine-speech", None)
+    finally:
+        if temp_path.exists():
+            temp_path.unlink()
+
+
+if __name__ == "__main__":
+    run("list voices volcengine from local catalog", test_list_voices_volcengine_from_local_catalog)
+    run("list voices alibaba filters by model", test_list_voices_alibaba_filters_by_model)
+    run("list voices unsupported provider returns empty", test_list_voices_unsupported_provider_returns_empty)
+    run(
+        "load local tts voices recovers after transient parse error",
+        test_load_local_tts_voices_recovers_after_transient_parse_error,
+    )
+    run(
+        "load local tts voices uses last good on parse error",
+        test_load_local_tts_voices_uses_last_good_on_parse_error,
+    )
diff --git a/backend/tests/test_tts_engine_relay.py b/backend/tests/test_tts_engine_relay.py
new file mode 100644
index 0000000..b6010ae
--- /dev/null
+++ b/backend/tests/test_tts_engine_relay.py
@@ -0,0 +1,230 @@
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+
+from app.api.tts import (  # noqa: E402
+    _build_volcengine_provider_payload,
+    _build_unspeech_payload,
+    _decorate_tts_error,
+    _extract_json_error_message,
+    _extract_tts_input,
+    _normalize_alibaba_provider_model,
+    _resolve_volcengine_tts_url,
+    _resolve_alibaba_tts_ws_url,
+    _resolve_tts_api_key,
+)
+from app.services.engines.runtime_store import EngineRuntimeConfig  # noqa: E402
+
+
+def run(name: str, fn):
+    try:
+        fn()
+        print(f"PASS {name}")
+    except Exception:
+        print(f"FAIL {name}")
+        raise
+
+
+def test_extract_tts_input_supports_string_and_object_forms():
+    assert _extract_tts_input("hello") == "hello"
+    assert _extract_tts_input({"text": "hello"}) == "hello"
+    assert _extract_tts_input({"input": "hello"}) == "hello"
+    assert _extract_tts_input({"prompt": "hello"}) == "hello"
+    assert _extract_tts_input(None) == ""
+
+
+def test_resolve_tts_api_key_prefers_request_override():
+    import os
+
+    previous = os.environ.get("TEST_TTS_KEY")
+    os.environ["TEST_TTS_KEY"] = "env-key"
+    runtime = EngineRuntimeConfig(
+        id="volcengine-speech",
+        base_url="https://unspeech.example/v1",
+        model="v1",
+        api_key_env="TEST_TTS_KEY",
+    )
+    try:
+        assert _resolve_tts_api_key(runtime, {"apiKey": "request-key"}) == "request-key"
+        assert _resolve_tts_api_key(runtime, {"api_key": "request-key-2"}) == "request-key-2"
+        assert _resolve_tts_api_key(runtime, {}) == "env-key"
+    finally:
+        if previous is None:
+            os.environ.pop("TEST_TTS_KEY", None)
+        else:
+            os.environ["TEST_TTS_KEY"] = previous
+
+
+def test_build_unspeech_payload_for_volcengine():
+    runtime = EngineRuntimeConfig(
+        id="volcengine-speech",
+        base_url="https://unspeech.example/v1",
+        model="v1",
+    )
+
+    payload = _build_unspeech_payload(
+        engine_id="volcengine-speech",
+        runtime_config=runtime,
+        text="hello volcengine",
+        overrides={
+            "model": "v1",
+            "voice": "zh_female_sample",
+            "appId": "appid-123",
+            "response_format": "mp3",
+        },
+    )
+
+    assert payload["model"] == "volcengine/v1"
+    assert payload["voice"] == "zh_female_sample"
+    assert payload["input"] == "hello volcengine"
+    assert payload["response_format"] == "mp3"
+    assert payload["extra_body"]["app"]["appid"] == "appid-123"
+
+
+def test_build_unspeech_payload_for_alibaba():
+    runtime = EngineRuntimeConfig(
+        id="alibaba-cloud-model-studio-speech",
+        base_url="https://unspeech.example/v1",
+        model="alibaba/cosyvoice-v1",
+    )
+
+    payload = _build_unspeech_payload(
+        engine_id="alibaba-cloud-model-studio-speech",
+        runtime_config=runtime,
+        text="hello alibaba",
+        overrides={
+            "model": "cosyvoice-v1",
+            "voice": "longxiaochun_v2",
+            "rate": 1.2,
+            "pitch": 0.9,
+            "volume": 80,
+        },
+    )
+
+    assert payload["model"] == "alibaba/cosyvoice-v1"
+    assert payload["voice"] == "longxiaochun_v2"
+    assert payload["input"] == "hello alibaba"
+    assert payload["extra_body"]["rate"] == 1.2
+    assert payload["extra_body"]["pitch"] == 0.9
+    assert payload["extra_body"]["volume"] == 80
+
+
+def test_build_volcengine_provider_payload_direct():
+    runtime = EngineRuntimeConfig(
+        id="volcengine-speech",
+        base_url="https://unspeech.example/v1",
+        model="v1",
+    )
+
+    payload = _build_volcengine_provider_payload(
+        runtime_config=runtime,
+        text="hello volcengine direct",
+        overrides={
+            "voice": "zh_female_sample",
+            "appId": "appid-123",
+            "response_format": "mp3",
+            "speed": 1.2,
+            "request": {"operation": "query"},
+        },
+        api_key="token-abc",
+    )
+
+    assert payload["app"]["appid"] == "appid-123"
+    assert payload["app"]["token"] == "token-abc"
+    assert payload["audio"]["voice_type"] == "zh_female_sample"
+    assert payload["audio"]["encoding"] == "mp3"
+    assert payload["audio"]["speed_ratio"] == 1.2
+    assert payload["request"]["text"] == "hello volcengine direct"
+
+
+def test_resolve_volcengine_tts_url_ignores_client_url_override():
+    runtime = EngineRuntimeConfig(
+        id="volcengine-speech",
+        base_url="https://openspeech.bytedance.com/api/v1/tts",
+        model="v1",
+    )
+    url = _resolve_volcengine_tts_url(
+        runtime,
+        {
+            "volcengine_url": "https://attacker.example/tts",
+            "provider_url": "https://attacker-2.example/tts",
+        },
+    )
+    assert url == "https://openspeech.bytedance.com/api/v1/tts"
+
+
+def test_resolve_alibaba_tts_ws_url_prefers_intl_region():
+    runtime = EngineRuntimeConfig(
+        id="alibaba-cloud-model-studio-speech",
+        base_url="https://unspeech.example/v1",
+        model="alibaba/cosyvoice-v1",
+    )
+    ws_url = _resolve_alibaba_tts_ws_url(runtime, {"region": "intl"})
+    assert ws_url == "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference"
+
+
+def test_resolve_alibaba_tts_ws_url_ignores_client_url_override():
+    runtime = EngineRuntimeConfig(
+        id="alibaba-cloud-model-studio-speech",
+        base_url="https://dashscope.aliyuncs.com",
+        model="cosyvoice-v1",
+    )
+    ws_url = _resolve_alibaba_tts_ws_url(
+        runtime,
+        {
+            "ws_url": "wss://attacker.example/ws",
+            "dashscope_ws_url": "wss://attacker-2.example/ws",
+            "baseUrl": "https://dashscope-intl.aliyuncs.com",
+        },
+    )
+    assert ws_url == "wss://dashscope.aliyuncs.com/api-ws/v1/inference"
+
+
+def test_normalize_alibaba_provider_model_strips_provider_prefix():
+    assert _normalize_alibaba_provider_model("alibaba/cosyvoice-v1") == "cosyvoice-v1"
+    assert _normalize_alibaba_provider_model("cosyvoice-v1") == "cosyvoice-v1"
+
+
+def test_extract_json_error_message_from_errors_array():
+    payload = {
+        "errors": [
+            {
+                "status": 401,
+                "detail": "load grant: requested grant not found in SaaS storage",
+            }
+        ]
+    }
+    assert (
+        _extract_json_error_message(payload)
+        == "load grant: requested grant not found in SaaS storage"
+    )
+
+
+def test_decorate_tts_error_for_volcengine_grant_issue():
+    message = "load grant: requested grant not found in SaaS storage"
+    decorated = _decorate_tts_error(message, "volcengine-speech")
+    assert "Volcengine credentials mismatch" in decorated
+    assert "appId" in decorated
+
+
+if __name__ == "__main__":
+    run("extract tts input supports string and object forms", test_extract_tts_input_supports_string_and_object_forms)
+    run("resolve tts api key prefers request override", test_resolve_tts_api_key_prefers_request_override)
+    run("build unspeech payload for volcengine", test_build_unspeech_payload_for_volcengine)
+    run("build unspeech payload for alibaba", test_build_unspeech_payload_for_alibaba)
+    run("build volcengine provider payload direct", test_build_volcengine_provider_payload_direct)
+    run(
+        "resolve volcengine tts url ignores client url override",
+        test_resolve_volcengine_tts_url_ignores_client_url_override,
+    )
+    run("resolve alibaba tts ws url prefers intl region", test_resolve_alibaba_tts_ws_url_prefers_intl_region)
+    run(
+        "resolve alibaba tts ws url ignores client url override",
+        test_resolve_alibaba_tts_ws_url_ignores_client_url_override,
+    )
+    run("normalize alibaba provider model strips provider prefix", test_normalize_alibaba_provider_model_strips_provider_prefix)
+    run("extract json error message from errors array", test_extract_json_error_message_from_errors_array)
+    run("decorate tts error for volcengine grant issue", test_decorate_tts_error_for_volcengine_grant_issue)
diff --git a/backend/uv.lock b/backend/uv.lock
index 3dacf91..41ca324 100644
--- a/backend/uv.lock
+++ b/backend/uv.lock
@@ -785,12 +785,12 @@ dependencies = [
     { name = "python-multipart" },
     { name = "pyyaml" },
     { name = "uvicorn", extra = ["standard"] },
+    { name = "websockets" },
 ]
 
 [package.optional-dependencies]
 dev = [
     { name = "pyinstaller" },
-    { name = "websockets" },
 ]
 
 [package.metadata]
@@ -804,6 +804,6 @@ requires-dist = [
     { name = "python-multipart" },
     { name = "pyyaml" },
     { name = "uvicorn", extras = ["standard"] },
-    { name = "websockets", marker = "extra == 'dev'", specifier = ">=12.0" },
+    { name = "websockets", specifier = ">=12.0" },
 ]
 provides-extras = ["dev"]
diff --git a/frontend/apps/desktop-tauri/renderer/src/App.vue b/frontend/apps/desktop-tauri/renderer/src/App.vue
index 9ef4c84..352b320 100644
--- a/frontend/apps/desktop-tauri/renderer/src/App.vue
+++ b/frontend/apps/desktop-tauri/renderer/src/App.vue
@@ -19,7 +19,6 @@ import {
 import { useActionTokenPromptSync } from "@whalewhisper/app-core/composables/use-action-token-prompt-sync";
 import { useChatStore } from "@whalewhisper/app-core/stores/chat";
 import { useLive2dRuntime } from "@whalewhisper/app-core/stores/live2d-runtime";
-import { useSpeechOutputStore } from "@whalewhisper/app-core/stores/speech-output";
 import { useUiStore } from "@whalewhisper/app-core/stores/ui";
 import DesktopSessionPanel from "./components/DesktopSessionPanel.vue";
 import StageViewOverlay from "./components/StageViewOverlay.vue";
@@ -48,7 +47,6 @@ const stageSettings = useStageSettingsStore();
 const uiStore = useUiStore();
 const chatStore = useChatStore();
 const live2dRuntime = useLive2dRuntime();
-const speechOutput = useSpeechOutputStore();
 const { stageDragEnabled, stageViewControlsEnabled } = storeToRefs(stageSettings);
 const { sessionsOpen } = storeToRefs(uiStore);
 const { scale, positionInPercentageString, modelRect } = storeToRefs(live2d);
@@ -96,7 +94,6 @@ let unlistenHover: null | (() => void) = null;
 let unlistenCursor: null | (() => void) = null;
 let unlistenChat: null | (() => void) = null;
 let unlistenActionToken: null | (() => void) = null;
-let disposeSpeechOutput: null | (() => void) = null;
 let boundsTimer: number | null = null;
 let canvasTimer: number | null = null;
 const hoverFadeOpacity = 0.35;
@@ -514,9 +511,6 @@ onMounted(async () => {
   }).then((unlisten) => {
     unlistenActionToken = unlisten ?? null;
   });
-  disposeSpeechOutput = chatStore.onAssistantFinal(async (message) => {
-    speechOutput.speak(message.content);
-  });
 });
 
 onUnmounted(() => {
@@ -554,7 +548,6 @@ onUnmounted(() => {
     unlistenActionToken();
     unlistenActionToken = null;
   }
-  disposeSpeechOutput?.();
 });
 
 function handleContextMenu(event: MouseEvent) {
diff --git a/frontend/apps/desktop-tauri/renderer/src/ChatApp.vue b/frontend/apps/desktop-tauri/renderer/src/ChatApp.vue
index f937a9f..f8690b2 100644
--- a/frontend/apps/desktop-tauri/renderer/src/ChatApp.vue
+++ b/frontend/apps/desktop-tauri/renderer/src/ChatApp.vue
@@ -2,6 +2,7 @@
 import { onMounted, onUnmounted } from "vue";
 
 import { useChatStore } from "@whalewhisper/app-core/stores/chat";
+import { useSpeechOutputStore } from "@whalewhisper/app-core/stores/speech-output";
 import DesktopChatOverlay from "./components/DesktopChatOverlay.vue";
 import { closeChatWindow, emitDesktopActionToken } from "./services/desktop";
 
@@ -12,20 +13,27 @@ const fillStyle = {
   height: "100%",
 };
 const chatStore = useChatStore();
+const speechOutput = useSpeechOutputStore();
 let disposeSpecialToken: null | (() => void) = null;
+let disposeSpeechOutput: null | (() => void) = null;
 
 async function handleClose() {
   await closeChatWindow();
 }
 
 onMounted(() => {
+  chatStore.connect();
   disposeSpecialToken = chatStore.onTokenSpecial(async (special) => {
     await emitDesktopActionToken(special);
   });
+  disposeSpeechOutput = chatStore.onAssistantFinal(async (message) => {
+    await speechOutput.speak(message.content);
+  });
 });
 
 onUnmounted(() => {
   disposeSpecialToken?.();
+  disposeSpeechOutput?.();
 });
 </script>
 
diff --git a/frontend/apps/desktop-tauri/renderer/src/SettingsApp.vue b/frontend/apps/desktop-tauri/renderer/src/SettingsApp.vue
index 08cac69..59d0c08 100644
--- a/frontend/apps/desktop-tauri/renderer/src/SettingsApp.vue
+++ b/frontend/apps/desktop-tauri/renderer/src/SettingsApp.vue
@@ -4,6 +4,7 @@ import { computed, onMounted, onUnmounted, ref } from "vue";
 import { SettingsLayout } from "@whalewhisper/stage-settings-ui";
 import { useI18n } from "@whalewhisper/app-core/composables/use-i18n";
 import { useLive2dRuntime } from "@whalewhisper/app-core/stores/live2d-runtime";
+import { useTranscriptionStore } from "@whalewhisper/app-core/stores/transcription";
 import { closeSettingsWindow } from "./services/desktop";
 import { listenDesktopActionToken } from "./services/desktop";
 import {
@@ -46,6 +47,12 @@ let unlistenActionToken: null | (() => void) = null;
 const dragIgnoreSelectors =
   "button, a, input, textarea, select, option, label, [contenteditable='true'], [role='button'], [role='link'], [role='slider'], [role='checkbox'], [role='switch'], [role='textbox'], [data-tauri-drag-region='false'], [data-drag-ignore='true']";
 const live2dRuntime = useLive2dRuntime();
+const transcriptionStore = useTranscriptionStore();
+
+function stopSettingsTestMic() {
+  if (transcriptionStore.listeningSource !== "settings-test") return;
+  void transcriptionStore.stopListening();
+}
 
 function isTauriRuntime() {
   if (typeof window === "undefined") return false;
@@ -101,9 +108,16 @@ async function pollCursorHover() {
 }
 
 async function handleClose() {
+  stopSettingsTestMic();
   await closeSettingsWindow();
 }
 
+function handleVisibilityChange() {
+  if (document.visibilityState === "hidden") {
+    stopSettingsTestMic();
+  }
+}
+
 onMounted(() => {
   dragHandleEl =
     dialogRef.value?.querySelector<HTMLElement>("[data-settings-drag-handle]") ?? null;
@@ -118,9 +132,11 @@ onMounted(() => {
       void pollCursorHover();
     }, 60);
   }
+  document.addEventListener("visibilitychange", handleVisibilityChange);
 });
 
 onUnmounted(() => {
+  stopSettingsTestMic();
   dragHandleEl?.removeEventListener("pointerdown", handleDragPointerDown);
   if (unlistenActionToken) {
     unlistenActionToken();
@@ -130,6 +146,7 @@ onUnmounted(() => {
     window.clearInterval(hoverTimer);
     hoverTimer = null;
   }
+  document.removeEventListener("visibilitychange", handleVisibilityChange);
   dragHandleEl = null;
 });
 </script>
diff --git a/frontend/apps/desktop-tauri/renderer/src/components/DesktopChatOverlay.vue b/frontend/apps/desktop-tauri/renderer/src/components/DesktopChatOverlay.vue
index 2d6d8f7..efed50d 100644
--- a/frontend/apps/desktop-tauri/renderer/src/components/DesktopChatOverlay.vue
+++ b/frontend/apps/desktop-tauri/renderer/src/components/DesktopChatOverlay.vue
@@ -12,6 +12,7 @@ import { storeToRefs } from "pinia";
 import { useI18n } from "@whalewhisper/app-core/composables/use-i18n";
 import { useTheme } from "@whalewhisper/app-core/composables/use-theme";
 import { useChatStore } from "@whalewhisper/app-core/stores/chat";
+import { useTranscriptionStore } from "@whalewhisper/app-core/stores/transcription";
 
 const props = defineProps<{
   visible: boolean;
@@ -23,7 +24,14 @@ const emit = defineEmits<{
 }>();
 
 const chatStore = useChatStore();
+const transcriptionStore = useTranscriptionStore();
 const { messages, sending, streamingMessage } = storeToRefs(chatStore);
+const {
+  listening: transcriptionListening,
+  listeningSource: transcriptionListeningSource,
+  canListen: canListenToTranscription,
+  error: transcriptionError,
+} = storeToRefs(transcriptionStore);
 const { t } = useI18n();
 const { isDark } = useTheme();
 
@@ -37,6 +45,19 @@ let dragStartScrollTop = 0;
 const showSendButton = computed(
   () => Boolean(messageInput.value.trim()) || isComposing.value
 );
+const chatMicActive = computed(
+  () =>
+    transcriptionListening.value &&
+    transcriptionListeningSource.value === "chat-input"
+);
+const chatMicButtonTitle = computed(() =>
+  chatMicActive.value ? t("audio.stt.stop") : t("audio.stt.start")
+);
+const showChatMicError = computed(
+  () =>
+    Boolean(transcriptionError.value) &&
+    transcriptionListeningSource.value === "chat-input"
+);
 
 const streaming = computed<ChatAssistantMessage>(() => {
   return (
@@ -80,6 +101,19 @@ function handleSend() {
   messageInput.value = "";
 }
 
+function toggleChatMic() {
+  if (!canListenToTranscription.value) return;
+  transcriptionStore.enabled = true;
+  if (chatMicActive.value) {
+    void transcriptionStore.stopListening();
+    return;
+  }
+  void transcriptionStore.startListening({
+    autoSend: true,
+    source: "chat-input",
+  });
+}
+
 function handleKeydown(event: KeyboardEvent) {
   if (event.key !== "Enter" || event.shiftKey) return;
   event.preventDefault();
@@ -254,24 +288,57 @@ onMounted(scrollToBottom);
       </div>
     </div>
 
-    <div class="flex items-end gap-1">
-      <textarea
-        v-model="messageInput"
-        rows="1"
-        class="min-h-[34px] flex-1 resize-none rounded-xl border border-white/50 bg-white/70 px-3 py-2 text-sm text-neutral-700 shadow-sm outline-none backdrop-blur-md transition focus:border-primary-300 dark:border-neutral-700/60 dark:bg-neutral-900/70 dark:text-neutral-100 dark:focus:border-primary-400"
-        :placeholder="t('chat.input.placeholder')"
-        @keydown="handleKeydown"
-        @compositionstart="isComposing = true"
-        @compositionend="isComposing = false"
-      ></textarea>
-      <button
-        v-if="showSendButton"
-        type="button"
-        class="flex h-9 w-9 items-center justify-center rounded-full bg-primary-500/80 text-white shadow-sm transition hover:bg-primary-500 dark:bg-primary-400/80 dark:hover:bg-primary-400"
-        @click="handleSend"
-      >
-        <div class="i-solar:arrow-up-outline h-4 w-4" />
-      </button>
+    <div>
+      <div class="flex items-end gap-1">
+        <button
+          type="button"
+          class="mb-0.5 flex h-8 w-8 items-center justify-center rounded-full border border-white/60 bg-white/70 text-neutral-500 shadow-sm backdrop-blur-md transition hover:text-neutral-800 disabled:cursor-not-allowed disabled:opacity-40 dark:border-neutral-700/60 dark:bg-neutral-900/70 dark:text-neutral-300 dark:hover:text-neutral-100"
+          :class="chatMicActive
+            ? 'border-primary-300/80 bg-primary-100/80 text-primary-600 dark:border-primary-300/70 dark:bg-primary-500/25 dark:text-primary-100'
+            : ''"
+          :title="chatMicButtonTitle"
+          :disabled="!canListenToTranscription"
+          @click="toggleChatMic"
+        >
+          <span class="relative inline-flex h-4 w-4 items-center justify-center">
+            <div class="i-solar:microphone-bold h-4 w-4" />
+            <svg
+              v-if="!chatMicActive"
+              class="pointer-events-none absolute inset-0 h-4 w-4 text-rose-500/90 dark:text-rose-300/90"
+              viewBox="0 0 16 16"
+              fill="none"
+              aria-hidden="true"
+            >
+              <path
+                d="M2 14L14 2"
+                stroke="currentColor"
+                stroke-width="2.2"
+                stroke-linecap="round"
+              />
+            </svg>
+          </span>
+        </button>
+        <textarea
+          v-model="messageInput"
+          rows="1"
+          class="min-h-[34px] flex-1 resize-none rounded-xl border border-white/50 bg-white/70 px-3 py-2 text-sm text-neutral-700 shadow-sm outline-none backdrop-blur-md transition focus:border-primary-300 dark:border-neutral-700/60 dark:bg-neutral-900/70 dark:text-neutral-100 dark:focus:border-primary-400"
+          :placeholder="t('chat.input.placeholder')"
+          @keydown="handleKeydown"
+          @compositionstart="isComposing = true"
+          @compositionend="isComposing = false"
+        ></textarea>
+        <button
+          v-if="showSendButton"
+          type="button"
+          class="flex h-9 w-9 items-center justify-center rounded-full bg-primary-500/80 text-white shadow-sm transition hover:bg-primary-500 dark:bg-primary-400/80 dark:hover:bg-primary-400"
+          @click="handleSend"
+        >
+          <div class="i-solar:arrow-up-outline h-4 w-4" />
+        </button>
+      </div>
+      <div v-if="showChatMicError" class="pt-1 text-[11px] text-rose-500">
+        {{ transcriptionError }}
+      </div>
     </div>
   </div>
 </template>
diff --git a/frontend/apps/desktop-tauri/src-tauri/tauri.conf.json b/frontend/apps/desktop-tauri/src-tauri/tauri.conf.json
index 0c7f1e4..738f539 100644
--- a/frontend/apps/desktop-tauri/src-tauri/tauri.conf.json
+++ b/frontend/apps/desktop-tauri/src-tauri/tauri.conf.json
@@ -15,6 +15,7 @@
         "label": "main",
         "title": "WhaleWhisper",
         "url": "index.html",
+        "useHttpsScheme": true,
         "width": 640,
         "height": 720,
         "minWidth": 360,
@@ -31,6 +32,7 @@
         "label": "settings",
         "title": "WhaleWhisper Settings",
         "url": "index.html#settings",
+        "useHttpsScheme": true,
         "visible": false,
         "width": 960,
         "height": 720,
diff --git a/frontend/apps/web/src/App.vue b/frontend/apps/web/src/App.vue
index 9b2a47d..39b4c17 100644
--- a/frontend/apps/web/src/App.vue
+++ b/frontend/apps/web/src/App.vue
@@ -55,6 +55,7 @@ useActionTokenPromptSync();
 const chatStore = useChatStore();
 const live2dRuntime = useLive2dRuntime();
 const speechOutput = useSpeechOutputStore();
+let disposeTokenLiteral: (() => void) | null = null;
 let disposeSpecialToken: (() => void) | null = null;
 let disposeSpeechOutput: (() => void) | null = null;
 
@@ -73,15 +74,20 @@ onMounted(() => {
     uiStore.openSessions();
   }
   chatStore.connect();
+  disposeTokenLiteral = chatStore.onTokenLiteral(async (literal) => {
+    speechOutput.pushAssistantLiteral(literal);
+  });
   disposeSpecialToken = chatStore.onTokenSpecial(async (special) => {
     await live2dRuntime.applySpecialToken(special);
+    speechOutput.pushAssistantSpecial(special);
   });
   disposeSpeechOutput = chatStore.onAssistantFinal(async (message) => {
-    speechOutput.speak(message.content);
+    await speechOutput.endAssistantStream(message.content);
   });
 });
 
 onUnmounted(() => {
+  disposeTokenLiteral?.();
   disposeSpecialToken?.();
   disposeSpeechOutput?.();
 });
diff --git a/frontend/apps/web/src/components/settings/SettingsDialog.vue b/frontend/apps/web/src/components/settings/SettingsDialog.vue
index 229a98e..319fe91 100644
--- a/frontend/apps/web/src/components/settings/SettingsDialog.vue
+++ b/frontend/apps/web/src/components/settings/SettingsDialog.vue
@@ -2,6 +2,7 @@
 import { storeToRefs } from "pinia";
 import { computed, ref } from "vue";
 import { useI18n } from "@whalewhisper/app-core/composables/use-i18n";
+import { useTranscriptionStore } from "@whalewhisper/app-core/stores/transcription";
 import { useUiStore } from "@whalewhisper/app-core/stores/ui";
 import { SettingsLayout } from "@whalewhisper/stage-settings-ui";
 import LanguageSelect from "../layouts/LanguageSelect.vue";
@@ -19,6 +20,7 @@ import {
 
 const uiStore = useUiStore();
 const { settingsOpen } = storeToRefs(uiStore);
+const transcriptionStore = useTranscriptionStore();
 const { t } = useI18n();
 const tabs = computed(() => [
   { id: "appearance", label: t("settings.tabs.appearance"), component: AppearanceSection },
@@ -40,6 +42,9 @@ const dialogPaddingClass = computed(() =>
 );
 
 function handleClose() {
+  if (transcriptionStore.listeningSource === "settings-test") {
+    void transcriptionStore.stopListening();
+  }
   uiStore.closeSettings();
 }
 </script>
diff --git a/frontend/apps/web/src/components/widgets/ChatArea.vue b/frontend/apps/web/src/components/widgets/ChatArea.vue
index e4745e0..99dfa9f 100644
--- a/frontend/apps/web/src/components/widgets/ChatArea.vue
+++ b/frontend/apps/web/src/components/widgets/ChatArea.vue
@@ -5,6 +5,7 @@ import { storeToRefs } from "pinia";
 import { useI18n } from "@whalewhisper/app-core/composables/use-i18n";
 import { useChatStore } from "@whalewhisper/app-core/stores/chat";
 import { useSettingsStore } from "@whalewhisper/app-core/stores/settings";
+import { useTranscriptionStore } from "@whalewhisper/app-core/stores/transcription";
 import BasicTextarea from "../ui/BasicTextarea.vue";
 
 const props = withDefaults(
@@ -21,8 +22,15 @@ const messageInput = ref("");
 const isComposing = ref(false);
 
 const chatStore = useChatStore();
+const transcriptionStore = useTranscriptionStore();
 const { t } = useI18n();
 const { themeColorsHueDynamic } = storeToRefs(useSettingsStore());
+const {
+  listening: transcriptionListening,
+  listeningSource: transcriptionListeningSource,
+  canListen: canListenToTranscription,
+  error: transcriptionError,
+} = storeToRefs(transcriptionStore);
 const isMobile = computed(() => props.variant === "mobile");
 const submitOnEnter = computed(() =>
   typeof props.submitOnEnter === "boolean"
@@ -32,6 +40,19 @@ const submitOnEnter = computed(() =>
 const showSendButton = computed(
   () => Boolean(messageInput.value.trim()) || isComposing.value
 );
+const chatMicActive = computed(
+  () =>
+    transcriptionListening.value &&
+    transcriptionListeningSource.value === "chat-input"
+);
+const chatMicButtonTitle = computed(() =>
+  chatMicActive.value ? t("audio.stt.stop") : t("audio.stt.start")
+);
+const showChatMicError = computed(
+  () =>
+    Boolean(transcriptionError.value) &&
+    transcriptionListeningSource.value === "chat-input"
+);
 
 function handleSend() {
   if (!messageInput.value.trim() || isComposing.value) {
@@ -41,46 +62,100 @@ function handleSend() {
   chatStore.send(messageInput.value);
   messageInput.value = "";
 }
+
+function toggleChatMic() {
+  if (!canListenToTranscription.value) return;
+  transcriptionStore.enabled = true;
+  if (chatMicActive.value) {
+    void transcriptionStore.stopListening();
+    return;
+  }
+  void transcriptionStore.startListening({
+    autoSend: true,
+    source: "chat-input",
+  });
+}
 </script>
 
 <template>
-  <div v-if="isMobile" class="ph-no-capture flex w-full gap-1">
-    <BasicTextarea
-      v-model="messageInput"
-      :placeholder="t('chat.input.placeholder')"
-      :submit-on-enter="submitOnEnter"
-      rows="1"
-      border="solid 2 neutral-200/60 dark:neutral-700/60"
-      text="neutral-500 hover:neutral-600 dark:neutral-100 dark:hover:neutral-200 placeholder:neutral-400 placeholder:hover:neutral-500 placeholder:dark:neutral-300 placeholder:dark:hover:neutral-400"
-      bg="neutral-100/80 dark:neutral-950/80"
-      max-h="[10lh]" min-h="[calc(1lh+4px+4px)]"
-      w-full resize-none overflow-y-scroll rounded="[1lh]" px-4 py-0.5 outline-none backdrop-blur-md
-      transition="all duration-250 ease-in-out placeholder:all placeholder:duration-250 placeholder:ease-in-out"
-      :class="[themeColorsHueDynamic ? 'transition-colors-none placeholder:transition-colors-none' : '']"
-      @submit="handleSend"
-      @compositionstart="isComposing = true"
-      @compositionend="isComposing = false"
-    />
+  <div v-if="isMobile" class="ph-no-capture w-full">
+    <div class="flex w-full items-end gap-1">
+      <button
+        type="button"
+        class="mb-0.5 flex h-8 w-8 items-center justify-center rounded-full border border-neutral-200/70 bg-white/70 text-neutral-500 shadow-sm backdrop-blur-md transition hover:text-neutral-800 disabled:cursor-not-allowed disabled:opacity-40 dark:border-neutral-700/70 dark:bg-neutral-900/70 dark:text-neutral-200 dark:hover:text-neutral-50"
+        :class="chatMicActive
+          ? 'border-primary-300/80 bg-primary-100/80 text-primary-600 dark:border-primary-300/70 dark:bg-primary-500/20 dark:text-primary-100'
+          : ''"
+        :title="chatMicButtonTitle"
+        :disabled="!canListenToTranscription"
+        @click="toggleChatMic"
+      >
+        <span class="relative inline-flex h-4 w-4 items-center justify-center">
+          <div class="i-solar:microphone-bold h-4 w-4" />
+          <span
+            v-if="!chatMicActive"
+            class="pointer-events-none absolute left-0 top-1/2 h-[2px] w-full -translate-y-1/2 rotate-[-38deg] rounded-full bg-current opacity-90"
+          />
+        </span>
+      </button>
+      <BasicTextarea
+        v-model="messageInput"
+        :placeholder="t('chat.input.placeholder')"
+        :submit-on-enter="submitOnEnter"
+        rows="1"
+        border="solid 2 neutral-200/60 dark:neutral-700/60"
+        text="neutral-500 hover:neutral-600 dark:neutral-100 dark:hover:neutral-200 placeholder:neutral-400 placeholder:hover:neutral-500 placeholder:dark:neutral-300 placeholder:dark:hover:neutral-400"
+        bg="neutral-100/80 dark:neutral-950/80"
+        max-h="[10lh]" min-h="[calc(1lh+4px+4px)]"
+        w-full resize-none overflow-y-scroll rounded="[1lh]" px-4 py-0.5 outline-none backdrop-blur-md
+        transition="all duration-250 ease-in-out placeholder:all placeholder:duration-250 placeholder:ease-in-out"
+        :class="[themeColorsHueDynamic ? 'transition-colors-none placeholder:transition-colors-none' : '']"
+        @submit="handleSend"
+        @compositionstart="isComposing = true"
+        @compositionend="isComposing = false"
+      />
+      <button
+        v-if="showSendButton"
+        w="[calc(1lh+4px+4px)]" h="[calc(1lh+4px+4px)]"
+        aspect-square
+        flex
+        items-center
+        self-end
+        justify-center
+        rounded-full
+        outline-none
+        backdrop-blur-md
+        text="neutral-500 hover:neutral-600 dark:neutral-900 dark:hover:neutral-800"
+        bg="primary-50/80 dark:neutral-100/80 hover:neutral-50"
+        transition="all duration-250 ease-in-out"
+        @click="handleSend"
+      >
+        <div class="i-solar:arrow-up-outline" />
+      </button>
+    </div>
+    <div v-if="showChatMicError" class="pt-1 text-[11px] text-rose-500">
+      {{ transcriptionError }}
+    </div>
+  </div>
+  <div v-else flex gap-2 class="ph-no-capture lt-md:h-full">
     <button
-      v-if="showSendButton"
-      w="[calc(1lh+4px+4px)]" h="[calc(1lh+4px+4px)]"
-      aspect-square
-      flex
-      items-center
-      self-end
-      justify-center
-      rounded-full
-      outline-none
-      backdrop-blur-md
-      text="neutral-500 hover:neutral-600 dark:neutral-900 dark:hover:neutral-800"
-      bg="primary-50/80 dark:neutral-100/80 hover:neutral-50"
-      transition="all duration-250 ease-in-out"
-      @click="handleSend"
+      type="button"
+      class="mb-4 flex h-9 w-9 items-center justify-center self-end rounded-full border border-neutral-200/70 bg-white/70 text-neutral-500 shadow-sm backdrop-blur-md transition hover:text-neutral-800 disabled:cursor-not-allowed disabled:opacity-40 dark:border-neutral-700/70 dark:bg-neutral-900/70 dark:text-neutral-200 dark:hover:text-neutral-50"
+      :class="chatMicActive
+        ? 'border-primary-300/80 bg-primary-100/80 text-primary-600 ring-2 ring-primary-300/70 dark:border-primary-300/70 dark:bg-primary-500/20 dark:text-primary-100 dark:ring-primary-300/60'
+        : ''"
+      :title="chatMicButtonTitle"
+      :disabled="!canListenToTranscription"
+      @click="toggleChatMic"
     >
-      <div class="i-solar:arrow-up-outline" />
+      <span class="relative inline-flex h-5 w-5 items-center justify-center">
+        <div class="i-solar:microphone-bold h-5 w-5" />
+        <span
+          v-if="!chatMicActive"
+          class="pointer-events-none absolute left-0 top-1/2 h-[2px] w-full -translate-y-1/2 rotate-[-38deg] rounded-full bg-current opacity-90"
+        />
+      </span>
     </button>
-  </div>
-  <div v-else flex gap-2 class="ph-no-capture lt-md:h-full">
     <div
       :class="[
         'relative',
diff --git a/frontend/packages/app-core/src/data/provider-fallback.ts b/frontend/packages/app-core/src/data/provider-fallback.ts
index 29664bc..35017ed 100644
--- a/frontend/packages/app-core/src/data/provider-fallback.ts
+++ b/frontend/packages/app-core/src/data/provider-fallback.ts
@@ -243,47 +243,6 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
       }
     ]
   },
-  {
-    "id": "openai-audio-speech",
-    "label": "OpenAI",
-    "category": "speech",
-    "icon": "i-lobe-icons:openai",
-    "description": "OpenAI speech models.",
-    "engineId": "openai-tts",
-    "defaults": {
-      "baseUrl": "https://api.openai.com/v1/",
-      "model": "tts-1",
-      "voice": "alloy"
-    },
-    "fields": [
-      {
-        "id": "apiKey",
-        "label": "API Key",
-        "type": "secret",
-        "required": true,
-        "placeholder": "sk-..."
-      },
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "required": true,
-        "default": "https://api.openai.com/v1/"
-      },
-      {
-        "id": "model",
-        "label": "Model",
-        "type": "select",
-        "optionsSource": "models"
-      },
-      {
-        "id": "voice",
-        "label": "Voice",
-        "type": "select",
-        "optionsSource": "voices"
-      }
-    ]
-  },
   {
     "id": "openai-audio-transcription",
     "label": "OpenAI",
@@ -822,39 +781,6 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
       }
     ]
   },
-  {
-    "id": "openai-compatible-audio-speech",
-    "label": "OpenAI Compatible",
-    "category": "speech",
-    "icon": "i-lobe-icons:openai",
-    "description": "OpenAI-compatible speech.",
-    "fields": [
-      {
-        "id": "apiKey",
-        "label": "API Key",
-        "type": "secret",
-        "required": true
-      },
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "required": true
-      },
-      {
-        "id": "model",
-        "label": "Model",
-        "type": "select",
-        "optionsSource": "models"
-      },
-      {
-        "id": "voice",
-        "label": "Voice",
-        "type": "select",
-        "optionsSource": "voices"
-      }
-    ]
-  },
   {
     "id": "volcengine-speech",
     "label": "Volcengine",
@@ -863,7 +789,7 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
     "description": "volcengine.com",
     "engineId": "volcengine-speech",
     "defaults": {
-      "baseUrl": "https://unspeech.hyp3r.link/v1/",
+      "baseUrl": "https://openspeech.bytedance.com/api/v1/tts",
       "model": "v1"
     },
     "fields": [
@@ -879,7 +805,7 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
         "label": "Base URL",
         "type": "text",
         "required": true,
-        "default": "https://unspeech.hyp3r.link/v1/"
+        "default": "https://openspeech.bytedance.com/api/v1/tts"
       },
       {
         "id": "model",
@@ -915,8 +841,8 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
     "description": "bailian.console.aliyun.com",
     "engineId": "alibaba-cloud-model-studio-speech",
     "defaults": {
-      "baseUrl": "https://unspeech.hyp3r.link/v1/",
-      "model": "alibaba/cosyvoice-v1"
+      "baseUrl": "https://dashscope.aliyuncs.com",
+      "model": "cosyvoice-v1"
     },
     "fields": [
       {
@@ -930,7 +856,7 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
         "label": "Base URL",
         "type": "text",
         "required": true,
-        "default": "https://unspeech.hyp3r.link/v1/"
+        "default": "https://dashscope.aliyuncs.com"
       },
       {
         "id": "model",
@@ -938,11 +864,11 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
         "type": "select",
         "options": [
           {
-            "id": "alibaba/cosyvoice-v1",
+            "id": "cosyvoice-v1",
             "label": "cosyvoice-v1"
           },
           {
-            "id": "alibaba/cosyvoice-v2",
+            "id": "cosyvoice-v2",
             "label": "cosyvoice-v2"
           }
         ]
@@ -955,143 +881,6 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
       }
     ]
   },
-  {
-    "id": "elevenlabs",
-    "label": "ElevenLabs",
-    "category": "speech",
-    "icon": "i-simple-icons:elevenlabs",
-    "description": "Voice synthesis & cloning.",
-    "defaults": {
-      "baseUrl": "https://unspeech.hyp3r.link/v1/"
-    },
-    "fields": [
-      {
-        "id": "apiKey",
-        "label": "API Key",
-        "type": "secret",
-        "required": true
-      },
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "required": true,
-        "default": "https://unspeech.hyp3r.link/v1/"
-      },
-      {
-        "id": "voice",
-        "label": "Voice",
-        "type": "text"
-      }
-    ]
-  },
-  {
-    "id": "microsoft-speech",
-    "label": "Microsoft / Azure Speech",
-    "category": "speech",
-    "icon": "i-lobe-icons:microsoft",
-    "description": "Microsoft speech services.",
-    "defaults": {
-      "baseUrl": "https://unspeech.hyp3r.link/v1/"
-    },
-    "fields": [
-      {
-        "id": "apiKey",
-        "label": "API Key",
-        "type": "secret",
-        "required": true
-      },
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "default": "https://unspeech.hyp3r.link/v1/"
-      },
-      {
-        "id": "voice",
-        "label": "Voice",
-        "type": "text"
-      }
-    ]
-  },
-  {
-    "id": "index-tts-vllm",
-    "label": "Bilibili Index TTS",
-    "category": "speech",
-    "icon": "i-lobe-icons:bilibiliindex",
-    "description": "index-tts.github.io",
-    "defaults": {
-      "baseUrl": "http://localhost:8000/v1/"
-    },
-    "fields": [
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "required": true,
-        "default": "http://localhost:8000/v1/"
-      },
-      {
-        "id": "model",
-        "label": "Model",
-        "type": "select",
-        "optionsSource": "models"
-      }
-    ]
-  },
-  {
-    "id": "comet-api-speech",
-    "label": "Comet API",
-    "category": "speech",
-    "icon": "i-lobe-icons:cometapi",
-    "description": "Comet API speech.",
-    "defaults": {
-      "baseUrl": "https://api.cometapi.com/v1/"
-    },
-    "fields": [
-      {
-        "id": "apiKey",
-        "label": "API Key",
-        "type": "secret",
-        "required": true
-      },
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "default": "https://api.cometapi.com/v1/"
-      },
-      {
-        "id": "voice",
-        "label": "Voice",
-        "type": "text"
-      }
-    ]
-  },
-  {
-    "id": "player2-speech",
-    "label": "Player2 Speech",
-    "category": "speech",
-    "icon": "i-lobe-icons:player2",
-    "description": "Local gameplay assistant speech.",
-    "defaults": {
-      "baseUrl": "http://localhost:4315/v1/"
-    },
-    "fields": [
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text",
-        "required": true,
-        "default": "http://localhost:4315/v1/"
-      },
-      {
-        "id": "voice",
-        "label": "Voice",
-        "type": "text"
-      }
-    ]
-  },
   {
     "id": "app-local-audio-speech",
     "label": "App (Local)",
@@ -1137,27 +926,18 @@ export const fallbackProviderCatalog: ProviderCatalogEntry[] = [
   },
   {
     "id": "aliyun-nls-transcription",
-    "label": "Aliyun NLS",
+    "label": "Alibaba Cloud Model Studio",
     "category": "transcription",
     "icon": "i-lobe-icons:alibabacloud",
-    "description": "Aliyun transcription.",
+    "description": "Alibaba Bailian ASR transcription.",
+    "engineId": "aliyun-nls-asr",
     "fields": [
       {
         "id": "apiKey",
         "label": "API Key",
         "type": "secret",
-        "required": true
-      },
-      {
-        "id": "baseUrl",
-        "label": "Base URL",
-        "type": "text"
-      },
-      {
-        "id": "model",
-        "label": "Model",
-        "type": "select",
-        "optionsSource": "models"
+        "required": true,
+        "scope": "config"
       }
     ]
   },
diff --git a/frontend/packages/app-core/src/data/provider-options.ts b/frontend/packages/app-core/src/data/provider-options.ts
index 90133bf..a035366 100644
--- a/frontend/packages/app-core/src/data/provider-options.ts
+++ b/frontend/packages/app-core/src/data/provider-options.ts
@@ -35,20 +35,6 @@ export type ProviderOption = {
   defaultExtra?: Record<string, string>;
 };
 
-const openAiVoices: SelectOption[] = [
-  { id: "alloy", label: "Alloy" },
-  { id: "echo", label: "Echo" },
-  { id: "fable", label: "Fable" },
-  { id: "onyx", label: "Onyx" },
-  { id: "nova", label: "Nova" },
-  { id: "shimmer", label: "Shimmer" },
-];
-
-const openAiTtsModels: SelectOption[] = [
-  { id: "tts-1", label: "tts-1" },
-  { id: "tts-1-hd", label: "tts-1-hd" },
-];
-
 const whisperModels: SelectOption[] = [
   { id: "whisper-1", label: "whisper-1" },
 ];
@@ -289,32 +275,6 @@ export const chatProviderOptions: ProviderOption[] = [
 ];
 
 export const speechProviderOptions: ProviderOption[] = [
-  {
-    id: "openai-audio-speech",
-    label: "OpenAI",
-    icon: "i-lobe-icons:openai",
-    description: "OpenAI speech models.",
-    category: "speech",
-    engineId: "openai-tts",
-    defaultBaseUrl: "https://api.openai.com/v1/",
-    requiresApiKey: true,
-    requiresBaseUrl: true,
-    supportsModels: true,
-    supportsVoices: true,
-    modelOptions: openAiTtsModels,
-    voiceOptions: openAiVoices,
-    defaultModel: "tts-1",
-    defaultVoice: "alloy",
-  },
-  {
-    id: "openai-compatible-audio-speech",
-    label: "OpenAI Compatible",
-    icon: "i-lobe-icons:openai",
-    description: "OpenAI-compatible speech.",
-    category: "speech",
-    requiresApiKey: true,
-    requiresBaseUrl: true,
-  },
   {
     id: "volcengine-speech",
     label: "Volcengine",
@@ -327,14 +287,15 @@ export const speechProviderOptions: ProviderOption[] = [
     supportsVoices: true,
     modelOptions: [{ id: "v1", label: "v1" }],
     defaultModel: "v1",
-    defaultBaseUrl: "https://unspeech.hyp3r.link/v1/",
+    defaultBaseUrl: "https://openspeech.bytedance.com/api/v1/tts",
   },
   {
-    id: "alibaba-cloud-model-studio",
+    id: "alibaba-cloud-model-studio-speech",
     label: "Alibaba Cloud Model Studio",
     icon: "i-lobe-icons:alibabacloud",
     description: "bailian.console.aliyun.com",
     category: "speech",
+    engineId: "alibaba-cloud-model-studio-speech",
     requiresApiKey: true,
     requiresBaseUrl: true,
     supportsModels: true,
@@ -344,65 +305,7 @@ export const speechProviderOptions: ProviderOption[] = [
       { id: "cosyvoice-v2", label: "cosyvoice-v2" },
     ],
     defaultModel: "cosyvoice-v1",
-    defaultBaseUrl: "https://unspeech.hyp3r.link/v1/",
-  },
-  {
-    id: "volcengine",
-    label: "Volcengine",
-    icon: "i-lobe-icons:volcengine",
-    description: "volcengine.com",
-    category: "speech",
-    requiresApiKey: true,
-    requiresBaseUrl: true,
-    supportsModels: true,
-    supportsVoices: true,
-    modelOptions: [{ id: "v1", label: "v1" }],
-    defaultModel: "v1",
-    defaultBaseUrl: "https://unspeech.hyp3r.link/v1/",
-  },
-  {
-    id: "elevenlabs",
-    label: "ElevenLabs",
-    icon: "i-simple-icons:elevenlabs",
-    description: "Voice synthesis & cloning.",
-    category: "speech",
-    defaultBaseUrl: "https://api.elevenlabs.io/v1/",
-    requiresApiKey: true,
-    requiresBaseUrl: true,
-  },
-  {
-    id: "microsoft-speech",
-    label: "Microsoft / Azure Speech",
-    icon: "i-lobe-icons:microsoft",
-    description: "Microsoft speech services.",
-    category: "speech",
-    requiresApiKey: true,
-  },
-  {
-    id: "index-tts-vllm",
-    label: "Bilibili Index TTS",
-    icon: "i-lobe-icons:bilibiliindex",
-    description: "index-tts.github.io",
-    category: "speech",
-    defaultBaseUrl: "http://localhost:8000/v1/",
-    requiresBaseUrl: true,
-  },
-  {
-    id: "comet-api-speech",
-    label: "Comet API",
-    icon: "i-lobe-icons:cometapi",
-    description: "Comet API speech.",
-    category: "speech",
-    requiresApiKey: true,
-  },
-  {
-    id: "player2-speech",
-    label: "Player2 Speech",
-    icon: "i-lobe-icons:player2",
-    description: "Local gameplay assistant speech.",
-    category: "speech",
-    defaultBaseUrl: "http://localhost:4315/v1/",
-    requiresBaseUrl: true,
+    defaultBaseUrl: "https://dashscope.aliyuncs.com",
   },
   {
     id: "app-local-audio-speech",
@@ -446,11 +349,11 @@ export const transcriptionProviderOptions: ProviderOption[] = [
   },
   {
     id: "aliyun-nls-transcription",
-    label: "Aliyun NLS",
+    label: "Alibaba Cloud Model Studio",
     icon: "i-lobe-icons:alibabacloud",
-    description: "Aliyun transcription.",
+    description: "Alibaba Bailian ASR transcription.",
     category: "transcription",
-    requiresApiKey: true,
+    engineId: "aliyun-nls-asr",
   },
   {
     id: "comet-api-transcription",
diff --git a/frontend/packages/app-core/src/services/audio-direct.test.ts b/frontend/packages/app-core/src/services/audio-direct.test.ts
new file mode 100644
index 0000000..963d195
--- /dev/null
+++ b/frontend/packages/app-core/src/services/audio-direct.test.ts
@@ -0,0 +1,164 @@
+import assert from "node:assert/strict";
+
+import {
+  buildLegacyTtsHttpRequest,
+  buildDirectTtsHttpRequest,
+  supportsDirectTts,
+} from "../utils/tts-direct-request.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+run("supports backend relay tts engines", () => {
+  assert.equal(supportsDirectTts("volcengine-speech"), true);
+  assert.equal(supportsDirectTts("alibaba-cloud-model-studio-speech"), true);
+  assert.equal(supportsDirectTts("openai-tts"), false);
+  assert.equal(supportsDirectTts("unknown-engine"), false);
+});
+
+run("builds backend relay request for volcengine speech engine", () => {
+  const request = buildDirectTtsHttpRequest({
+    text: "hello",
+    engineId: "volcengine-speech",
+    apiBaseUrl: "http://localhost:8090",
+    config: {
+      apiKey: "token-123",
+      model: "v1",
+      voice: "zh_female_test",
+      appId: "appid-xyz",
+    },
+  });
+
+  assert.ok(request);
+  assert.equal(request?.url, "http://localhost:8090/api/tts/engines");
+  assert.equal(request?.headers.Authorization, undefined);
+  assert.deepEqual(request?.body, {
+    engine: "volcengine-speech",
+    data: "hello",
+    config: {
+      apiKey: "token-123",
+      model: "v1",
+      voice: "zh_female_test",
+      appId: "appid-xyz",
+    },
+  });
+});
+
+run("builds backend relay request for alibaba speech engine", () => {
+  const request = buildDirectTtsHttpRequest({
+    text: "hello",
+    engineId: "alibaba-cloud-model-studio-speech",
+    apiBaseUrl: "http://localhost:8090/",
+    config: {
+      apiKey: "token-123",
+      model: "alibaba/cosyvoice-v1",
+      voice: "longxiaochun_v2",
+      rate: 1.2,
+      pitch: 0.9,
+    },
+  });
+
+  assert.ok(request);
+  assert.equal(request?.url, "http://localhost:8090/api/tts/engines");
+  assert.deepEqual(request?.body, {
+    engine: "alibaba-cloud-model-studio-speech",
+    data: "hello",
+    config: {
+      apiKey: "token-123",
+      model: "cosyvoice-v1",
+      voice: "longxiaochun_v2",
+      rate: 1.2,
+      pitch: 0.9,
+    },
+  });
+});
+
+run("does not forward base url for fixed direct providers", () => {
+  const volcRequest = buildDirectTtsHttpRequest({
+    text: "hello",
+    engineId: "volcengine-speech",
+    apiBaseUrl: "http://localhost:8090",
+    config: {
+      apiKey: "token-123",
+      baseUrl: "https://unspeech.hyp3r.link/v1/",
+      model: "v1",
+      voice: "zh_female_test",
+      appId: "appid-xyz",
+    },
+  });
+  assert.ok(volcRequest);
+  assert.equal((volcRequest?.body.config as { baseUrl?: string }).baseUrl, undefined);
+
+  const alibabaRequest = buildDirectTtsHttpRequest({
+    text: "hello",
+    engineId: "alibaba-cloud-model-studio-speech",
+    apiBaseUrl: "http://localhost:8090",
+    config: {
+      apiKey: "token-123",
+      baseUrl: "https://unspeech.hyp3r.link/v1/",
+      model: "cosyvoice-v1",
+      voice: "longwan",
+    },
+  });
+  assert.ok(alibabaRequest);
+  assert.equal((alibabaRequest?.body.config as { baseUrl?: string }).baseUrl, undefined);
+});
+
+run("builds legacy synthesize fallback request from backend relay request", () => {
+  const request = buildDirectTtsHttpRequest({
+    text: "fallback test",
+    engineId: "volcengine-speech",
+    apiBaseUrl: "http://localhost:8090",
+    config: {
+      apiKey: "token-123",
+      baseUrl: "https://unspeech.example/v1",
+      model: "v1",
+      voice: "zh_female_test",
+      appId: "appid-xyz",
+    },
+  });
+
+  assert.ok(request);
+  const legacy = buildLegacyTtsHttpRequest(request!);
+  assert.equal(legacy.url, "http://localhost:8090/api/tts/synthesize");
+  assert.deepEqual(legacy.body, {
+    text: "fallback test",
+    engine: "volcengine-speech",
+    providerId: "volcengine-speech",
+    provider_id: "volcengine-speech",
+    config: {
+      apiKey: "token-123",
+      api_key: "token-123",
+      model: "volcengine/v1",
+      voice: "zh_female_test",
+      appId: "appid-xyz",
+      appid: "appid-xyz",
+      app_id: "appid-xyz",
+      backend: "volcengine",
+    },
+  });
+});
+
+run("keeps alibaba model id without provider prefix in legacy fallback request", () => {
+  const request = buildDirectTtsHttpRequest({
+    text: "fallback alibaba",
+    engineId: "alibaba-cloud-model-studio-speech",
+    apiBaseUrl: "http://localhost:8090",
+    config: {
+      apiKey: "token-123",
+      model: "alibaba/cosyvoice-v1",
+      voice: "longxiaochun_v2",
+    },
+  });
+
+  assert.ok(request);
+  const legacy = buildLegacyTtsHttpRequest(request!);
+  assert.equal((legacy.body.config as { model?: string }).model, "cosyvoice-v1");
+});
diff --git a/frontend/packages/app-core/src/services/audio.ts b/frontend/packages/app-core/src/services/audio.ts
index 35049d4..9de5e7c 100644
--- a/frontend/packages/app-core/src/services/audio.ts
+++ b/frontend/packages/app-core/src/services/audio.ts
@@ -1,4 +1,9 @@
 import { appConfig } from "../config";
+import {
+  buildLegacyTtsHttpRequest,
+  buildDirectTtsHttpRequest,
+  supportsDirectTts,
+} from "../utils/tts-direct-request";
 
 type AudioRequestConfig = Record<string, unknown>;
 
@@ -34,6 +39,61 @@ export type AsrStreamConnection = {
   close: () => void;
 };
 
+export { buildDirectTtsHttpRequest, supportsDirectTts };
+
+function createTtsHttpError(status: number, detail: string) {
+  const error = new Error(detail || `Direct TTS request failed: ${status}`) as Error & {
+    status?: number;
+    detail?: string;
+  };
+  error.status = status;
+  error.detail = detail || undefined;
+  return error;
+}
+
+function decodeBase64Audio(base64: string, mimeType: string) {
+  const cleaned = base64.trim().replace(/^data:[^;]+;base64,/, "");
+  const binary = atob(cleaned);
+  const bytes = new Uint8Array(binary.length);
+  for (let index = 0; index < binary.length; index += 1) {
+    bytes[index] = binary.charCodeAt(index);
+  }
+  return new Blob([bytes], { type: mimeType || "audio/mpeg" });
+}
+
+async function resolveTtsBlob(response: Response) {
+  const contentType = (response.headers.get("Content-Type") || "").toLowerCase();
+  if (!contentType.includes("application/json")) {
+    return await response.blob();
+  }
+
+  const payload = (await response.json().catch(() => null)) as
+    | {
+        audioBase64?: unknown;
+        audio_base64?: unknown;
+        audio?: unknown;
+        mimeType?: unknown;
+        mime_type?: unknown;
+        format?: unknown;
+      }
+    | null;
+  const audioBase64 =
+    (typeof payload?.audioBase64 === "string" && payload.audioBase64) ||
+    (typeof payload?.audio_base64 === "string" && payload.audio_base64) ||
+    (typeof payload?.audio === "string" && payload.audio) ||
+    "";
+  if (audioBase64) {
+    const mimeType =
+      (typeof payload?.mimeType === "string" && payload.mimeType) ||
+      (typeof payload?.mime_type === "string" && payload.mime_type) ||
+      (typeof payload?.format === "string" && `audio/${payload.format}`) ||
+      "audio/mpeg";
+    return decodeBase64Audio(audioBase64, mimeType);
+  }
+
+  throw new Error("TTS response JSON does not contain audio payload.");
+}
+
 export function resolveAudioApiBaseUrl() {
   const proxyUrl = appConfig.providers.proxyUrl?.trim();
   const apiBaseUrl = appConfig.providers.apiBaseUrl?.trim();
@@ -51,28 +111,55 @@ function resolveAudioWsBaseUrl(baseUrl?: string) {
 }
 
 export async function requestTts(request: TtsRequest): Promise<Blob> {
-  const baseUrl = request.baseUrl?.trim() || resolveAudioApiBaseUrl();
-  if (!baseUrl) {
+  return await requestTtsDirect(request);
+}
+
+export async function requestTtsDirect(request: TtsRequest): Promise<Blob> {
+  const apiBaseUrl = request.baseUrl?.trim() || resolveAudioApiBaseUrl();
+  if (!apiBaseUrl) {
     throw new Error("Audio API base URL is not configured.");
   }
 
-  const response = await fetch(`${baseUrl}/api/tts/engines`, {
+  const directRequest = buildDirectTtsHttpRequest({
+    text: request.text,
+    engineId: request.engineId,
+    apiBaseUrl,
+    config: request.config,
+  });
+  if (!directRequest) {
+    throw new Error("Backend relay TTS request is not available for current config.");
+  }
+
+  const response = await fetch(directRequest.url, {
     method: "POST",
-    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify({
-      engine: request.engineId || "default",
-      data: { text: request.text },
-      config: request.config ?? {},
-    }),
+    headers: directRequest.headers,
+    body: JSON.stringify(directRequest.body),
     signal: request.signal,
   });
 
-  if (!response.ok) {
-    const detail = await response.text();
-    throw new Error(detail || `TTS request failed: ${response.status}`);
+  if (response.ok) {
+    return await resolveTtsBlob(response);
+  }
+
+  if (response.status === 405) {
+    const legacyRequest = buildLegacyTtsHttpRequest(directRequest);
+    const legacyResponse = await fetch(legacyRequest.url, {
+      method: "POST",
+      headers: legacyRequest.headers,
+      body: JSON.stringify(legacyRequest.body),
+      signal: request.signal,
+    });
+
+    if (legacyResponse.ok) {
+      return await resolveTtsBlob(legacyResponse);
+    }
+
+    const detail = await legacyResponse.text();
+    throw createTtsHttpError(legacyResponse.status, detail);
   }
 
-  return await response.blob();
+  const detail = await response.text();
+  throw createTtsHttpError(response.status, detail);
 }
 
 export async function requestAsr(request: AsrRequest): Promise<Record<string, any>> {
diff --git a/frontend/packages/app-core/src/services/providers.ts b/frontend/packages/app-core/src/services/providers.ts
index fbbd870..f318f36 100644
--- a/frontend/packages/app-core/src/services/providers.ts
+++ b/frontend/packages/app-core/src/services/providers.ts
@@ -1,8 +1,5 @@
 import type { ProviderConfig } from "../stores/providers";
 import type { ProviderCatalogEntry, ProviderCategory, SelectOption } from "../data/provider-catalog";
-import type { UnAlibabaCloudOptions, VoiceProviderWithExtraOptions } from "unspeech";
-
-import { createUnAlibabaCloud, listVoices } from "unspeech";
 
 import { appConfig } from "../config";
 
@@ -115,67 +112,45 @@ export async function listProviderModels(option: ProviderCatalogEntry, config: P
 }
 
 export async function listProviderVoices(option: ProviderCatalogEntry, config: ProviderConfig) {
+  const payload = {
+    providerId: option.id,
+    apiKey: config.apiKey ?? "",
+    baseUrl: normalizeBaseUrl(config.baseUrl, resolveDefaultBaseUrl(option)),
+    model: config.model ?? "",
+    extra: config.extra ?? {},
+  };
+
   if (!proxyBaseUrl) {
-    if (option.id === "alibaba-cloud-model-studio-speech") {
-      const apiKey = config.apiKey?.trim();
-      const baseUrl = normalizeBaseUrl(config.baseUrl, resolveDefaultBaseUrl(option));
-      if (!apiKey || !baseUrl) {
-        return [];
-      }
-      const provider = createUnAlibabaCloud(apiKey, baseUrl) as VoiceProviderWithExtraOptions<UnAlibabaCloudOptions>;
-      const voices = await listVoices({
-        ...provider.voice(),
-      });
-      const configuredModel = config.model?.trim();
-      const modelCandidates = new Set<string>();
-      if (configuredModel) {
-        modelCandidates.add(configuredModel);
-        if (configuredModel.includes("/")) {
-          const shortModel = configuredModel.split("/").pop();
-          if (shortModel) {
-            modelCandidates.add(shortModel);
-          }
-        } else {
-          modelCandidates.add(`alibaba/${configuredModel}`);
-        }
-      }
-      const filtered = voices.filter((voice) => {
-        const compatible = voice.compatible_models;
-        if (!Array.isArray(compatible) || compatible.length === 0) {
-          return true;
-        }
-        if (!modelCandidates.size) {
-          return true;
-        }
-        return compatible.some((model) => modelCandidates.has(model));
-      });
-      const resolved = filtered.length ? filtered : voices;
-      return resolved.map((voice) => {
-        const descriptions: string[] = [];
-        if (voice.languages?.length) {
-          descriptions.push(voice.languages.map((lang) => lang.title).join(", "));
-        }
-        if (Array.isArray(voice.compatible_models) && voice.compatible_models.length) {
-          descriptions.push(`Models: ${voice.compatible_models.join(", ")}`);
-        }
-        return {
-          id: voice.id,
-          label: voice.name,
-          description: descriptions.join(" · "),
-        };
-      });
+    const baseUrl = resolveApiBaseUrl();
+    if (!baseUrl) {
+      return null;
     }
-    return null;
+
+    const response = await fetch(`${baseUrl}/api/providers/voices`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(payload),
+    });
+
+    if (!response.ok) {
+      throw new Error(`Provider voices request failed: ${response.status}`);
+    }
+
+    const result = (await response.json()) as {
+      voices?: SelectOption[];
+      data?: { voices?: SelectOption[] };
+    };
+
+    if (result.data?.voices) return result.data.voices;
+    if (result.voices) return result.voices;
+    return [];
   }
 
   const result = await requestProxy<{ voices?: SelectOption[]; data?: { voices?: SelectOption[] } }>(
     "/providers/voices",
-    {
-      providerId: option.id,
-      apiKey: config.apiKey ?? "",
-      baseUrl: normalizeBaseUrl(config.baseUrl, resolveDefaultBaseUrl(option)),
-      extra: config.extra ?? {},
-    }
+    payload
   );
 
   if (result.data?.voices) return result.data.voices;
diff --git a/frontend/packages/app-core/src/stores/providers.ts b/frontend/packages/app-core/src/stores/providers.ts
index d97b16a..da928d6 100644
--- a/frontend/packages/app-core/src/stores/providers.ts
+++ b/frontend/packages/app-core/src/stores/providers.ts
@@ -18,6 +18,8 @@ import {
 } from "../services/providers";
 import { useI18n } from "../composables/use-i18n";
 import { formatHealthError, formatHealthMessage } from "../utils/health";
+import { filterProviderFields } from "../utils/provider-fields";
+import { isVisibleSpeechProviderId } from "../utils/provider-visibility";
 import { useSettingsStore } from "./settings";
 
 export type ProviderStatus = "online" | "offline";
@@ -40,6 +42,16 @@ export type ProviderRuntime = {
 
 const configFieldIds = new Set(["apiKey", "baseUrl", "model", "voice"]);
 const credentialFieldIds = new Set(["apiKey", "baseUrl"]);
+const ALIYUN_NLS_PROVIDER_ID = "aliyun-nls-transcription";
+const aliyunNlsNormalizedFields: ProviderField[] = [
+  {
+    id: "apiKey",
+    label: "API Key",
+    type: "secret",
+    required: true,
+    scope: "config",
+  },
+];
 
 export const useProvidersStore = defineStore("providers", () => {
   const settingsStore = useSettingsStore();
@@ -47,16 +59,46 @@ export const useProvidersStore = defineStore("providers", () => {
     "whalewhisper/providers/configs",
     {}
   );
-  const engineHealthSkipProviders = new Set(["alibaba-cloud-model-studio-speech"]);
+  const engineHealthSkipProviders = new Set([
+    "alibaba-cloud-model-studio-speech",
+    "volcengine-speech",
+    "aliyun-nls-transcription",
+  ]);
   const providerRuntime = ref<Record<string, ProviderRuntime>>({});
   const catalogProviders = ref<ProviderCatalogEntry[]>([]);
   const catalogLoading = ref(false);
   const catalogError = ref<string | null>(null);
   const { t } = useI18n();
 
+  function normalizeProviderEntry(option: ProviderCatalogEntry): ProviderCatalogEntry {
+    if (option.id !== ALIYUN_NLS_PROVIDER_ID) {
+      return option;
+    }
+
+    return {
+      ...option,
+      fields: aliyunNlsNormalizedFields.map((field) => ({ ...field })),
+    };
+  }
+
+  function filterRemovedSpeechProviders(providers: ProviderCatalogEntry[]) {
+    return providers.filter((provider) => {
+      if (provider.category !== "speech") return true;
+      return isVisibleSpeechProviderId(provider.id);
+    });
+  }
+
   const effectiveProviders = computed(() => {
-    if (catalogProviders.value.length) return catalogProviders.value;
-    if (catalogError.value) return fallbackProviderCatalog;
+    if (catalogProviders.value.length) {
+      return filterRemovedSpeechProviders(
+        catalogProviders.value.map((option) => normalizeProviderEntry(option))
+      );
+    }
+    if (catalogError.value) {
+      return filterRemovedSpeechProviders(
+        fallbackProviderCatalog.map((option) => normalizeProviderEntry(option))
+      );
+    }
     return [];
   });
 
@@ -235,7 +277,7 @@ export const useProvidersStore = defineStore("providers", () => {
 
   function getProviderFields(providerId: string) {
     const option = getProviderMetadata(providerId);
-    return option?.fields ?? [];
+    return filterProviderFields(option);
   }
 
   function getProviderFieldValue(providerId: string, field: ProviderField) {
@@ -536,6 +578,13 @@ export const useProvidersStore = defineStore("providers", () => {
   }
 
   const pendingRefreshIds = new Set<string>();
+  function queueProviderRefresh(providerId: string) {
+    if (!providerId) return;
+    ensureProvider(providerId);
+    ensureDefaultConfig(providerId);
+    pendingRefreshIds.add(providerId);
+  }
+
   const flushRefreshQueue = useDebounceFn(() => {
     pendingRefreshIds.forEach((providerId) => {
       void refreshProvider(providerId);
@@ -543,6 +592,15 @@ export const useProvidersStore = defineStore("providers", () => {
     pendingRefreshIds.clear();
   }, 600);
 
+  watch(
+    () => [settingsStore.chatProviderId, settingsStore.speechProviderId, settingsStore.transcriptionProviderId],
+    (providerIds) => {
+      providerIds.forEach((providerId) => queueProviderRefresh(providerId));
+      flushRefreshQueue();
+    },
+    { immediate: true }
+  );
+
   watch(
     providerConfigs,
     (next, prev) => {
@@ -553,7 +611,7 @@ export const useProvidersStore = defineStore("providers", () => {
         const nextConfig = next?.[key];
         const prevConfig = prev?.[key];
         if (JSON.stringify(nextConfig) !== JSON.stringify(prevConfig)) {
-          pendingRefreshIds.add(key);
+          queueProviderRefresh(key);
         }
       });
       flushRefreshQueue();
diff --git a/frontend/packages/app-core/src/stores/settings.ts b/frontend/packages/app-core/src/stores/settings.ts
index ebbecbe..20a9a85 100644
--- a/frontend/packages/app-core/src/stores/settings.ts
+++ b/frontend/packages/app-core/src/stores/settings.ts
@@ -15,7 +15,7 @@ export const useSettingsStore = defineStore("settings", () => {
   } = storeToRefs(stageSettings);
   const themeColorsHueDynamic = ref(false);
   const chatProviderId = useLocalStorage("whalewhisper/providers/chat", "openrouter-ai");
-  const speechProviderId = useLocalStorage("whalewhisper/providers/speech", "openai-audio-speech");
+  const speechProviderId = useLocalStorage("whalewhisper/providers/speech", "browser-local-audio-speech");
   const transcriptionProviderId = useLocalStorage(
     "whalewhisper/providers/transcription",
     "openai-audio-transcription"
diff --git a/frontend/packages/app-core/src/stores/speech-output.ts b/frontend/packages/app-core/src/stores/speech-output.ts
index 358928e..9f0f6b8 100644
--- a/frontend/packages/app-core/src/stores/speech-output.ts
+++ b/frontend/packages/app-core/src/stores/speech-output.ts
@@ -2,7 +2,13 @@ import { useLocalStorage } from "@vueuse/core";
 import { defineStore, storeToRefs } from "pinia";
 import { computed, onScopeDispose, ref, watch } from "vue";
 
-import { requestTts, resolveAudioApiBaseUrl } from "../services/audio";
+import {
+  requestTtsDirect,
+  supportsDirectTts,
+} from "../services/audio";
+import { toSpeakableTtsChunks } from "../utils/tts-chunker";
+import { TtsStreamSegmenter } from "../utils/tts-stream-segmenter";
+import { runTtsChunkQueue, TtsChunkQueueError } from "../utils/tts-streaming-runner";
 import { useProvidersStore } from "./providers";
 import { useSettingsStore } from "./settings";
 
@@ -14,7 +20,6 @@ type VoiceOption = {
 
 export const useSpeechOutputStore = defineStore("speech-output", () => {
   const enabled = useLocalStorage("whalewhisper/audio/tts/enabled", false);
-  const voiceId = useLocalStorage("whalewhisper/audio/tts/voice", "");
   const rate = useLocalStorage("whalewhisper/audio/tts/rate", 1);
   const pitch = useLocalStorage("whalewhisper/audio/tts/pitch", 1);
   const volume = useLocalStorage("whalewhisper/audio/tts/volume", 1);
@@ -24,7 +29,6 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
   const settingsStore = useSettingsStore();
   const { speechProviderId } = storeToRefs(settingsStore);
   const localVoices = ref<SpeechSynthesisVoice[]>([]);
-  const audioApiBaseUrl = computed(() => resolveAudioApiBaseUrl());
   const useBrowserTts = computed(
     () => speechProviderId.value === "browser-local-audio-speech"
   );
@@ -33,7 +37,7 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     if (useBrowserTts.value) {
       return "speechSynthesis" in window;
     }
-    return Boolean(audioApiBaseUrl.value);
+    return true;
   });
   const providerMetadata = computed(() =>
     providersStore.getProviderMetadata(speechProviderId.value)
@@ -59,16 +63,149 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     }
     return remoteVoices.value;
   });
-  const resolvedVoiceId = computed(() => voiceId.value || providerConfig.value?.voice || "");
+  const resolvedVoiceId = computed(() => providerConfig.value?.voice || "");
   const audioElement = ref<HTMLAudioElement | null>(null);
   const lastError = ref<string | null>(null);
   let remoteController: AbortController | null = null;
   let streamController: AbortController | null = null;
+  let requestQueueTail: Promise<void> = Promise.resolve();
   let activeObjectUrl: string | null = null;
   let audioContext: AudioContext | null = null;
   let gainNode: GainNode | null = null;
   let activeSources: AudioBufferSourceNode[] = [];
   let scheduledTime = 0;
+  const assistantStreamSegmenter = new TtsStreamSegmenter();
+  const incrementalStreamingEnabled = computed(
+    () => streaming.value && !useBrowserTts.value
+  );
+  let assistantStreamActive = false;
+  let assistantStreamTaskTail: Promise<void> = Promise.resolve();
+  let assistantStreamPlaybackTail: Promise<void> = Promise.resolve();
+  let assistantStreamStartedChunks = 0;
+  let assistantStreamQueueVersion = 0;
+  let assistantStreamChunks: string[] = [];
+  let assistantStreamFailedChunkIndex: number | null = null;
+
+  function resolveTtsEngineId() {
+    const metadataEngineId = providerMetadata.value?.engineId;
+    if (typeof metadataEngineId === "string" && metadataEngineId.trim()) {
+      return metadataEngineId.trim();
+    }
+    if (speechProviderId.value === "volcengine-speech" || speechProviderId.value === "volcengine") {
+      return "volcengine-speech";
+    }
+    if (
+      speechProviderId.value === "alibaba-cloud-model-studio-speech" ||
+      speechProviderId.value === "alibaba-cloud-model-studio"
+    ) {
+      return "alibaba-cloud-model-studio-speech";
+    }
+    return "";
+  }
+
+  async function requestTtsDirectSerial(params: Parameters<typeof requestTtsDirect>[0]) {
+    const previous = requestQueueTail;
+    let release: (() => void) | null = null;
+    requestQueueTail = new Promise<void>((resolve) => {
+      release = resolve;
+    });
+
+    await previous;
+    try {
+      if (params.signal?.aborted) {
+        throw new DOMException("Aborted", "AbortError");
+      }
+      return await requestTtsDirect(params);
+    } finally {
+      release?.();
+    }
+  }
+
+  function isRetriableTtsError(error: unknown) {
+    if (error instanceof DOMException && error.name === "AbortError") {
+      return false;
+    }
+    if (!(error instanceof Error)) {
+      return false;
+    }
+    const status = Number((error as Error & { status?: number }).status);
+    if (status === 502 || status === 503 || status === 504 || status === 429) {
+      return true;
+    }
+    const message = error.message.toLowerCase();
+    return (
+      message.includes("bad gateway") ||
+      message.includes("gateway timeout") ||
+      message.includes("failed to fetch") ||
+      message.includes("networkerror") ||
+      message.includes("502")
+    );
+  }
+
+  async function sleep(ms: number, signal?: AbortSignal) {
+    if (!ms) return;
+    await new Promise<void>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        cleanup();
+        resolve();
+      }, ms);
+      const onAbort = () => {
+        cleanup();
+        reject(new DOMException("Aborted", "AbortError"));
+      };
+      const cleanup = () => {
+        clearTimeout(timer);
+        signal?.removeEventListener("abort", onAbort);
+      };
+      if (signal?.aborted) {
+        onAbort();
+        return;
+      }
+      signal?.addEventListener("abort", onAbort, { once: true });
+    });
+  }
+
+  async function requestTtsDirectWithRetry(
+    params: Parameters<typeof requestTtsDirect>[0],
+    options?: { maxAttempts?: number }
+  ) {
+    const maxAttempts = Math.max(1, options?.maxAttempts ?? 1);
+    for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+      try {
+        return await requestTtsDirectSerial(params);
+      } catch (error) {
+        const shouldRetry = attempt < maxAttempts && isRetriableTtsError(error);
+        if (!shouldRetry) {
+          throw error;
+        }
+        await sleep(180 * attempt, params.signal);
+      }
+    }
+    throw new Error("Direct TTS request failed.");
+  }
+
+  async function requestRemoteTtsBlob(
+    text: string,
+    signal: AbortSignal,
+    options?: { maxAttempts?: number }
+  ) {
+    const engineId = resolveTtsEngineId();
+    const config = buildRemoteConfig();
+
+    if (!supportsDirectTts(engineId)) {
+      throw new Error(`Direct TTS is not supported for provider: ${speechProviderId.value}`);
+    }
+
+    return await requestTtsDirectWithRetry(
+      {
+        engineId,
+        text,
+        config,
+        signal,
+      },
+      options
+    );
+  }
 
   function refreshVoices() {
     if (!supported.value) return;
@@ -84,12 +221,13 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     return localVoices.value.find((voice) => voice.voiceURI === resolvedVoiceId.value);
   }
 
-  function stopRemotePlayback() {
+  function stopRemotePlayback(options?: { invalidateQueue?: boolean }) {
     if (remoteController) {
       remoteController.abort();
       remoteController = null;
     }
     stopStreamingPlayback();
+    resetAssistantStreamState({ invalidateQueue: options?.invalidateQueue ?? true });
     if (audioElement.value) {
       audioElement.value.pause();
       audioElement.value.src = "";
@@ -111,6 +249,23 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     return Math.min(Math.max(value, 0), 1);
   }
 
+  function scheduleDecodedBuffer(ctx: AudioContext, buffer: AudioBuffer) {
+    const startAt = Math.max(ctx.currentTime, scheduledTime);
+    const source = ctx.createBufferSource();
+    source.buffer = buffer;
+    if (gainNode) {
+      source.connect(gainNode);
+    } else {
+      source.connect(ctx.destination);
+    }
+    source.start(startAt);
+    scheduledTime = startAt + buffer.duration;
+    activeSources.push(source);
+    source.onended = () => {
+      activeSources = activeSources.filter((item) => item !== source);
+    };
+  }
+
   function ensureAudioContext() {
     if (typeof window === "undefined") return null;
     if (!audioContext) {
@@ -142,23 +297,202 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     scheduledTime = 0;
   }
 
+  function resetAssistantStreamState(options?: { invalidateQueue?: boolean }) {
+    assistantStreamActive = false;
+    assistantStreamPlaybackTail = Promise.resolve();
+    assistantStreamStartedChunks = 0;
+    assistantStreamChunks = [];
+    assistantStreamFailedChunkIndex = null;
+    assistantStreamSegmenter.reset();
+    if (options?.invalidateQueue) {
+      assistantStreamQueueVersion += 1;
+      assistantStreamTaskTail = Promise.resolve();
+    }
+  }
+
+  function queueAssistantStreamTask(task: () => Promise<void>) {
+    const version = assistantStreamQueueVersion;
+    const runIfCurrent = async () => {
+      if (version !== assistantStreamQueueVersion) return;
+      await task();
+    };
+    assistantStreamTaskTail = assistantStreamTaskTail.then(runIfCurrent, runIfCurrent);
+    return assistantStreamTaskTail;
+  }
+
+  async function ensureAssistantStreamStarted() {
+    if (!incrementalStreamingEnabled.value) return false;
+    if (!supported.value || !enabled.value) return false;
+    if (assistantStreamActive && streamController && !streamController.signal.aborted) {
+      return true;
+    }
+
+    const ctx = ensureAudioContext();
+    if (!ctx) return false;
+
+    if (ctx.state === "suspended") {
+      await ctx.resume();
+    }
+    if (gainNode) {
+      gainNode.gain.value = clampVolume(volume.value);
+    }
+
+    stopRemotePlayback({ invalidateQueue: false });
+    assistantStreamSegmenter.reset();
+    assistantStreamPlaybackTail = Promise.resolve();
+    assistantStreamStartedChunks = 0;
+    assistantStreamChunks = [];
+    assistantStreamFailedChunkIndex = null;
+    streamController = new AbortController();
+    scheduledTime = ctx.currentTime;
+    assistantStreamActive = true;
+    return true;
+  }
+
+  function scheduleAssistantChunkPlayback(chunk: string, chunkIndex: number) {
+    assistantStreamPlaybackTail = assistantStreamPlaybackTail
+      .catch(() => undefined)
+      .then(async () => {
+        if (!assistantStreamActive || !streamController) return;
+        if (
+          assistantStreamFailedChunkIndex !== null &&
+          chunkIndex > assistantStreamFailedChunkIndex
+        ) {
+          return;
+        }
+        const ctx = ensureAudioContext();
+        if (!ctx || streamController.signal.aborted) return;
+
+          const buffer = await fetchTtsBuffer(chunk, streamController, ctx, {
+            maxAttempts: 1,
+          });
+        if (!buffer || streamController.signal.aborted) return;
+        scheduleDecodedBuffer(ctx, buffer);
+        assistantStreamStartedChunks += 1;
+      })
+      .catch((error) => {
+        if (error instanceof DOMException && error.name === "AbortError") return;
+        if (
+          assistantStreamFailedChunkIndex === null ||
+          chunkIndex < assistantStreamFailedChunkIndex
+        ) {
+          assistantStreamFailedChunkIndex = chunkIndex;
+        }
+        console.warn("[TTS] stream chunk failed, defer to merged fallback:", {
+          index: chunkIndex,
+          chunk,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      });
+  }
+
+  function flushAssistantSegmenter(finalize: boolean) {
+    const chunks = assistantStreamSegmenter.drain(finalize);
+    if (chunks.length === 0) return;
+    const baseIndex = assistantStreamChunks.length;
+    chunks.forEach((chunk, offset) => {
+      const chunkIndex = baseIndex + offset;
+      assistantStreamChunks.push(chunk);
+      scheduleAssistantChunkPlayback(chunk, chunkIndex);
+    });
+  }
+
+  function pushAssistantLiteral(literal: string) {
+    if (!literal) return;
+    if (!incrementalStreamingEnabled.value) return;
+    if (!supported.value || !enabled.value) return;
+    void queueAssistantStreamTask(async () => {
+      const started = await ensureAssistantStreamStarted();
+      if (!started) return;
+      assistantStreamSegmenter.appendLiteral(literal);
+      flushAssistantSegmenter(false);
+    });
+  }
+
+  function pushAssistantSpecial(_special: string) {
+    if (!incrementalStreamingEnabled.value) return;
+    if (!supported.value || !enabled.value) return;
+    void queueAssistantStreamTask(async () => {
+      const started = await ensureAssistantStreamStarted();
+      if (!started) return;
+      assistantStreamSegmenter.appendSpecialMarker();
+      flushAssistantSegmenter(false);
+    });
+  }
+
+  async function endAssistantStream(finalText?: string) {
+    if (!incrementalStreamingEnabled.value) {
+      if (finalText?.trim()) {
+        await speak(finalText);
+      }
+      return;
+    }
+
+    await queueAssistantStreamTask(async () => {
+      if (!assistantStreamActive) {
+        if (finalText?.trim()) {
+          await speak(finalText);
+        }
+        return;
+      }
+      assistantStreamSegmenter.appendFlushMarker();
+      flushAssistantSegmenter(true);
+      try {
+        await assistantStreamPlaybackTail;
+        if (
+          assistantStreamFailedChunkIndex !== null &&
+          streamController &&
+          !streamController.signal.aborted
+        ) {
+          const ctx = ensureAudioContext();
+          const remainingText = assistantStreamChunks
+            .slice(assistantStreamFailedChunkIndex)
+            .join("");
+          if (ctx && remainingText.trim()) {
+            console.warn("[TTS] stream fallback to merged remainder:", {
+              failedIndex: assistantStreamFailedChunkIndex,
+              remainingChunks: assistantStreamChunks.length - assistantStreamFailedChunkIndex,
+            });
+            const fallbackBuffer = await fetchTtsBuffer(
+              remainingText,
+              streamController,
+              ctx,
+              { maxAttempts: 1 }
+            );
+            if (fallbackBuffer && !streamController.signal.aborted) {
+              scheduleDecodedBuffer(ctx, fallbackBuffer);
+              assistantStreamStartedChunks += 1;
+            }
+          }
+        }
+      } finally {
+        if (streamController && streamController.signal.aborted) {
+          // Keep current abort state from explicit stop/interrupt.
+        }
+        assistantStreamActive = false;
+        assistantStreamSegmenter.reset();
+        assistantStreamPlaybackTail = Promise.resolve();
+        assistantStreamStartedChunks = 0;
+      }
+    });
+  }
+
   function buildRemoteConfig() {
     const config: Record<string, unknown> = {
       ...(providerConfig.value?.extra ?? {}),
     };
     const isAlibaba = speechProviderId.value === "alibaba-cloud-model-studio-speech";
+    const isVolcengine = speechProviderId.value === "volcengine-speech";
     if (providerConfig.value?.apiKey) {
       config.apiKey = providerConfig.value.apiKey;
     }
     if (providerConfig.value?.baseUrl) {
       config.baseUrl = providerConfig.value.baseUrl;
+      config.base_url = providerConfig.value.baseUrl;
     }
     let model = providerConfig.value?.model;
-    const voice = resolvedVoiceId.value || providerConfig.value?.voice;
+    const voice = resolvedVoiceId.value;
     if (model) {
-      if (isAlibaba && !model.includes("/")) {
-        model = `alibaba/${model}`;
-      }
       config.model = model;
     }
     if (voice) {
@@ -174,37 +508,23 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     if (isAlibaba && pitch.value && pitch.value !== 1) {
       config.pitch = pitch.value;
     }
-    return config;
-  }
-
-  function splitTtsText(text: string) {
-    const hardBreaks = new Set([".", "。", "!", "！", "?", "？", "…", "\n"]);
-    const softBreaks = new Set([",", "，", ";", "；", ":", "：", "、"]);
-    const minChars = 12;
-    const maxChars = 80;
-    const chunks: string[] = [];
-    let buffer = "";
-    for (const char of text) {
-      buffer += char;
-      const isBreak = hardBreaks.has(char) || softBreaks.has(char);
-      if (buffer.length >= maxChars || (isBreak && buffer.length >= minChars)) {
-        const trimmed = buffer.trim();
-        if (trimmed) chunks.push(trimmed);
-        buffer = "";
+    if (isVolcengine) {
+      const appId = String(providerConfig.value?.extra?.appId ?? providerConfig.value?.extra?.appid ?? "").trim();
+      if (appId) {
+        config.appId = appId;
       }
     }
-    const trimmed = buffer.trim();
-    if (trimmed) chunks.push(trimmed);
-    return chunks;
+    return config;
   }
 
-  async function fetchTtsBuffer(text: string, controller: AbortController, ctx: AudioContext) {
-    const blob = await requestTts({
-      baseUrl: audioApiBaseUrl.value,
-      engineId: providerMetadata.value?.engineId,
-      text,
-      config: buildRemoteConfig(),
-      signal: controller.signal,
+  async function fetchTtsBuffer(
+    text: string,
+    controller: AbortController,
+    ctx: AudioContext,
+    options?: { maxAttempts?: number }
+  ) {
+    const blob = await requestRemoteTtsBlob(text, controller.signal, {
+      maxAttempts: Math.max(1, options?.maxAttempts ?? 1),
     });
     if (controller.signal.aborted) {
       throw new DOMException("Aborted", "AbortError");
@@ -226,37 +546,56 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
 
     const controller = new AbortController();
     streamController = controller;
-    const chunks = splitTtsText(text);
+    const chunks = toSpeakableTtsChunks(text);
     if (chunks.length === 0) return;
 
-    const pending: Array<Promise<AudioBuffer>> = [];
-    let index = 0;
-    const maxInFlight = 2;
     scheduledTime = ctx.currentTime;
 
     try {
-      while (index < chunks.length || pending.length > 0) {
-        while (index < chunks.length && pending.length < maxInFlight) {
-          pending.push(fetchTtsBuffer(chunks[index], controller, ctx));
-          index += 1;
+      await runTtsChunkQueue(
+        chunks,
+        async (chunk) => {
+          if (controller.signal.aborted) return;
+          const buffer = await fetchTtsBuffer(chunk, controller, ctx, {
+            maxAttempts: 1,
+          });
+          if (!buffer || controller.signal.aborted) return;
+          scheduleDecodedBuffer(ctx, buffer);
+        },
+        {
+          stopOnError: true,
+          onChunkError: (error, context) => {
+            console.warn("[TTS] chunk failed:", {
+              index: context.index,
+              total: context.total,
+              chunk: context.chunk,
+              error: error instanceof Error ? error.message : String(error),
+            });
+          },
         }
-        const buffer = await pending.shift();
-        if (!buffer || controller.signal.aborted) return;
-        const startAt = Math.max(ctx.currentTime, scheduledTime);
-        const source = ctx.createBufferSource();
-        source.buffer = buffer;
-        if (gainNode) {
-          source.connect(gainNode);
-        } else {
-          source.connect(ctx.destination);
+      );
+    } catch (error) {
+      if (error instanceof DOMException && error.name === "AbortError") {
+        throw error;
+      }
+      if (error instanceof TtsChunkQueueError) {
+        const remainingChunks = chunks.slice(error.context.index);
+        const remainingText = remainingChunks.join("");
+        if (remainingText.trim()) {
+          console.warn("[TTS] fallback to merged remainder after chunk failure:", {
+            failedIndex: error.context.index,
+            remainingChunks: remainingChunks.length,
+          });
+          const fallbackBuffer = await fetchTtsBuffer(remainingText, controller, ctx, {
+            maxAttempts: 1,
+          });
+          if (!controller.signal.aborted && fallbackBuffer) {
+            scheduleDecodedBuffer(ctx, fallbackBuffer);
+            return;
+          }
         }
-        source.start(startAt);
-        scheduledTime = startAt + buffer.duration;
-        activeSources.push(source);
-        source.onended = () => {
-          activeSources = activeSources.filter((item) => item !== source);
-        };
       }
+      throw error;
     } finally {
       if (streamController === controller) {
         streamController = null;
@@ -313,13 +652,7 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     const controller = new AbortController();
     remoteController = controller;
     try {
-      const blob = await requestTts({
-        baseUrl: audioApiBaseUrl.value,
-        engineId: providerMetadata.value?.engineId,
-        text,
-        config: buildRemoteConfig(),
-        signal: controller.signal,
-      });
+      const blob = await requestRemoteTtsBlob(text, controller.signal);
       if (controller.signal.aborted) return;
       const objectUrl = URL.createObjectURL(blob);
       activeObjectUrl = objectUrl;
@@ -362,25 +695,6 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     }
   );
 
-  watch(
-    () => voices.value,
-    (next) => {
-      if (useBrowserTts.value) return;
-      const voiceIds = new Set(next.map((voice) => voice.voiceURI));
-      if (voiceId.value && voiceIds.has(voiceId.value)) return;
-      const configuredVoice = providerConfig.value?.voice;
-      if (configuredVoice && voiceIds.has(configuredVoice)) {
-        voiceId.value = configuredVoice;
-        return;
-      }
-      if (next.length > 0) {
-        voiceId.value = next[0].voiceURI;
-      } else {
-        voiceId.value = "";
-      }
-    }
-  );
-
   watch(
     () => volume.value,
     (next) => {
@@ -399,7 +713,6 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
 
   return {
     enabled,
-    voiceId,
     rate,
     pitch,
     volume,
@@ -407,7 +720,11 @@ export const useSpeechOutputStore = defineStore("speech-output", () => {
     voices,
     supported,
     lastError,
+    incrementalStreamingEnabled,
     refreshVoices,
+    pushAssistantLiteral,
+    pushAssistantSpecial,
+    endAssistantStream,
     speak,
     stop,
   };
diff --git a/frontend/packages/app-core/src/stores/transcription.ts b/frontend/packages/app-core/src/stores/transcription.ts
index 625dfa7..e47950c 100644
--- a/frontend/packages/app-core/src/stores/transcription.ts
+++ b/frontend/packages/app-core/src/stores/transcription.ts
@@ -8,6 +8,13 @@ import {
   createAudioCaptureSession,
   pcm16ToWavBlob,
 } from "../utils/audio-stream";
+import { shouldAutoRestartBrowserRecognition } from "../utils/browser-recognition-restart";
+import { decideCaptureFallback } from "../utils/capture-startup";
+import {
+  normalizeTranscriptionLanguage,
+  resolveInitialTranscriptionLanguage,
+} from "../utils/transcription-language";
+import { sanitizeTranscript } from "../utils/transcript-filter";
 import { useChatStore } from "./chat";
 import { useHearingStore } from "./hearing";
 import { useProvidersStore } from "./providers";
@@ -21,6 +28,12 @@ type RecordingResult = {
 };
 
 type CaptureMode = "worklet" | "media";
+type ListeningSource = "settings-test" | "chat-input";
+type StartListeningOptions = {
+  autoSend?: boolean;
+  source?: ListeningSource;
+};
+const BROWSER_RECOGNITION_RESTART_DELAY_MS = 250;
 
 export const useTranscriptionStore = defineStore("transcription", () => {
   const chatStore = useChatStore();
@@ -31,7 +44,14 @@ export const useTranscriptionStore = defineStore("transcription", () => {
 
   const enabled = useLocalStorage("whalewhisper/audio/transcription/enabled", false);
   const autoSend = useLocalStorage("whalewhisper/audio/transcription/auto-send", true);
-  const language = useLocalStorage("whalewhisper/audio/transcription/language", "en-US");
+  const initialLanguage =
+    typeof navigator !== "undefined"
+      ? resolveInitialTranscriptionLanguage(navigator.language)
+      : resolveInitialTranscriptionLanguage(undefined);
+  const language = useLocalStorage(
+    "whalewhisper/audio/transcription/language",
+    initialLanguage
+  );
   const vadMinSpeechMs = useLocalStorage(
     "whalewhisper/audio/transcription/vad-min-ms",
     300
@@ -87,6 +107,8 @@ export const useTranscriptionStore = defineStore("transcription", () => {
   const lastTranscript = ref("");
   const error = ref<string | null>(null);
   const vadActive = ref(false);
+  const activeAutoSend = ref(Boolean(autoSend.value));
+  const listeningSource = ref<ListeningSource | null>(null);
 
   let recognition: any = null;
   let recorder: MediaRecorder | null = null;
@@ -96,6 +118,10 @@ export const useTranscriptionStore = defineStore("transcription", () => {
   let recorderStartedAt = 0;
   let silenceTimer: number | null = null;
   let restoreHearingEnabled: boolean | null = null;
+  let browserRecognitionSessionRequested = false;
+  let manualBrowserRecognitionStop = false;
+  let recognitionRestartTimer: number | null = null;
+  let lastBrowserRecognitionErrorCode: string | null = null;
 
   let captureSession: Awaited<ReturnType<typeof createAudioCaptureSession>> | null = null;
   let captureActive = false;
@@ -106,6 +132,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
   let streamPending: ArrayBuffer[] = [];
   let streamConnection: ReturnType<typeof openAsrStream> | null = null;
   let streamReady = false;
+  let workletCaptureDisabled = false;
 
   const minSpeechMs = computed(() =>
     Math.max(100, Number(vadMinSpeechMs.value) || 300)
@@ -114,6 +141,81 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     Math.max(200, Number(vadSilenceMs.value) || 700)
   );
 
+  function applyTranscript(raw: string) {
+    const transcript = sanitizeTranscript(raw);
+    if (!transcript) {
+      return;
+    }
+    lastTranscript.value = transcript;
+    interimText.value = "";
+    if (activeAutoSend.value) {
+      chatStore.send(transcript);
+    }
+  }
+
+  function resolveStartAutoSend(options?: StartListeningOptions) {
+    if (typeof options?.autoSend === "boolean") {
+      return options.autoSend;
+    }
+    return Boolean(autoSend.value);
+  }
+
+  function resolveStartSource(options: StartListeningOptions | undefined, nextAutoSend: boolean) {
+    if (options?.source) {
+      return options.source;
+    }
+    return nextAutoSend ? "chat-input" : "settings-test";
+  }
+
+  function shouldRestartBrowserRecognition() {
+    return shouldAutoRestartBrowserRecognition({
+      userRequested: browserRecognitionSessionRequested,
+      manuallyStopped: manualBrowserRecognitionStop,
+      enabled: enabled.value,
+      supported: supported.value,
+      useBrowserRecognition: useBrowserRecognition.value,
+      lastErrorCode: lastBrowserRecognitionErrorCode,
+    });
+  }
+
+  function clearRecognitionRestartTimer() {
+    if (typeof window === "undefined") return;
+    if (recognitionRestartTimer) {
+      window.clearTimeout(recognitionRestartTimer);
+      recognitionRestartTimer = null;
+    }
+  }
+
+  function startBrowserRecognitionSession() {
+    const recognizer = ensureRecognition();
+    if (!recognizer) return;
+    recognizer.lang = normalizeTranscriptionLanguage(language.value);
+    try {
+      recognizer.start();
+    } catch (err) {
+      const name = err instanceof DOMException ? err.name : "";
+      if (name === "InvalidStateError") {
+        listening.value = true;
+        return;
+      }
+      error.value = err instanceof Error ? err.message : "Speech recognition error.";
+      listening.value = false;
+    }
+  }
+
+  function scheduleBrowserRecognitionRestart() {
+    if (typeof window === "undefined") return;
+    clearRecognitionRestartTimer();
+    recognitionRestartTimer = window.setTimeout(() => {
+      recognitionRestartTimer = null;
+      if (!shouldRestartBrowserRecognition()) {
+        listening.value = false;
+        return;
+      }
+      startBrowserRecognitionSession();
+    }, BROWSER_RECOGNITION_RESTART_DELAY_MS);
+  }
+
   function getRecognitionCtor(): SpeechRecognitionCtor | null {
     if (typeof window === "undefined") return null;
     return (window.SpeechRecognition || window.webkitSpeechRecognition) as SpeechRecognitionCtor;
@@ -136,18 +238,26 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     recognition = new Ctor();
     recognition.continuous = true;
     recognition.interimResults = true;
-    recognition.lang = language.value;
+    recognition.lang = normalizeTranscriptionLanguage(language.value);
 
     recognition.onstart = () => {
       listening.value = true;
       error.value = null;
+      lastBrowserRecognitionErrorCode = null;
     };
 
     recognition.onend = () => {
+      if (shouldRestartBrowserRecognition()) {
+        listening.value = true;
+        scheduleBrowserRecognitionRestart();
+        return;
+      }
       listening.value = false;
     };
 
     recognition.onerror = (event) => {
+      lastBrowserRecognitionErrorCode =
+        typeof event.error === "string" ? event.error : null;
       error.value = event.error || "Speech recognition error.";
       listening.value = false;
     };
@@ -166,13 +276,9 @@ export const useTranscriptionStore = defineStore("transcription", () => {
         }
       }
 
-      interimText.value = interim.trim();
+      interimText.value = sanitizeTranscript(interim);
       if (finalText.trim()) {
-        lastTranscript.value = finalText.trim();
-        interimText.value = "";
-        if (autoSend.value) {
-          chatStore.send(lastTranscript.value);
-        }
+        applyTranscript(finalText);
       }
     };
 
@@ -217,7 +323,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     }
   }
 
-  async function startListening() {
+  async function startListening(options?: StartListeningOptions) {
     if (!canListen.value) {
       error.value = useBrowserRecognition.value
         ? "Speech recognition is not supported in this environment."
@@ -225,6 +331,17 @@ export const useTranscriptionStore = defineStore("transcription", () => {
       return;
     }
 
+    if (!enabled.value) {
+      enabled.value = true;
+    }
+    const nextAutoSend = resolveStartAutoSend(options);
+    activeAutoSend.value = nextAutoSend;
+    listeningSource.value = resolveStartSource(options, nextAutoSend);
+
+    if (listening.value) {
+      return;
+    }
+
     listening.value = true;
 
     if (!hearingStore.enabled) {
@@ -233,15 +350,12 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     }
     await hearingStore.start();
 
-    if (!enabled.value) {
-      return;
-    }
-
     if (useBrowserRecognition.value && supported.value) {
-      const recognizer = ensureRecognition();
-      if (!recognizer) return;
-      recognizer.lang = language.value;
-      recognizer.start();
+      browserRecognitionSessionRequested = true;
+      manualBrowserRecognitionStop = false;
+      lastBrowserRecognitionErrorCode = null;
+      clearRecognitionRestartTimer();
+      startBrowserRecognitionSession();
       return;
     }
 
@@ -249,18 +363,29 @@ export const useTranscriptionStore = defineStore("transcription", () => {
   }
 
   async function stopListening() {
+    const wasSettingsTest = listeningSource.value === "settings-test";
+    const wasChatInput = listeningSource.value === "chat-input";
     listening.value = false;
 
     if (useBrowserRecognition.value) {
+      browserRecognitionSessionRequested = false;
+      manualBrowserRecognitionStop = true;
+      clearRecognitionRestartTimer();
       recognition?.stop();
     } else {
       await stopVad();
     }
 
-    if (restoreHearingEnabled === false) {
+    if (wasSettingsTest || wasChatInput) {
+      hearingStore.stopSpeechDetection();
+      hearingStore.stop();
+      hearingStore.enabled = false;
+    } else if (restoreHearingEnabled === false) {
       hearingStore.enabled = false;
     }
     restoreHearingEnabled = null;
+    activeAutoSend.value = Boolean(autoSend.value);
+    listeningSource.value = null;
   }
 
   function resolveRecorderMimeType() {
@@ -279,9 +404,9 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     error.value = null;
     if (!navigator.mediaDevices?.getUserMedia) {
       error.value = "Microphone is not supported in this environment.";
-      return;
+      return false;
     }
-    if (recorder) return;
+    if (recorder) return true;
 
     const constraints: MediaStreamConstraints = {
       audio: hearingStore.selectedDeviceId
@@ -292,7 +417,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
       recorderStream = await navigator.mediaDevices.getUserMedia(constraints);
     } catch (err) {
       error.value = err instanceof Error ? err.message : "Failed to access microphone.";
-      return;
+      return false;
     }
 
     const mimeType = resolveRecorderMimeType();
@@ -305,7 +430,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     } catch (err) {
       error.value = err instanceof Error ? err.message : "Failed to start recorder.";
       cleanupRecorder();
-      return;
+      return false;
     }
 
     recorder.ondataavailable = (event) => {
@@ -323,6 +448,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
       error.value = detail?.message || "Recorder error.";
     };
     recorder.start();
+    return true;
   }
 
   async function stopRecording() {
@@ -383,11 +509,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
       });
       const transcript = extractTranscript(payload);
       if (transcript) {
-        lastTranscript.value = transcript;
-        interimText.value = "";
-        if (autoSend.value) {
-          chatStore.send(transcript);
-        }
+        applyTranscript(transcript);
       }
     } catch (err) {
       error.value = err instanceof Error ? err.message : "Transcription failed.";
@@ -402,9 +524,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     if (model) {
       config.model = model;
     }
-    if (language.value) {
-      config.language = language.value;
-    }
+    config.language = normalizeTranscriptionLanguage(language.value);
     const extension = resolveExtension(mimeType);
     config.filename = extension ? `audio.${extension}` : "audio.wav";
     config.content_type = mimeType || "application/octet-stream";
@@ -422,9 +542,9 @@ export const useTranscriptionStore = defineStore("transcription", () => {
   }
 
   function extractTranscript(payload: Record<string, any>) {
-    if (typeof payload.text === "string") return payload.text.trim();
+    if (typeof payload.text === "string") return sanitizeTranscript(payload.text);
     const data = payload.data;
-    if (data && typeof data.text === "string") return data.text.trim();
+    if (data && typeof data.text === "string") return sanitizeTranscript(data.text);
     return "";
   }
 
@@ -444,16 +564,33 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     if (vadActive.value) return;
     vadActive.value = true;
     listening.value = true;
+    error.value = null;
 
     try {
-      await hearingStore.startSpeechDetection({
-        minSpeechMs: minSpeechMs.value,
-      });
-      if (workletAvailable.value) {
-        await ensureCaptureSession();
+      // Always use local volume-threshold detection to avoid external VAD runtime/CDN dependency.
+      hearingStore.stopSpeechDetection();
+      await hearingStore.start();
+      if (workletAvailable.value && !workletCaptureDisabled) {
+        try {
+          await ensureCaptureSession();
+        } catch (err) {
+          const fallback = decideCaptureFallback({
+            workletError: err,
+            mediaRecorderSupported: recordingAvailable.value,
+          });
+          if (fallback.mode === "none") {
+            throw new Error(fallback.error || "Failed to start microphone listening.");
+          }
+          workletCaptureDisabled = true;
+        }
       }
     } catch (err) {
-      error.value = err instanceof Error ? err.message : "Failed to start VAD.";
+      hearingStore.stopSpeechDetection();
+      error.value =
+        err instanceof Error
+          ? err.message
+          : "Failed to start microphone listening.";
+      listening.value = false;
     }
   }
 
@@ -468,6 +605,17 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     await stopCapture();
   }
 
+  async function startMediaCapture() {
+    const started = await startRecording();
+    if (!started) {
+      captureActive = false;
+      captureMode = null;
+      return false;
+    }
+    captureMode = "media";
+    return true;
+  }
+
   async function startCapture() {
     if (captureActive) return;
     captureStartedAt = Date.now();
@@ -477,8 +625,24 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     streamReady = false;
     captureActive = true;
 
-    if (workletAvailable.value) {
-      await ensureCaptureSession();
+    if (workletAvailable.value && !workletCaptureDisabled) {
+      try {
+        await ensureCaptureSession();
+      } catch (err) {
+        const fallback = decideCaptureFallback({
+          workletError: err,
+          mediaRecorderSupported: recordingAvailable.value,
+        });
+        if (fallback.mode === "none") {
+          error.value = fallback.error || "Recording is not supported in this environment.";
+          captureActive = false;
+          captureMode = null;
+          return;
+        }
+        workletCaptureDisabled = true;
+        await startMediaCapture();
+        return;
+      }
       captureMode = "worklet";
       if (useStreamingTransport.value) {
         try {
@@ -506,15 +670,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
       }
       return;
     }
-
-    if (!recordingAvailable.value) {
-      error.value = "Recording is not supported in this environment.";
-      captureActive = false;
-      return;
-    }
-
-    captureMode = "media";
-    await startRecording();
+    await startMediaCapture();
   }
 
   async function stopCapture() {
@@ -538,11 +694,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
         const payload = await streamConnection.result;
         const transcript = extractTranscript(payload);
         if (transcript) {
-          lastTranscript.value = transcript;
-          interimText.value = "";
-          if (autoSend.value) {
-            chatStore.send(transcript);
-          }
+          applyTranscript(transcript);
         }
         streamSucceeded = true;
       } catch (err) {
@@ -566,11 +718,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
         });
         const transcript = extractTranscript(payload);
         if (transcript) {
-          lastTranscript.value = transcript;
-          interimText.value = "";
-          if (autoSend.value) {
-            chatStore.send(transcript);
-          }
+          applyTranscript(transcript);
         }
       } catch (err) {
         error.value = err instanceof Error ? err.message : "Transcription failed.";
@@ -607,7 +755,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
 
   watch(language, (next) => {
     if (recognition) {
-      recognition.lang = next;
+      recognition.lang = normalizeTranscriptionLanguage(next);
     }
   });
 
@@ -634,6 +782,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
   );
 
   onScopeDispose(() => {
+    clearRecognitionRestartTimer();
     void stopListening();
     recognition = null;
     cleanupRecorder();
@@ -677,6 +826,7 @@ export const useTranscriptionStore = defineStore("transcription", () => {
     listening,
     supported,
     canListen,
+    listeningSource,
     interimText,
     lastTranscript,
     error,
diff --git a/frontend/packages/app-core/src/utils/browser-recognition-restart.test.ts b/frontend/packages/app-core/src/utils/browser-recognition-restart.test.ts
new file mode 100644
index 0000000..ec2b29b
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/browser-recognition-restart.test.ts
@@ -0,0 +1,56 @@
+import assert from "node:assert/strict";
+
+import { shouldAutoRestartBrowserRecognition } from "./browser-recognition-restart.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+run("restarts when user session is active and no fatal error", () => {
+  assert.equal(
+    shouldAutoRestartBrowserRecognition({
+      userRequested: true,
+      manuallyStopped: false,
+      enabled: true,
+      supported: true,
+      useBrowserRecognition: true,
+      lastErrorCode: null,
+    }),
+    true
+  );
+});
+
+run("does not restart after manual stop", () => {
+  assert.equal(
+    shouldAutoRestartBrowserRecognition({
+      userRequested: true,
+      manuallyStopped: true,
+      enabled: true,
+      supported: true,
+      useBrowserRecognition: true,
+      lastErrorCode: null,
+    }),
+    false
+  );
+});
+
+run("does not restart on microphone permission denial", () => {
+  assert.equal(
+    shouldAutoRestartBrowserRecognition({
+      userRequested: true,
+      manuallyStopped: false,
+      enabled: true,
+      supported: true,
+      useBrowserRecognition: true,
+      lastErrorCode: "not-allowed",
+    }),
+    false
+  );
+});
+
diff --git a/frontend/packages/app-core/src/utils/browser-recognition-restart.ts b/frontend/packages/app-core/src/utils/browser-recognition-restart.ts
new file mode 100644
index 0000000..644f176
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/browser-recognition-restart.ts
@@ -0,0 +1,23 @@
+const NON_RESTARTABLE_ERRORS = new Set([
+  "not-allowed",
+  "service-not-allowed",
+  "audio-capture",
+]);
+
+type AutoRestartDecision = {
+  userRequested: boolean;
+  manuallyStopped: boolean;
+  enabled: boolean;
+  supported: boolean;
+  useBrowserRecognition: boolean;
+  lastErrorCode?: string | null;
+};
+
+export function shouldAutoRestartBrowserRecognition(options: AutoRestartDecision) {
+  if (!options.userRequested) return false;
+  if (options.manuallyStopped) return false;
+  if (!options.enabled || !options.supported || !options.useBrowserRecognition) return false;
+  if (!options.lastErrorCode) return true;
+  return !NON_RESTARTABLE_ERRORS.has(options.lastErrorCode);
+}
+
diff --git a/frontend/packages/app-core/src/utils/capture-startup.test.ts b/frontend/packages/app-core/src/utils/capture-startup.test.ts
new file mode 100644
index 0000000..5ccea7a
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/capture-startup.test.ts
@@ -0,0 +1,63 @@
+import {
+  decideCaptureFallback,
+  type CaptureFallbackDecision,
+} from "./capture-startup.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+function expectDecision(
+  actual: CaptureFallbackDecision,
+  expected: CaptureFallbackDecision
+) {
+  const actualText = JSON.stringify(actual);
+  const expectedText = JSON.stringify(expected);
+  if (actualText !== expectedText) {
+    throw new Error(`Expected ${expectedText} but received ${actualText}`);
+  }
+}
+
+function expectEqual<T>(actual: T, expected: T) {
+  if (actual !== expected) {
+    throw new Error(`Expected ${String(expected)} but received ${String(actual)}`);
+  }
+}
+
+run("falls back to media recorder when worklet init fails and media is supported", () => {
+  const decision = decideCaptureFallback({
+    workletError: new Error("worklet addModule failed"),
+    mediaRecorderSupported: true,
+  });
+
+  expectDecision(decision, {
+    mode: "media",
+    error: null,
+  });
+});
+
+run("returns actionable error when no fallback transport is available", () => {
+  const decision = decideCaptureFallback({
+    workletError: new Error("worklet addModule failed"),
+    mediaRecorderSupported: false,
+  });
+
+  expectEqual(decision.mode, "none");
+  expectEqual(decision.error, "worklet addModule failed");
+});
+
+run("normalizes non-error throw values", () => {
+  const decision = decideCaptureFallback({
+    workletError: "AudioWorklet is unavailable",
+    mediaRecorderSupported: false,
+  });
+
+  expectEqual(decision.mode, "none");
+  expectEqual(decision.error, "AudioWorklet is unavailable");
+});
diff --git a/frontend/packages/app-core/src/utils/capture-startup.ts b/frontend/packages/app-core/src/utils/capture-startup.ts
new file mode 100644
index 0000000..add6ff2
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/capture-startup.ts
@@ -0,0 +1,35 @@
+export type CaptureFallbackDecision = {
+  mode: "media" | "none";
+  error: string | null;
+};
+
+type CaptureFallbackInput = {
+  workletError: unknown;
+  mediaRecorderSupported: boolean;
+};
+
+function normalizeError(error: unknown) {
+  if (error instanceof Error && error.message.trim()) {
+    return error.message.trim();
+  }
+  if (typeof error === "string" && error.trim()) {
+    return error.trim();
+  }
+  return "Audio capture initialization failed.";
+}
+
+export function decideCaptureFallback(
+  input: CaptureFallbackInput
+): CaptureFallbackDecision {
+  if (input.mediaRecorderSupported) {
+    return {
+      mode: "media",
+      error: null,
+    };
+  }
+
+  return {
+    mode: "none",
+    error: normalizeError(input.workletError),
+  };
+}
diff --git a/frontend/packages/app-core/src/utils/provider-fields.test.ts b/frontend/packages/app-core/src/utils/provider-fields.test.ts
new file mode 100644
index 0000000..49c71db
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/provider-fields.test.ts
@@ -0,0 +1,61 @@
+import assert from "node:assert/strict";
+
+import type { ProviderCatalogEntry } from "../data/provider-catalog.ts";
+import { filterProviderFields } from "./provider-fields.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+function fieldIds(option: ProviderCatalogEntry) {
+  return filterProviderFields(option).map((field) => field.id);
+}
+
+run("hides baseUrl when provider has default baseUrl in defaults", () => {
+  const option: ProviderCatalogEntry = {
+    id: "openai-compatible",
+    label: "OpenAI Compatible",
+    category: "chat",
+    defaults: {
+      baseUrl: "https://api.example.com/v1/",
+    },
+    fields: [
+      { id: "apiKey", label: "API Key", type: "secret" },
+      { id: "baseUrl", label: "Base URL", type: "text" },
+      { id: "model", label: "Model", type: "select" },
+    ],
+  };
+  assert.deepEqual(fieldIds(option), ["apiKey", "model"]);
+});
+
+run("hides baseUrl when baseUrl field itself has default", () => {
+  const option: ProviderCatalogEntry = {
+    id: "custom-provider",
+    label: "Custom",
+    category: "speech",
+    fields: [
+      { id: "apiKey", label: "API Key", type: "secret" },
+      { id: "baseUrl", label: "Base URL", type: "text", default: "https://tts.example.com/" },
+    ],
+  };
+  assert.deepEqual(fieldIds(option), ["apiKey"]);
+});
+
+run("keeps baseUrl when provider has no default baseUrl", () => {
+  const option: ProviderCatalogEntry = {
+    id: "manual-base-url",
+    label: "Manual Base URL",
+    category: "transcription",
+    fields: [
+      { id: "apiKey", label: "API Key", type: "secret" },
+      { id: "baseUrl", label: "Base URL", type: "text" },
+    ],
+  };
+  assert.deepEqual(fieldIds(option), ["apiKey", "baseUrl"]);
+});
diff --git a/frontend/packages/app-core/src/utils/provider-fields.ts b/frontend/packages/app-core/src/utils/provider-fields.ts
new file mode 100644
index 0000000..0ebf0db
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/provider-fields.ts
@@ -0,0 +1,23 @@
+import type { ProviderCatalogEntry, ProviderField } from "../data/provider-catalog";
+
+function resolveFieldDefault(field?: ProviderField) {
+  if (!field || field.default === undefined || field.default === null) return "";
+  return String(field.default).trim();
+}
+
+function hasDefaultBaseUrl(option?: ProviderCatalogEntry) {
+  const defaultBaseUrl = option?.defaults?.baseUrl?.trim();
+  if (defaultBaseUrl) {
+    return true;
+  }
+  const baseUrlField = option?.fields?.find((field) => field.id === "baseUrl");
+  return Boolean(resolveFieldDefault(baseUrlField));
+}
+
+export function filterProviderFields(option?: ProviderCatalogEntry): ProviderField[] {
+  const fields = option?.fields ?? [];
+  if (!hasDefaultBaseUrl(option)) {
+    return fields;
+  }
+  return fields.filter((field) => field.id !== "baseUrl");
+}
diff --git a/frontend/packages/app-core/src/utils/provider-visibility.test.ts b/frontend/packages/app-core/src/utils/provider-visibility.test.ts
new file mode 100644
index 0000000..e110419
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/provider-visibility.test.ts
@@ -0,0 +1,42 @@
+import assert from "node:assert/strict";
+
+import {
+  filterVisibleSpeechProviders,
+  isVisibleSpeechProviderId,
+} from "./provider-visibility.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+run("only configured speech providers are visible", () => {
+  assert.equal(isVisibleSpeechProviderId("volcengine-speech"), true);
+  assert.equal(isVisibleSpeechProviderId("alibaba-cloud-model-studio-speech"), true);
+  assert.equal(isVisibleSpeechProviderId("browser-local-audio-speech"), true);
+  assert.equal(isVisibleSpeechProviderId("app-local-audio-speech"), true);
+  assert.equal(isVisibleSpeechProviderId("openai-audio-speech"), false);
+  assert.equal(isVisibleSpeechProviderId("elevenlabs"), false);
+});
+
+run("filters unsupported speech provider ids", () => {
+  assert.deepEqual(
+    filterVisibleSpeechProviders([
+      "openai-audio-speech",
+      "volcengine-speech",
+      "alibaba-cloud-model-studio-speech",
+      "elevenlabs",
+      "browser-local-audio-speech",
+    ]),
+    [
+      "volcengine-speech",
+      "alibaba-cloud-model-studio-speech",
+      "browser-local-audio-speech",
+    ]
+  );
+});
diff --git a/frontend/packages/app-core/src/utils/provider-visibility.ts b/frontend/packages/app-core/src/utils/provider-visibility.ts
new file mode 100644
index 0000000..9399749
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/provider-visibility.ts
@@ -0,0 +1,14 @@
+const visibleSpeechProviderIds = new Set([
+  "volcengine-speech",
+  "alibaba-cloud-model-studio-speech",
+  "browser-local-audio-speech",
+  "app-local-audio-speech",
+]);
+
+export function isVisibleSpeechProviderId(providerId: string) {
+  return visibleSpeechProviderIds.has(providerId);
+}
+
+export function filterVisibleSpeechProviders(providerIds: string[]) {
+  return providerIds.filter((providerId) => isVisibleSpeechProviderId(providerId));
+}
diff --git a/frontend/packages/app-core/src/utils/transcript-filter.test.ts b/frontend/packages/app-core/src/utils/transcript-filter.test.ts
new file mode 100644
index 0000000..0fc07e6
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/transcript-filter.test.ts
@@ -0,0 +1,23 @@
+import assert from "node:assert/strict";
+
+import { sanitizeTranscript } from "./transcript-filter.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+run("drops windows absolute image path transcript", () => {
+  const transcript = String.raw`C:\Users\ADMIN\Documents\WeChat Files\wxid_b2orpigekka622\FileStorage\Temp\1772262785183.jpg`;
+  assert.equal(sanitizeTranscript(transcript), "");
+});
+
+run("keeps normal natural language transcript", () => {
+  assert.equal(sanitizeTranscript("你好，这是语音测试。"), "你好，这是语音测试。");
+});
+
diff --git a/frontend/packages/app-core/src/utils/transcript-filter.ts b/frontend/packages/app-core/src/utils/transcript-filter.ts
new file mode 100644
index 0000000..8080c64
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/transcript-filter.ts
@@ -0,0 +1,17 @@
+const WINDOWS_ABSOLUTE_PATH_RE =
+  /^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]+$/;
+
+export function sanitizeTranscript(value: string) {
+  const trimmed = value.trim();
+  if (!trimmed) {
+    return "";
+  }
+
+  const normalizedPathCandidate = trimmed.replace(/\//g, "\\");
+  if (WINDOWS_ABSOLUTE_PATH_RE.test(normalizedPathCandidate)) {
+    return "";
+  }
+
+  return trimmed;
+}
+
diff --git a/frontend/packages/app-core/src/utils/transcription-language.test.ts b/frontend/packages/app-core/src/utils/transcription-language.test.ts
new file mode 100644
index 0000000..fd7e4ab
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/transcription-language.test.ts
@@ -0,0 +1,40 @@
+import {
+  normalizeTranscriptionLanguage,
+  resolveInitialTranscriptionLanguage,
+} from "./transcription-language.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+function expectEqual<T>(actual: T, expected: T) {
+  if (actual !== expected) {
+    throw new Error(`Expected ${String(expected)} but received ${String(actual)}`);
+  }
+}
+
+run("normalizes short zh language token", () => {
+  expectEqual(normalizeTranscriptionLanguage("zh"), "zh-CN");
+});
+
+run("normalizes short en language token", () => {
+  expectEqual(normalizeTranscriptionLanguage("en"), "en-US");
+});
+
+run("keeps specific locale value", () => {
+  expectEqual(normalizeTranscriptionLanguage("ja-JP"), "ja-JP");
+});
+
+run("falls back to english when language is missing", () => {
+  expectEqual(resolveInitialTranscriptionLanguage(undefined), "en-US");
+});
+
+run("uses navigator language when available", () => {
+  expectEqual(resolveInitialTranscriptionLanguage("zh-CN"), "zh-CN");
+});
diff --git a/frontend/packages/app-core/src/utils/transcription-language.ts b/frontend/packages/app-core/src/utils/transcription-language.ts
new file mode 100644
index 0000000..3acd1ae
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/transcription-language.ts
@@ -0,0 +1,25 @@
+const DEFAULT_TRANSCRIPTION_LANGUAGE = "en-US";
+
+function normalizeLocaleToken(value: string) {
+  return value.trim().replace("_", "-");
+}
+
+export function normalizeTranscriptionLanguage(language: unknown) {
+  if (typeof language !== "string") {
+    return DEFAULT_TRANSCRIPTION_LANGUAGE;
+  }
+  const normalized = normalizeLocaleToken(language);
+  if (!normalized) {
+    return DEFAULT_TRANSCRIPTION_LANGUAGE;
+  }
+  const lower = normalized.toLowerCase();
+  if (lower === "zh") return "zh-CN";
+  if (lower === "en") return "en-US";
+  return normalized;
+}
+
+export function resolveInitialTranscriptionLanguage(
+  navigatorLanguage?: string | null
+) {
+  return normalizeTranscriptionLanguage(navigatorLanguage);
+}
diff --git a/frontend/packages/app-core/src/utils/tts-chunker.test.ts b/frontend/packages/app-core/src/utils/tts-chunker.test.ts
new file mode 100644
index 0000000..64f610a
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-chunker.test.ts
@@ -0,0 +1,51 @@
+import assert from "node:assert/strict";
+
+import {
+  chunkTtsInput,
+  toSpeakableTtsChunks,
+  TTS_FLUSH_INSTRUCTION,
+  TTS_SPECIAL_TOKEN,
+} from "./tts-chunker.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+run("splits on hard punctuation", () => {
+  const chunks = toSpeakableTtsChunks("你好。世界。");
+  assert.deepEqual(chunks, ["你好。", "世界。"]);
+});
+
+run("keeps decimal punctuation in numbers", () => {
+  const chunks = toSpeakableTtsChunks("价格是2.5，不是25。");
+  assert.equal(chunks.join(""), "价格是2.5，不是25。");
+});
+
+run("normalizes three dots into ellipsis", () => {
+  const chunks = toSpeakableTtsChunks("等等...快点。");
+  assert.ok(chunks.join("").includes("…"));
+  assert.equal(chunks.join("").includes("..."), false);
+});
+
+run("emits special reason when special token appears", () => {
+  const chunks = chunkTtsInput(`前缀${TTS_SPECIAL_TOKEN}后缀。`);
+  assert.equal(chunks[0]?.reason, "special");
+  assert.equal(chunks[0]?.text, "前缀");
+});
+
+run("emits standalone special chunk when buffer is empty", () => {
+  const chunks = chunkTtsInput(`${TTS_SPECIAL_TOKEN}你好。`);
+  assert.equal(chunks[0]?.reason, "special");
+  assert.equal(chunks[0]?.text, "");
+});
+
+run("flush instruction forces chunk boundary and is stripped for TTS text", () => {
+  const chunks = toSpeakableTtsChunks(`第一句${TTS_FLUSH_INSTRUCTION}第二句。`);
+  assert.deepEqual(chunks, ["第一句", "第二句。"]);
+});
diff --git a/frontend/packages/app-core/src/utils/tts-chunker.ts b/frontend/packages/app-core/src/utils/tts-chunker.ts
new file mode 100644
index 0000000..f1b793b
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-chunker.ts
@@ -0,0 +1,243 @@
+export const TTS_FLUSH_INSTRUCTION = "\u200B";
+export const TTS_SPECIAL_TOKEN = "\u2063";
+
+const keptPunctuations = new Set(["?", "？", "!", "！"]);
+const hardPunctuations = new Set([
+  ".",
+  "。",
+  "?",
+  "？",
+  "!",
+  "！",
+  "…",
+  "⋯",
+  "～",
+  "~",
+  "\n",
+  "\t",
+  "\r",
+]);
+const softPunctuations = new Set([
+  ",",
+  "，",
+  "、",
+  "–",
+  "—",
+  ":",
+  "：",
+  ";",
+  "；",
+  "《",
+  "》",
+  "「",
+  "」",
+]);
+
+export type TtsChunkReason = "boost" | "limit" | "hard" | "flush" | "special";
+
+export interface TtsInputChunk {
+  text: string;
+  words: number;
+  reason: TtsChunkReason;
+}
+
+export interface TtsInputChunkOptions {
+  boost?: number;
+  minimumWords?: number;
+  maximumWords?: number;
+}
+
+type SegmentLike = { segment?: string; isWordLike?: boolean };
+type SegmenterLike = { segment: (input: string) => Iterable<SegmentLike> };
+
+function createSegmenter(granularity: "word" | "grapheme"): SegmenterLike | null {
+  const SegmenterCtor = (Intl as any)?.Segmenter as
+    | (new (locales?: string | string[], options?: { granularity: string }) => SegmenterLike)
+    | undefined;
+  if (!SegmenterCtor) return null;
+  try {
+    return new SegmenterCtor(undefined, { granularity });
+  } catch {
+    return null;
+  }
+}
+
+function splitGraphemes(text: string, segmenter: SegmenterLike | null) {
+  if (!text) return [];
+  if (!segmenter) {
+    return Array.from(text);
+  }
+  const units: string[] = [];
+  for (const token of segmenter.segment(text)) {
+    if (typeof token?.segment === "string" && token.segment.length > 0) {
+      units.push(token.segment);
+    }
+  }
+  return units.length > 0 ? units : Array.from(text);
+}
+
+function countWordLike(text: string, segmenter: SegmenterLike | null) {
+  if (!text) return 0;
+  if (!segmenter) {
+    const matched = text.match(/[A-Za-z0-9\u4e00-\u9fff]+/g);
+    return matched?.length ?? 0;
+  }
+  let count = 0;
+  for (const token of segmenter.segment(text)) {
+    if (token?.isWordLike) {
+      count += 1;
+    }
+  }
+  return count;
+}
+
+export function sanitizeTtsChunk(text: string) {
+  return text
+    .replaceAll(TTS_SPECIAL_TOKEN, "")
+    .replaceAll(TTS_FLUSH_INSTRUCTION, "")
+    .trim();
+}
+
+export function chunkTtsInput(
+  inputText: string,
+  options?: TtsInputChunkOptions
+): TtsInputChunk[] {
+  const { boost = 2, minimumWords = 4, maximumWords = 12 } = options ?? {};
+  const source = inputText.trim();
+  if (!source) return [];
+
+  const graphemeSegmenter = createSegmenter("grapheme");
+  const wordSegmenter = createSegmenter("word");
+  const input = splitGraphemes(source, graphemeSegmenter);
+
+  const chunks: TtsInputChunk[] = [];
+  let yieldCount = 0;
+  let buffer = "";
+  let chunk = "";
+  let chunkWordsCount = 0;
+  let previousValue: string | undefined;
+  let index = 0;
+
+  while (index < input.length) {
+    let value = input[index];
+
+    if (value.length > 1) {
+      previousValue = value;
+      index += 1;
+      continue;
+    }
+
+    const flush = value === TTS_FLUSH_INSTRUCTION;
+    const special = value === TTS_SPECIAL_TOKEN;
+    const hard = hardPunctuations.has(value);
+    const soft = softPunctuations.has(value);
+    const kept = keptPunctuations.has(value);
+    let consumed = 1;
+
+    if (flush || special || hard || soft) {
+      switch (value) {
+        case ".":
+        case ",": {
+          if (previousValue !== undefined && /\d/.test(previousValue)) {
+            const nextValue = input[index + 1];
+            if (nextValue && /\d/.test(nextValue)) {
+              buffer += value;
+              previousValue = value;
+              index += consumed;
+              continue;
+            }
+          } else if (value === ".") {
+            const nextValue = input[index + 1];
+            const afterNextValue = input[index + 2];
+            if (nextValue === "." && afterNextValue === ".") {
+              value = "…";
+              consumed = 3;
+            }
+          }
+          break;
+        }
+      }
+
+      if (buffer.length === 0) {
+        if (special) {
+          chunks.push({
+            text: "",
+            words: 0,
+            reason: "special",
+          });
+          yieldCount += 1;
+          chunkWordsCount = 0;
+        }
+
+        previousValue = value;
+        index += consumed;
+        continue;
+      }
+
+      const words = countWordLike(buffer, wordSegmenter);
+
+      if (chunkWordsCount > minimumWords && chunkWordsCount + words > maximumWords) {
+        const text = kept ? `${chunk.trim()}${value}` : chunk.trim();
+        chunks.push({
+          text,
+          words: chunkWordsCount,
+          reason: "limit",
+        });
+        yieldCount += 1;
+        chunk = "";
+        chunkWordsCount = 0;
+      }
+
+      chunk += buffer + value;
+      chunkWordsCount += words;
+      buffer = "";
+
+      if (special) {
+        chunks.push({
+          text: chunk.slice(0, -1).trim(),
+          words: chunkWordsCount,
+          reason: "special",
+        });
+        yieldCount += 1;
+        chunk = "";
+        chunkWordsCount = 0;
+      } else if (flush || hard || chunkWordsCount > maximumWords || yieldCount < boost) {
+        chunks.push({
+          text: chunk.trim(),
+          words: chunkWordsCount,
+          reason: flush ? "flush" : hard ? "hard" : chunkWordsCount > maximumWords ? "limit" : "boost",
+        });
+        yieldCount += 1;
+        chunk = "";
+        chunkWordsCount = 0;
+      }
+
+      previousValue = value;
+      index += consumed;
+      continue;
+    }
+
+    buffer += value;
+    previousValue = value;
+    index += 1;
+  }
+
+  if (chunk.length > 0 || buffer.length > 0) {
+    chunks.push({
+      text: (chunk + buffer).trim(),
+      words: chunkWordsCount + countWordLike(buffer, wordSegmenter),
+      reason: "flush",
+    });
+  }
+
+  return chunks;
+}
+
+export function toSpeakableTtsChunks(
+  inputText: string,
+  options?: TtsInputChunkOptions
+) {
+  return chunkTtsInput(inputText, options)
+    .map((item) => sanitizeTtsChunk(item.text))
+    .filter((text) => text.length > 0);
+}
diff --git a/frontend/packages/app-core/src/utils/tts-direct-request.ts b/frontend/packages/app-core/src/utils/tts-direct-request.ts
new file mode 100644
index 0000000..0c476b6
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-direct-request.ts
@@ -0,0 +1,231 @@
+type AudioRequestConfig = Record<string, unknown>;
+
+type BackendTtsPayload = {
+  engine: string;
+  data: string;
+  config: Record<string, unknown>;
+};
+
+type LegacyTtsPayload = {
+  text: string;
+  engine: string;
+  providerId?: string;
+  provider_id?: string;
+  config: Record<string, unknown>;
+};
+
+export type DirectTtsHttpRequest = {
+  url: string;
+  headers: Record<string, string>;
+  body: BackendTtsPayload;
+};
+
+export type LegacyTtsHttpRequest = {
+  url: string;
+  headers: Record<string, string>;
+  body: LegacyTtsPayload;
+};
+
+const allowedBackendTtsEngineIds = new Set([
+  "volcengine-speech",
+  "alibaba-cloud-model-studio-speech",
+]);
+
+function asRecord(value: unknown): Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : {};
+}
+
+function readString(config: Record<string, unknown>, keys: string[]) {
+  for (const key of keys) {
+    const value = config[key];
+    if (typeof value === "string" && value.trim()) {
+      return value.trim();
+    }
+  }
+  return "";
+}
+
+function normalizeBackendTtsUrl(apiBaseUrl: string) {
+  const trimmed = apiBaseUrl.trim().replace(/\/+$/, "");
+  return `${trimmed}/api/tts/engines`;
+}
+
+function normalizeLegacyTtsUrl(url: string) {
+  const trimmed = url.trim().replace(/\/+$/, "");
+  if (trimmed.endsWith("/api/tts/engines")) {
+    return `${trimmed.slice(0, -"/api/tts/engines".length)}/api/tts/synthesize`;
+  }
+  return `${trimmed}/api/tts/synthesize`;
+}
+
+function resolveLegacyBackend(engineId: string) {
+  if (engineId === "volcengine-speech") return "volcengine";
+  if (engineId === "alibaba-cloud-model-studio-speech") return "alibaba";
+  return "";
+}
+
+function normalizeAlibabaModelId(model: string, engineId: string) {
+  if (engineId !== "alibaba-cloud-model-studio-speech") {
+    return model;
+  }
+  return model.replace(/^alibaba\//i, "").trim();
+}
+
+function resolveVolcengineAppId(config: Record<string, unknown>) {
+  const topLevel = readString(config, ["appId", "appid", "app_id"]);
+  if (topLevel) return topLevel;
+  const app = asRecord(config.app);
+  return readString(app, ["appId", "appid", "app_id"]);
+}
+
+function copyKnownExtras(
+  source: Record<string, unknown>,
+  target: Record<string, unknown>,
+  keys: string[]
+) {
+  keys.forEach((key) => {
+    if (Object.prototype.hasOwnProperty.call(source, key) && source[key] !== undefined) {
+      target[key] = source[key];
+    }
+  });
+}
+
+export function supportsDirectTts(engineId: string | null | undefined) {
+  if (!engineId) return false;
+  return allowedBackendTtsEngineIds.has(engineId);
+}
+
+export function buildDirectTtsHttpRequest(input: {
+  text: string;
+  engineId?: string;
+  apiBaseUrl?: string;
+  config?: AudioRequestConfig;
+}): DirectTtsHttpRequest | null {
+  const engineId = (input.engineId || "").trim();
+  if (!supportsDirectTts(engineId)) return null;
+
+  const config = asRecord(input.config);
+  const apiBaseUrl = (input.apiBaseUrl || "").trim();
+  const apiKey = readString(config, ["apiKey", "api_key"]);
+  const model = normalizeAlibabaModelId(readString(config, ["model"]), engineId);
+  const voice = readString(config, ["voice"]);
+  const text = (input.text || "").trim();
+
+  if (!apiBaseUrl || !apiKey || !model || !voice || !text) {
+    return null;
+  }
+
+  const backendConfig: Record<string, unknown> = {
+    apiKey,
+    model,
+    voice,
+  };
+
+  const responseFormat = readString(config, ["response_format", "responseFormat", "format"]);
+  if (responseFormat) {
+    backendConfig.response_format = responseFormat;
+  }
+
+  if (typeof config.speed === "number") {
+    backendConfig.speed = config.speed;
+  }
+
+  if (engineId === "volcengine-speech") {
+    const appId = resolveVolcengineAppId(config);
+    if (!appId) return null;
+    backendConfig.appId = appId;
+    copyKnownExtras(config, backendConfig, [
+      "app",
+      "audio",
+      "request",
+      "user",
+      "extra_body",
+      "extraBody",
+    ]);
+  }
+
+  if (engineId === "alibaba-cloud-model-studio-speech") {
+    copyKnownExtras(config, backendConfig, [
+      "rate",
+      "pitch",
+      "volume",
+      "sample_rate",
+      "sampleRate",
+      "extra_body",
+      "extraBody",
+    ]);
+  }
+
+  return {
+    url: normalizeBackendTtsUrl(apiBaseUrl),
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: {
+      engine: engineId,
+      data: text,
+      config: backendConfig,
+    },
+  };
+}
+
+export function buildLegacyTtsHttpRequest(input: DirectTtsHttpRequest): LegacyTtsHttpRequest {
+  const config: Record<string, unknown> = {
+    ...input.body.config,
+  };
+  const apiKey = readString(config, ["apiKey", "api_key"]);
+  if (apiKey && !readString(config, ["api_key"])) {
+    config.api_key = apiKey;
+  }
+
+  const baseUrl = readString(config, ["baseUrl", "base_url"]);
+  if (baseUrl) {
+    if (!readString(config, ["baseUrl"])) {
+      config.baseUrl = baseUrl;
+    }
+    if (!readString(config, ["base_url"])) {
+      config.base_url = baseUrl;
+    }
+  }
+
+  const backend = resolveLegacyBackend(input.body.engine);
+  if (backend && !readString(config, ["backend"])) {
+    config.backend = backend;
+  }
+
+  const model = readString(config, ["model"]);
+  if (model && !model.includes("/")) {
+    if (backend === "volcengine") {
+      config.model = `volcengine/${model}`;
+    }
+  }
+  if (backend === "alibaba") {
+    config.model = normalizeAlibabaModelId(readString(config, ["model"]), input.body.engine);
+  }
+
+  const appId = readString(config, ["appId", "appid", "app_id"]);
+  if (appId) {
+    if (!readString(config, ["appid"])) {
+      config.appid = appId;
+    }
+    if (!readString(config, ["app_id"])) {
+      config.app_id = appId;
+    }
+  }
+
+  return {
+    url: normalizeLegacyTtsUrl(input.url),
+    headers: {
+      ...input.headers,
+    },
+    body: {
+      text: input.body.data,
+      engine: input.body.engine,
+      providerId: input.body.engine,
+      provider_id: input.body.engine,
+      config,
+    },
+  };
+}
diff --git a/frontend/packages/app-core/src/utils/tts-stream-segmenter.test.ts b/frontend/packages/app-core/src/utils/tts-stream-segmenter.test.ts
new file mode 100644
index 0000000..07fbe3e
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-stream-segmenter.test.ts
@@ -0,0 +1,39 @@
+import assert from "node:assert/strict";
+
+import { TtsStreamSegmenter } from "./tts-stream-segmenter.ts";
+
+function run(name: string, fn: () => void) {
+  try {
+    fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+run("emits finished sentence while keeping trailing tail", () => {
+  const segmenter = new TtsStreamSegmenter();
+  segmenter.appendLiteral("你好");
+  assert.deepEqual(segmenter.drain(false), []);
+
+  segmenter.appendLiteral("。世界");
+  assert.deepEqual(segmenter.drain(false), ["你好。"]);
+
+  segmenter.appendLiteral("。");
+  assert.deepEqual(segmenter.drain(false), ["世界。"]);
+});
+
+run("special marker flushes previous literal chunk", () => {
+  const segmenter = new TtsStreamSegmenter();
+  segmenter.appendLiteral("前缀");
+  segmenter.appendSpecialMarker();
+  assert.deepEqual(segmenter.drain(false), ["前缀"]);
+});
+
+run("final drain emits tail chunk", () => {
+  const segmenter = new TtsStreamSegmenter();
+  segmenter.appendLiteral("还没结束");
+  assert.deepEqual(segmenter.drain(false), []);
+  assert.deepEqual(segmenter.drain(true), ["还没结束"]);
+});
diff --git a/frontend/packages/app-core/src/utils/tts-stream-segmenter.ts b/frontend/packages/app-core/src/utils/tts-stream-segmenter.ts
new file mode 100644
index 0000000..2c34f20
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-stream-segmenter.ts
@@ -0,0 +1,64 @@
+import {
+  chunkTtsInput,
+  sanitizeTtsChunk,
+  TTS_FLUSH_INSTRUCTION,
+  TTS_SPECIAL_TOKEN,
+} from "./tts-chunker.ts";
+
+function endsWithControlMarker(text: string) {
+  return (
+    text.endsWith(TTS_FLUSH_INSTRUCTION) ||
+    text.endsWith(TTS_SPECIAL_TOKEN)
+  );
+}
+
+export class TtsStreamSegmenter {
+  private input = "";
+  private emittedCount = 0;
+
+  appendLiteral(text: string) {
+    if (!text) return;
+    this.input += text;
+  }
+
+  appendSpecialMarker() {
+    this.input += TTS_SPECIAL_TOKEN;
+  }
+
+  appendFlushMarker() {
+    this.input += TTS_FLUSH_INSTRUCTION;
+  }
+
+  reset() {
+    this.input = "";
+    this.emittedCount = 0;
+  }
+
+  drain(finalize: boolean) {
+    const chunks = chunkTtsInput(this.input);
+    if (chunks.length === 0) return [];
+
+    let emitUntil = chunks.length;
+    if (!finalize) {
+      const last = chunks[chunks.length - 1];
+      if (last?.reason === "flush" && !endsWithControlMarker(this.input)) {
+        emitUntil -= 1;
+      }
+    }
+
+    if (emitUntil <= this.emittedCount) {
+      return [];
+    }
+
+    const emitted: string[] = [];
+    for (let index = this.emittedCount; index < emitUntil; index++) {
+      const text = sanitizeTtsChunk(chunks[index]?.text ?? "");
+      if (text) {
+        emitted.push(text);
+      }
+    }
+
+    this.emittedCount = emitUntil;
+    return emitted;
+  }
+}
diff --git a/frontend/packages/app-core/src/utils/tts-streaming-runner.test.ts b/frontend/packages/app-core/src/utils/tts-streaming-runner.test.ts
new file mode 100644
index 0000000..4d65cd7
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-streaming-runner.test.ts
@@ -0,0 +1,79 @@
+import assert from "node:assert/strict";
+
+import { runTtsChunkQueue, TtsChunkQueueError } from "./tts-streaming-runner.ts";
+
+async function run(name: string, fn: () => Promise<void> | void) {
+  try {
+    await fn();
+    console.info(`PASS ${name}`);
+  } catch (error) {
+    console.error(`FAIL ${name}`);
+    throw error;
+  }
+}
+
+await run("continues when one chunk fails", async () => {
+  const processed: string[] = [];
+  const result = await runTtsChunkQueue(["A", "B", "C"], async (chunk) => {
+    if (chunk === "B") {
+      const error = new Error("Bad gateway") as Error & { status?: number };
+      error.status = 502;
+      throw error;
+    }
+    processed.push(chunk);
+  });
+
+  assert.deepEqual(processed, ["A", "C"]);
+  assert.equal(result.succeeded, 2);
+  assert.equal(result.failed, 1);
+});
+
+await run("throws when every chunk fails", async () => {
+  await assert.rejects(
+    async () => {
+      await runTtsChunkQueue(["A", "B"], async () => {
+        const error = new Error("Always fail");
+        throw error;
+      });
+    },
+    /Always fail/
+  );
+});
+
+await run("does not swallow AbortError", async () => {
+  await assert.rejects(
+    async () => {
+      await runTtsChunkQueue(["A"], async () => {
+        throw new DOMException("Aborted", "AbortError");
+      });
+    },
+    (error: unknown) =>
+      error instanceof DOMException &&
+      error.name === "AbortError"
+  );
+});
+
+await run("stops on first chunk failure when stopOnError is enabled", async () => {
+  const processed: string[] = [];
+  await assert.rejects(
+    async () => {
+      await runTtsChunkQueue(
+        ["A", "B", "C"],
+        async (chunk) => {
+          if (chunk === "B") {
+            throw new Error("B failed");
+          }
+          processed.push(chunk);
+        },
+        { stopOnError: true }
+      );
+    },
+    (error: unknown) => {
+      if (!(error instanceof TtsChunkQueueError)) return false;
+      assert.equal(error.context.index, 1);
+      assert.equal(error.context.chunk, "B");
+      return true;
+    }
+  );
+  assert.deepEqual(processed, ["A"]);
+});
diff --git a/frontend/packages/app-core/src/utils/tts-streaming-runner.ts b/frontend/packages/app-core/src/utils/tts-streaming-runner.ts
new file mode 100644
index 0000000..f1b933a
--- /dev/null
+++ b/frontend/packages/app-core/src/utils/tts-streaming-runner.ts
@@ -0,0 +1,72 @@
+export interface TtsChunkQueueResult {
+  succeeded: number;
+  failed: number;
+  lastError: unknown | null;
+}
+
+export interface TtsChunkQueueContext {
+  chunk: string;
+  index: number;
+  total: number;
+}
+
+export class TtsChunkQueueError extends Error {
+  context: TtsChunkQueueContext;
+  originalError: unknown;
+
+  constructor(error: unknown, context: TtsChunkQueueContext) {
+    const message = error instanceof Error ? error.message : String(error);
+    super(`TTS chunk failed at ${context.index + 1}/${context.total}: ${message}`);
+    this.name = "TtsChunkQueueError";
+    this.context = context;
+    this.originalError = error;
+  }
+}
+
+export interface RunTtsChunkQueueOptions {
+  onChunkError?: (error: unknown, context: TtsChunkQueueContext) => void;
+  stopOnError?: boolean;
+}
+
+function isAbortError(error: unknown) {
+  return error instanceof DOMException && error.name === "AbortError";
+}
+
+export async function runTtsChunkQueue(
+  chunks: string[],
+  runChunk: (chunk: string, index: number, total: number) => Promise<void>,
+  options?: RunTtsChunkQueueOptions
+): Promise<TtsChunkQueueResult> {
+  let succeeded = 0;
+  let failed = 0;
+  let lastError: unknown | null = null;
+
+  for (let index = 0; index < chunks.length; index++) {
+    const chunk = chunks[index];
+    try {
+      await runChunk(chunk, index, chunks.length);
+      succeeded += 1;
+    } catch (error) {
+      if (isAbortError(error)) {
+        throw error;
+      }
+      failed += 1;
+      lastError = error;
+      const context = {
+        chunk,
+        index,
+        total: chunks.length,
+      };
+      options?.onChunkError?.(error, context);
+      if (options?.stopOnError) {
+        throw new TtsChunkQueueError(error, context);
+      }
+    }
+  }
+
+  if (succeeded === 0 && lastError) {
+    throw lastError;
+  }
+
+  return { succeeded, failed, lastError };
+}
diff --git a/frontend/packages/app-settings/src/sections/AudioSection.vue b/frontend/packages/app-settings/src/sections/AudioSection.vue
index bb2299d..ea47a3a 100644
--- a/frontend/packages/app-settings/src/sections/AudioSection.vue
+++ b/frontend/packages/app-settings/src/sections/AudioSection.vue
@@ -1,5 +1,5 @@
 <script setup lang="ts">
-import { computed } from "vue";
+import { computed, onUnmounted } from "vue";
 
 import { AudioSection } from "@whalewhisper/stage-settings-ui";
 import { useI18n } from "@whalewhisper/app-core/composables/use-i18n";
@@ -12,6 +12,11 @@ const transcriptionStore = useTranscriptionStore();
 const speechOutputStore = useSpeechOutputStore();
 const { t, locale } = useI18n();
 const localeValue = computed(() => locale.value || "en");
+
+onUnmounted(() => {
+  if (transcriptionStore.listeningSource !== "settings-test") return;
+  void transcriptionStore.stopListening();
+});
 </script>
 
 <template>
diff --git a/frontend/packages/app-settings/src/sections/ModelSection.vue b/frontend/packages/app-settings/src/sections/ModelSection.vue
index 85e458f..b22b291 100644
--- a/frontend/packages/app-settings/src/sections/ModelSection.vue
+++ b/frontend/packages/app-settings/src/sections/ModelSection.vue
@@ -122,11 +122,17 @@ function buildPanel(category: "chat" | "speech" | "transcription", providerId: s
           : category === "speech"
             ? speechProviderId.value
             : transcriptionProviderId.value;
-      const targetField = providersStore
-        .getProviderFields(updatedProviderId)
-        .find((field) => field.id === fieldId);
+      const providerFields = providersStore.getProviderFields(updatedProviderId);
+      const targetField = providerFields.find((field) => field.id === fieldId);
       if (!targetField) return;
       providersStore.setProviderFieldValue(updatedProviderId, targetField, value);
+      if (category === "speech" && fieldId === "model") {
+        const voiceField = providerFields.find((field) => field.id === "voice");
+        if (voiceField) {
+          providersStore.setProviderFieldValue(updatedProviderId, voiceField, "");
+        }
+        void providersStore.refreshProvider(updatedProviderId);
+      }
     },
   };
 }
diff --git a/frontend/packages/stage-settings-ui/src/components/AudioSection.vue b/frontend/packages/stage-settings-ui/src/components/AudioSection.vue
index 2024237..4137805 100644
--- a/frontend/packages/stage-settings-ui/src/components/AudioSection.vue
+++ b/frontend/packages/stage-settings-ui/src/components/AudioSection.vue
@@ -1,4 +1,4 @@
-<script setup lang="ts">
+﻿<script setup lang="ts">
 import { computed, onMounted, ref } from "vue";
 
 import SelectMenu from "./ui/SelectMenu.vue";
@@ -29,26 +29,27 @@ type TranscriptionState = {
   vadMinSpeechMs?: number;
   vadSilenceMs?: number;
   listening?: boolean;
+  listeningSource?: string | null;
   canListen?: boolean;
   supported?: boolean;
   interimText?: string;
   lastTranscript?: string;
   error?: string | null;
-  startListening?: () => void;
-  stopListening?: () => void;
+  startListening?: (options?: {
+    autoSend?: boolean;
+    source?: string;
+  }) => void | Promise<void>;
+  stopListening?: () => void | Promise<void>;
 };
 
 type SpeechOutputState = {
   enabled?: boolean;
-  voiceId?: string;
   rate?: number;
   pitch?: number;
   volume?: number;
   streaming?: boolean;
-  voices?: Array<{ voiceURI: string; name: string; lang?: string }>;
   supported?: boolean;
   lastError?: string | null;
-  refreshVoices?: () => void;
   speak?: (text: string) => void | Promise<void>;
   stop?: () => void;
 };
@@ -185,22 +186,8 @@ const speechThreshold = computed({
   },
 });
 
-const sttEnabled = computed({
-  get: () => !!props.transcription?.enabled,
-  set: (value: boolean) => {
-    if (!props.transcription) return;
-    props.transcription.enabled = value;
-  },
-});
-const autoSend = computed({
-  get: () => !!props.transcription?.autoSend,
-  set: (value: boolean) => {
-    if (!props.transcription) return;
-    props.transcription.autoSend = value;
-  },
-});
-const language = computed({
-  get: () => props.transcription?.language ?? "en-US",
+const transcriptionLanguage = computed({
+  get: () => props.transcription?.language ?? (isZh.value ? "zh-CN" : "en-US"),
   set: (value: string) => {
     if (!props.transcription) return;
     props.transcription.language = value;
@@ -228,13 +215,6 @@ const ttsEnabled = computed({
     props.speechOutput.enabled = value;
   },
 });
-const voiceId = computed({
-  get: () => props.speechOutput?.voiceId ?? "",
-  set: (value: string) => {
-    if (!props.speechOutput) return;
-    props.speechOutput.voiceId = value;
-  },
-});
 const rate = computed({
   get: () => props.speechOutput?.rate ?? 1,
   set: (value: number) => {
@@ -277,15 +257,35 @@ const deviceOptions = computed<SelectOption[]>(() => {
   }));
 });
 
-const voiceOptions = computed<SelectOption[]>(() =>
-  (props.speechOutput?.voices ?? []).map((voice) => ({
-    id: voice.voiceURI,
-    label: voice.name,
-    description: voice.lang,
-  }))
-);
+const transcriptionLanguageOptions = computed<SelectOption[]>(() => [
+  {
+    id: "zh-CN",
+    label: isZh.value ? "中文（简体）" : "Chinese (Simplified)",
+    description: "zh-CN",
+  },
+  {
+    id: "en-US",
+    label: "English (US)",
+    description: "en-US",
+  },
+  {
+    id: "ja-JP",
+    label: "Japanese",
+    description: "ja-JP",
+  },
+  {
+    id: "ko-KR",
+    label: "Korean",
+    description: "ko-KR",
+  },
+]);
 
 const testText = ref("");
+const transcriptionTestOnlyHint = computed(() =>
+  isZh.value
+    ? "麦克风仅用于测试识别，不会发送到聊天。"
+    : "Microphone here is test-only and will not send to chat."
+);
 const defaultTestText = computed(() =>
   isZh.value ? "你好，这是一段语音测试。" : "Hello! This is a voice test."
 );
@@ -325,18 +325,41 @@ function formatDeviceLabel(device: { deviceId: string; label?: string }) {
   return cleaned || t("audio.device.unknown");
 }
 
-function toggleListening() {
-  if (!props.transcription) return;
-  if (props.transcription.listening) {
-    props.transcription.stopListening?.();
-  } else {
-    props.transcription.startListening?.();
+const settingsMicActive = computed(
+  () =>
+    !!props.transcription?.listening &&
+    props.transcription?.listeningSource === "settings-test"
+);
+const micActive = computed(() => {
+  if (props.transcription) {
+    return settingsMicActive.value;
+  }
+  return micEnabled.value;
+});
+
+async function toggleMicInput() {
+  if (props.transcription?.canListen) {
+    const canStopCurrentSession =
+      !!props.transcription.listening &&
+      (props.transcription.listeningSource === "settings-test" ||
+        props.transcription.listeningSource == null);
+    if (canStopCurrentSession) {
+      await props.transcription.stopListening?.();
+      return;
+    }
+    props.transcription.enabled = true;
+    await props.transcription.startListening?.({
+      autoSend: false,
+      source: "settings-test",
+    });
+    return;
   }
+
+  micEnabled.value = !micEnabled.value;
 }
 
 onMounted(() => {
   void props.hearing?.refreshDevices?.();
-  props.speechOutput?.refreshVoices?.();
   if (!testText.value) {
     testText.value = defaultTestText.value;
   }
@@ -353,23 +376,23 @@ onMounted(() => {
         <div class="relative h-24 w-24">
           <div
             class="absolute left-1/2 top-1/2 h-16 w-16 rounded-full transition-all duration-150 -translate-x-1/2 -translate-y-1/2"
-            :class="micEnabled ? 'bg-primary-500/20' : 'bg-neutral-300/20 dark:bg-neutral-700/30'"
+            :class="micActive ? 'bg-primary-500/20' : 'bg-neutral-300/20 dark:bg-neutral-700/30'"
             :style="{ transform: `translate(-50%, -50%) scale(${1 + ((props.hearing?.volumeLevel ?? 0) / 100) * 0.35})` }"
           />
           <div
             class="absolute left-1/2 top-1/2 h-20 w-20 rounded-full transition-all duration-200 -translate-x-1/2 -translate-y-1/2"
-            :class="micEnabled ? 'bg-primary-500/10' : 'bg-neutral-300/10 dark:bg-neutral-700/20'"
+            :class="micActive ? 'bg-primary-500/10' : 'bg-neutral-300/10 dark:bg-neutral-700/20'"
             :style="{ transform: `translate(-50%, -50%) scale(${1.2 + ((props.hearing?.volumeLevel ?? 0) / 100) * 0.5})` }"
           />
           <button
             type="button"
             class="absolute left-1/2 top-1/2 grid h-14 w-14 place-items-center rounded-full shadow-md transition-all duration-200 -translate-x-1/2 -translate-y-1/2"
-            :class="micEnabled
+            :class="micActive
               ? 'bg-primary-500 text-white hover:bg-primary-600 active:scale-95'
               : 'bg-neutral-200 text-neutral-600 hover:bg-neutral-300 active:scale-95 dark:bg-neutral-700 dark:text-neutral-200'"
-            @click="micEnabled = !micEnabled"
+            @click="toggleMicInput"
           >
-            <div :class="micEnabled ? 'i-solar:microphone-bold' : 'i-solar:microphone-3-bold-duotone'" class="h-6 w-6" />
+            <div :class="micActive ? 'i-solar:microphone-bold' : 'i-solar:microphone-3-bold-duotone'" class="h-6 w-6" />
           </button>
         </div>
 
@@ -422,42 +445,19 @@ onMounted(() => {
       </div>
 
       <div class="mt-4 grid gap-2">
-        <div class="flex items-center justify-between rounded-xl border border-neutral-200/70 bg-white/60 px-3 py-2 dark:border-neutral-800/70 dark:bg-neutral-900/60">
-          <div>
-            <div class="text-sm font-medium text-neutral-800 dark:text-neutral-100">
-              {{ t("audio.stt.title") }}
-            </div>
-            <div class="text-xs text-neutral-500 dark:text-neutral-400">
-              {{ t("audio.stt.desc") }}
-            </div>
-          </div>
-          <button
-            type="button"
-            class="rounded-lg border border-neutral-200 bg-white/80 px-3 py-1 text-xs text-neutral-600 transition hover:text-neutral-900 dark:border-neutral-700 dark:bg-neutral-800/80 dark:text-neutral-300"
-            @click="sttEnabled = !sttEnabled"
-          >
-            {{ sttEnabled ? t("common.disabled") : t("common.enabled") }}
-          </button>
-        </div>
+        <label class="text-xs text-neutral-500 dark:text-neutral-400">
+          {{ t("audio.stt.language") }}
+        </label>
+        <SelectMenu
+          v-model="transcriptionLanguage"
+          :options="transcriptionLanguageOptions"
+          :placeholder="t('audio.stt.language')"
+        />
+      </div>
+
+      <div class="mt-4 grid gap-2">
         <div class="flex flex-wrap items-center gap-2">
-          <button
-            type="button"
-            class="rounded-lg border border-neutral-200 bg-white/80 px-3 py-1 text-xs text-neutral-600 transition hover:text-neutral-900 disabled:opacity-60 dark:border-neutral-700 dark:bg-neutral-800/80 dark:text-neutral-300"
-            :disabled="!props.transcription?.canListen"
-            @click="toggleListening"
-          >
-            {{ props.transcription?.listening ? t("audio.stt.stop") : t("audio.stt.start") }}
-          </button>
-          <label class="text-xs text-neutral-500 dark:text-neutral-400">
-            {{ t("audio.stt.language") }}
-          </label>
-          <input
-            v-model="language"
-            type="text"
-            placeholder="en-US"
-            class="w-28 rounded-lg border border-neutral-200 bg-white/80 px-2 py-1 text-xs text-neutral-700 shadow-sm outline-none transition focus:border-primary-400 dark:border-neutral-800 dark:bg-neutral-900/70 dark:text-neutral-200"
-          />
-          <label class="ml-2 flex items-center gap-2 text-xs text-neutral-500 dark:text-neutral-400">
+          <label class="flex items-center gap-2 text-xs text-neutral-500 dark:text-neutral-400">
             {{ t("audio.stt.vad.minSpeech") }}
             <input
               v-model.number="vadMinSpeechMs"
@@ -468,7 +468,7 @@ onMounted(() => {
               class="w-20 rounded-lg border border-neutral-200 bg-white/80 px-2 py-1 text-xs text-neutral-700 shadow-sm outline-none transition focus:border-primary-400 dark:border-neutral-800 dark:bg-neutral-900/70 dark:text-neutral-200"
             />
           </label>
-          <label class="ml-2 flex items-center gap-2 text-xs text-neutral-500 dark:text-neutral-400">
+          <label class="flex items-center gap-2 text-xs text-neutral-500 dark:text-neutral-400">
             {{ t("audio.stt.vad.silence") }}
             <input
               v-model.number="vadSilenceMs"
@@ -479,19 +479,20 @@ onMounted(() => {
               class="w-20 rounded-lg border border-neutral-200 bg-white/80 px-2 py-1 text-xs text-neutral-700 shadow-sm outline-none transition focus:border-primary-400 dark:border-neutral-800 dark:bg-neutral-900/70 dark:text-neutral-200"
             />
           </label>
-          <label class="ml-2 flex items-center gap-2 text-xs text-neutral-500 dark:text-neutral-400">
-            <input v-model="autoSend" type="checkbox" class="accent-primary-500" />
-            {{ t("audio.stt.autosend") }}
-          </label>
         </div>
-        <div v-if="props.transcription?.error" class="text-xs text-rose-500">
-          {{ props.transcription.error }}
+        <div class="text-xs text-neutral-500 dark:text-neutral-400">
+          {{ transcriptionTestOnlyHint }}
+        </div>
+        <div class="text-xs text-neutral-500 dark:text-neutral-400">
+          {{ t("audio.stt.last") }}
         </div>
-        <div v-if="props.transcription?.interimText" class="text-xs text-neutral-500 dark:text-neutral-400">
-          {{ t("audio.stt.listening") }} {{ props.transcription.interimText }}
+        <div class="rounded-xl border border-neutral-200/70 bg-white/60 px-3 py-2 text-xs text-neutral-700 dark:border-neutral-800/70 dark:bg-neutral-900/60 dark:text-neutral-300">
+          <span v-if="props.transcription?.interimText">{{ props.transcription.interimText }}</span>
+          <span v-else-if="props.transcription?.lastTranscript">{{ props.transcription.lastTranscript }}</span>
+          <span v-else class="text-neutral-400 dark:text-neutral-500">-</span>
         </div>
-        <div v-else-if="props.transcription?.lastTranscript" class="text-xs text-neutral-500 dark:text-neutral-400">
-          {{ t("audio.stt.last") }} {{ props.transcription.lastTranscript }}
+        <div v-if="props.transcription?.error" class="text-xs text-rose-500">
+          {{ props.transcription.error }}
         </div>
       </div>
     </div>
@@ -518,14 +519,6 @@ onMounted(() => {
         </button>
       </div>
       <div class="mt-3 grid gap-2">
-        <label class="text-xs text-neutral-500 dark:text-neutral-400">
-          {{ t("audio.tts.voice") }}
-        </label>
-        <SelectMenu
-          v-model="voiceId"
-          :options="voiceOptions"
-          :placeholder="t('audio.tts.placeholder')"
-        />
         <div v-if="props.speechOutput?.supported === false" class="text-xs text-rose-500">
           {{ t("audio.tts.unsupported") }}
         </div>
@@ -590,3 +583,4 @@ onMounted(() => {
     </div>
   </div>
 </template>
+
diff --git a/frontend/packages/stage-settings-ui/src/components/ProviderPanel.vue b/frontend/packages/stage-settings-ui/src/components/ProviderPanel.vue
index c548dc1..c56636d 100644
--- a/frontend/packages/stage-settings-ui/src/components/ProviderPanel.vue
+++ b/frontend/packages/stage-settings-ui/src/components/ProviderPanel.vue
@@ -123,6 +123,13 @@ function resolveLabel(field: ProviderField) {
 function resolvePlaceholder(field: ProviderField) {
   if (field.placeholder) return field.placeholder;
   if (field.id === "apiKey") return "sk-...";
+  if (
+    field.id === "voice" &&
+    field.type === "select" &&
+    (field.options?.length ?? 0) === 0
+  ) {
+    return isZh.value ? "没有支持的声线" : "No compatible voices";
+  }
   if (field.id === "model") return t("providers.placeholder.model");
   if (field.id === "voice") return t("providers.placeholder.voice");
   return field.label || t("common.select");