From b729f7602b54733f0d75afd862cb67c751cf108e Mon Sep 17 00:00:00 2001
From: lishunyang <lishunyang12@163.com>
Date: Fri, 6 Feb 2026 03:57:57 +0800
Subject: [PATCH 1/3] feat: add gradio demo for qwen tts

Signed-off-by: lishunyang <lishunyang12@163.com>
---
 PR_DESCRIPTION.md                             |  26 ++
 examples/online_serving/qwen3_tts/README.md   |  16 +
 .../online_serving/qwen3_tts/gradio_demo.py   | 379 ++++++++++++++++++
 .../qwen3_tts/run_gradio_demo.sh              | 203 ++++++++++
 4 files changed, 624 insertions(+)
 create mode 100644 PR_DESCRIPTION.md
 create mode 100644 examples/online_serving/qwen3_tts/gradio_demo.py
 create mode 100644 examples/online_serving/qwen3_tts/run_gradio_demo.sh

diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md
new file mode 100644
index 0000000000..3975045951
--- /dev/null
+++ b/PR_DESCRIPTION.md
@@ -0,0 +1,26 @@
+## feat(qwen3-tts): Add Gradio demo for online serving
+
+Closes part of #938 (item 1.8 - Gradio Demo)
+
+### Summary
+- Add interactive Gradio web UI for Qwen3-TTS at `examples/online_serving/qwen3_tts/`
+- Support all 3 task types: CustomVoice, VoiceDesign, Base (voice cloning)
+- Dynamic UI that shows/hides fields based on selected task type
+- Fetches available speakers from `/v1/audio/voices` endpoint
+- Add `run_gradio_demo.sh` to launch server + demo together
+
+### Files Changed
+- `examples/online_serving/qwen3_tts/gradio_demo.py` (new)
+- `examples/online_serving/qwen3_tts/run_gradio_demo.sh` (new)
+- `examples/online_serving/qwen3_tts/README.md` (updated)
+
+### Test plan
+- [ ] Start server with `./run_server.sh CustomVoice`, run `python gradio_demo.py`, generate speech with Vivian/Ryan speakers
+- [ ] Start server with VoiceDesign model, verify instructions field is required
+- [ ] Start server with Base model, upload reference audio and verify voice cloning
+- [ ] Test `run_gradio_demo.sh` launches both server and Gradio
+- [ ] Verify error messages when server is down or inputs are invalid
+
+### Notes
+- Streaming audio playback will be added as a follow-up once #1189 is merged
+- References `examples/online_serving/qwen3_omni/gradio_demo.py` as design pattern
diff --git a/examples/online_serving/qwen3_tts/README.md b/examples/online_serving/qwen3_tts/README.md
index 1c9bd48203..a335021edb 100644
--- a/examples/online_serving/qwen3_tts/README.md
+++ b/examples/online_serving/qwen3_tts/README.md
@@ -10,6 +10,22 @@ This directory contains examples for running Qwen3-TTS models with vLLM-Omni's o
 | `Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign` | VoiceDesign | Natural language voice style description |
 | `Qwen/Qwen3-TTS-12Hz-1.7B-Base` | Base | Voice cloning from reference audio |
 
+## Gradio Demo
+
+The interactive Gradio demo supports all 3 task types with a web UI.
+
+```bash
+# Option 1: Launch server + Gradio together
+./run_gradio_demo.sh                                # CustomVoice (default)
+./run_gradio_demo.sh --task-type VoiceDesign        # VoiceDesign
+./run_gradio_demo.sh --task-type Base               # Voice cloning
+
+# Option 2: If server is already running
+python gradio_demo.py --api-base http://localhost:8000
+```
+
+Then open http://127.0.0.1:7860 in your browser.
+
 ## Quick Start
 
 ### 1. Start the Server
diff --git a/examples/online_serving/qwen3_tts/gradio_demo.py b/examples/online_serving/qwen3_tts/gradio_demo.py
new file mode 100644
index 0000000000..9c41beaa3c
--- /dev/null
+++ b/examples/online_serving/qwen3_tts/gradio_demo.py
@@ -0,0 +1,379 @@
+"""Gradio demo for Qwen3-TTS online serving via /v1/audio/speech API.
+
+Supports all 3 task types:
+  - CustomVoice: Predefined speaker with optional style instructions
+  - VoiceDesign: Natural language voice description
+  - Base: Voice cloning from reference audio
+
+Usage:
+    # Start the server first (see run_server.sh), then:
+    python gradio_demo.py --api-base http://localhost:8000
+
+    # Or use run_gradio_demo.sh to start both server and demo together.
+"""
+
+import argparse
+import base64
+import io
+
+import gradio as gr
+import httpx
+import numpy as np
+import soundfile as sf
+
+SUPPORTED_LANGUAGES = [
+    "Auto",
+    "Chinese",
+    "English",
+    "Japanese",
+    "Korean",
+    "German",
+    "French",
+    "Russian",
+    "Portuguese",
+    "Spanish",
+    "Italian",
+]
+
+TASK_TYPES = ["CustomVoice", "VoiceDesign", "Base"]
+
+
+def fetch_voices(api_base: str) -> list[str]:
+    """Fetch available voices from the server."""
+    try:
+        with httpx.Client(timeout=10.0) as client:
+            resp = client.get(
+                f"{api_base}/v1/audio/voices",
+                headers={"Authorization": "Bearer EMPTY"},
+            )
+        if resp.status_code == 200:
+            data = resp.json()
+            return data.get("voices", ["Vivian", "Ryan"])
+    except Exception:
+        pass
+    return ["Vivian", "Ryan"]
+
+
+def encode_audio_to_base64(audio_data: tuple) -> str:
+    """Encode Gradio audio input (sample_rate, numpy_array) to base64 data URL."""
+    sample_rate, audio_np = audio_data
+
+    if audio_np.dtype != np.int16:
+        if audio_np.dtype in (np.float32, np.float64):
+            audio_np = np.clip(audio_np, -1.0, 1.0)
+            audio_np = (audio_np * 32767).astype(np.int16)
+        else:
+            audio_np = audio_np.astype(np.int16)
+
+    buf = io.BytesIO()
+    sf.write(buf, audio_np, sample_rate, format="WAV")
+    wav_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+    return f"data:audio/wav;base64,{wav_b64}"
+
+
+def generate_speech(
+    api_base: str,
+    text: str,
+    task_type: str,
+    voice: str,
+    language: str,
+    instructions: str,
+    ref_audio: tuple | None,
+    ref_text: str,
+    response_format: str,
+    speed: float,
+):
+    """Call /v1/audio/speech and return audio for Gradio."""
+    if not text or not text.strip():
+        raise gr.Error("Please enter text to synthesize.")
+
+    # Build request payload
+    payload = {
+        "input": text.strip(),
+        "response_format": response_format,
+        "speed": speed,
+    }
+
+    if task_type:
+        payload["task_type"] = task_type
+    if language:
+        payload["language"] = language
+
+    # Task-specific parameters
+    if task_type == "CustomVoice":
+        if voice:
+            payload["voice"] = voice
+        if instructions and instructions.strip():
+            payload["instructions"] = instructions.strip()
+
+    elif task_type == "VoiceDesign":
+        if not instructions or not instructions.strip():
+            raise gr.Error(
+                "VoiceDesign task requires voice style instructions."
+            )
+        payload["instructions"] = instructions.strip()
+
+    elif task_type == "Base":
+        if ref_audio is None:
+            raise gr.Error(
+                "Base (voice clone) task requires reference audio."
+            )
+        payload["ref_audio"] = encode_audio_to_base64(ref_audio)
+        if ref_text and ref_text.strip():
+            payload["ref_text"] = ref_text.strip()
+
+    # Call the API
+    try:
+        with httpx.Client(timeout=300.0) as client:
+            resp = client.post(
+                f"{api_base}/v1/audio/speech",
+                json=payload,
+                headers={
+                    "Content-Type": "application/json",
+                    "Authorization": "Bearer EMPTY",
+                },
+            )
+    except httpx.TimeoutException:
+        raise gr.Error("Request timed out. The server may be busy.")
+    except httpx.ConnectError:
+        raise gr.Error(
+            f"Cannot connect to server at {api_base}. "
+            "Make sure the vLLM server is running."
+        )
+
+    if resp.status_code != 200:
+        raise gr.Error(f"Server error ({resp.status_code}): {resp.text}")
+
+    # Check for JSON error response
+    content_type = resp.headers.get("content-type", "")
+    if "application/json" in content_type:
+        try:
+            error_data = resp.json()
+            raise gr.Error(f"Server error: {error_data}")
+        except ValueError:
+            pass
+
+    # Decode audio response
+    try:
+        audio_np, sample_rate = sf.read(io.BytesIO(resp.content))
+        if audio_np.ndim > 1:
+            audio_np = audio_np[:, 0]
+        return (sample_rate, audio_np.astype(np.float32))
+    except Exception as e:
+        raise gr.Error(f"Failed to decode audio response: {e}")
+
+
+def on_task_type_change(task_type: str):
+    """Update UI visibility based on selected task type."""
+    if task_type == "CustomVoice":
+        return (
+            gr.update(visible=True),   # voice dropdown
+            gr.update(visible=True, info="Optional style/emotion instructions"),
+            gr.update(visible=False),  # ref_audio
+            gr.update(visible=False),  # ref_text
+        )
+    elif task_type == "VoiceDesign":
+        return (
+            gr.update(visible=False),  # voice dropdown
+            gr.update(visible=True, info="Required: describe the voice style"),
+            gr.update(visible=False),  # ref_audio
+            gr.update(visible=False),  # ref_text
+        )
+    elif task_type == "Base":
+        return (
+            gr.update(visible=False),  # voice dropdown
+            gr.update(visible=False),  # instructions
+            gr.update(visible=True),   # ref_audio
+            gr.update(visible=True),   # ref_text
+        )
+    return (
+        gr.update(visible=True),
+        gr.update(visible=True),
+        gr.update(visible=False),
+        gr.update(visible=False),
+    )
+
+
+def build_interface(api_base: str):
+    """Build the Gradio interface."""
+    voices = fetch_voices(api_base)
+
+    css = """
+    #generate-btn button { width: 100%; }
+    .task-info { padding: 8px 12px; border-radius: 6px;
+                 background: #f0f4ff; margin-bottom: 8px; }
+    """
+
+    with gr.Blocks(css=css, title="Qwen3-TTS Demo") as demo:
+        gr.Markdown("# Qwen3-TTS Online Serving Demo")
+        gr.Markdown(f"**Server:** `{api_base}`")
+
+        with gr.Row():
+            # Left column: inputs
+            with gr.Column(scale=3):
+                text_input = gr.Textbox(
+                    label="Text to Synthesize",
+                    placeholder="Enter text here, e.g., Hello, how are you?",
+                    lines=4,
+                )
+
+                with gr.Row():
+                    task_type = gr.Radio(
+                        choices=TASK_TYPES,
+                        value="CustomVoice",
+                        label="Task Type",
+                        scale=2,
+                    )
+                    language = gr.Dropdown(
+                        choices=SUPPORTED_LANGUAGES,
+                        value="Auto",
+                        label="Language",
+                        scale=1,
+                    )
+
+                # CustomVoice controls
+                voice = gr.Dropdown(
+                    choices=voices,
+                    value=voices[0] if voices else None,
+                    label="Speaker",
+                    visible=True,
+                )
+
+                # Instructions (CustomVoice optional, VoiceDesign required)
+                instructions = gr.Textbox(
+                    label="Instructions",
+                    placeholder=(
+                        "e.g., Speak with excitement / "
+                        "A warm, friendly female voice"
+                    ),
+                    lines=2,
+                    visible=True,
+                    info="Optional style/emotion instructions",
+                )
+
+                # Base (voice clone) controls
+                ref_audio = gr.Audio(
+                    label="Reference Audio (for voice cloning)",
+                    type="numpy",
+                    sources=["upload", "microphone"],
+                    visible=False,
+                )
+                ref_text = gr.Textbox(
+                    label="Reference Audio Transcript",
+                    placeholder="Transcript of the reference audio (optional, improves quality)",
+                    lines=2,
+                    visible=False,
+                )
+
+                with gr.Row():
+                    response_format = gr.Dropdown(
+                        choices=["wav", "mp3", "flac", "pcm", "aac", "opus"],
+                        value="wav",
+                        label="Audio Format",
+                        scale=1,
+                    )
+                    speed = gr.Slider(
+                        minimum=0.25,
+                        maximum=4.0,
+                        value=1.0,
+                        step=0.05,
+                        label="Speed",
+                        scale=1,
+                    )
+
+                generate_btn = gr.Button(
+                    "Generate Speech",
+                    variant="primary",
+                    size="lg",
+                    elem_id="generate-btn",
+                )
+
+            # Right column: output
+            with gr.Column(scale=2):
+                audio_output = gr.Audio(
+                    label="Generated Audio",
+                    interactive=False,
+                )
+                gr.Markdown(
+                    "### Task Types\n"
+                    "- **CustomVoice**: Use a predefined speaker "
+                    "(Vivian, Ryan, etc.) with optional style instructions\n"
+                    "- **VoiceDesign**: Describe the desired voice in natural "
+                    "language (instructions required)\n"
+                    "- **Base**: Clone a voice from reference audio"
+                )
+
+        # Dynamic UI updates
+        task_type.change(
+            fn=on_task_type_change,
+            inputs=[task_type],
+            outputs=[voice, instructions, ref_audio, ref_text],
+        )
+
+        # Generate button
+        generate_btn.click(
+            fn=lambda *args: generate_speech(api_base, *args),
+            inputs=[
+                text_input,
+                task_type,
+                voice,
+                language,
+                instructions,
+                ref_audio,
+                ref_text,
+                response_format,
+                speed,
+            ],
+            outputs=[audio_output],
+        )
+
+        demo.queue()
+    return demo
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Gradio demo for Qwen3-TTS online serving."
+    )
+    parser.add_argument(
+        "--api-base",
+        default="http://localhost:8000",
+        help="Base URL for the vLLM API server (default: http://localhost:8000).",
+    )
+    parser.add_argument(
+        "--ip",
+        default="127.0.0.1",
+        help="Host/IP for Gradio server (default: 127.0.0.1).",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=7860,
+        help="Port for Gradio server (default: 7860).",
+    )
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        help="Share the Gradio demo publicly.",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    print(f"Connecting to vLLM server at: {args.api_base}")
+    demo = build_interface(args.api_base)
+
+    try:
+        demo.launch(
+            server_name=args.ip,
+            server_port=args.port,
+            share=args.share,
+        )
+    except KeyboardInterrupt:
+        print("\nShutting down...")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/online_serving/qwen3_tts/run_gradio_demo.sh b/examples/online_serving/qwen3_tts/run_gradio_demo.sh
new file mode 100644
index 0000000000..1c7fe9e374
--- /dev/null
+++ b/examples/online_serving/qwen3_tts/run_gradio_demo.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+# Launch both vLLM server and Gradio demo for Qwen3-TTS
+#
+# Usage:
+#   ./run_gradio_demo.sh                                    # Default: CustomVoice
+#   ./run_gradio_demo.sh --task-type VoiceDesign            # VoiceDesign model
+#   ./run_gradio_demo.sh --task-type Base --gradio-port 7861
+#
+# Options:
+#   --task-type TYPE        Task type: CustomVoice, VoiceDesign, Base (default: CustomVoice)
+#   --server-port PORT      Port for vLLM server (default: 8000)
+#   --gradio-port PORT      Port for Gradio demo (default: 7860)
+#   --server-host HOST      Host for vLLM server (default: 0.0.0.0)
+#   --gradio-ip IP          IP for Gradio demo (default: 127.0.0.1)
+#   --share                 Share Gradio demo publicly
+
+set -e
+
+# Default values
+TASK_TYPE="CustomVoice"
+SERVER_PORT=8000
+GRADIO_PORT=7860
+SERVER_HOST="0.0.0.0"
+GRADIO_IP="127.0.0.1"
+GRADIO_SHARE=false
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --task-type)
+            TASK_TYPE="$2"
+            shift 2
+            ;;
+        --server-port)
+            SERVER_PORT="$2"
+            shift 2
+            ;;
+        --gradio-port)
+            GRADIO_PORT="$2"
+            shift 2
+            ;;
+        --server-host)
+            SERVER_HOST="$2"
+            shift 2
+            ;;
+        --gradio-ip)
+            GRADIO_IP="$2"
+            shift 2
+            ;;
+        --share)
+            GRADIO_SHARE=true
+            shift
+            ;;
+        --help)
+            echo "Usage: $0 [OPTIONS]"
+            echo ""
+            echo "Options:"
+            echo "  --task-type TYPE        Task type: CustomVoice, VoiceDesign, Base (default: CustomVoice)"
+            echo "  --server-port PORT      Port for vLLM server (default: 8000)"
+            echo "  --gradio-port PORT      Port for Gradio demo (default: 7860)"
+            echo "  --server-host HOST      Host for vLLM server (default: 0.0.0.0)"
+            echo "  --gradio-ip IP          IP for Gradio demo (default: 127.0.0.1)"
+            echo "  --share                 Share Gradio demo publicly"
+            echo ""
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+# Map task type to model
+case "$TASK_TYPE" in
+    CustomVoice)
+        MODEL="Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+        ;;
+    VoiceDesign)
+        MODEL="Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign"
+        ;;
+    Base)
+        MODEL="Qwen/Qwen3-TTS-12Hz-1.7B-Base"
+        ;;
+    *)
+        echo "Unknown task type: $TASK_TYPE"
+        echo "Supported: CustomVoice, VoiceDesign, Base"
+        exit 1
+        ;;
+esac
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+API_BASE="http://localhost:${SERVER_PORT}"
+
+echo "=========================================="
+echo "Qwen3-TTS Gradio Demo"
+echo "=========================================="
+echo "Task Type : $TASK_TYPE"
+echo "Model     : $MODEL"
+echo "Server    : http://${SERVER_HOST}:${SERVER_PORT}"
+echo "Gradio    : http://${GRADIO_IP}:${GRADIO_PORT}"
+echo "=========================================="
+
+# Cleanup on exit
+cleanup() {
+    echo ""
+    echo "Shutting down..."
+    if [ -n "$SERVER_PID" ]; then
+        echo "Stopping vLLM server (PID: $SERVER_PID)..."
+        kill "$SERVER_PID" 2>/dev/null || true
+        wait "$SERVER_PID" 2>/dev/null || true
+    fi
+    if [ -n "$GRADIO_PID" ]; then
+        echo "Stopping Gradio demo (PID: $GRADIO_PID)..."
+        kill "$GRADIO_PID" 2>/dev/null || true
+        wait "$GRADIO_PID" 2>/dev/null || true
+    fi
+    echo "Cleanup complete"
+    exit 0
+}
+trap cleanup SIGINT SIGTERM
+
+# Start vLLM server
+echo ""
+echo "Starting vLLM server..."
+LOG_FILE="/tmp/vllm_tts_server_${SERVER_PORT}.log"
+
+vllm-omni serve "$MODEL" \
+    --stage-configs-path vllm_omni/model_executor/stage_configs/qwen3_tts.yaml \
+    --host "$SERVER_HOST" \
+    --port "$SERVER_PORT" \
+    --gpu-memory-utilization 0.9 \
+    --trust-remote-code \
+    --enforce-eager \
+    --omni 2>&1 | tee "$LOG_FILE" &
+SERVER_PID=$!
+
+# Wait for server startup
+echo ""
+echo "Waiting for vLLM server to be ready..."
+STARTUP_FLAG="/tmp/vllm_tts_startup_flag_${SERVER_PORT}.tmp"
+rm -f "$STARTUP_FLAG"
+
+(
+    tail -f "$LOG_FILE" 2>/dev/null | grep -m 1 "Application startup complete" > /dev/null && touch "$STARTUP_FLAG"
+) &
+TAIL_PID=$!
+
+MAX_WAIT=300
+ELAPSED=0
+while [ $ELAPSED -lt $MAX_WAIT ]; do
+    if [ -f "$STARTUP_FLAG" ]; then
+        kill "$TAIL_PID" 2>/dev/null || true
+        wait "$TAIL_PID" 2>/dev/null || true
+        echo ""
+        echo "vLLM server is ready!"
+        break
+    fi
+    if ! kill -0 "$SERVER_PID" 2>/dev/null; then
+        kill "$TAIL_PID" 2>/dev/null || true
+        echo ""
+        echo "Error: vLLM server failed to start"
+        exit 1
+    fi
+    sleep 1
+    ELAPSED=$((ELAPSED + 1))
+done
+
+rm -f "$STARTUP_FLAG"
+
+if [ $ELAPSED -ge $MAX_WAIT ]; then
+    kill "$TAIL_PID" 2>/dev/null || true
+    echo "Error: Server startup timed out after ${MAX_WAIT}s"
+    kill "$SERVER_PID" 2>/dev/null || true
+    exit 1
+fi
+
+# Start Gradio demo
+echo ""
+echo "Starting Gradio demo..."
+cd "$SCRIPT_DIR"
+GRADIO_CMD=("python" "gradio_demo.py" "--api-base" "$API_BASE" "--ip" "$GRADIO_IP" "--port" "$GRADIO_PORT")
+if [ "$GRADIO_SHARE" = true ]; then
+    GRADIO_CMD+=("--share")
+fi
+
+"${GRADIO_CMD[@]}" &
+GRADIO_PID=$!
+
+echo ""
+echo "=========================================="
+echo "Both services are running!"
+echo "=========================================="
+echo "vLLM Server : http://${SERVER_HOST}:${SERVER_PORT}"
+echo "Gradio Demo : http://${GRADIO_IP}:${GRADIO_PORT}"
+echo ""
+echo "Press Ctrl+C to stop both services"
+echo "=========================================="
+echo ""
+
+wait $SERVER_PID $GRADIO_PID || true
+cleanup

From 8c99aed3bcf9808c325378dd12f94d72fcdc0625 Mon Sep 17 00:00:00 2001
From: lishunyang <lishunyang12@163.com>
Date: Fri, 6 Feb 2026 04:02:22 +0800
Subject: [PATCH 2/3] chore: delete description

Signed-off-by: lishunyang <lishunyang12@163.com>
---
 PR_DESCRIPTION.md | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 PR_DESCRIPTION.md

diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md
deleted file mode 100644
index 3975045951..0000000000
--- a/PR_DESCRIPTION.md
+++ /dev/null
@@ -1,26 +0,0 @@
-## feat(qwen3-tts): Add Gradio demo for online serving
-
-Closes part of #938 (item 1.8 - Gradio Demo)
-
-### Summary
-- Add interactive Gradio web UI for Qwen3-TTS at `examples/online_serving/qwen3_tts/`
-- Support all 3 task types: CustomVoice, VoiceDesign, Base (voice cloning)
-- Dynamic UI that shows/hides fields based on selected task type
-- Fetches available speakers from `/v1/audio/voices` endpoint
-- Add `run_gradio_demo.sh` to launch server + demo together
-
-### Files Changed
-- `examples/online_serving/qwen3_tts/gradio_demo.py` (new)
-- `examples/online_serving/qwen3_tts/run_gradio_demo.sh` (new)
-- `examples/online_serving/qwen3_tts/README.md` (updated)
-
-### Test plan
-- [ ] Start server with `./run_server.sh CustomVoice`, run `python gradio_demo.py`, generate speech with Vivian/Ryan speakers
-- [ ] Start server with VoiceDesign model, verify instructions field is required
-- [ ] Start server with Base model, upload reference audio and verify voice cloning
-- [ ] Test `run_gradio_demo.sh` launches both server and Gradio
-- [ ] Verify error messages when server is down or inputs are invalid
-
-### Notes
-- Streaming audio playback will be added as a follow-up once #1189 is merged
-- References `examples/online_serving/qwen3_omni/gradio_demo.py` as design pattern

From 05278a2a63738c1f8f48b55b39373ad400d5fd45 Mon Sep 17 00:00:00 2001
From: lishunyang <lishunyang12@163.com>
Date: Fri, 6 Feb 2026 04:04:57 +0800
Subject: [PATCH 3/3] fix: pre-commit

Signed-off-by: lishunyang <lishunyang12@163.com>
---
 .../online_serving/qwen3_tts/gradio_demo.py   | 28 ++++++-------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/examples/online_serving/qwen3_tts/gradio_demo.py b/examples/online_serving/qwen3_tts/gradio_demo.py
index 9c41beaa3c..1d64f127f6 100644
--- a/examples/online_serving/qwen3_tts/gradio_demo.py
+++ b/examples/online_serving/qwen3_tts/gradio_demo.py
@@ -108,16 +108,12 @@ def generate_speech(
 
     elif task_type == "VoiceDesign":
         if not instructions or not instructions.strip():
-            raise gr.Error(
-                "VoiceDesign task requires voice style instructions."
-            )
+            raise gr.Error("VoiceDesign task requires voice style instructions.")
         payload["instructions"] = instructions.strip()
 
     elif task_type == "Base":
         if ref_audio is None:
-            raise gr.Error(
-                "Base (voice clone) task requires reference audio."
-            )
+            raise gr.Error("Base (voice clone) task requires reference audio.")
         payload["ref_audio"] = encode_audio_to_base64(ref_audio)
         if ref_text and ref_text.strip():
             payload["ref_text"] = ref_text.strip()
@@ -136,10 +132,7 @@ def generate_speech(
     except httpx.TimeoutException:
         raise gr.Error("Request timed out. The server may be busy.")
     except httpx.ConnectError:
-        raise gr.Error(
-            f"Cannot connect to server at {api_base}. "
-            "Make sure the vLLM server is running."
-        )
+        raise gr.Error(f"Cannot connect to server at {api_base}. Make sure the vLLM server is running.")
 
     if resp.status_code != 200:
         raise gr.Error(f"Server error ({resp.status_code}): {resp.text}")
@@ -167,7 +160,7 @@ def on_task_type_change(task_type: str):
     """Update UI visibility based on selected task type."""
     if task_type == "CustomVoice":
         return (
-            gr.update(visible=True),   # voice dropdown
+            gr.update(visible=True),  # voice dropdown
             gr.update(visible=True, info="Optional style/emotion instructions"),
             gr.update(visible=False),  # ref_audio
             gr.update(visible=False),  # ref_text
@@ -183,8 +176,8 @@ def on_task_type_change(task_type: str):
         return (
             gr.update(visible=False),  # voice dropdown
             gr.update(visible=False),  # instructions
-            gr.update(visible=True),   # ref_audio
-            gr.update(visible=True),   # ref_text
+            gr.update(visible=True),  # ref_audio
+            gr.update(visible=True),  # ref_text
         )
     return (
         gr.update(visible=True),
@@ -242,10 +235,7 @@ def build_interface(api_base: str):
                 # Instructions (CustomVoice optional, VoiceDesign required)
                 instructions = gr.Textbox(
                     label="Instructions",
-                    placeholder=(
-                        "e.g., Speak with excitement / "
-                        "A warm, friendly female voice"
-                    ),
+                    placeholder=("e.g., Speak with excitement / A warm, friendly female voice"),
                     lines=2,
                     visible=True,
                     info="Optional style/emotion instructions",
@@ -332,9 +322,7 @@ def build_interface(api_base: str):
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Gradio demo for Qwen3-TTS online serving."
-    )
+    parser = argparse.ArgumentParser(description="Gradio demo for Qwen3-TTS online serving.")
     parser.add_argument(
         "--api-base",
         default="http://localhost:8000",