Merge pull request #986 from Light-Heart-Labs/fix/stt-bugs-and-recovery-tooling

Lightheartdevs · web-flow · commit 342e08cb5779 · 2026-04-18T12:03:23.000-04:00
fix(whisper): bug fixes + recovery tooling (dream stt, doctor check, offline warn)
diff --git a/dream-server/dream-cli b/dream-server/dream-cli
@@ -2207,6 +2207,81 @@ cmd_model() {
     esac
 }
 
+cmd_stt() {
+    check_install; cd "$INSTALL_DIR"
+    local subcmd="${1:-status}"
+
+    # Resolve model and port from .env (with fallback for older installs).
+    local model port model_encoded url
+    model=$(_env_get_raw AUDIO_STT_MODEL)
+    [[ -z "$model" ]] && model="Systran/faster-whisper-base"
+    port=$(_env_get_raw WHISPER_PORT)
+    [[ -z "$port" ]] && port="9000"
+    model_encoded="${model//\//%2F}"
+    url="http://localhost:${port}"
+
+    case "$subcmd" in
+        current)
+            echo "STT model: ${model}"
+            echo "Whisper URL: ${url}"
+            return 0
+            ;;
+        status)
+            if ! curl -sf --max-time 3 "${url}/v1/models" >/dev/null 2>&1; then
+                warn "Whisper service not reachable at ${url}"
+                echo "  Is voice enabled and the stack running?"
+                return 1
+            fi
+            local target_encoded="$model_encoded"
+            local target_model="$model"
+            if [[ -n "${2:-}" ]]; then
+                target_model="$2"
+                target_encoded="${target_model//\//%2F}"
+            fi
+            if curl -sf --max-time 5 "${url}/v1/models/${target_encoded}" >/dev/null 2>&1; then
+                success "Cached: ${target_model}"
+            else
+                warn "Not cached: ${target_model}"
+                echo "  Run: dream stt download${2:+ $2}"
+            fi
+            ;;
+        download)
+            local target_model="${2:-$model}"
+            local target_encoded="${target_model//\//%2F}"
+            # Wait briefly for the models API to be ready (max 15s).
+            local ready=false
+            for _i in $(seq 1 15); do
+                if curl -sf --max-time 2 "${url}/v1/models" >/dev/null 2>&1; then
+                    ready=true
+                    break
+                fi
+                sleep 1
+            done
+            if ! $ready; then
+                error "Whisper models API not reachable at ${url}. Is voice enabled and the stack running?"
+            fi
+            # Skip if already cached.
+            if curl -sf --max-time 5 "${url}/v1/models/${target_encoded}" >/dev/null 2>&1; then
+                success "Already cached: ${target_model}"
+                return 0
+            fi
+            echo "Downloading ${target_model}..."
+            if ! curl -s --max-time 3600 -X POST "${url}/v1/models/${target_encoded}"; then
+                error "Download request failed. Check Whisper logs: dream logs whisper"
+            fi
+            # Verify the download actually cached.
+            if curl -sf --max-time 10 "${url}/v1/models/${target_encoded}" >/dev/null 2>&1; then
+                success "Downloaded and cached: ${target_model}"
+            else
+                error "Download returned but model is not cached. Check Whisper logs: dream logs whisper"
+            fi
+            ;;
+        *)
+            error "Usage: dream stt <current|status|download> [MODEL]"
+            ;;
+    esac
+}
+
 cmd_backup() {
     check_install
     cd "$INSTALL_DIR"
@@ -3196,6 +3271,8 @@ ${CYAN}Commands:${NC}
                       Switch between local/cloud/hybrid modes
   model [current|list|swap]
                       View or change the local LLM model tier
+  stt [current|status|download] [MODEL]
+                      View Whisper STT model, check cache, or trigger download
   backup [options]    Create a backup of user data and config
   backup verify <id>   Verify checksum integrity for a backup
   restore [backup_id] Restore from a backup
@@ -3317,6 +3394,7 @@ case "${1:-help}" in
     preset|p)    shift; cmd_preset "$@" ;;
     mode|m)      shift; cmd_mode "$@" ;;
     model)       shift; cmd_model "$@" ;;
+    stt)         shift; cmd_stt "$@" ;;
     backup)      shift; cmd_backup "$@" ;;
     restore)     shift; cmd_restore "$@" ;;
     rollback)    cmd_rollback ;;
diff --git a/dream-server/installers/phases/09-offline.sh b/dream-server/installers/phases/09-offline.sh
@@ -87,6 +87,21 @@ M1_EOF
         fi
     fi
 
+    # Whisper STT model: Phase 12 pre-downloads it by POSTing to the running
+    # Speaches API, but offline-mode users often disconnect BEFORE Phase 12
+    # completes, or they run 'dream stop' before network becomes unavailable.
+    # We can't pre-download from HuggingFace directly in Phase 9 without a
+    # huggingface_hub Python dep, so surface the requirement loudly here and
+    # point users at the 'dream stt download' CLI (added in the same PR).
+    if [[ "$ENABLE_VOICE" == "true" ]]; then
+        ai_warn "Offline mode + voice enabled: Whisper STT model is NOT pre-downloaded by Phase 9"
+        log "  The installer's Phase 12 will still attempt the download while online,"
+        log "  but if you go offline before it completes, STT will 404 on first use."
+        log "  To ensure the model is cached before disconnecting, run after install:"
+        log "    dream stt download"
+        log "  Or use 'scripts/pre-download.sh --with-voice' to pre-cache before install."
+    fi
+
     # Offline docs already copied by rsync/cp block above
     ai_ok "Offline mode configured"
     log "After installation, disconnect from internet for fully air-gapped operation"
diff --git a/dream-server/installers/phases/12-health.sh b/dream-server/installers/phases/12-health.sh
@@ -179,7 +179,7 @@ if [[ "$ENABLE_VOICE" == "true" ]]; then
     STT_MODEL_ENCODED="${STT_MODEL//\//%2F}"
     WHISPER_PORT_RESOLVED="${SERVICE_PORTS[whisper]:-9000}"
     WHISPER_URL="http://localhost:${WHISPER_PORT_RESOLVED}"
-    STT_RECOVERY_CMD="curl -X POST ${WHISPER_URL}/v1/models/${STT_MODEL_ENCODED}"
+    STT_RECOVERY_CMD="curl --max-time 3600 -X POST ${WHISPER_URL}/v1/models/${STT_MODEL_ENCODED}"
 
     # Step 1: wait briefly for the models API to be ready. Whisper's /health
     # endpoint can pass before the models endpoint responds, so we probe
diff --git a/dream-server/installers/windows/install-windows.ps1 b/dream-server/installers/windows/install-windows.ps1
@@ -662,17 +662,35 @@ foreach ($check in $healthChecks) {
 # Trigger the download explicitly, verify it completed, surface recovery
 # instructions on failure. Mirrors Linux Phase 12 and macOS install-macos.sh.
 if ($enableVoice) {
-    # Read AUDIO_STT_MODEL from .env (written by env-generator.ps1).
+    # Read AUDIO_STT_MODEL and WHISPER_PORT from .env (written by env-generator.ps1).
+    # Use ReadAllText with explicit UTF8NoBom encoding so legacy BOM-prefixed
+    # .env files (written by old Set-Content -Encoding UTF8) don't break the
+    # regex on the first line.
     $sttModel = "Systran/faster-whisper-base"  # safe fallback
+    $whisperPort = "9000"  # safe fallback
     $envPath = Join-Path $installDir ".env"
     if (Test-Path $envPath) {
-        $envLine = Get-Content $envPath -ErrorAction SilentlyContinue | Where-Object { $_ -match "^AUDIO_STT_MODEL=(.+)$" } | Select-Object -First 1
-        if ($envLine -match "^AUDIO_STT_MODEL=(.+)$") {
-            $sttModel = $Matches[1].Trim('"').Trim()
+        try {
+            $envText = [System.IO.File]::ReadAllText($envPath, (New-Object System.Text.UTF8Encoding($false)))
+            # Strip any leading BOM defensively in case the file was written
+            # with a different encoding.
+            if ($envText.Length -gt 0 -and [int]$envText[0] -eq 0xFEFF) {
+                $envText = $envText.Substring(1)
+            }
+            foreach ($line in ($envText -split "`r?`n")) {
+                if ($line -match "^AUDIO_STT_MODEL=(.*)$") {
+                    $val = $Matches[1].Trim('"').Trim()
+                    if ($val) { $sttModel = $val }
+                } elseif ($line -match "^WHISPER_PORT=(.*)$") {
+                    $val = $Matches[1].Trim('"').Trim()
+                    if ($val) { $whisperPort = $val }
+                }
+            }
+        } catch {
+            # Fall through to defaults on any read failure.
         }
     }
     $sttModelEncoded = $sttModel -replace "/", "%2F"
-    $whisperPort = 9000  # Windows doesn't reassign this port
     $whisperUrl = "http://localhost:$whisperPort"
     $sttRecoveryCmd = "Invoke-WebRequest -Method POST -Uri '$whisperUrl/v1/models/$sttModelEncoded' -TimeoutSec 3600"
 
diff --git a/dream-server/scripts/dream-doctor.sh b/dream-server/scripts/dream-doctor.sh
@@ -115,6 +115,31 @@ if command -v curl >/dev/null 2>&1; then
     fi
 fi
 
+# STT model cache check: a common silent-failure mode is the installer's
+# pre-download failing, so Whisper's /health passes (service up) but the
+# model isn't cached. Transcription then returns 404. This check catches
+# that case and surfaces the exact recovery command.
+STT_MODEL_CACHED="unknown"
+STT_MODEL_NAME=""
+STT_RECOVERY_HINT=""
+if [[ "${ENABLE_VOICE:-false}" == "true" ]] && command -v curl >/dev/null 2>&1; then
+    STT_MODEL_NAME="${AUDIO_STT_MODEL:-Systran/faster-whisper-base}"
+    _stt_whisper_port="${SERVICE_PORTS[whisper]:-9000}"
+    _stt_model_encoded="${STT_MODEL_NAME//\//%2F}"
+    _stt_whisper_url="http://127.0.0.1:${_stt_whisper_port}"
+    if curl -sf --max-time 5 "${_stt_whisper_url}/v1/models/${_stt_model_encoded}" >/dev/null 2>&1; then
+        STT_MODEL_CACHED="true"
+    else
+        # Distinguish "service down" from "model missing" for the hint.
+        if curl -sf --max-time 5 "${_stt_whisper_url}/health" >/dev/null 2>&1; then
+            STT_MODEL_CACHED="false"
+            STT_RECOVERY_HINT="curl --max-time 3600 -X POST ${_stt_whisper_url}/v1/models/${_stt_model_encoded}"
+        else
+            STT_MODEL_CACHED="service_down"
+        fi
+    fi
+fi
+
 # Collect extension diagnostics (wrapped in function to allow local variables)
 collect_extension_diagnostics() {
     # Use outer GPU_BACKEND or default to nvidia (don't make local to avoid set -u issues)
@@ -214,13 +239,13 @@ elif command -v python >/dev/null 2>&1; then
     PYTHON_CMD="python"
 fi
 
-"$PYTHON_CMD" - "$CAP_FILE" "$PREFLIGHT_FILE" "$REPORT_FILE" "$DOCKER_CLI" "$DOCKER_DAEMON" "$COMPOSE_CLI" "$DASHBOARD_HTTP" "$WEBUI_HTTP" "$_DASHBOARD_PORT" "$_WEBUI_PORT" "$EXT_DIAGNOSTICS" <<'PY'
+"$PYTHON_CMD" - "$CAP_FILE" "$PREFLIGHT_FILE" "$REPORT_FILE" "$DOCKER_CLI" "$DOCKER_DAEMON" "$COMPOSE_CLI" "$DASHBOARD_HTTP" "$WEBUI_HTTP" "$_DASHBOARD_PORT" "$_WEBUI_PORT" "$EXT_DIAGNOSTICS" "$STT_MODEL_CACHED" "$STT_MODEL_NAME" "$STT_RECOVERY_HINT" <<'PY'
 import json
 import pathlib
 import sys
 from datetime import datetime, timezone
 
-cap_file, preflight_file, report_file, docker_cli, docker_daemon, compose_cli, dashboard_http, webui_http, dashboard_port, webui_port, ext_diagnostics_json = sys.argv[1:]
+cap_file, preflight_file, report_file, docker_cli, docker_daemon, compose_cli, dashboard_http, webui_http, dashboard_port, webui_port, ext_diagnostics_json, stt_cached, stt_model_name, stt_recovery = sys.argv[1:]
 
 cap = json.load(open(cap_file, "r", encoding="utf-8"))
 pre = json.load(open(preflight_file, "r", encoding="utf-8"))
@@ -238,6 +263,8 @@ report = {
         "compose_cli": compose_cli == "true",
         "dashboard_http": dashboard_http == "true",
         "webui_http": webui_http == "true",
+        "stt_model_cached": stt_cached,
+        "stt_model_name": stt_model_name,
     },
     "extensions": ext_diagnostics,
     "summary": {
@@ -269,6 +296,13 @@ if runtime["docker_daemon"] and not runtime["dashboard_http"]:
 if runtime["docker_daemon"] and not runtime["webui_http"]:
     fix_hints.append(f"Verify Open WebUI container and port {webui_port} mapping.")
 
+# STT model cache: service up but model missing is a common silent failure
+if stt_cached == "false" and stt_recovery:
+    fix_hints.append(
+        f"Whisper STT model '{stt_model_name}' not cached — transcription will 404. "
+        f"Run: {stt_recovery}"
+    )
+
 # Extension-specific hints
 for ext in ext_diagnostics:
     ext_id = ext.get("id", "unknown")
diff --git a/dream-server/tests/bats-tests/stt-download.bats b/dream-server/tests/bats-tests/stt-download.bats