Skip to content

Commit 342e08c

Browse files
Merge pull request #986 from Light-Heart-Labs/fix/stt-bugs-and-recovery-tooling
fix(whisper): bug fixes + recovery tooling (dream stt, doctor check, offline warn)
2 parents a7d98d5 + 743f4e8 commit 342e08c

File tree

6 files changed

+357
-8
lines changed

6 files changed

+357
-8
lines changed

dream-server/dream-cli

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2207,6 +2207,81 @@ cmd_model() {
22072207
esac
22082208
}
22092209

2210+
cmd_stt() {
2211+
check_install; cd "$INSTALL_DIR"
2212+
local subcmd="${1:-status}"
2213+
2214+
# Resolve model and port from .env (with fallback for older installs).
2215+
local model port model_encoded url
2216+
model=$(_env_get_raw AUDIO_STT_MODEL)
2217+
[[ -z "$model" ]] && model="Systran/faster-whisper-base"
2218+
port=$(_env_get_raw WHISPER_PORT)
2219+
[[ -z "$port" ]] && port="9000"
2220+
model_encoded="${model//\//%2F}"
2221+
url="http://localhost:${port}"
2222+
2223+
case "$subcmd" in
2224+
current)
2225+
echo "STT model: ${model}"
2226+
echo "Whisper URL: ${url}"
2227+
return 0
2228+
;;
2229+
status)
2230+
if ! curl -sf --max-time 3 "${url}/v1/models" >/dev/null 2>&1; then
2231+
warn "Whisper service not reachable at ${url}"
2232+
echo " Is voice enabled and the stack running?"
2233+
return 1
2234+
fi
2235+
local target_encoded="$model_encoded"
2236+
local target_model="$model"
2237+
if [[ -n "${2:-}" ]]; then
2238+
target_model="$2"
2239+
target_encoded="${target_model//\//%2F}"
2240+
fi
2241+
if curl -sf --max-time 5 "${url}/v1/models/${target_encoded}" >/dev/null 2>&1; then
2242+
success "Cached: ${target_model}"
2243+
else
2244+
warn "Not cached: ${target_model}"
2245+
echo " Run: dream stt download${2:+ $2}"
2246+
fi
2247+
;;
2248+
download)
2249+
local target_model="${2:-$model}"
2250+
local target_encoded="${target_model//\//%2F}"
2251+
# Wait briefly for the models API to be ready (max 15s).
2252+
local ready=false
2253+
for _i in $(seq 1 15); do
2254+
if curl -sf --max-time 2 "${url}/v1/models" >/dev/null 2>&1; then
2255+
ready=true
2256+
break
2257+
fi
2258+
sleep 1
2259+
done
2260+
if ! $ready; then
2261+
error "Whisper models API not reachable at ${url}. Is voice enabled and the stack running?"
2262+
fi
2263+
# Skip if already cached.
2264+
if curl -sf --max-time 5 "${url}/v1/models/${target_encoded}" >/dev/null 2>&1; then
2265+
success "Already cached: ${target_model}"
2266+
return 0
2267+
fi
2268+
echo "Downloading ${target_model}..."
2269+
if ! curl -s --max-time 3600 -X POST "${url}/v1/models/${target_encoded}"; then
2270+
error "Download request failed. Check Whisper logs: dream logs whisper"
2271+
fi
2272+
# Verify the download actually cached.
2273+
if curl -sf --max-time 10 "${url}/v1/models/${target_encoded}" >/dev/null 2>&1; then
2274+
success "Downloaded and cached: ${target_model}"
2275+
else
2276+
error "Download returned but model is not cached. Check Whisper logs: dream logs whisper"
2277+
fi
2278+
;;
2279+
*)
2280+
error "Usage: dream stt <current|status|download> [MODEL]"
2281+
;;
2282+
esac
2283+
}
2284+
22102285
cmd_backup() {
22112286
check_install
22122287
cd "$INSTALL_DIR"
@@ -3196,6 +3271,8 @@ ${CYAN}Commands:${NC}
31963271
Switch between local/cloud/hybrid modes
31973272
model [current|list|swap]
31983273
View or change the local LLM model tier
3274+
stt [current|status|download] [MODEL]
3275+
View Whisper STT model, check cache, or trigger download
31993276
backup [options] Create a backup of user data and config
32003277
backup verify <id> Verify checksum integrity for a backup
32013278
restore [backup_id] Restore from a backup
@@ -3317,6 +3394,7 @@ case "${1:-help}" in
33173394
preset|p) shift; cmd_preset "$@" ;;
33183395
mode|m) shift; cmd_mode "$@" ;;
33193396
model) shift; cmd_model "$@" ;;
3397+
stt) shift; cmd_stt "$@" ;;
33203398
backup) shift; cmd_backup "$@" ;;
33213399
restore) shift; cmd_restore "$@" ;;
33223400
rollback) cmd_rollback ;;

dream-server/installers/phases/09-offline.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,21 @@ M1_EOF
8787
fi
8888
fi
8989

90+
# Whisper STT model: Phase 12 pre-downloads it by POSTing to the running
91+
# Speaches API, but offline-mode users often disconnect BEFORE Phase 12
92+
# completes, or they run 'dream stop' before network becomes unavailable.
93+
# We can't pre-download from HuggingFace directly in Phase 9 without a
94+
# huggingface_hub Python dep, so surface the requirement loudly here and
95+
# point users at the 'dream stt download' CLI (added in the same PR).
96+
if [[ "$ENABLE_VOICE" == "true" ]]; then
97+
ai_warn "Offline mode + voice enabled: Whisper STT model is NOT pre-downloaded by Phase 9"
98+
log " The installer's Phase 12 will still attempt the download while online,"
99+
log " but if you go offline before it completes, STT will 404 on first use."
100+
log " To ensure the model is cached before disconnecting, run after install:"
101+
log " dream stt download"
102+
log " Or use 'scripts/pre-download.sh --with-voice' to pre-cache before install."
103+
fi
104+
90105
# Offline docs already copied by rsync/cp block above
91106
ai_ok "Offline mode configured"
92107
log "After installation, disconnect from internet for fully air-gapped operation"

dream-server/installers/phases/12-health.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ if [[ "$ENABLE_VOICE" == "true" ]]; then
179179
STT_MODEL_ENCODED="${STT_MODEL//\//%2F}"
180180
WHISPER_PORT_RESOLVED="${SERVICE_PORTS[whisper]:-9000}"
181181
WHISPER_URL="http://localhost:${WHISPER_PORT_RESOLVED}"
182-
STT_RECOVERY_CMD="curl -X POST ${WHISPER_URL}/v1/models/${STT_MODEL_ENCODED}"
182+
STT_RECOVERY_CMD="curl --max-time 3600 -X POST ${WHISPER_URL}/v1/models/${STT_MODEL_ENCODED}"
183183

184184
# Step 1: wait briefly for the models API to be ready. Whisper's /health
185185
# endpoint can pass before the models endpoint responds, so we probe

dream-server/installers/windows/install-windows.ps1

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -662,17 +662,35 @@ foreach ($check in $healthChecks) {
662662
# Trigger the download explicitly, verify it completed, surface recovery
663663
# instructions on failure. Mirrors Linux Phase 12 and macOS install-macos.sh.
664664
if ($enableVoice) {
665-
# Read AUDIO_STT_MODEL from .env (written by env-generator.ps1).
665+
# Read AUDIO_STT_MODEL and WHISPER_PORT from .env (written by env-generator.ps1).
666+
# Use ReadAllText with explicit UTF8NoBom encoding so legacy BOM-prefixed
667+
# .env files (written by old Set-Content -Encoding UTF8) don't break the
668+
# regex on the first line.
666669
$sttModel = "Systran/faster-whisper-base" # safe fallback
670+
$whisperPort = "9000" # safe fallback
667671
$envPath = Join-Path $installDir ".env"
668672
if (Test-Path $envPath) {
669-
$envLine = Get-Content $envPath -ErrorAction SilentlyContinue | Where-Object { $_ -match "^AUDIO_STT_MODEL=(.+)$" } | Select-Object -First 1
670-
if ($envLine -match "^AUDIO_STT_MODEL=(.+)$") {
671-
$sttModel = $Matches[1].Trim('"').Trim()
673+
try {
674+
$envText = [System.IO.File]::ReadAllText($envPath, (New-Object System.Text.UTF8Encoding($false)))
675+
# Strip any leading BOM defensively in case the file was written
676+
# with a different encoding.
677+
if ($envText.Length -gt 0 -and [int]$envText[0] -eq 0xFEFF) {
678+
$envText = $envText.Substring(1)
679+
}
680+
foreach ($line in ($envText -split "`r?`n")) {
681+
if ($line -match "^AUDIO_STT_MODEL=(.*)$") {
682+
$val = $Matches[1].Trim('"').Trim()
683+
if ($val) { $sttModel = $val }
684+
} elseif ($line -match "^WHISPER_PORT=(.*)$") {
685+
$val = $Matches[1].Trim('"').Trim()
686+
if ($val) { $whisperPort = $val }
687+
}
688+
}
689+
} catch {
690+
# Fall through to defaults on any read failure.
672691
}
673692
}
674693
$sttModelEncoded = $sttModel -replace "/", "%2F"
675-
$whisperPort = 9000 # Windows doesn't reassign this port
676694
$whisperUrl = "http://localhost:$whisperPort"
677695
$sttRecoveryCmd = "Invoke-WebRequest -Method POST -Uri '$whisperUrl/v1/models/$sttModelEncoded' -TimeoutSec 3600"
678696

dream-server/scripts/dream-doctor.sh

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,31 @@ if command -v curl >/dev/null 2>&1; then
115115
fi
116116
fi
117117

118+
# STT model cache check: a common silent-failure mode is the installer's
119+
# pre-download failing, so Whisper's /health passes (service up) but the
120+
# model isn't cached. Transcription then returns 404. This check catches
121+
# that case and surfaces the exact recovery command.
122+
STT_MODEL_CACHED="unknown"
123+
STT_MODEL_NAME=""
124+
STT_RECOVERY_HINT=""
125+
if [[ "${ENABLE_VOICE:-false}" == "true" ]] && command -v curl >/dev/null 2>&1; then
126+
STT_MODEL_NAME="${AUDIO_STT_MODEL:-Systran/faster-whisper-base}"
127+
_stt_whisper_port="${SERVICE_PORTS[whisper]:-9000}"
128+
_stt_model_encoded="${STT_MODEL_NAME//\//%2F}"
129+
_stt_whisper_url="http://127.0.0.1:${_stt_whisper_port}"
130+
if curl -sf --max-time 5 "${_stt_whisper_url}/v1/models/${_stt_model_encoded}" >/dev/null 2>&1; then
131+
STT_MODEL_CACHED="true"
132+
else
133+
# Distinguish "service down" from "model missing" for the hint.
134+
if curl -sf --max-time 5 "${_stt_whisper_url}/health" >/dev/null 2>&1; then
135+
STT_MODEL_CACHED="false"
136+
STT_RECOVERY_HINT="curl --max-time 3600 -X POST ${_stt_whisper_url}/v1/models/${_stt_model_encoded}"
137+
else
138+
STT_MODEL_CACHED="service_down"
139+
fi
140+
fi
141+
fi
142+
118143
# Collect extension diagnostics (wrapped in function to allow local variables)
119144
collect_extension_diagnostics() {
120145
# Use outer GPU_BACKEND or default to nvidia (don't make local to avoid set -u issues)
@@ -214,13 +239,13 @@ elif command -v python >/dev/null 2>&1; then
214239
PYTHON_CMD="python"
215240
fi
216241

217-
"$PYTHON_CMD" - "$CAP_FILE" "$PREFLIGHT_FILE" "$REPORT_FILE" "$DOCKER_CLI" "$DOCKER_DAEMON" "$COMPOSE_CLI" "$DASHBOARD_HTTP" "$WEBUI_HTTP" "$_DASHBOARD_PORT" "$_WEBUI_PORT" "$EXT_DIAGNOSTICS" <<'PY'
242+
"$PYTHON_CMD" - "$CAP_FILE" "$PREFLIGHT_FILE" "$REPORT_FILE" "$DOCKER_CLI" "$DOCKER_DAEMON" "$COMPOSE_CLI" "$DASHBOARD_HTTP" "$WEBUI_HTTP" "$_DASHBOARD_PORT" "$_WEBUI_PORT" "$EXT_DIAGNOSTICS" "$STT_MODEL_CACHED" "$STT_MODEL_NAME" "$STT_RECOVERY_HINT" <<'PY'
218243
import json
219244
import pathlib
220245
import sys
221246
from datetime import datetime, timezone
222247

223-
cap_file, preflight_file, report_file, docker_cli, docker_daemon, compose_cli, dashboard_http, webui_http, dashboard_port, webui_port, ext_diagnostics_json = sys.argv[1:]
248+
cap_file, preflight_file, report_file, docker_cli, docker_daemon, compose_cli, dashboard_http, webui_http, dashboard_port, webui_port, ext_diagnostics_json, stt_cached, stt_model_name, stt_recovery = sys.argv[1:]
224249

225250
cap = json.load(open(cap_file, "r", encoding="utf-8"))
226251
pre = json.load(open(preflight_file, "r", encoding="utf-8"))
@@ -238,6 +263,8 @@ report = {
238263
"compose_cli": compose_cli == "true",
239264
"dashboard_http": dashboard_http == "true",
240265
"webui_http": webui_http == "true",
266+
"stt_model_cached": stt_cached,
267+
"stt_model_name": stt_model_name,
241268
},
242269
"extensions": ext_diagnostics,
243270
"summary": {
@@ -269,6 +296,13 @@ if runtime["docker_daemon"] and not runtime["dashboard_http"]:
269296
if runtime["docker_daemon"] and not runtime["webui_http"]:
270297
fix_hints.append(f"Verify Open WebUI container and port {webui_port} mapping.")
271298

299+
# STT model cache: service up but model missing is a common silent failure
300+
if stt_cached == "false" and stt_recovery:
301+
fix_hints.append(
302+
f"Whisper STT model '{stt_model_name}' not cached — transcription will 404. "
303+
f"Run: {stt_recovery}"
304+
)
305+
272306
# Extension-specific hints
273307
for ext in ext_diagnostics:
274308
ext_id = ext.get("id", "unknown")

0 commit comments

Comments
 (0)