Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions dream-server/.env.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@
"TTS_VOICE": {
"type": "string",
"description": "Text-to-speech voice"
},
"DIFY_SECRET_KEY": {
"type": "string",
"description": "Dify secret key for API access",
"secret": true
}
}
}
2 changes: 2 additions & 0 deletions dream-server/docker-compose.base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,5 @@ services:
networks:
default:
name: dream-network
dream-network:
name: dream-network
60 changes: 12 additions & 48 deletions dream-server/extensions/services/whisper/docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,58 +1,22 @@
#!/bin/sh
# ============================================================================
# DreamServer Whisper VAD Patch
# DreamServer Whisper Entrypoint
# ============================================================================
# Injects Voice Activity Detection parameters into the speaches STT router.
# This runs at container startup and is IDEMPOTENT — safe across restarts.
#
# Why: The upstream speaches image doesn't include VAD tuning parameters
# that work well for conversational AI. This patch adds them at runtime
# so we don't need to maintain a custom Docker image.
#
# The DREAM_PATCHED marker prevents duplicate insertion when Docker
# restarts the container (which preserves the writable filesystem layer).
# VAD patch disabled — the sed/perl replacement can break Python syntax
# in newer versions of speaches where the transcribe() call spans multiple
# lines. Using upstream defaults until patch can handle multi-line safely.
# ============================================================================

STT_FILE="/home/ubuntu/speaches/src/speaches/routers/stt.py"

apply_patch() {
# Already patched? Skip to prevent duplicate insertion on restart
if grep -q "DREAM_PATCHED" "$STT_FILE" 2>/dev/null; then
echo "[dream-whisper] VAD patch already applied, skipping"
return 0
fi

# Check if target pattern exists
if ! grep -qE '^[[:space:]]*vad_filter[[:space:]]*=[[:space:]]*effective_vad_filter[[:space:]]*,?[[:space:]]*$' "$STT_FILE" 2>/dev/null; then
echo "[dream-whisper] WARNING: Target pattern not found in $STT_FILE" >&2
echo "[dream-whisper] Upstream may have changed - patch skipped" >&2
return 0
fi

# Apply patch - match the FULL LINE to prevent partial matches
# Replaces the vad_filter line with vad_filter + vad_parameters + marker
if command -v perl >/dev/null 2>&1; then
perl -i -pe 's/^[[:space:]]*vad_filter\s*=\s*effective_vad_filter\s*,?\s*$/ vad_filter=effective_vad_filter,\n vad_parameters={"threshold": 0.3, "min_silence_duration_ms": 400, "min_speech_duration_ms": 50, "speech_pad_ms": 200}, # DREAM_PATCHED/' "$STT_FILE"
else
sed -i -E '/^[[:space:]]*vad_filter[[:space:]]*=[[:space:]]*effective_vad_filter[[:space:]]*,?[[:space:]]*$/c\ vad_filter=effective_vad_filter,\n vad_parameters={"threshold": 0.3, "min_silence_duration_ms": 400, "min_speech_duration_ms": 50, "speech_pad_ms": 200}, # DREAM_PATCHED' "$STT_FILE"
fi

# Verify
if grep -q "DREAM_PATCHED" "$STT_FILE" 2>/dev/null; then
echo "[dream-whisper] VAD patch applied successfully"
else
echo "[dream-whisper] WARNING: Patch verification failed" >&2
fi
local stt_file="$1"
echo "[dream-whisper] apply_patch called for $stt_file (no-op)"
}

# Apply patch if file exists and is writable (non-fatal if it fails)
if [ -f "$STT_FILE" ] && [ -w "$STT_FILE" ]; then
apply_patch
elif [ ! -f "$STT_FILE" ]; then
echo "[dream-whisper] WARNING: $STT_FILE not found, skipping patch" >&2
else
echo "[dream-whisper] WARNING: $STT_FILE not writable, skipping patch" >&2
fi
STT_FILE=$(python3 -c "import speaches.routers.stt as m; print(m.__file__)" 2>/dev/null || true)

# VAD patch disabled — upstream compatibility, using defaults
# TODO: Fix patch to handle multi-line function calls safely.
echo "[dream-whisper] VAD patch disabled (upstream compatibility), using defaults"

# Always start the server (patch failure should not block startup)
# Always start uvicorn (patch failure is non-fatal but logged)
exec uvicorn --factory speaches.main:create_app --host 0.0.0.0 --port 8000
2 changes: 2 additions & 0 deletions dream-server/installers/phases/06-directories.sh
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ MODELS_EOF
LITELLM_KEY=$(_env_get LITELLM_KEY "sk-dream-$(openssl rand -hex 16 2>/dev/null || head -c 16 /dev/urandom | xxd -p)")
LIVEKIT_SECRET=$(_env_get LIVEKIT_API_SECRET "$(openssl rand -base64 32 2>/dev/null || head -c 32 /dev/urandom | base64)")
DASHBOARD_API_KEY=$(_env_get DASHBOARD_API_KEY "$(openssl rand -hex 32 2>/dev/null || head -c 32 /dev/urandom | xxd -p)")
DIFY_SECRET_KEY=$(_env_get DIFY_SECRET_KEY "$(openssl rand -hex 32 2>/dev/null || head -c 32 /dev/urandom | xxd -p)")
OPENCODE_SERVER_PASSWORD=$(_env_get OPENCODE_SERVER_PASSWORD "")

# Preserve user-supplied cloud API keys
Expand Down Expand Up @@ -305,6 +306,7 @@ LIVEKIT_API_SECRET=${LIVEKIT_SECRET}
OPENCLAW_TOKEN=${OPENCLAW_TOKEN:-$(openssl rand -hex 24 2>/dev/null || head -c 24 /dev/urandom | xxd -p)}
OPENCODE_SERVER_PASSWORD=${OPENCODE_SERVER_PASSWORD}
OPENCODE_PORT=3003
DIFY_SECRET_KEY=${DIFY_SECRET_KEY}

#=== Voice Settings ===
WHISPER_MODEL=base
Expand Down
2 changes: 2 additions & 0 deletions dream-server/installers/windows/lib/env-generator.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ function New-DreamEnv {
$dashboardApiKey = Get-EnvOrNew "DASHBOARD_API_KEY" (New-SecureHex -Bytes 32)
$openclawToken = Get-EnvOrNew "OPENCLAW_TOKEN" (New-SecureHex -Bytes 24)
$searxngSecret = Get-EnvOrNew "SEARXNG_SECRET" (New-SecureHex -Bytes 32)
$difySecretKey = Get-EnvOrNew "DIFY_SECRET_KEY" (New-SecureHex -Bytes 32)

# Determine LLM API URL based on backend
# AMD on Windows: llama-server runs natively, containers reach it via host.docker.internal
Expand Down Expand Up @@ -208,6 +209,7 @@ LIVEKIT_API_SECRET=$livekitSecret
OPENCLAW_TOKEN=$openclawToken
OPENCODE_SERVER_PASSWORD=
OPENCODE_PORT=3003
DIFY_SECRET_KEY=$difySecretKey

#=== Voice Settings ===
WHISPER_MODEL=base
Expand Down
5 changes: 3 additions & 2 deletions dream-server/scripts/resolve-compose-stack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,12 @@ if ext_dir.exists():
service = manifest.get("service", {})
# Check GPU backend compatibility
backends = service.get("gpu_backends", ["amd", "nvidia"])
if gpu_backend not in backends and "all" not in backends:
# "none" means CPU-only — compatible with any GPU backend
if gpu_backend not in backends and "all" not in backends and "none" not in backends:
continue
# Get compose file from manifest
compose_rel = service.get("compose_file", "")
if compose_rel:
if compose_rel and not compose_rel.endswith(".disabled"):
compose_path = service_dir / compose_rel
if compose_path.exists():
resolved.append(str(compose_path.relative_to(script_dir)))
Expand Down
Loading