Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions dream-server/bin/dream-host-agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1599,31 +1599,40 @@ def _write_lemonade_config(install_dir: Path, gguf_file: str):
def _compose_restart_llama_server(env: dict):
"""Restart llama-server via docker compose (host-native path).

This is the primary restart strategy for Linux (systemd) and macOS
(launchd) where the agent runs natively on the host. It mirrors the
proven pattern from bootstrap-upgrade.sh lines 289-304.
Primary restart strategy for Linux (systemd) and macOS (launchd) where the
agent runs natively on the host. Mirrors bootstrap-upgrade.sh lines 289-304.

Raises RuntimeError on any docker-layer failure so _do_model_activate can
surface the error immediately instead of waiting for the health-check loop.
"""
gpu_backend = env.get("GPU_BACKEND", "nvidia")
compose_flags = []
flags_file = INSTALL_DIR / ".compose-flags"
if flags_file.exists():
compose_flags = flags_file.read_text(encoding="utf-8").strip().split()

def _run(argv, timeout):
result = subprocess.run(
argv, cwd=str(INSTALL_DIR),
capture_output=True, text=True, timeout=timeout,
)
if result.returncode != 0:
raise RuntimeError(
f"{' '.join(argv[:3])} failed (exit {result.returncode}): "
f"{(result.stderr or '').strip()[:300]}"
)

if gpu_backend == "amd":
# Lemonade: restart preserves cached binary, reads models.ini on boot
if compose_flags:
subprocess.run(["docker", "compose"] + compose_flags + ["restart", "llama-server"],
cwd=str(INSTALL_DIR), capture_output=True, timeout=300)
_run(["docker", "compose"] + compose_flags + ["restart", "llama-server"], 300)
else:
subprocess.run(["docker", "restart", "dream-llama-server"],
capture_output=True, timeout=300)
_run(["docker", "restart", "dream-llama-server"], 300)
else:
# llama.cpp: recreate to pick up new GGUF_FILE from .env
if compose_flags:
subprocess.run(["docker", "compose"] + compose_flags + ["stop", "llama-server"],
cwd=str(INSTALL_DIR), capture_output=True, timeout=120)
subprocess.run(["docker", "compose"] + compose_flags + ["up", "-d", "llama-server"],
cwd=str(INSTALL_DIR), capture_output=True, timeout=300)
_run(["docker", "compose"] + compose_flags + ["stop", "llama-server"], 120)
_run(["docker", "compose"] + compose_flags + ["up", "-d", "llama-server"], 300)
else:
# No compose flags — cannot use compose. Fall back to
# inspect-and-recreate, which picks up GGUF_FILE from .env.
Expand Down
Loading