fix(host-agent): surface docker failures in _compose_restart_llama_server

yasinBursali · claude · yasinBursali · commit d695fe5c0983 · 2026-04-16T17:59:13.000+04:00
_compose_restart_llama_server called subprocess.run four+ times for
docker commands (compose stop/up, docker restart/stop/start) without
inspecting returncode. Docker-layer failures (permission denied, missing
compose file, daemon errors) were silently swallowed: _do_model_activate
proceeded into the 5-minute health-check polling loop and only reported
a generic "Health check failed — rolled back" with no indication of the
real cause.

Route all docker calls through a nested _run helper that captures
stderr, checks returncode, and raises RuntimeError with the failing
command + stderr tail on non-zero. The caller at _do_model_activate
already wraps the path in `except Exception` and will now surface the
docker error immediately. Native-host path only — Windows/WSL2 uses
_recreate_llama_server which has its own returncode handling.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/dream-server/bin/dream-host-agent.py b/dream-server/bin/dream-host-agent.py
@@ -1599,31 +1599,40 @@ def _write_lemonade_config(install_dir: Path, gguf_file: str):
 def _compose_restart_llama_server(env: dict):
     """Restart llama-server via docker compose (host-native path).
 
-    This is the primary restart strategy for Linux (systemd) and macOS
-    (launchd) where the agent runs natively on the host.  It mirrors the
-    proven pattern from bootstrap-upgrade.sh lines 289-304.
+    Primary restart strategy for Linux (systemd) and macOS (launchd) where the
+    agent runs natively on the host. Mirrors bootstrap-upgrade.sh lines 289-304.
+
+    Raises RuntimeError on any docker-layer failure so _do_model_activate can
+    surface the error immediately instead of waiting for the health-check loop.
     """
     gpu_backend = env.get("GPU_BACKEND", "nvidia")
     compose_flags = []
     flags_file = INSTALL_DIR / ".compose-flags"
     if flags_file.exists():
         compose_flags = flags_file.read_text(encoding="utf-8").strip().split()
 
+    def _run(argv, timeout):
+        result = subprocess.run(
+            argv, cwd=str(INSTALL_DIR),
+            capture_output=True, text=True, timeout=timeout,
+        )
+        if result.returncode != 0:
+            raise RuntimeError(
+                f"{' '.join(argv[:3])} failed (exit {result.returncode}): "
+                f"{(result.stderr or '').strip()[:300]}"
+            )
+
     if gpu_backend == "amd":
         # Lemonade: restart preserves cached binary, reads models.ini on boot
         if compose_flags:
-            subprocess.run(["docker", "compose"] + compose_flags + ["restart", "llama-server"],
-                           cwd=str(INSTALL_DIR), capture_output=True, timeout=300)
+            _run(["docker", "compose"] + compose_flags + ["restart", "llama-server"], 300)
         else:
-            subprocess.run(["docker", "restart", "dream-llama-server"],
-                           capture_output=True, timeout=300)
+            _run(["docker", "restart", "dream-llama-server"], 300)
     else:
         # llama.cpp: recreate to pick up new GGUF_FILE from .env
         if compose_flags:
-            subprocess.run(["docker", "compose"] + compose_flags + ["stop", "llama-server"],
-                           cwd=str(INSTALL_DIR), capture_output=True, timeout=120)
-            subprocess.run(["docker", "compose"] + compose_flags + ["up", "-d", "llama-server"],
-                           cwd=str(INSTALL_DIR), capture_output=True, timeout=300)
+            _run(["docker", "compose"] + compose_flags + ["stop", "llama-server"], 120)
+            _run(["docker", "compose"] + compose_flags + ["up", "-d", "llama-server"], 300)
         else:
             # No compose flags — cannot use compose.  Fall back to
             # inspect-and-recreate, which picks up GGUF_FILE from .env.