Skip to content

Commit 85cf303

Browse files
committed
fix: resolve 9 infrastructure bugs from stability audit
Shell Scripts: - dream-restore.sh: Replace hardcoded config list with dynamic glob for compose overlays (.base.yml, .nvidia.yml, etc.) (Light-Heart-Labs#4) - dream-update.sh: Use resolve-compose-stack.sh for container restart instead of hardcoded docker-compose.yml (Light-Heart-Labs#7) - dream-update.sh: Enable dotglob in rollback to restore hidden files like .env and .version (Light-Heart-Labs#6) - dream-backup.sh: Include .tar.gz archives in retention/list queries to prevent infinite disk accumulation (Light-Heart-Labs#9) - dream-backup.sh: Dynamic glob for config backup (compose overlays) Token Spy: - compose.yaml: Add persistent volume mount for data/ directory to prevent SQLite data loss on container restart (#2) - main.py: Offload blocking SSH subprocess to thread pool via asyncio.to_thread() to prevent event loop deadlock (#3) - main.py: Add finally blocks in streaming generators to guarantee billing metrics are logged on asyncio.CancelledError (Light-Heart-Labs#8) - db_postgres.py: Add conn.rollback() on failed SQL to prevent connection pool poisoning (Light-Heart-Labs#10) Installer: - detection.sh: Run systemd auto-resume service as root instead of $USER to prevent sudo password hang and permission errors (Light-Heart-Labs#11)
1 parent 91464d1 commit 85cf303

7 files changed

Lines changed: 70 additions & 41 deletions

File tree

dream-server/dream-backup.sh

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ list_backups() {
6868
local backups=()
6969
while IFS= read -r -d '' backup; do
7070
backups+=("$backup")
71-
done < <(find "$BACKUP_ROOT" -maxdepth 1 -type d -name "*-*-*" -print0 2>/dev/null | sort -z -r)
71+
done < <(find "$BACKUP_ROOT" -maxdepth 1 \( -type d -o -name "*.tar.gz" \) -name "*-*-*" -print0 2>/dev/null | sort -z -r)
7272

7373
if [[ ${#backups[@]} -eq 0 ]]; then
7474
log_info "No backups found"
@@ -199,21 +199,11 @@ backup_config() {
199199
local backup_dir="$1"
200200
log_info "Backing up configuration..."
201201

202-
# Essential config files
203-
local config_files=(
204-
".env"
205-
"docker-compose.yml"
206-
".version"
207-
"dream-preflight.sh"
208-
"dream-update.sh"
209-
)
210-
211-
for file in "${config_files[@]}"; do
212-
if [[ -f "$DREAM_DIR/$file" ]]; then
213-
cp "$DREAM_DIR/$file" "$backup_dir/"
214-
log_success "Backed up: $file"
215-
else
216-
log_warn "Skipped (not found): $file"
202+
# Essential config files: discover compose overlays + dotfiles dynamically
203+
for file in "$DREAM_DIR"/.env "$DREAM_DIR"/.version "$DREAM_DIR"/docker-compose*.y*ml "$DREAM_DIR"/dream-preflight.sh "$DREAM_DIR"/dream-update.sh; do
204+
if [[ -f "$file" ]]; then
205+
cp "$file" "$backup_dir/"
206+
log_success "Backed up: $(basename "$file")"
217207
fi
218208
done
219209

@@ -257,7 +247,7 @@ apply_retention() {
257247
local backups=()
258248
while IFS= read -r -d '' backup; do
259249
backups+=("$backup")
260-
done < <(find "$BACKUP_ROOT" -maxdepth 1 -type d -name "*-*-*" -print0 2>/dev/null | sort -z -r)
250+
done < <(find "$BACKUP_ROOT" -maxdepth 1 \( -type d -o -name "*.tar.gz" \) -name "*-*-*" -print0 2>/dev/null | sort -z -r)
261251

262252
local count=${#backups[@]}
263253
if [[ $count -gt $RETENTION_COUNT ]]; then
@@ -409,7 +399,11 @@ main() {
409399
fi
410400

411401
# Check if running in Dream Server directory
412-
if [[ ! -f "$DREAM_DIR/docker-compose.yml" && ! -d "$DREAM_DIR/data" ]]; then
402+
local has_compose=false
403+
for f in "$DREAM_DIR"/docker-compose*.y*ml; do
404+
[[ -f "$f" ]] && has_compose=true && break
405+
done
406+
if [[ "$has_compose" == "false" && ! -d "$DREAM_DIR/data" ]]; then
413407
log_warn "This doesn't appear to be a Dream Server directory"
414408
log_warn "Expected: docker-compose.yml or data/ directory"
415409
read -rp "Continue anyway? [y/N] " confirm

dream-server/dream-restore.sh

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,10 @@ dry_run_preview() {
229229
if [[ "$restore_config" == "true" ]]; then
230230
echo "Config Files to Restore:"
231231
echo "───────────────────────────────────────────────────────────────────"
232-
local config_files=(".env" "docker-compose.yml" ".version")
233-
for file in "${config_files[@]}"; do
234-
if [[ -f "$backup_dir/$file" ]]; then
235-
echo "$file"
232+
# Dynamically discover config files (dotfiles + compose overlays + scripts)
233+
for file in "$backup_dir"/.env "$backup_dir"/.version "$backup_dir"/docker-compose*.y*ml "$backup_dir"/dream-*.sh; do
234+
if [[ -f "$file" ]]; then
235+
echo "$(basename "$file")"
236236
fi
237237
done
238238
if [[ -d "$backup_dir/config" ]]; then
@@ -284,12 +284,11 @@ restore_config() {
284284
local backup_dir="$1"
285285
log_step "Restoring configuration..."
286286

287-
local config_files=(".env" "docker-compose.yml" ".version" "dream-preflight.sh" "dream-update.sh")
288-
289-
for file in "${config_files[@]}"; do
290-
if [[ -f "$backup_dir/$file" ]]; then
291-
cp "$backup_dir/$file" "$DREAM_DIR/"
292-
log_success "Restored: $file"
287+
# Dynamically discover config files (dotfiles + compose overlays + scripts)
288+
for file in "$backup_dir"/.env "$backup_dir"/.version "$backup_dir"/docker-compose*.y*ml "$backup_dir"/dream-*.sh; do
289+
if [[ -f "$file" ]]; then
290+
cp "$file" "$DREAM_DIR/"
291+
log_success "Restored: $(basename "$file")"
293292
fi
294293
done
295294

@@ -309,13 +308,19 @@ verify_restore() {
309308
local all_good=true
310309

311310
# Check critical paths
312-
local critical_paths=("data/open-webui" "docker-compose.yml")
313-
for path in "${critical_paths[@]}"; do
314-
if [[ ! -e "$DREAM_DIR/$path" ]]; then
315-
log_warn "Missing after restore: $path"
316-
all_good=false
317-
fi
311+
# Check that at least one compose file exists (base or standalone)
312+
local has_compose=false
313+
for f in "$DREAM_DIR"/docker-compose*.y*ml; do
314+
[[ -f "$f" ]] && has_compose=true && break
318315
done
316+
if [[ "$has_compose" == "false" ]]; then
317+
log_warn "Missing after restore: no docker-compose*.yml files found"
318+
all_good=false
319+
fi
320+
if [[ ! -d "$DREAM_DIR/data/open-webui" ]]; then
321+
log_warn "Missing after restore: data/open-webui"
322+
all_good=false
323+
fi
319324

320325
if [[ "$all_good" == "true" ]]; then
321326
log_success "Restore verification passed"
@@ -457,7 +462,12 @@ main() {
457462
fi
458463

459464
# Check if running in Dream Server directory
460-
if [[ ! -f "$DREAM_DIR/docker-compose.yml" && ! -d "$DREAM_DIR/data" ]]; then
465+
# Check for any compose file (standalone or overlay) or data directory
466+
local has_compose=false
467+
for f in "$DREAM_DIR"/docker-compose*.y*ml; do
468+
[[ -f "$f" ]] && has_compose=true && break
469+
done
470+
if [[ "$has_compose" == "false" && ! -d "$DREAM_DIR/data" ]]; then
461471
log_warn "This doesn't appear to be a Dream Server directory"
462472
log_warn "Expected: docker-compose.yml or data/ directory"
463473
read -rp "Continue anyway? [y/N] " confirm

dream-server/dream-update.sh

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,11 +294,20 @@ cmd_update() {
294294

295295
# Restart services
296296
log_info "Restarting services..."
297-
local compose_file="${INSTALL_DIR}/docker-compose.yml"
298-
if [[ -f "$compose_file" ]]; then
297+
local compose_flags
298+
if [[ -x "${INSTALL_DIR}/scripts/resolve-compose-stack.sh" ]]; then
299+
compose_flags=$(bash "${INSTALL_DIR}/scripts/resolve-compose-stack.sh" --script-dir "$INSTALL_DIR" 2>/dev/null | tail -1)
300+
fi
301+
if [[ -n "${compose_flags:-}" ]]; then
302+
cd "$INSTALL_DIR"
303+
docker compose $compose_flags down --remove-orphans 2>/dev/null || docker-compose $compose_flags down --remove-orphans
304+
docker compose $compose_flags up -d 2>/dev/null || docker-compose $compose_flags up -d
305+
elif [[ -f "${INSTALL_DIR}/docker-compose.yml" ]]; then
299306
cd "$INSTALL_DIR"
300307
docker compose down --remove-orphans 2>/dev/null || docker-compose down --remove-orphans
301308
docker compose up -d 2>/dev/null || docker-compose up -d
309+
else
310+
log_warn "No compose files found. Skipping container restart."
302311
fi
303312

304313
# Run health checks
@@ -372,14 +381,16 @@ cmd_rollback() {
372381
cd "$INSTALL_DIR"
373382
docker compose down 2>/dev/null || docker-compose down 2>/dev/null || true
374383

375-
# Restore files
384+
# Restore files (enable dotglob to include .env, .version, etc.)
376385
log_info "Restoring configuration files..."
386+
shopt -s dotglob
377387
for file in "$backup_path"/*; do
378388
if [[ -f "$file" && "$(basename "$file")" != "metadata.json" ]]; then
379389
cp "$file" "$INSTALL_DIR/"
380390
log_info " Restored: $(basename "$file")"
381391
fi
382392
done
393+
shopt -u dotglob
383394

384395
# Restart services
385396
log_info "Restarting services..."

dream-server/extensions/services/token-spy/compose.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ services:
1010
- no-new-privileges:true
1111
ports:
1212
- "${TOKEN_SPY_PORT:-3005}:8080"
13+
volumes:
14+
- ./data/token-spy:/app/data
1315
environment:
1416
- OLLAMA_URL=${LLM_API_URL:-http://llama-server:8080}
1517
deploy:

dream-server/extensions/services/token-spy/db_postgres.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ def _get_or_create_agent(agent_name: str) -> UUID:
135135
conn.commit()
136136
_agent_cache[agent_name] = agent_id
137137
return agent_id
138+
except Exception:
139+
conn.rollback()
140+
raise
138141
finally:
139142
_put_conn(conn)
140143

@@ -205,6 +208,9 @@ def log_usage(entry: dict):
205208
)
206209
)
207210
conn.commit()
211+
except Exception:
212+
conn.rollback()
213+
raise
208214
finally:
209215
_put_conn(conn)
210216

dream-server/extensions/services/token-spy/main.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ async def _poll_remote_agents():
321321
try:
322322
# Poll remote agents (SSH-based)
323323
for agent in REMOTE_AGENTS:
324-
status = _get_remote_session_status(agent)
324+
status = await asyncio.to_thread(_get_remote_session_status, agent)
325325
chars = status.get("current_history_chars", 0)
326326
limit = get_agent_setting(agent, "session_char_limit")
327327
if limit is None or limit <= 0:
@@ -659,7 +659,9 @@ async def stream_and_capture():
659659
yield f"data: {json.dumps({'type': 'error', 'error': {'type': 'proxy_error', 'message': 'Upstream request failed'}})}\n\n"
660660
except Exception as e:
661661
log.error(f"Proxy stream error: {e}")
662-
# Still try to log what we have
662+
finally:
663+
# Guarantee billing metrics are logged even on CancelledError
664+
# (which is a BaseException and bypasses 'except Exception')
663665
if usage["input_tokens"] > 0:
664666
_log_entry(
665667
model, sys_analysis, msg_analysis, tools,
@@ -890,6 +892,8 @@ async def stream_and_capture():
890892
yield f"data: {json.dumps({'error': {'message': 'Upstream request failed', 'type': 'proxy_error'}})}\n\n"
891893
except Exception as e:
892894
log.error(f"Proxy stream error: {e}")
895+
finally:
896+
# Guarantee billing metrics are logged even on CancelledError
893897
if usage["input_tokens"] > 0:
894898
_log_entry(model, sys_analysis, msg_analysis, tools, raw_body, usage, start_time, provider_name="openai", filter_result=filter_result)
895899

dream-server/installers/lib/detection.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,12 @@ Wants=network-online.target
323323
324324
[Service]
325325
Type=oneshot
326-
User=$USER
327326
ExecStart=/bin/bash ${SCRIPT_DIR}/install.sh ${resume_args}
328327
ExecStartPost=/bin/rm -f /etc/systemd/system/${svc_name}.service
329328
ExecStartPost=/bin/systemctl daemon-reload
329+
WorkingDirectory=${SCRIPT_DIR}
330+
Environment="HOME=${HOME}"
331+
Environment="USER=${USER}"
330332
StandardOutput=journal+console
331333
StandardError=journal+console
332334

0 commit comments

Comments
 (0)