Skip to content

Commit 5256992

Browse files
baijumclaude
andcommitted
feat: implement Phase 5.5 observability and operations
Add 6 infrastructure scripts and update bootstrap/verify for the full observability stack: credential isolation, backups, centralized logging (Loki + Promtail), monitoring (Grafana + cron alerts), automated security updates, and cost visibility reporting. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 461ab43 commit 5256992

10 files changed

Lines changed: 1049 additions & 9 deletions

docs/roadmap.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ Add the operational foundation required before opening the platform to self-host
8080

8181
**Done when:** Platform operator can view health of all running apps, restore from a backup, and receive alerts when a container is unhealthy or disk is >80% full. Each app has isolated credentials.
8282

83-
**Status:** Not started. No monitoring, backup, or alerting exists. Credential isolation deferred from Phase 5.
83+
**Status:** In progress. Infrastructure scripts and bootstrap changes implemented. Pending server deployment and verification.
8484

8585
## Phase 6 — Self-Hosting Ecosystem
8686

infrastructure/backup-postgres.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Colors
5+
GREEN='\033[0;32m'
6+
YELLOW='\033[1;33m'
7+
RED='\033[0;31m'
8+
NC='\033[0m' # No Color
9+
10+
# Helper functions
11+
info() {
12+
echo -e "${GREEN}[INFO]${NC} $*"
13+
}
14+
15+
warn() {
16+
echo -e "${YELLOW}[WARN]${NC} $*"
17+
}
18+
19+
error() {
20+
echo -e "${RED}[ERROR]${NC} $*"
21+
}
22+
23+
# Configuration
24+
BACKUP_DIR="/data/backups/postgres"
25+
COMPOSE_FILE="/opt/platform/docker-compose.yml"
26+
27+
# Create backup directory if it doesn't exist
28+
mkdir -p "$BACKUP_DIR"
29+
30+
info "Starting PostgreSQL backup..."
31+
info "Backup directory: $BACKUP_DIR"
32+
33+
# List all databases except templates and postgres system database
34+
info "Fetching database list..."
35+
databases=$(docker compose -f "$COMPOSE_FILE" exec -T postgres psql -U postgres -tc \
36+
"SELECT datname FROM pg_database WHERE datistemplate = false AND datname != 'postgres'" | \
37+
sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
38+
39+
if [ -z "$databases" ]; then
40+
warn "No databases found to backup"
41+
exit 0
42+
fi
43+
44+
# Track failures
45+
failed_dumps=0
46+
total_size=0
47+
backup_count=0
48+
49+
# Backup each database
50+
for db in $databases; do
51+
filename="${db}_$(date +%Y%m%d_%H%M%S).dump"
52+
filepath="$BACKUP_DIR/$filename"
53+
54+
info "Backing up database: $db"
55+
56+
if docker compose -f "$COMPOSE_FILE" exec -T postgres pg_dump -U postgres -Fc "$db" > "$filepath"; then
57+
file_size=$(stat -f%z "$filepath" 2>/dev/null || stat -c%s "$filepath" 2>/dev/null || echo "0")
58+
human_size=$(numfmt --to=iec-i --suffix=B "$file_size" 2>/dev/null || echo "${file_size}B")
59+
info " ✓ Backed up $db to $filename ($human_size)"
60+
((total_size += file_size))
61+
((backup_count++))
62+
else
63+
error " ✗ Failed to backup $db"
64+
((failed_dumps++))
65+
rm -f "$filepath" # Clean up partial dump
66+
fi
67+
done
68+
69+
# Retention: delete backups older than 7 days
70+
info "Cleaning up old backups (older than 7 days)..."
71+
deleted_count=$(find "$BACKUP_DIR" -name "*.dump" -mtime +7 -delete -print | wc -l | tr -d ' ')
72+
if [ "$deleted_count" -gt 0 ]; then
73+
info " Removed $deleted_count old backup(s)"
74+
else
75+
info " No old backups to remove"
76+
fi
77+
78+
# Print summary
79+
echo ""
80+
info "=== Backup Summary ==="
81+
info "Databases backed up: $backup_count"
82+
if [ "$total_size" -gt 0 ]; then
83+
human_total=$(numfmt --to=iec-i --suffix=B "$total_size" 2>/dev/null || echo "${total_size}B")
84+
info "Total backup size: $human_total"
85+
fi
86+
if [ "$failed_dumps" -gt 0 ]; then
87+
error "Failed backups: $failed_dumps"
88+
fi
89+
info "Backup location: $BACKUP_DIR"
90+
91+
# Exit with error if any dumps failed
92+
if [ "$failed_dumps" -gt 0 ]; then
93+
exit 1
94+
fi

0 commit comments

Comments
 (0)