-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdocker-compose.prod.yml
More file actions
490 lines (466 loc) · 16.8 KB
/
Copy pathdocker-compose.prod.yml
File metadata and controls
490 lines (466 loc) · 16.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
# ================================================================
# Production stack — STANDALONE, registry-only, always-latest.
#
# This is the production-VM compose file. It runs the full stack:
# postgres (app DB), postgres-tfstate (Terraform state), keycloak
# (+ its postgres), rabbitmq, redis, backend, worker, frontend,
# nginx (TLS reverse proxy).
#
# docker compose -f docker-compose.prod.yml up -d --pull always
#
# Key differences vs ``docker-compose.staging.yml`` (the staging /
# version-pinnable variant):
# - Image tags are HARDCODED to ``:latest`` — no ${VAR:-latest}
# override. ``pull_policy: always`` on every service from this
# repo's images so each ``compose up`` re-pulls. Combined with
# the CI workflow tagging ``main`` as ``:latest``, this gives a
# continuous-delivery pipeline: merge to main → image rebuilt
# and pushed → ``compose up`` pulls and rolls.
# - The staging.yml variant keeps ``${BACKEND_VERSION:-latest}`` so a
# staging operator can pin a specific commit SHA / tag for a
# reproducible rollout before promoting to prod. Prod intentionally
# cannot pin — if you need a pin, use staging.yml on a separate VM.
#
# Everything else (resource limits, log rotation, nginx TLS,
# required-env-vars enforcement) is identical to staging.yml so the
# operational behaviour is consistent.
#
# DB migrations are NOT part of this compose file — run them after
# compose-up:
#
# docker exec backend-prod python -m alembic upgrade head
#
# The Ansible playbook in infrastructure/ansible automates this and
# bundles the seed step on first boot.
# ================================================================
services:
# ----------------------------------------------------------------
# PostgreSQL Database (application)
# ----------------------------------------------------------------
postgres:
image: postgres:16-alpine
container_name: postgres-prod
environment:
POSTGRES_USER: ${DB_USER:?DB_USER is required}
POSTGRES_PASSWORD: ${DB_PASSWORD:?DB_PASSWORD is required}
POSTGRES_DB: ${DB_NAME:?DB_NAME is required}
volumes:
- postgres_prod_data:/var/lib/postgresql/data
ports:
- "${DB_PORT:-5432}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
interval: 10s
timeout: 5s
retries: 5
networks:
- backend-network
- worker-network
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
# ----------------------------------------------------------------
# PostgreSQL Database (Terraform Remote State)
# ----------------------------------------------------------------
# Worker-only Postgres for Terraform's ``pg`` backend. Deliberately
# isolated from the application DB: the worker must never reach the
# backend's data, only its own state store. One schema per deployment
# (``deployment_<uuid>``) keeps the locking/state surface tight. Not
# exposed on the host — internal to worker-network.
postgres-tfstate:
image: postgres:16-alpine
container_name: postgres-tfstate-prod
environment:
POSTGRES_USER: ${TFSTATE_DB_USER:?TFSTATE_DB_USER is required}
POSTGRES_PASSWORD: ${TFSTATE_DB_PASSWORD:?TFSTATE_DB_PASSWORD is required}
POSTGRES_DB: ${TFSTATE_DB_NAME:?TFSTATE_DB_NAME is required}
volumes:
- postgres_tfstate_prod_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${TFSTATE_DB_USER}"]
interval: 10s
timeout: 5s
retries: 5
networks:
- worker-network
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 256M
# ----------------------------------------------------------------
# Keycloak database
# ----------------------------------------------------------------
keycloak-postgres:
image: postgres:16-alpine
container_name: keycloak-postgres-prod
environment:
POSTGRES_USER: ${KEYCLOAK_DB_USER:?KEYCLOAK_DB_USER is required}
POSTGRES_PASSWORD: ${KEYCLOAK_DB_PASSWORD:?KEYCLOAK_DB_PASSWORD is required}
POSTGRES_DB: ${KEYCLOAK_DB_NAME:?KEYCLOAK_DB_NAME is required}
volumes:
- keycloak_postgres_prod_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${KEYCLOAK_DB_USER}"]
interval: 10s
timeout: 5s
retries: 5
networks:
- keycloak-network
restart: unless-stopped
# ----------------------------------------------------------------
# Keycloak (Identity Provider)
# ----------------------------------------------------------------
# Imports the ``dhbw`` realm from keycloak/realm-export.json on first
# boot (skipped once the realm exists in the persistent DB volume).
#
# ``start-dev`` (HTTP, no hostname enforcement) is fine behind the
# nginx TLS terminator below. ``KC_PROXY=edge`` tells Keycloak to
# trust X-Forwarded-Proto from nginx so it builds correct https://
# redirect URIs.
keycloak:
image: quay.io/keycloak/keycloak:23.0
container_name: keycloak-prod
command: ["start-dev", "--import-realm"]
environment:
KC_DB: postgres
KC_DB_URL: jdbc:postgresql://keycloak-postgres:5432/${KEYCLOAK_DB_NAME}
KC_DB_USERNAME: ${KEYCLOAK_DB_USER}
KC_DB_PASSWORD: ${KEYCLOAK_DB_PASSWORD}
KEYCLOAK_ADMIN: ${KEYCLOAK_ADMIN_USER:?KEYCLOAK_ADMIN_USER is required}
KEYCLOAK_ADMIN_PASSWORD: ${KEYCLOAK_ADMIN_PASSWORD:?KEYCLOAK_ADMIN_PASSWORD is required}
KC_HOSTNAME_STRICT: "false"
KC_HTTP_ENABLED: "true"
KC_PROXY: "edge"
volumes:
- ./keycloak/realm-export.json:/opt/keycloak/data/import/realm-export.json:ro
depends_on:
keycloak-postgres:
condition: service_healthy
networks:
- keycloak-network
- backend-network
restart: unless-stopped
# ----------------------------------------------------------------
# RabbitMQ (Celery broker)
# ----------------------------------------------------------------
rabbitmq:
image: rabbitmq:3.13-management-alpine
container_name: rabbitmq-prod
hostname: rabbitmq-prod
environment:
RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER:?RABBITMQ_USER is required}
RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD:?RABBITMQ_PASSWORD is required}
RABBITMQ_DEFAULT_VHOST: ${RABBITMQ_VHOST:-/}
ports:
- "${RABBITMQ_PORT:-5672}:5672"
- "${RABBITMQ_MANAGEMENT_PORT:-15672}:15672"
volumes:
- rabbitmq_prod_data:/var/lib/rabbitmq
healthcheck:
test: ["CMD", "rabbitmq-diagnostics", "ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- backend-network
- worker-network
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 512M
# ----------------------------------------------------------------
# Redis — Celery RESULT backend (broker is RabbitMQ above).
# ----------------------------------------------------------------
redis:
image: redis:7-alpine
container_name: redis-prod
hostname: redis
volumes:
- redis_prod_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- backend-network
- worker-network
restart: unless-stopped
# ----------------------------------------------------------------
# Backend API (FastAPI)
# ----------------------------------------------------------------
# Always-latest from GHCR. ``pull_policy: always`` ensures every
# ``compose up`` pulls the freshest manifest; the docker daemon
# caches layers, so only a real upstream change triggers a layer
# download.
#
# Migrations are applied AFTER compose-up by the Ansible playbook:
# docker exec backend-prod python -m alembic upgrade head
# Do not re-introduce a migrate init-container here — that pattern
# hides migration failures inside the compose output.
backend:
image: ghcr.io/six7-click-n-deploy/backend:latest
pull_policy: always
container_name: backend-prod
# The backend image's venv is built at /build/.venv and copied to
# /app/.venv, so the console scripts (``uvicorn``, ``alembic``)
# carry a stale build-time shebang (#!/build/.venv/bin/python) that
# doesn't resolve at runtime. Invoke via ``python -m`` to bypass
# the broken script.
command: ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4", "--proxy-headers", "--forwarded-allow-ips", "*"]
environment:
# Database
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
# Security
SECRET_KEY: ${SECRET_KEY:?SECRET_KEY is required}
ALGORITHM: ${ALGORITHM:-HS256}
ACCESS_TOKEN_EXPIRE_MINUTES: ${ACCESS_TOKEN_EXPIRE_MINUTES:-30}
CREDENTIAL_ENCRYPTION_KEY: ${CREDENTIAL_ENCRYPTION_KEY:?CREDENTIAL_ENCRYPTION_KEY is required}
# Celery (broker = RabbitMQ, result backend = Redis)
CELERY_BROKER_URL: amqp://${RABBITMQ_USER}:${RABBITMQ_PASSWORD}@rabbitmq:5672/${RABBITMQ_VHOST:-/}
CELERY_RESULT_BACKEND: redis://redis:6379/0
# Keycloak
KEYCLOAK_SERVER_URL: ${KEYCLOAK_SERVER_URL:-http://keycloak:8080}
KEYCLOAK_REALM: ${KEYCLOAK_REALM:-dhbw}
KEYCLOAK_CLIENT_ID: ${KEYCLOAK_CLIENT_ID:-appstore-backend}
KEYCLOAK_CLIENT_SECRET: ${KEYCLOAK_CLIENT_SECRET:?KEYCLOAK_CLIENT_SECRET is required}
KEYCLOAK_ENABLED: ${KEYCLOAK_ENABLED:-true}
# Git
GIT_ACCESS_TOKEN: ${GIT_ACCESS_TOKEN:?GIT_ACCESS_TOKEN is required}
# CORS
CORS_ORIGINS: ${CORS_ORIGINS:?CORS_ORIGINS is required}
# Email (Gmail SMTP) — used by the post-deploy notify hook in
# celery_event_listener.py. SMTP_ENABLED is the explicit
# kill-switch (default false): even with SMTP_USER/SMTP_PASSWORD
# populated, mail delivery only happens when SMTP_ENABLED=true.
SMTP_ENABLED: ${SMTP_ENABLED:-false}
SMTP_HOST: ${SMTP_HOST:-smtp.gmail.com}
SMTP_PORT: ${SMTP_PORT:-465}
SMTP_USER: ${SMTP_USER:-}
SMTP_PASSWORD: ${SMTP_PASSWORD:-}
SMTP_FROM_EMAIL: ${SMTP_FROM_EMAIL:-}
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-Click-n-Deploy}
# Public base URL of the frontend, used in owner-summary mails
# to deep-link back to the deployment detail page.
APP_BASE_URL: ${APP_BASE_URL:?APP_BASE_URL is required}
# Application
DEBUG: "False"
APP_NAME: ${APP_NAME:-Backend API}
# Server
HOST: 0.0.0.0
PORT: 8000
WORKERS: ${BACKEND_WORKERS:-4}
LOG_LEVEL: ${LOG_LEVEL:-info}
depends_on:
postgres:
condition: service_healthy
rabbitmq:
condition: service_healthy
redis:
condition: service_healthy
keycloak:
condition: service_started
volumes:
- backend_git_repos:/tmp/repos
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
networks:
- frontend-network
- backend-network
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# ----------------------------------------------------------------
# Worker Service (Celery)
# ----------------------------------------------------------------
# Worker has NO access to the application DB — it only talks to
# RabbitMQ/Redis (Celery) and postgres-tfstate (Terraform state).
worker:
image: ghcr.io/six7-click-n-deploy/worker:latest
pull_policy: always
container_name: worker-prod
environment:
# Celery (broker = RabbitMQ, result backend = Redis)
CELERY_BROKER_URL: amqp://${RABBITMQ_USER}:${RABBITMQ_PASSWORD}@rabbitmq:5672/${RABBITMQ_VHOST:-/}
CELERY_RESULT_BACKEND: redis://redis:6379/0
# OpenStack credential decryption (must match backend)
CREDENTIAL_ENCRYPTION_KEY: ${CREDENTIAL_ENCRYPTION_KEY:?CREDENTIAL_ENCRYPTION_KEY is required}
# Terraform pg backend — isolated DB, NOT the application DB.
# Worker uses one schema per deployment (deployment_<uuid>).
TFSTATE_DATABASE_URL: postgresql://${TFSTATE_DB_USER}:${TFSTATE_DB_PASSWORD}@postgres-tfstate:5432/${TFSTATE_DB_NAME}?sslmode=disable
# Git
GIT_ACCESS_TOKEN: ${GIT_ACCESS_TOKEN:?GIT_ACCESS_TOKEN is required}
# Application
DEBUG: "False"
LOG_LEVEL: ${LOG_LEVEL:-info}
depends_on:
rabbitmq:
condition: service_healthy
redis:
condition: service_healthy
postgres-tfstate:
condition: service_healthy
volumes:
- worker_git_repos:/tmp/repos
networks:
- worker-network
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# ----------------------------------------------------------------
# nginx (TLS reverse proxy)
# Terminates HTTPS on :443, redirects :80→:443, and proxies:
# / → frontend:8080
# /api/ → backend:8000 (strips /api/ prefix)
# /auth/ → keycloak:8080 (strips /auth/ prefix)
# The TLS certificate is provisioned by Ansible and lives at
# ./nginx/certs/ on the VM (bind-mounted so it survives redeploys).
# ----------------------------------------------------------------
nginx:
image: nginx:1.27-alpine
container_name: nginx-prod
# ``config_hash`` is a content fingerprint of nginx.conf, computed
# by Ansible before ``docker compose up``. nginx.conf is bind-
# mounted, so a content change alone does not make Compose recreate
# the container — but a label change does. Whenever nginx.conf is
# edited the label changes, Compose sees a spec drift, and the
# nginx container is recreated automatically.
labels:
config_hash: "${NGINX_CONFIG_HASH:-empty}"
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/certs:/etc/nginx/certs:ro
depends_on:
- frontend
- backend
- keycloak
networks:
- frontend-network
- backend-network
- keycloak-network
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# ----------------------------------------------------------------
# Frontend (Vue 3 SPA, served by nginx in the image)
# ----------------------------------------------------------------
# Always-latest from GHCR. Same ``pull_policy: always`` rationale as
# backend / worker above.
frontend:
image: ghcr.io/six7-click-n-deploy/frontend:latest
pull_policy: always
container_name: frontend-prod
environment:
NODE_ENV: production
VITE_KEYCLOAK_URL: ${VITE_KEYCLOAK_URL:?VITE_KEYCLOAK_URL is required}
VITE_KEYCLOAK_REALM: ${VITE_KEYCLOAK_REALM:-dhbw}
VITE_KEYCLOAK_CLIENT_ID: ${VITE_KEYCLOAK_CLIENT_ID:-appstore-frontend}
VITE_APP_URL: ${VITE_APP_URL:?VITE_APP_URL is required}
VITE_API_URL: ${VITE_API_URL:?VITE_API_URL is required}
depends_on:
backend:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
networks:
- frontend-network
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 256M
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# ----------------------------------------------------------------
# Volumes
# ----------------------------------------------------------------
volumes:
postgres_prod_data:
driver: local
postgres_tfstate_prod_data:
driver: local
keycloak_postgres_prod_data:
driver: local
rabbitmq_prod_data:
driver: local
redis_prod_data:
driver: local
backend_git_repos:
driver: local
worker_git_repos:
driver: local
# ----------------------------------------------------------------
# Networks
# ----------------------------------------------------------------
networks:
frontend-network:
driver: bridge
name: frontend-network-prod
backend-network:
driver: bridge
name: backend-network-prod
worker-network:
driver: bridge
name: worker-network-prod
keycloak-network:
driver: bridge
name: keycloak-network-prod