-
Notifications
You must be signed in to change notification settings - Fork 41
Expand file tree
/
Copy pathdocker-compose.staging.yml
More file actions
299 lines (264 loc) · 8.74 KB
/
docker-compose.staging.yml
File metadata and controls
299 lines (264 loc) · 8.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
version: "3.7"
networks:
proxy-shared:
external: true
ubyssey-internal:
driver: overlay
attachable: true
services:
mysql:
image: mysql:8.0
environment:
MYSQL_DATABASE: ubyssey
MYSQL_USER: ubyssey
MYSQL_PASSWORD_FILE: /run/secrets/SQL_PASSWORD
MYSQL_ROOT_PASSWORD_FILE: /run/secrets/SQL_PASSWORD
secrets:
- SQL_PASSWORD
networks:
- ubyssey-internal
volumes:
- ./mysql-data:/var/lib/mysql
cache:
image: memcached:1.6.10
networks:
- ubyssey-internal
ports:
- "11211:11211"
healthcheck:
test: ["CMD", "bash", "-c", "timeout 5 bash -c '</dev/tcp/localhost/11211'"]
interval: 30s
timeout: 10s
retries: 5
start_period: 10s
redis:
restart: always
image: redis:latest
networks:
- ubyssey-internal
volumes:
- redis-data:/var/lib/redis
ports:
- "6379:6379"
# WSGI worker for regular HTTP traffic
django-wsgi:
image: ghcr.io/ubyssey/ubyssey.ca:${TAG}
# The number of gunicorn workers below is derived from the number of CPU cores
# on the VM using this formula: (2 x $num_cores) + 1, where $num_cores is 8.
#
# Total workers = (2 x 8) + 1 = 17
#
# We then further divide by the number of container replicas (4):
#
# Workers per replica = floor(17 / 4) = 4
#
# Ref: https://medium.com/@jleonro/finetunne-number-of-workers-in-gunicorn-ab1907b06cae
command: >
bash -c "python manage.py migrate && python manage.py crontab add && service cron start
&& gunicorn ubyssey.wsgi:application --workers=4 --bind 0.0.0.0:8000 --access-logfile - --error-logfile -"
environment:
SECRET_KEY_FILE: /run/secrets/DJANGO_SECRET_KEY
SQL_HOST: mysql
SQL_USER: root
SQL_PASSWORD_FILE: /run/secrets/SQL_PASSWORD
SQL_DATABASE: ubyssey
GS_ACCESS_KEY_ID_FILE: /run/secrets/GS_ACCESS_KEY_ID
GS_SECRET_ACCESS_KEY_FILE: /run/secrets/GS_SECRET_ACCESS_KEY
STATIC_URL: https://storage.googleapis.com/ubyssey-staging/static/
GOOGLE_APPLICATION_CREDENTIALS: /run/secrets/GOOGLE_APPLICATION_CREDENTIALS
EMAIL_HOST_PASSWORD_FILE: /run/secrets/EMAIL_HOST_PASSWORD
secrets:
- DJANGO_SECRET_KEY
- SQL_PASSWORD
- GS_ACCESS_KEY_ID
- GS_SECRET_ACCESS_KEY
- GOOGLE_APPLICATION_CREDENTIALS
- EMAIL_HOST_PASSWORD
networks:
- proxy-shared
- ubyssey-internal
expose:
- 8000
# Send container logs to Google Cloud Logging
# Ref: https://docs.docker.com/engine/logging/drivers/gcplogs/
logging:
driver: gcplogs
deploy:
# Deploy 4 replicas of the Django app. This makes us more resilient to errors
# that might cause a single container to crash momentarily.
#
# Ref: https://docs.docker.com/reference/compose-file/deploy/#replicas
mode: replicated
replicas: 4
# Restart containers on failure up to a maximum of 10 times.
# Ref: https://docs.docker.com/reference/compose-file/deploy/#restart_policy
restart_policy:
condition: on-failure
max_attempts: 10
window: 120s
# Update 2 containers at a time. This prevents downtime when releasing an update.
# Ref: https://docs.docker.com/reference/compose-file/deploy/#update_config
# NOTE: No rollback for staging - we want to see what breaks
update_config:
parallelism: 2
delay: 10s
order: stop-first
monitor: 30s
max_failure_ratio: 0
failure_action: pause
depends_on:
- mysql
- cache
- redis
# ASGI worker for WebSocket traffic at /ws/
django-asgi:
image: ghcr.io/ubyssey/ubyssey.ca:${TAG}
# Use Gunicorn with Uvicorn workers for ASGI/WebSocket support
#
# Worker configuration: 2 workers per replica × 2 replicas = 4 total ASGI workers
#
# Why 4 ASGI workers?
# - Total CPU budget: (2 × 8 cores) + 1 = 17 workers for BOTH WSGI + ASGI combined
# - Traffic split: Majority is HTTP (WSGI), minimal WebSocket traffic (ASGI)
# - Resource allocation: WSGI handles bulk of requests, ASGI dedicated to WebSocket-only
# - Each ASGI worker is async and multiplexes many concurrent WebSocket connections
#
# Why 2 replicas?
# - Provides fault tolerance (if 1 replica fails, 50% capacity remains)
# - Enables zero-downtime updates (update 1 replica at a time)
#
# Ref: https://medium.com/@jleonro/finetunne-number-of-workers-in-gunicorn-ab1907b06cae
command: >
bash -c "gunicorn ubyssey.asgi:application --workers=2 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8001 --access-logfile - --error-logfile -"
environment:
SECRET_KEY_FILE: /run/secrets/DJANGO_SECRET_KEY
SQL_HOST: mysql
SQL_USER: root
SQL_PASSWORD_FILE: /run/secrets/SQL_PASSWORD
SQL_DATABASE: ubyssey
GS_ACCESS_KEY_ID_FILE: /run/secrets/GS_ACCESS_KEY_ID
GS_SECRET_ACCESS_KEY_FILE: /run/secrets/GS_SECRET_ACCESS_KEY
STATIC_URL: https://storage.googleapis.com/ubyssey-staging/static/
GOOGLE_APPLICATION_CREDENTIALS: /run/secrets/GOOGLE_APPLICATION_CREDENTIALS
EMAIL_HOST_PASSWORD_FILE: /run/secrets/EMAIL_HOST_PASSWORD
secrets:
- DJANGO_SECRET_KEY
- SQL_PASSWORD
- GS_ACCESS_KEY_ID
- GS_SECRET_ACCESS_KEY
- GOOGLE_APPLICATION_CREDENTIALS
- EMAIL_HOST_PASSWORD
networks:
- proxy-shared
- ubyssey-internal
expose:
- 8001
# Send container logs to Google Cloud Logging
# Ref: https://docs.docker.com/engine/logging/drivers/gcplogs/
logging:
driver: gcplogs
deploy:
# Deploy 2 replicas for WebSocket handling
mode: replicated
replicas: 2
# Restart containers on failure up to a maximum of 10 times.
# Ref: https://docs.docker.com/reference/compose-file/deploy/#restart_policy
restart_policy:
condition: on-failure
max_attempts: 10
window: 120s
# Update 1 container at a time for WebSocket connections
# Ref: https://docs.docker.com/reference/compose-file/deploy/#update_config
# NOTE: No rollback for staging - we want to see what breaks
update_config:
parallelism: 1
delay: 10s
order: stop-first
monitor: 30s
max_failure_ratio: 0
failure_action: pause
depends_on:
- mysql
- cache
- redis
nginx:
image: nginx:1.27
restart: always
volumes:
- ./nginx/:/etc/nginx/:ro
- ./certbot/www/:/var/www/certbot/:ro
- ./certbot/letsencrypt-etc/:/etc/letsencrypt/:ro
- certbot-signals:/tmp/certbot-signals
- ./scripts/nginx-reload-watcher.sh:/nginx-reload-watcher.sh:ro
networks:
- proxy-shared
- ubyssey-internal
ports:
- 80:80
- 443:443
logging:
driver: gcplogs
# Run nginx and the reload watcher in parallel
command: >
sh -c "sh /nginx-reload-watcher.sh & nginx -g 'daemon off;'"
# Health check to ensure nginx is running and config is valid
# This prevents bad configs from being deployed silently
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
deploy:
# Restart containers on failure up to a maximum of 5 times
restart_policy:
condition: on-failure
max_attempts: 5
window: 120s
# Rolling update strategy - only update if health check passes
# NOTE: No rollback for staging - we want to see what breaks
update_config:
parallelism: 1
delay: 10s
order: stop-first
monitor: 30s
max_failure_ratio: 0
failure_action: pause
depends_on:
- django-wsgi
- django-asgi
certbot:
image: certbot/certbot:v4.0.0
volumes:
- ./certbot/www/:/var/www/certbot/:rw
- ./certbot/letsencrypt-etc/:/etc/letsencrypt/:rw
- ./certbot/letsencrypt-lib/:/var/lib/letsencrypt/:rw
- certbot-signals:/tmp/certbot-signals
- ./scripts/certbot-renewal-with-nginx-reload.sh:/certbot-renewal-with-nginx-reload.sh:ro
networks:
- ubyssey-internal
# Run the certbot service script that signals nginx to reload via shared volume
entrypoint: ["/bin/sh", "/certbot-renewal-with-nginx-reload.sh"]
deploy:
# Restart certbot on failure - background service should always run
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 5
volumes:
certbot-signals:
driver: local
redis-data: {}
secrets:
DJANGO_SECRET_KEY:
external: true
SQL_PASSWORD:
external: true
GS_ACCESS_KEY_ID:
external: true
GS_SECRET_ACCESS_KEY:
external: true
GOOGLE_APPLICATION_CREDENTIALS:
file: ./service-account.json
EMAIL_HOST_PASSWORD:
external: true