-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
177 lines (169 loc) · 6.71 KB
/
docker-compose.yml
File metadata and controls
177 lines (169 loc) · 6.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# docker-compose.yml — Phase 2.6: bootstrap + orchestrator only
#
# The orchestrator manages all pub-sub components (brokers, publishers, subscribers)
# dynamically via the Docker SDK. Static infrastructure only here.
#
# Start the full system:
# make demo # start, wait for orchestrator, seed 3+2+3 topology
# make clean # tear down everything (compose + dynamic containers)
#
# Networking note:
# All containers join aether-net. The orchestrator creates dynamic containers
# on this same network so they can resolve each other by hostname.
services:
# ── bootstrap ─────────────────────────────────────────────────────────────
bootstrap:
build: .
image: aether:latest
container_name: aether-bootstrap
hostname: bootstrap
command: >
aether-bootstrap
--host bootstrap
--port 7000
--status-port 17000
--log-level INFO
environment:
- AETHER_CONFIG=/app/config.docker.yaml
volumes:
- ./config.docker.yaml:/app/config.docker.yaml:ro
ports:
- "7100:7000" # TCP peer-discovery (host 7100 → container 7000)
- "17100:17000" # HTTP /status (host 17100 → container 17000)
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:17000/status"]
interval: 5s
timeout: 3s
retries: 5
start_period: 5s
networks:
- aether-net
restart: "no"
# ── orchestrator ──────────────────────────────────────────────────────────
orchestrator:
image: aether:latest
container_name: aether-orchestrator
hostname: orchestrator
command: uvicorn aether.orchestrator.main:app --host 0.0.0.0 --port 9000 --log-level info
ports:
- "9000:9000" # FastAPI control plane
volumes:
- /var/run/docker.sock:/var/run/docker.sock # Docker-out-of-Docker
user: root # required to access the Docker socket
environment:
BOOTSTRAP_HOST: bootstrap
BOOTSTRAP_PORT: "7000"
AETHER_IMAGE: aether:latest
DOCKER_NETWORK: aether-net
OTEL_ENDPOINT: http://otel-collector:4318
depends_on:
bootstrap:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:9000/docs"]
interval: 5s
timeout: 3s
retries: 10
start_period: 15s
networks:
- aether-net
restart: "no"
# ── dashboard ──────────────────────────────────────────────────────────────
dashboard:
build:
context: ./dashboard
dockerfile: Dockerfile
image: aether-dashboard:latest
container_name: aether-dashboard
hostname: dashboard
ports:
- "3000:80" # React dashboard (host 3000 → nginx 80)
depends_on:
orchestrator:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:80/"]
interval: 10s
timeout: 5s
retries: 3
start_period: 10s
networks:
- aether-net
restart: "no"
# ── otel-collector ────────────────────────────────────────────────────────
# Receives OTLP logs from all Aether processes and forwards to Loki.
# Pinned to a specific version — the Loki exporter config schema changes
# between otelcol-contrib releases and latest can break the config silently.
otel-collector:
image: otel/opentelemetry-collector-contrib:0.95.0
container_name: aether-otel-collector
hostname: otel-collector
command: ["--config=/etc/otel-collector.yaml"]
volumes:
- ./observability/otel-collector.yaml:/etc/otel-collector.yaml:ro
ports:
- "4317:4317" # OTLP gRPC (future use)
- "4318:4318" # OTLP HTTP (used by Python opentelemetry-exporter-otlp-proto-http)
depends_on:
- loki
networks:
- aether-net
restart: unless-stopped
# ── loki ──────────────────────────────────────────────────────────────────
# Log storage and query engine. Receives from otel-collector, queried by Grafana.
loki:
image: grafana/loki:2.9.4
container_name: aether-loki
hostname: loki
command: -config.file=/etc/loki/loki.yaml
volumes:
- ./observability/loki.yaml:/etc/loki/loki.yaml:ro
ports:
- "3100:3100"
networks:
- aether-net
restart: unless-stopped
# ── prometheus ────────────────────────────────────────────────────────────
# Scrapes /metrics from the orchestrator every 15s. No hard startup deps —
# Prometheus retries scrape targets that aren't ready yet.
prometheus:
image: prom/prometheus:v2.50.1
container_name: aether-prometheus
hostname: prometheus
volumes:
- ./observability/prometheus.yaml:/etc/prometheus/prometheus.yml:ro
ports:
- "9090:9090"
networks:
- aether-net
restart: unless-stopped
# ── grafana ───────────────────────────────────────────────────────────────
# Dashboards and log/metric exploration. Available at http://localhost:3001
# (port 3000 is taken by the React dashboard). Anonymous access enabled so
# there's no login step in the demo.
# Datasources (Prometheus + Loki) and dashboard provider are provisioned
# automatically from observability/grafana/provisioning/ on first boot.
grafana:
image: grafana/grafana:10.3.3
container_name: aether-grafana
hostname: grafana
environment:
GF_SERVER_HTTP_PORT: "3001"
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer"
volumes:
- ./observability/grafana/provisioning:/etc/grafana/provisioning:ro
- ./observability/grafana/dashboards:/var/lib/grafana/dashboards:ro
ports:
- "3001:3001"
depends_on:
- loki
- prometheus
networks:
- aether-net
restart: unless-stopped
# ── network ───────────────────────────────────────────────────────────────
networks:
aether-net:
name: aether-net
driver: bridge