Skip to content

Commit f8ad79e

Browse files
authored
Merge pull request #32 from nearai/feat/standardize-container-names
Standardize container/service names with functional prefixes
2 parents bc92d67 + dd24080 commit f8ad79e

3 files changed

Lines changed: 124 additions & 124 deletions

File tree

GLM-5.1.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ services:
4848
environment:
4949
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
5050

51-
nginx:
51+
proxy-nginx:
5252
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
53-
container_name: nginx
53+
container_name: proxy-nginx
5454
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
5555
ports:
5656
- "8000:80"
@@ -92,20 +92,20 @@ services:
9292
- MODEL_NAME=zai-org/GLM-5.1-FP8
9393
- OHTTP_ENABLED=true
9494
- TOKEN=${PROXY_TOKEN}
95-
- VLLM_BASE_URL=http://glm51:8000
95+
- VLLM_BASE_URL=http://model-sg-glm51:8000
9696
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
9797
- USE_NV_ATTESTATION_SDK=true
9898
- WEB_CONTEXT_SEARCH_URL=${WEB_CONTEXT_SEARCH_URL}
9999
- WEB_CONTEXT_SEARCH_API_KEY=${WEB_CONTEXT_SEARCH_API_KEY}
100100

101-
glm51:
101+
model-sg-glm51:
102102
<<: *nvidia
103103
init: true
104104
depends_on:
105105
model-downloader:
106106
condition: service_completed_successfully
107107
image: lmsysorg/sglang:dev-cu12@sha256:aac6b242680daeb74d2ab1d85f70575357552d7d165d2e5d30eb362797db54a1
108-
container_name: glm51
108+
container_name: model-sg-glm51
109109
command: >
110110
sglang serve
111111
--model-path zai-org/GLM-5.1-FP8
@@ -156,7 +156,7 @@ services:
156156
com.datadoghq.ad.check_names: '["openmetrics"]'
157157
com.datadoghq.ad.init_configs: "[{}]"
158158
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
159-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
159+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
160160

161161
dcgm-exporter:
162162
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -241,13 +241,13 @@ configs:
241241
# 1-token completion health check directly to model container (no auth needed)
242242
check_inference() {
243243
echo "Performing health check on model endpoint..."
244-
curl -sSf --max-time 45 -X POST "http://glm51:8000/v1/chat/completions" \
244+
curl -sSf --max-time 45 -X POST "http://model-sg-glm51:8000/v1/chat/completions" \
245245
-H "Content-Type: application/json" \
246246
-d '{"model":"zai-org/GLM-5.1-FP8","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
247247
}
248248
249249
echo "Waiting for model to be ready..."
250-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
250+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
251251
echo "Model ready, starting registration loop"
252252
253253
while true; do

Qwen3.5-122B.yaml

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ x-vllm-proxy-common: &vllm-proxy-common
2727
restart: unless-stopped
2828
logging: *logging-conf
2929

30-
x-qwen35-122b-common: &qwen35-122b-common
30+
x-sg-qwen35-122b-common: &sg-qwen35-122b-common
3131
<<: *nvidia
3232
init: true
3333
# SGLang v0.5.12 (cu129) — migrated off vLLM 2026-05-22.
@@ -92,9 +92,9 @@ services:
9292
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
9393
logging: *logging-conf
9494

95-
nginx:
95+
proxy-nginx:
9696
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
97-
container_name: nginx
97+
container_name: proxy-nginx
9898
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
9999
ports:
100100
- "8000:80"
@@ -126,9 +126,9 @@ services:
126126

127127
# --- Single proxy for both backends ---
128128

129-
vllm-proxy-qwen35:
129+
proxy-qwen35:
130130
<<: *vllm-proxy-common
131-
container_name: vllm-proxy-qwen35
131+
container_name: proxy-qwen35
132132
environment:
133133
- NVIDIA_VISIBLE_DEVICES=all
134134
- CLOUD_API_URL=https://cloud-api.near.ai
@@ -138,7 +138,7 @@ services:
138138
- MODEL_NAME=Qwen/Qwen3.5-122B-A10B
139139
- OHTTP_ENABLED=true
140140
- TOKEN=${PROXY_TOKEN}
141-
- VLLM_BACKEND_URLS=http://qwen35-1:8000,http://qwen35-2:8000
141+
- VLLM_BACKEND_URLS=http://model-sg-qwen35-122b-1:8000,http://model-sg-qwen35-122b-2:8000
142142
- VLLM_PROXY_MAX_REQUEST_SIZE=104857600
143143
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
144144
- USE_NV_ATTESTATION_SDK=true
@@ -147,9 +147,9 @@ services:
147147

148148
# --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
149149

150-
qwen35-1:
151-
<<: *qwen35-122b-common
152-
container_name: qwen35-1
150+
model-sg-qwen35-122b-1:
151+
<<: *sg-qwen35-122b-common
152+
container_name: model-sg-qwen35-122b-1
153153
depends_on:
154154
model-downloader:
155155
condition: service_completed_successfully
@@ -164,13 +164,13 @@ services:
164164
com.datadoghq.ad.check_names: '["openmetrics"]'
165165
com.datadoghq.ad.init_configs: "[{}]"
166166
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
167-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://qwen35-1:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
167+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-qwen35-122b-1:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
168168

169169
# --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
170170

171-
qwen35-2:
172-
<<: *qwen35-122b-common
173-
container_name: qwen35-2
171+
model-sg-qwen35-122b-2:
172+
<<: *sg-qwen35-122b-common
173+
container_name: model-sg-qwen35-122b-2
174174
depends_on:
175175
model-downloader:
176176
condition: service_completed_successfully
@@ -185,7 +185,7 @@ services:
185185
com.datadoghq.ad.check_names: '["openmetrics"]'
186186
com.datadoghq.ad.init_configs: "[{}]"
187187
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
188-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://qwen35-2:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
188+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-qwen35-122b-2:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
189189

190190
dcgm-exporter:
191191
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -269,13 +269,13 @@ configs:
269269
# Health check directly on backend (no auth needed on raw sglang container)
270270
check_inference() {
271271
echo "Performing health check on backend..."
272-
curl -sSf --max-time 45 -X POST "http://qwen35-1:8000/v1/chat/completions" \
272+
curl -sSf --max-time 45 -X POST "http://model-sg-qwen35-122b-1:8000/v1/chat/completions" \
273273
-H "Content-Type: application/json" \
274274
-d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
275275
}
276276
277277
echo "Waiting for model to be ready..."
278-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
278+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
279279
echo "Model ready, starting registration loop"
280280
281281
while true; do
@@ -334,7 +334,7 @@ configs:
334334
# :80 — single proxy handles both backends
335335
server {
336336
listen 80 default_server;
337-
location / { proxy_pass http://vllm-proxy-qwen35:8000; }
337+
location / { proxy_pass http://proxy-qwen35:8000; }
338338
}
339339
340340
ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -352,5 +352,5 @@ configs:
352352
# PINGs (http2_keep_alive_while_idle).
353353
keepalive_timeout 1h;
354354
keepalive_requests 1000000;
355-
location / { proxy_pass http://vllm-proxy-qwen35:8000; }
355+
location / { proxy_pass http://proxy-qwen35:8000; }
356356
}

0 commit comments

Comments
 (0)