Skip to content

Commit 5c66217

Browse files
Lloydclaude
authored andcommitted
Standardize container/service names with functional prefixes
- model-vllm-*: vLLM inference engines (DeepSeek, Qwen3.5, Qwen3-30B, GPT-OSS, Qwen3-VL, Qwen3-Embedding, Qwen3-Reranker, Whisper) - model-sg-*: SGLang inference engines (GLM-5, GLM-5.1, FLUX x4) - model-privacy-filter: custom HF inference service - proxy-*: inference-proxy (vllm-proxy-rs) and nginx reverse proxy (also fixes flux containers which were incorrectly prefixed vllm-) All internal hostname references (VLLM_BASE_URL, VLLM_BACKEND_URLS, openmetrics endpoints, registrar health checks, nginx proxy_pass) updated to match. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent fa176ef commit 5c66217

6 files changed

Lines changed: 140 additions & 140 deletions

File tree

DeepSeek-V3.1.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ services:
6262
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
6363
logging: *logging-conf
6464

65-
nginx:
65+
proxy-nginx:
6666
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
67-
container_name: nginx
67+
container_name: proxy-nginx
6868
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
6969
ports:
7070
- "8000:80"
@@ -107,9 +107,9 @@ services:
107107
- /var/run/docker.sock:/var/run/docker.sock
108108
logging: *logging-conf
109109

110-
vllm-proxy-deepseek:
110+
proxy-deepseek:
111111
<<: *vllm-proxy-common
112-
container_name: vllm-proxy-deepseek
112+
container_name: proxy-deepseek
113113
environment:
114114
- NVIDIA_VISIBLE_DEVICES=all
115115
- CLOUD_API_URL=https://cloud-api.near.ai
@@ -118,15 +118,15 @@ services:
118118
- MODEL_NAME=deepseek-ai/DeepSeek-V3.1
119119
- OHTTP_ENABLED=true
120120
- TOKEN=${PROXY_TOKEN}
121-
- VLLM_BASE_URL=http://vllm-deepseek:8000
121+
- VLLM_BASE_URL=http://model-vllm-deepseek:8000
122122
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
123123
labels:
124-
com.datadoghq.ad.logs: '[{"source": "vllm-proxy", "service": "vllm-proxy", "tags": ["model:deepseek-ai/DeepSeek-V3.1", "ip:${HOST_IP}", "port:8000"]}]'
124+
com.datadoghq.ad.logs: '[{"source": "vllm-proxy", "service": "proxy-deepseek", "tags": ["model:deepseek-ai/DeepSeek-V3.1", "ip:${HOST_IP}", "port:8000"]}]'
125125

126-
vllm-deepseek:
126+
model-vllm-deepseek:
127127
<<: *vllm-common
128128
image: vllm/vllm-openai@sha256:0dc46f74eb0e630675d83101dc66c6441c4475cceedcf9235ee42b87c3affd23 # v0.17.1
129-
container_name: vllm-deepseek
129+
container_name: model-vllm-deepseek
130130
depends_on:
131131
model-downloader:
132132
condition: service_completed_successfully
@@ -166,7 +166,7 @@ services:
166166
com.datadoghq.ad.check_names: '["vllm"]'
167167
com.datadoghq.ad.init_configs: "[{}]"
168168
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:deepseek-ai/DeepSeek-V3.1","ip:${HOST_IP}", "port:8000"]}]'
169-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-deepseek:8000/metrics", "histogram_buckets_as_distributions": true, "service": "vllm-deepseek", "tags":["model:deepseek-ai/DeepSeek-V3.1","ip:${HOST_IP}", "port:8000"]}]'
169+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-vllm-deepseek:8000/metrics", "histogram_buckets_as_distributions": true, "service": "model-vllm-deepseek", "tags":["model:deepseek-ai/DeepSeek-V3.1","ip:${HOST_IP}", "port:8000"]}]'
170170

171171
dcgm-exporter:
172172
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -215,7 +215,7 @@ configs:
215215
client_body_buffer_size 1m;
216216
217217
location / {
218-
proxy_pass http://vllm-proxy-deepseek:8000;
218+
proxy_pass http://proxy-deepseek:8000;
219219
proxy_http_version 1.1;
220220
proxy_set_header Host $$host;
221221
proxy_set_header X-Real-IP $$remote_addr;
@@ -240,7 +240,7 @@ configs:
240240
client_body_buffer_size 1m;
241241
242242
location / {
243-
proxy_pass http://vllm-proxy-deepseek:8000;
243+
proxy_pass http://proxy-deepseek:8000;
244244
proxy_http_version 1.1;
245245
proxy_set_header Host $$host;
246246
proxy_set_header X-Real-IP $$remote_addr;
@@ -290,14 +290,14 @@ configs:
290290
trap cleanup TERM INT
291291
292292
check_inference() {
293-
curl -sf --max-time 10 -X POST "http://vllm-deepseek:8000/v1/chat/completions" \
293+
curl -sf --max-time 10 -X POST "http://model-vllm-deepseek:8000/v1/chat/completions" \
294294
-H "Content-Type: application/json" \
295295
-d '{"model":"deepseek-ai/DeepSeek-V3.1","messages":[{"role":"user","content":"hi"}],"max_tokens":1}' \
296296
> /dev/null 2>&1
297297
}
298298
299299
echo "Waiting for model to be ready..."
300-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
300+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
301301
echo "Model ready, starting registration loop"
302302
303303
while true; do

GLM-5.1.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ services:
4848
environment:
4949
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
5050

51-
nginx:
51+
proxy-nginx:
5252
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
53-
container_name: nginx
53+
container_name: proxy-nginx
5454
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
5555
ports:
5656
- "8000:80"
@@ -91,17 +91,17 @@ services:
9191
- MODEL_NAME=zai-org/GLM-5.1-FP8
9292
- OHTTP_ENABLED=true
9393
- TOKEN=${PROXY_TOKEN}
94-
- VLLM_BASE_URL=http://glm51:8000
94+
- VLLM_BASE_URL=http://model-sg-glm51:8000
9595
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
9696

97-
glm51:
97+
model-sg-glm51:
9898
<<: *nvidia
9999
init: true
100100
depends_on:
101101
model-downloader:
102102
condition: service_completed_successfully
103103
image: lmsysorg/sglang:dev@sha256:e1eee3f75e62827dbfa29994a260934c2bc7e5adfb047170576f1676b436b926
104-
container_name: glm51
104+
container_name: model-sg-glm51
105105
command: >
106106
sglang serve
107107
--model-path zai-org/GLM-5.1-FP8
@@ -144,7 +144,7 @@ services:
144144
com.datadoghq.ad.check_names: '["openmetrics"]'
145145
com.datadoghq.ad.init_configs: "[{}]"
146146
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
147-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
147+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
148148

149149
dcgm-exporter:
150150
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -229,13 +229,13 @@ configs:
229229
# 1-token completion health check directly to model container (no auth needed)
230230
check_inference() {
231231
echo "Performing health check on model endpoint..."
232-
curl -sSf --max-time 45 -X POST "http://glm51:8000/v1/chat/completions" \
232+
curl -sSf --max-time 45 -X POST "http://model-sg-glm51:8000/v1/chat/completions" \
233233
-H "Content-Type: application/json" \
234234
-d '{"model":"zai-org/GLM-5.1-FP8","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
235235
}
236236
237237
echo "Waiting for model to be ready..."
238-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
238+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
239239
echo "Model ready, starting registration loop"
240240
241241
while true; do

GLM-5.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ services:
4848
environment:
4949
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
5050

51-
nginx:
51+
proxy-nginx:
5252
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
53-
container_name: nginx
53+
container_name: proxy-nginx
5454
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
5555
ports:
5656
- "8000:80"
@@ -91,21 +91,21 @@ services:
9191
- MODEL_NAME=zai-org/GLM-5-FP8
9292
- OHTTP_ENABLED=true
9393
- TOKEN=${PROXY_TOKEN}
94-
- VLLM_BASE_URL=http://glm:8000
94+
- VLLM_BASE_URL=http://model-sg-glm:8000
9595
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
9696
# Default off; set to "true" via compose/up env to canary one host
9797
# at a time. Switches GPU evidence collection from the Python
9898
# subprocess (cc_admin) to the libnvat-based Rust SDK.
9999
- USE_NV_ATTESTATION_SDK=${USE_NV_ATTESTATION_SDK:-false}
100100

101-
glm:
101+
model-sg-glm:
102102
<<: *nvidia
103103
init: true
104104
depends_on:
105105
model-downloader:
106106
condition: service_completed_successfully
107107
image: lmsysorg/sglang:glm5-hopper@sha256:4680bebe1b0bdfa5a16bcb8ae410e8fc6e21c64f2b6e446e5b6d01f52347d3e9
108-
container_name: glm
108+
container_name: model-sg-glm
109109
command: >
110110
sglang serve
111111
--model-path zai-org/GLM-5-FP8
@@ -146,7 +146,7 @@ services:
146146
com.datadoghq.ad.check_names: '["openmetrics"]'
147147
com.datadoghq.ad.init_configs: "[{}]"
148148
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5-FP8","ip:${HOST_IP}","port:8000"]}]'
149-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5", "tags":["model:zai-org/GLM-5-FP8","ip:${HOST_IP}","port:8000"]}]'
149+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5", "tags":["model:zai-org/GLM-5-FP8","ip:${HOST_IP}","port:8000"]}]'
150150

151151
dcgm-exporter:
152152
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -230,11 +230,11 @@ configs:
230230
231231
#GET Health check for inference service
232232
check_inference() {
233-
curl -sSf --max-time 10 "http://glm:8000/v1/models"
233+
curl -sSf --max-time 10 "http://model-sg-glm:8000/v1/models"
234234
}
235235
236236
echo "Waiting for model to be ready..."
237-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
237+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
238238
echo "Model ready, starting registration loop"
239239
240240
while true; do

Qwen3.5-122B.yaml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ services:
8787
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
8888
logging: *logging-conf
8989

90-
nginx:
90+
proxy-nginx:
9191
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
92-
container_name: nginx
92+
container_name: proxy-nginx
9393
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
9494
ports:
9595
- "8000:80"
@@ -121,9 +121,9 @@ services:
121121

122122
# --- Single proxy for both backends ---
123123

124-
vllm-proxy-qwen35:
124+
proxy-qwen35:
125125
<<: *vllm-proxy-common
126-
container_name: vllm-proxy-qwen35
126+
container_name: proxy-qwen35
127127
environment:
128128
- NVIDIA_VISIBLE_DEVICES=all
129129
- CLOUD_API_URL=https://cloud-api.near.ai
@@ -132,16 +132,16 @@ services:
132132
- MODEL_NAME=Qwen/Qwen3.5-122B-A10B
133133
- OHTTP_ENABLED=true
134134
- TOKEN=${PROXY_TOKEN}
135-
- VLLM_BACKEND_URLS=http://vllm-qwen35-122b-1:8000,http://vllm-qwen35-122b-2:8000
135+
- VLLM_BACKEND_URLS=http://model-vllm-qwen35-122b-1:8000,http://model-vllm-qwen35-122b-2:8000
136136
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
137137
labels:
138138
com.datadoghq.ad.logs: '[{"source": "vllm-proxy", "service": "vllm-proxy", "tags": ["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
139139

140140
# --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
141141

142-
vllm-qwen35-122b-1:
142+
model-vllm-qwen35-122b-1:
143143
<<: *vllm-qwen35-122b-common
144-
container_name: vllm-qwen35-122b-1
144+
container_name: model-vllm-qwen35-122b-1
145145
depends_on:
146146
model-downloader:
147147
condition: service_completed_successfully
@@ -156,13 +156,13 @@ services:
156156
com.datadoghq.ad.check_names: '["vllm"]'
157157
com.datadoghq.ad.init_configs: "[{}]"
158158
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
159-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-qwen35-122b-1:8000/metrics", "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
159+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-vllm-qwen35-122b-1:8000/metrics", "histogram_buckets_as_distributions": true, "service": "model-vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
160160

161161
# --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
162162

163-
vllm-qwen35-122b-2:
163+
model-vllm-qwen35-122b-2:
164164
<<: *vllm-qwen35-122b-common
165-
container_name: vllm-qwen35-122b-2
165+
container_name: model-vllm-qwen35-122b-2
166166
depends_on:
167167
model-downloader:
168168
condition: service_completed_successfully
@@ -177,7 +177,7 @@ services:
177177
com.datadoghq.ad.check_names: '["vllm"]'
178178
com.datadoghq.ad.init_configs: "[{}]"
179179
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
180-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-qwen35-122b-2:8000/metrics", "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
180+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-vllm-qwen35-122b-2:8000/metrics", "histogram_buckets_as_distributions": true, "service": "model-vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
181181

182182
dcgm-exporter:
183183
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -260,13 +260,13 @@ configs:
260260
# Health check directly on backend (no auth needed on raw vLLM container)
261261
check_inference() {
262262
echo "Performing health check on backend..."
263-
curl -sSf --max-time 45 -X POST "http://vllm-qwen35-122b-1:8000/v1/chat/completions" \
263+
curl -sSf --max-time 45 -X POST "http://model-vllm-qwen35-122b-1:8000/v1/chat/completions" \
264264
-H "Content-Type: application/json" \
265265
-d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
266266
}
267267
268268
echo "Waiting for model to be ready..."
269-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
269+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
270270
echo "Model ready, starting registration loop"
271271
272272
while true; do
@@ -308,7 +308,7 @@ configs:
308308
# :80 — single proxy handles both backends
309309
server {
310310
listen 80 default_server;
311-
location / { proxy_pass http://vllm-proxy-qwen35:8000; }
311+
location / { proxy_pass http://proxy-qwen35:8000; }
312312
}
313313
314314
ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -326,5 +326,5 @@ configs:
326326
# PINGs (http2_keep_alive_while_idle).
327327
keepalive_timeout 1h;
328328
keepalive_requests 1000000;
329-
location / { proxy_pass http://vllm-proxy-qwen35:8000; }
329+
location / { proxy_pass http://proxy-qwen35:8000; }
330330
}

gpt-oss-single.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
services:
2-
vllm-gpt-oss:
2+
model-vllm-gpt-oss:
33
image: vllm/vllm-openai@sha256:6766ce0c459e24b76f3e9ba14ffc0442131ef4248c904efdcbf0d89e38be01fe
44
runtime: nvidia
55
ipc: host

0 commit comments

Comments
 (0)