Skip to content

Commit abb3c49

Browse files
LloydLloyd
authored andcommitted
Standardize container/service names with functional prefixes
- model-vllm-*: vLLM inference engines (DeepSeek, GLM-5 staying as model-sg-* since SGLang, GLM-5.1 same, Qwen3.5, Qwen3-30B, GPT-OSS, gpt-oss-single, Qwen3-VL, Qwen3-Embedding, Qwen3-Reranker, Whisper, Gemma-4-31B) - model-sg-*: SGLang inference engines (GLM-5, GLM-5.1, FLUX, Qwen3.6-35B) - model-privacy-filter: custom HF inference service - proxy-*: inference-proxy (vllm-proxy-rs) and nginx reverse proxy All internal hostname references (VLLM_BASE_URL, VLLM_BACKEND_URLS, openmetrics endpoints, registrar health checks, nginx proxy_pass) updated to match. FLUX Datadog labels switched from vllm to sglang/openmetrics since FLUX runs SGLang despite its previous vllm- prefix.
1 parent 53e367c commit abb3c49

6 files changed

Lines changed: 145 additions & 145 deletions

File tree

DeepSeek-V3.1.yaml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ services:
6262
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
6363
logging: *logging-conf
6464

65-
nginx:
65+
proxy-nginx:
6666
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
67-
container_name: nginx
67+
container_name: proxy-nginx
6868
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
6969
ports:
7070
- "8000:80"
@@ -107,9 +107,9 @@ services:
107107
- /var/run/docker.sock:/var/run/docker.sock
108108
logging: *logging-conf
109109

110-
vllm-proxy-deepseek:
110+
proxy-deepseek:
111111
<<: *vllm-proxy-common
112-
container_name: vllm-proxy-deepseek
112+
container_name: proxy-deepseek
113113
environment:
114114
- NVIDIA_VISIBLE_DEVICES=all
115115
- CLOUD_API_URL=https://cloud-api.near.ai
@@ -118,16 +118,16 @@ services:
118118
- MODEL_NAME=deepseek-ai/DeepSeek-V3.1
119119
- OHTTP_ENABLED=true
120120
- TOKEN=${PROXY_TOKEN}
121-
- VLLM_BASE_URL=http://vllm-deepseek:8000
121+
- VLLM_BASE_URL=http://model-vllm-deepseek:8000
122122
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
123123
- USE_NV_ATTESTATION_SDK=true
124124
labels:
125125
com.datadoghq.ad.logs: '[{"source": "vllm-proxy", "service": "vllm-proxy", "tags": ["model:deepseek-ai/DeepSeek-V3.1", "ip:${HOST_IP}", "port:8000"]}]'
126126

127-
vllm-deepseek:
127+
model-vllm-deepseek:
128128
<<: *vllm-common
129129
image: vllm/vllm-openai@sha256:0dc46f74eb0e630675d83101dc66c6441c4475cceedcf9235ee42b87c3affd23 # v0.17.1
130-
container_name: vllm-deepseek
130+
container_name: model-vllm-deepseek
131131
depends_on:
132132
model-downloader:
133133
condition: service_completed_successfully
@@ -167,7 +167,7 @@ services:
167167
com.datadoghq.ad.check_names: '["vllm"]'
168168
com.datadoghq.ad.init_configs: "[{}]"
169169
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:deepseek-ai/DeepSeek-V3.1","ip:${HOST_IP}", "port:8000"]}]'
170-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-deepseek:8000/metrics", "histogram_buckets_as_distributions": true, "service": "vllm-deepseek", "tags":["model:deepseek-ai/DeepSeek-V3.1","ip:${HOST_IP}", "port:8000"]}]'
170+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-vllm-deepseek:8000/metrics", "histogram_buckets_as_distributions": true, "service": "model-vllm-deepseek", "tags":["model:deepseek-ai/DeepSeek-V3.1","ip:${HOST_IP}", "port:8000"]}]'
171171

172172
dcgm-exporter:
173173
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -216,7 +216,7 @@ configs:
216216
client_body_buffer_size 1m;
217217
218218
location / {
219-
proxy_pass http://vllm-proxy-deepseek:8000;
219+
proxy_pass http://proxy-deepseek:8000;
220220
proxy_http_version 1.1;
221221
proxy_set_header Host $$host;
222222
proxy_set_header X-Real-IP $$remote_addr;
@@ -241,7 +241,7 @@ configs:
241241
client_body_buffer_size 1m;
242242
243243
location / {
244-
proxy_pass http://vllm-proxy-deepseek:8000;
244+
proxy_pass http://proxy-deepseek:8000;
245245
proxy_http_version 1.1;
246246
proxy_set_header Host $$host;
247247
proxy_set_header X-Real-IP $$remote_addr;
@@ -291,14 +291,14 @@ configs:
291291
trap cleanup TERM INT
292292
293293
check_inference() {
294-
curl -sf --max-time 10 -X POST "http://vllm-deepseek:8000/v1/chat/completions" \
294+
curl -sf --max-time 10 -X POST "http://model-vllm-deepseek:8000/v1/chat/completions" \
295295
-H "Content-Type: application/json" \
296296
-d '{"model":"deepseek-ai/DeepSeek-V3.1","messages":[{"role":"user","content":"hi"}],"max_tokens":1}' \
297297
> /dev/null 2>&1
298298
}
299299
300300
echo "Waiting for model to be ready..."
301-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
301+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
302302
echo "Model ready, starting registration loop"
303303
304304
while true; do

GLM-5.1.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ services:
4848
environment:
4949
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
5050

51-
nginx:
51+
proxy-nginx:
5252
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
53-
container_name: nginx
53+
container_name: proxy-nginx
5454
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
5555
ports:
5656
- "8000:80"
@@ -91,18 +91,18 @@ services:
9191
- MODEL_NAME=zai-org/GLM-5.1-FP8
9292
- OHTTP_ENABLED=true
9393
- TOKEN=${PROXY_TOKEN}
94-
- VLLM_BASE_URL=http://glm51:8000
94+
- VLLM_BASE_URL=http://model-sg-glm51:8000
9595
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
9696
- USE_NV_ATTESTATION_SDK=true
9797

98-
glm51:
98+
model-sg-glm51:
9999
<<: *nvidia
100100
init: true
101101
depends_on:
102102
model-downloader:
103103
condition: service_completed_successfully
104104
image: lmsysorg/sglang:dev@sha256:e1eee3f75e62827dbfa29994a260934c2bc7e5adfb047170576f1676b436b926
105-
container_name: glm51
105+
container_name: model-sg-glm51
106106
command: >
107107
sglang serve
108108
--model-path zai-org/GLM-5.1-FP8
@@ -146,7 +146,7 @@ services:
146146
com.datadoghq.ad.check_names: '["openmetrics"]'
147147
com.datadoghq.ad.init_configs: "[{}]"
148148
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
149-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
149+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
150150

151151
dcgm-exporter:
152152
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -231,13 +231,13 @@ configs:
231231
# 1-token completion health check directly to model container (no auth needed)
232232
check_inference() {
233233
echo "Performing health check on model endpoint..."
234-
curl -sSf --max-time 45 -X POST "http://glm51:8000/v1/chat/completions" \
234+
curl -sSf --max-time 45 -X POST "http://model-sg-glm51:8000/v1/chat/completions" \
235235
-H "Content-Type: application/json" \
236236
-d '{"model":"zai-org/GLM-5.1-FP8","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
237237
}
238238
239239
echo "Waiting for model to be ready..."
240-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
240+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
241241
echo "Model ready, starting registration loop"
242242
243243
while true; do

GLM-5.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ services:
4848
environment:
4949
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
5050

51-
nginx:
51+
proxy-nginx:
5252
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
53-
container_name: nginx
53+
container_name: proxy-nginx
5454
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
5555
ports:
5656
- "8000:80"
@@ -91,22 +91,22 @@ services:
9191
- MODEL_NAME=zai-org/GLM-5-FP8
9292
- OHTTP_ENABLED=true
9393
- TOKEN=${PROXY_TOKEN}
94-
- VLLM_BASE_URL=http://glm:8000
94+
- VLLM_BASE_URL=http://model-sg-glm:8000
9595
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
9696
# Switches GPU evidence collection from the Python cc_admin
9797
# subprocess to the libnvat-based Rust SDK. Fleet-wide flip
9898
# 2026-05-08 after Phase 4 canary on gpu07 + qwen3-vl ran
9999
# clean for ~24h.
100100
- USE_NV_ATTESTATION_SDK=true
101101

102-
glm:
102+
model-sg-glm:
103103
<<: *nvidia
104104
init: true
105105
depends_on:
106106
model-downloader:
107107
condition: service_completed_successfully
108108
image: lmsysorg/sglang:glm5-hopper@sha256:4680bebe1b0bdfa5a16bcb8ae410e8fc6e21c64f2b6e446e5b6d01f52347d3e9
109-
container_name: glm
109+
container_name: model-sg-glm
110110
command: >
111111
sglang serve
112112
--model-path zai-org/GLM-5-FP8
@@ -147,7 +147,7 @@ services:
147147
com.datadoghq.ad.check_names: '["openmetrics"]'
148148
com.datadoghq.ad.init_configs: "[{}]"
149149
com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5-FP8","ip:${HOST_IP}","port:8000"]}]'
150-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5", "tags":["model:zai-org/GLM-5-FP8","ip:${HOST_IP}","port:8000"]}]'
150+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5", "tags":["model:zai-org/GLM-5-FP8","ip:${HOST_IP}","port:8000"]}]'
151151

152152
dcgm-exporter:
153153
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -231,11 +231,11 @@ configs:
231231
232232
#GET Health check for inference service
233233
check_inference() {
234-
curl -sSf --max-time 10 "http://glm:8000/v1/models"
234+
curl -sSf --max-time 10 "http://model-sg-glm:8000/v1/models"
235235
}
236236
237237
echo "Waiting for model to be ready..."
238-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
238+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
239239
echo "Model ready, starting registration loop"
240240
241241
while true; do

Qwen3.5-122B.yaml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ services:
8787
- HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
8888
logging: *logging-conf
8989

90-
nginx:
90+
proxy-nginx:
9191
image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
92-
container_name: nginx
92+
container_name: proxy-nginx
9393
command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
9494
ports:
9595
- "8000:80"
@@ -121,9 +121,9 @@ services:
121121

122122
# --- Single proxy for both backends ---
123123

124-
vllm-proxy-qwen35:
124+
proxy-qwen35:
125125
<<: *vllm-proxy-common
126-
container_name: vllm-proxy-qwen35
126+
container_name: proxy-qwen35
127127
environment:
128128
- NVIDIA_VISIBLE_DEVICES=all
129129
- CLOUD_API_URL=https://cloud-api.near.ai
@@ -132,7 +132,7 @@ services:
132132
- MODEL_NAME=Qwen/Qwen3.5-122B-A10B
133133
- OHTTP_ENABLED=true
134134
- TOKEN=${PROXY_TOKEN}
135-
- VLLM_BACKEND_URLS=http://vllm-qwen35-122b-1:8000,http://vllm-qwen35-122b-2:8000
135+
- VLLM_BACKEND_URLS=http://model-vllm-qwen35-122b-1:8000,http://model-vllm-qwen35-122b-2:8000
136136
- VLLM_PROXY_MAX_REQUEST_SIZE=104857600
137137
- TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
138138
- USE_NV_ATTESTATION_SDK=true
@@ -141,9 +141,9 @@ services:
141141

142142
# --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
143143

144-
vllm-qwen35-122b-1:
144+
model-vllm-qwen35-122b-1:
145145
<<: *vllm-qwen35-122b-common
146-
container_name: vllm-qwen35-122b-1
146+
container_name: model-vllm-qwen35-122b-1
147147
depends_on:
148148
model-downloader:
149149
condition: service_completed_successfully
@@ -158,13 +158,13 @@ services:
158158
com.datadoghq.ad.check_names: '["openmetrics"]'
159159
com.datadoghq.ad.init_configs: "[{}]"
160160
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
161-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-qwen35-122b-1:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
161+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-vllm-qwen35-122b-1:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "model-vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
162162

163163
# --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
164164

165-
vllm-qwen35-122b-2:
165+
model-vllm-qwen35-122b-2:
166166
<<: *vllm-qwen35-122b-common
167-
container_name: vllm-qwen35-122b-2
167+
container_name: model-vllm-qwen35-122b-2
168168
depends_on:
169169
model-downloader:
170170
condition: service_completed_successfully
@@ -179,7 +179,7 @@ services:
179179
com.datadoghq.ad.check_names: '["openmetrics"]'
180180
com.datadoghq.ad.init_configs: "[{}]"
181181
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
182-
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-qwen35-122b-2:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
182+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-vllm-qwen35-122b-2:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "model-vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
183183

184184
dcgm-exporter:
185185
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -262,13 +262,13 @@ configs:
262262
# Health check directly on backend (no auth needed on raw vLLM container)
263263
check_inference() {
264264
echo "Performing health check on backend..."
265-
curl -sSf --max-time 45 -X POST "http://vllm-qwen35-122b-1:8000/v1/chat/completions" \
265+
curl -sSf --max-time 45 -X POST "http://model-vllm-qwen35-122b-1:8000/v1/chat/completions" \
266266
-H "Content-Type: application/json" \
267267
-d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
268268
}
269269
270270
echo "Waiting for model to be ready..."
271-
until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
271+
until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
272272
echo "Model ready, starting registration loop"
273273
274274
while true; do
@@ -310,7 +310,7 @@ configs:
310310
# :80 — single proxy handles both backends
311311
server {
312312
listen 80 default_server;
313-
location / { proxy_pass http://vllm-proxy-qwen35:8000; }
313+
location / { proxy_pass http://proxy-qwen35:8000; }
314314
}
315315
316316
ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -328,5 +328,5 @@ configs:
328328
# PINGs (http2_keep_alive_while_idle).
329329
keepalive_timeout 1h;
330330
keepalive_requests 1000000;
331-
location / { proxy_pass http://vllm-proxy-qwen35:8000; }
331+
location / { proxy_pass http://proxy-qwen35:8000; }
332332
}

gpt-oss-single.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
services:
2-
vllm-gpt-oss:
2+
model-vllm-gpt-oss:
33
image: vllm/vllm-openai@sha256:6766ce0c459e24b76f3e9ba14ffc0442131ef4248c904efdcbf0d89e38be01fe
44
runtime: nvidia
55
ipc: host

0 commit comments

Comments
 (0)