@@ -87,9 +87,9 @@ services:
8787 - HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
8888 logging : *logging-conf
8989
90- nginx :
90+ proxy- nginx :
9191 image : nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
92- container_name : nginx
92+ container_name : proxy- nginx
9393 command : /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
9494 ports :
9595 - " 8000:80"
@@ -121,9 +121,9 @@ services:
121121
122122 # --- Single proxy for both backends ---
123123
124- vllm- proxy-qwen35 :
124+ proxy-qwen35 :
125125 << : *vllm-proxy-common
126- container_name : vllm- proxy-qwen35
126+ container_name : proxy-qwen35
127127 environment :
128128 - NVIDIA_VISIBLE_DEVICES=all
129129 - CLOUD_API_URL=https://cloud-api.near.ai
@@ -132,16 +132,16 @@ services:
132132 - MODEL_NAME=Qwen/Qwen3.5-122B-A10B
133133 - OHTTP_ENABLED=true
134134 - TOKEN=${PROXY_TOKEN}
135- - VLLM_BACKEND_URLS=http://vllm-qwen35-122b-1:8000,http://vllm-qwen35-122b-2:8000
135+ - VLLM_BACKEND_URLS=http://model- vllm-qwen35-122b-1:8000,http://model- vllm-qwen35-122b-2:8000
136136 - TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
137137 labels :
138138 com.datadoghq.ad.logs : ' [{"source": "vllm-proxy", "service": "vllm-proxy", "tags": ["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
139139
140140 # --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
141141
142- vllm-qwen35-122b-1 :
142+ model- vllm-qwen35-122b-1 :
143143 << : *vllm-qwen35-122b-common
144- container_name : vllm-qwen35-122b-1
144+ container_name : model- vllm-qwen35-122b-1
145145 depends_on :
146146 model-downloader :
147147 condition : service_completed_successfully
@@ -156,13 +156,13 @@ services:
156156 com.datadoghq.ad.check_names : ' ["vllm"]'
157157 com.datadoghq.ad.init_configs : " [{}]"
158158 com.datadoghq.ad.logs : ' [{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
159- com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://vllm-qwen35-122b-1:8000/metrics", "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
159+ com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://model- vllm-qwen35-122b-1:8000/metrics", "histogram_buckets_as_distributions": true, "service": "model- vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
160160
161161 # --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
162162
163- vllm-qwen35-122b-2 :
163+ model- vllm-qwen35-122b-2 :
164164 << : *vllm-qwen35-122b-common
165- container_name : vllm-qwen35-122b-2
165+ container_name : model- vllm-qwen35-122b-2
166166 depends_on :
167167 model-downloader :
168168 condition : service_completed_successfully
@@ -177,7 +177,7 @@ services:
177177 com.datadoghq.ad.check_names : ' ["vllm"]'
178178 com.datadoghq.ad.init_configs : " [{}]"
179179 com.datadoghq.ad.logs : ' [{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
180- com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://vllm-qwen35-122b-2:8000/metrics", "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
180+ com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://model- vllm-qwen35-122b-2:8000/metrics", "histogram_buckets_as_distributions": true, "service": "model- vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
181181
182182 dcgm-exporter :
183183 image : nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -260,13 +260,13 @@ configs:
260260 # Health check directly on backend (no auth needed on raw vLLM container)
261261 check_inference() {
262262 echo "Performing health check on backend..."
263- curl -sSf --max-time 45 -X POST "http://vllm-qwen35-122b-1:8000/v1/chat/completions" \
263+ curl -sSf --max-time 45 -X POST "http://model- vllm-qwen35-122b-1:8000/v1/chat/completions" \
264264 -H "Content-Type: application/json" \
265265 -d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
266266 }
267267
268268 echo "Waiting for model to be ready..."
269- until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
269+ until curl -sf http://proxy- nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
270270 echo "Model ready, starting registration loop"
271271
272272 while true; do
@@ -308,7 +308,7 @@ configs:
308308 # :80 — single proxy handles both backends
309309 server {
310310 listen 80 default_server;
311- location / { proxy_pass http://vllm- proxy-qwen35:8000; }
311+ location / { proxy_pass http://proxy-qwen35:8000; }
312312 }
313313
314314 ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -326,5 +326,5 @@ configs:
326326 # PINGs (http2_keep_alive_while_idle).
327327 keepalive_timeout 1h;
328328 keepalive_requests 1000000;
329- location / { proxy_pass http://vllm- proxy-qwen35:8000; }
329+ location / { proxy_pass http://proxy-qwen35:8000; }
330330 }
0 commit comments