@@ -87,9 +87,9 @@ services:
8787 - HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
8888 logging : *logging-conf
8989
90- nginx :
90+ proxy- nginx :
9191 image : nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
92- container_name : nginx
92+ container_name : proxy- nginx
9393 command : /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
9494 ports :
9595 - " 8000:80"
@@ -121,9 +121,9 @@ services:
121121
122122 # --- Single proxy for both backends ---
123123
124- vllm- proxy-qwen35 :
124+ proxy-qwen35 :
125125 << : *vllm-proxy-common
126- container_name : vllm- proxy-qwen35
126+ container_name : proxy-qwen35
127127 environment :
128128 - NVIDIA_VISIBLE_DEVICES=all
129129 - CLOUD_API_URL=https://cloud-api.near.ai
@@ -132,7 +132,7 @@ services:
132132 - MODEL_NAME=Qwen/Qwen3.5-122B-A10B
133133 - OHTTP_ENABLED=true
134134 - TOKEN=${PROXY_TOKEN}
135- - VLLM_BACKEND_URLS=http://vllm-qwen35-122b-1:8000,http://vllm-qwen35-122b-2:8000
135+ - VLLM_BACKEND_URLS=http://model- vllm-qwen35-122b-1:8000,http://model- vllm-qwen35-122b-2:8000
136136 - VLLM_PROXY_MAX_REQUEST_SIZE=104857600
137137 - TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
138138 - USE_NV_ATTESTATION_SDK=true
@@ -141,9 +141,9 @@ services:
141141
142142 # --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
143143
144- vllm-qwen35-122b-1 :
144+ model- vllm-qwen35-122b-1 :
145145 << : *vllm-qwen35-122b-common
146- container_name : vllm-qwen35-122b-1
146+ container_name : model- vllm-qwen35-122b-1
147147 depends_on :
148148 model-downloader :
149149 condition : service_completed_successfully
@@ -158,13 +158,13 @@ services:
158158 com.datadoghq.ad.check_names : ' ["openmetrics"]'
159159 com.datadoghq.ad.init_configs : " [{}]"
160160 com.datadoghq.ad.logs : ' [{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
161- com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://vllm-qwen35-122b-1:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
161+ com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://model- vllm-qwen35-122b-1:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "model- vllm-qwen35-122b-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
162162
163163 # --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
164164
165- vllm-qwen35-122b-2 :
165+ model- vllm-qwen35-122b-2 :
166166 << : *vllm-qwen35-122b-common
167- container_name : vllm-qwen35-122b-2
167+ container_name : model- vllm-qwen35-122b-2
168168 depends_on :
169169 model-downloader :
170170 condition : service_completed_successfully
@@ -179,7 +179,7 @@ services:
179179 com.datadoghq.ad.check_names : ' ["openmetrics"]'
180180 com.datadoghq.ad.init_configs : " [{}]"
181181 com.datadoghq.ad.logs : ' [{"source": "vllm", "service": "vllm", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
182- com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://vllm-qwen35-122b-2:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
182+ com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://model- vllm-qwen35-122b-2:8000/metrics", "metrics":["vllm:.*"], "histogram_buckets_as_distributions": true, "service": "model- vllm-qwen35-122b-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
183183
184184 dcgm-exporter :
185185 image : nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -262,13 +262,13 @@ configs:
262262 # Health check directly on backend (no auth needed on raw vLLM container)
263263 check_inference() {
264264 echo "Performing health check on backend..."
265- curl -sSf --max-time 45 -X POST "http://vllm-qwen35-122b-1:8000/v1/chat/completions" \
265+ curl -sSf --max-time 45 -X POST "http://model- vllm-qwen35-122b-1:8000/v1/chat/completions" \
266266 -H "Content-Type: application/json" \
267267 -d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
268268 }
269269
270270 echo "Waiting for model to be ready..."
271- until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
271+ until curl -sf http://proxy- nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
272272 echo "Model ready, starting registration loop"
273273
274274 while true; do
@@ -310,7 +310,7 @@ configs:
310310 # :80 — single proxy handles both backends
311311 server {
312312 listen 80 default_server;
313- location / { proxy_pass http://vllm- proxy-qwen35:8000; }
313+ location / { proxy_pass http://proxy-qwen35:8000; }
314314 }
315315
316316 ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -328,5 +328,5 @@ configs:
328328 # PINGs (http2_keep_alive_while_idle).
329329 keepalive_timeout 1h;
330330 keepalive_requests 1000000;
331- location / { proxy_pass http://vllm- proxy-qwen35:8000; }
331+ location / { proxy_pass http://proxy-qwen35:8000; }
332332 }
0 commit comments