nearai · lloydmak99 · May 27, 2026 · May 21, 2026 · May 27, 2026 · May 27, 2026
diff --git a/GLM-5.1.yaml b/GLM-5.1.yaml
@@ -48,9 +48,9 @@ services:
     environment:
       - HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
 
-  nginx:
+  proxy-nginx:
     image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
-    container_name: nginx
+    container_name: proxy-nginx
     command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
     ports:
       - "8000:80"
@@ -91,20 +91,20 @@ services:
       - MODEL_NAME=zai-org/GLM-5.1-FP8
       - OHTTP_ENABLED=true
       - TOKEN=${PROXY_TOKEN}
-      - VLLM_BASE_URL=http://glm51:8000
+      - VLLM_BASE_URL=http://model-sg-glm51:8000
       - TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
       - USE_NV_ATTESTATION_SDK=true
       - WEB_CONTEXT_SEARCH_URL=${WEB_CONTEXT_SEARCH_URL}
       - WEB_CONTEXT_SEARCH_API_KEY=${WEB_CONTEXT_SEARCH_API_KEY}
 
-  glm51:
+  model-sg-glm51:
     <<: *nvidia
     init: true
     depends_on:
       model-downloader:
         condition: service_completed_successfully
     image: lmsysorg/sglang:dev-cu12@sha256:aac6b242680daeb74d2ab1d85f70575357552d7d165d2e5d30eb362797db54a1
-    container_name: glm51
+    container_name: model-sg-glm51
     command: >
       sglang serve
       --model-path zai-org/GLM-5.1-FP8
@@ -155,7 +155,7 @@ services:
       com.datadoghq.ad.check_names: '["openmetrics"]'
       com.datadoghq.ad.init_configs: "[{}]"
       com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
-      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
+      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
 
   dcgm-exporter:
     image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -240,13 +240,13 @@ configs:
       # 1-token completion health check directly to model container (no auth needed)
       check_inference() {
         echo "Performing health check on model endpoint..."
-        curl -sSf --max-time 45 -X POST "http://glm51:8000/v1/chat/completions" \
+        curl -sSf --max-time 45 -X POST "http://model-sg-glm51:8000/v1/chat/completions" \
           -H "Content-Type: application/json" \
           -d '{"model":"zai-org/GLM-5.1-FP8","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
       }
 
       echo "Waiting for model to be ready..."
-      until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
+      until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
       echo "Model ready, starting registration loop"
 
       while true; do

diff --git a/Qwen3.5-122B.yaml b/Qwen3.5-122B.yaml
@@ -27,7 +27,7 @@ x-vllm-proxy-common: &vllm-proxy-common
   restart: unless-stopped
   logging: *logging-conf
 
-x-qwen35-122b-common: &qwen35-122b-common
+x-sg-qwen35-122b-common: &sg-qwen35-122b-common
   <<: *nvidia
   init: true
   # SGLang v0.5.12 (cu129) — migrated off vLLM 2026-05-22.
@@ -92,9 +92,9 @@ services:
       - HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
     logging: *logging-conf
 
-  nginx:
+  proxy-nginx:
     image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
-    container_name: nginx
+    container_name: proxy-nginx
     command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
     ports:
       - "8000:80"
@@ -126,9 +126,9 @@ services:
 
   # --- Single proxy for both backends ---
 
-  vllm-proxy-qwen35:
+  proxy-qwen35:
     <<: *vllm-proxy-common
-    container_name: vllm-proxy-qwen35
+    container_name: proxy-qwen35
     environment:
       - NVIDIA_VISIBLE_DEVICES=all
       - CLOUD_API_URL=https://cloud-api.near.ai
@@ -137,7 +137,7 @@ services:
       - MODEL_NAME=Qwen/Qwen3.5-122B-A10B
       - OHTTP_ENABLED=true
       - TOKEN=${PROXY_TOKEN}
-      - VLLM_BACKEND_URLS=http://qwen35-1:8000,http://qwen35-2:8000
+      - VLLM_BACKEND_URLS=http://model-sg-qwen35-122b-1:8000,http://model-sg-qwen35-122b-2:8000
       - VLLM_PROXY_MAX_REQUEST_SIZE=104857600
       - TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
       - USE_NV_ATTESTATION_SDK=true
@@ -146,9 +146,9 @@ services:
 
   # --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
 
-  qwen35-1:
-    <<: *qwen35-122b-common
-    container_name: qwen35-1
+  model-sg-qwen35-122b-1:
+    <<: *sg-qwen35-122b-common
+    container_name: model-sg-qwen35-122b-1
     depends_on:
       model-downloader:
         condition: service_completed_successfully
@@ -163,13 +163,13 @@ services:
       com.datadoghq.ad.check_names: '["openmetrics"]'
       com.datadoghq.ad.init_configs: "[{}]"
       com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
-      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://qwen35-1:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
+      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-qwen35-122b-1:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
 
   # --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
 
-  qwen35-2:
-    <<: *qwen35-122b-common
-    container_name: qwen35-2
+  model-sg-qwen35-122b-2:
+    <<: *sg-qwen35-122b-common
+    container_name: model-sg-qwen35-122b-2
     depends_on:
       model-downloader:
         condition: service_completed_successfully
@@ -184,7 +184,7 @@ services:
       com.datadoghq.ad.check_names: '["openmetrics"]'
       com.datadoghq.ad.init_configs: "[{}]"
       com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
-      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://qwen35-2:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
+      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-qwen35-122b-2:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
 
   dcgm-exporter:
     image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -268,13 +268,13 @@ configs:
       # Health check directly on backend (no auth needed on raw sglang container)
       check_inference() {
         echo "Performing health check on backend..."
-        curl -sSf --max-time 45 -X POST "http://qwen35-1:8000/v1/chat/completions" \
+        curl -sSf --max-time 45 -X POST "http://model-sg-qwen35-122b-1:8000/v1/chat/completions" \
           -H "Content-Type: application/json" \
           -d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
       }
 
       echo "Waiting for model to be ready..."
-      until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
+      until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
       echo "Model ready, starting registration loop"
 
       while true; do
@@ -333,7 +333,7 @@ configs:
       # :80 — single proxy handles both backends
       server {
         listen 80 default_server;
-        location / { proxy_pass http://vllm-proxy-qwen35:8000; }
+        location / { proxy_pass http://proxy-qwen35:8000; }
       }
 
       ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -351,5 +351,5 @@ configs:
         # PINGs (http2_keep_alive_while_idle).
         keepalive_timeout 1h;
         keepalive_requests 1000000;
-        location / { proxy_pass http://vllm-proxy-qwen35:8000; }
+        location / { proxy_pass http://proxy-qwen35:8000; }
       }