Merge pull request #32 from nearai/feat/standardize-container-names

lloydmak99 · web-flow · commit f8ad79e4ea57 · 2026-05-27T13:51:15.000-07:00
Standardize container/service names with functional prefixes
diff --git a/GLM-5.1.yaml b/GLM-5.1.yaml
@@ -48,9 +48,9 @@ services:
     environment:
       - HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
 
-  nginx:
+  proxy-nginx:
     image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
-    container_name: nginx
+    container_name: proxy-nginx
     command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
     ports:
       - "8000:80"
@@ -92,20 +92,20 @@ services:
       - MODEL_NAME=zai-org/GLM-5.1-FP8
       - OHTTP_ENABLED=true
       - TOKEN=${PROXY_TOKEN}
-      - VLLM_BASE_URL=http://glm51:8000
+      - VLLM_BASE_URL=http://model-sg-glm51:8000
       - TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
       - USE_NV_ATTESTATION_SDK=true
       - WEB_CONTEXT_SEARCH_URL=${WEB_CONTEXT_SEARCH_URL}
       - WEB_CONTEXT_SEARCH_API_KEY=${WEB_CONTEXT_SEARCH_API_KEY}
 
-  glm51:
+  model-sg-glm51:
     <<: *nvidia
     init: true
     depends_on:
       model-downloader:
         condition: service_completed_successfully
     image: lmsysorg/sglang:dev-cu12@sha256:aac6b242680daeb74d2ab1d85f70575357552d7d165d2e5d30eb362797db54a1
-    container_name: glm51
+    container_name: model-sg-glm51
     command: >
       sglang serve
       --model-path zai-org/GLM-5.1-FP8
@@ -156,7 +156,7 @@ services:
       com.datadoghq.ad.check_names: '["openmetrics"]'
       com.datadoghq.ad.init_configs: "[{}]"
       com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
-      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
+      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-glm51:8000/metrics", "histogram_buckets_as_distributions": true, "metrics":["sglang:*"], "service": "glm-5.1", "tags":["model:zai-org/GLM-5.1-FP8","ip:${HOST_IP}","port:8000"]}]'
 
   dcgm-exporter:
     image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -241,13 +241,13 @@ configs:
       # 1-token completion health check directly to model container (no auth needed)
       check_inference() {
         echo "Performing health check on model endpoint..."
-        curl -sSf --max-time 45 -X POST "http://glm51:8000/v1/chat/completions" \
+        curl -sSf --max-time 45 -X POST "http://model-sg-glm51:8000/v1/chat/completions" \
           -H "Content-Type: application/json" \
           -d '{"model":"zai-org/GLM-5.1-FP8","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
       }
 
       echo "Waiting for model to be ready..."
-      until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
+      until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
       echo "Model ready, starting registration loop"
 
       while true; do
diff --git a/Qwen3.5-122B.yaml b/Qwen3.5-122B.yaml
@@ -27,7 +27,7 @@ x-vllm-proxy-common: &vllm-proxy-common
   restart: unless-stopped
   logging: *logging-conf
 
-x-qwen35-122b-common: &qwen35-122b-common
+x-sg-qwen35-122b-common: &sg-qwen35-122b-common
   <<: *nvidia
   init: true
   # SGLang v0.5.12 (cu129) — migrated off vLLM 2026-05-22.
@@ -92,9 +92,9 @@ services:
       - HF_TOKEN=${HUGGING_FACE_HUB_TOKEN}
     logging: *logging-conf
 
-  nginx:
+  proxy-nginx:
     image: nginx@sha256:1d13701a5f9f3fb01aaa88cef2344d65b6b5bf6b7d9fa4cf0dca557a8d7702ba
-    container_name: nginx
+    container_name: proxy-nginx
     command: /bin/sh -c 'while :; do sleep 6h; nginx -s reload; done & nginx -g "daemon off;"'
     ports:
       - "8000:80"
@@ -126,9 +126,9 @@ services:
 
   # --- Single proxy for both backends ---
 
-  vllm-proxy-qwen35:
+  proxy-qwen35:
     <<: *vllm-proxy-common
-    container_name: vllm-proxy-qwen35
+    container_name: proxy-qwen35
     environment:
       - NVIDIA_VISIBLE_DEVICES=all
       - CLOUD_API_URL=https://cloud-api.near.ai
@@ -138,7 +138,7 @@ services:
       - MODEL_NAME=Qwen/Qwen3.5-122B-A10B
       - OHTTP_ENABLED=true
       - TOKEN=${PROXY_TOKEN}
-      - VLLM_BACKEND_URLS=http://qwen35-1:8000,http://qwen35-2:8000
+      - VLLM_BACKEND_URLS=http://model-sg-qwen35-122b-1:8000,http://model-sg-qwen35-122b-2:8000
       - VLLM_PROXY_MAX_REQUEST_SIZE=104857600
       - TLS_CERT_PATH=/etc/letsencrypt/live/completions.near.ai/fullchain.pem
       - USE_NV_ATTESTATION_SDK=true
@@ -147,9 +147,9 @@ services:
 
   # --- Qwen3.5-122B-A10B instance 1 (GPUs 0-3) ---
 
-  qwen35-1:
-    <<: *qwen35-122b-common
-    container_name: qwen35-1
+  model-sg-qwen35-122b-1:
+    <<: *sg-qwen35-122b-common
+    container_name: model-sg-qwen35-122b-1
     depends_on:
       model-downloader:
         condition: service_completed_successfully
@@ -164,13 +164,13 @@ services:
       com.datadoghq.ad.check_names: '["openmetrics"]'
       com.datadoghq.ad.init_configs: "[{}]"
       com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000","instance:1"]}]'
-      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://qwen35-1:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
+      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-qwen35-122b-1:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-1", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8000"]}]'
 
   # --- Qwen3.5-122B-A10B instance 2 (GPUs 4-7) ---
 
-  qwen35-2:
-    <<: *qwen35-122b-common
-    container_name: qwen35-2
+  model-sg-qwen35-122b-2:
+    <<: *sg-qwen35-122b-common
+    container_name: model-sg-qwen35-122b-2
     depends_on:
       model-downloader:
         condition: service_completed_successfully
@@ -185,7 +185,7 @@ services:
       com.datadoghq.ad.check_names: '["openmetrics"]'
       com.datadoghq.ad.init_configs: "[{}]"
       com.datadoghq.ad.logs: '[{"source": "sglang", "service": "sglang", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001","instance:2"]}]'
-      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://qwen35-2:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
+      com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://model-sg-qwen35-122b-2:8000/metrics", "metrics":["sglang:*"], "histogram_buckets_as_distributions": true, "service": "qwen35-2", "tags":["model:Qwen/Qwen3.5-122B-A10B","ip:${HOST_IP}","port:8001"]}]'
 
   dcgm-exporter:
     image: nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless
@@ -269,13 +269,13 @@ configs:
       # Health check directly on backend (no auth needed on raw sglang container)
       check_inference() {
         echo "Performing health check on backend..."
-        curl -sSf --max-time 45 -X POST "http://qwen35-1:8000/v1/chat/completions" \
+        curl -sSf --max-time 45 -X POST "http://model-sg-qwen35-122b-1:8000/v1/chat/completions" \
           -H "Content-Type: application/json" \
           -d '{"model":"Qwen/Qwen3.5-122B-A10B","messages":[{"role":"user","content":"hi"}],"max_tokens":1}'
       }
 
       echo "Waiting for model to be ready..."
-      until curl -sf http://nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
+      until curl -sf http://proxy-nginx:80/v1/models > /dev/null 2>&1; do sleep 30; done
       echo "Model ready, starting registration loop"
 
       while true; do
@@ -334,7 +334,7 @@ configs:
       # :80 — single proxy handles both backends
       server {
         listen 80 default_server;
-        location / { proxy_pass http://vllm-proxy-qwen35:8000; }
+        location / { proxy_pass http://proxy-qwen35:8000; }
       }
 
       ssl_certificate /etc/letsencrypt/live/completions.near.ai/fullchain.pem;
@@ -352,5 +352,5 @@ configs:
         # PINGs (http2_keep_alive_while_idle).
         keepalive_timeout 1h;
         keepalive_requests 1000000;
-        location / { proxy_pass http://vllm-proxy-qwen35:8000; }
+        location / { proxy_pass http://proxy-qwen35:8000; }
       }
diff --git a/small-models.yaml b/small-models.yaml