diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..2292e31
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,39 @@
+# Copy this to .env (the Makefile does this automatically on first run).
+# Fill in the REPLACE_WITH_* placeholders with values from
+# rob-poc/serving-api/dev/secrets.yaml. Never put prod values here —
+# the Makefile's _guard-local-db will refuse non-local DATABASE_URL.
+
+# ── Local Postgres (matches what `make db-up` spins up) ──────────────────────
+DATABASE_URL=postgresql://serving:serving@localhost:5433/serving
+
+# ── Auth0 (research.computer tenant) ─────────────────────────────────────────
+AUTH0_DOMAIN=researchcomputer.eu.auth0.com
+AUTH0_API_AUDIENCE=https://researchcomputer.eu.auth0.com/
+AUTH0_ISSUER=https://researchcomputer.eu.auth0.com/
+AUTH0_ALGORITHMS=RS256
+AUTH0_CLIENT_ID=REPLACE_WITH_AUTH0_CLIENT_ID
+AUTH0_CLIENT_SECRET=REPLACE_WITH_AUTH0_CLIENT_SECRET
+AUTH_SECRET=REPLACE_WITH_RANDOM_STRING
+AUTH_TRUST_HOST=true
+
+VITE_AUTH0_CLIENT_ID=REPLACE_WITH_VITE_AUTH0_CLIENT_ID
+VITE_AUTH0_DOMAIN=researchcomputer.eu.auth0.com
+
+# ── OpenTela / OCF (peer discovery + LLM routing) ────────────────────────────
+# Point at the dev OpenTela head for live model discovery, or use
+# OTELA_FIXTURE_PATH (set by `make dummy-run`) to read a static snapshot.
+OCF_HEAD_ADDR=http://148.187.108.177:8092
+
+# ── Langfuse (observability — optional; leave blank to disable) ──────────────
+LANGFUSE_HOST=https://cloud.langfuse.com
+LANGFUSE_PUBLIC_KEY=
+LANGFUSE_SECRET_KEY=
+
+# ── CSCS L1 passthrough (optional; leave blank to disable) ───────────────────
+# When both are set, requests for L1-hosted Apertus models forward here
+# instead of OpenTela. See backend/services/cscs_l1_service.py.
+CSCS_L1_BASE_URL=
+CSCS_L1_API_KEY=
+
+# ── Logfire (observability — optional) ───────────────────────────────────────
+LOGFIRE_TOKEN=
diff --git a/.gitignore b/.gitignore
index 4686944..c82f305 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 *.pyc
 *.env
 *.env*
+!.env.example
 details.json
 secrets/*.json
 .venv/*
diff --git a/Makefile b/Makefile
index 9151fda..df5f012 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: install install-dev format check test run dummy-run db-up db-down migrate _ensure-env _ensure-frontend-env
+.PHONY: install install-dev format check test run dummy-run db-up db-down migrate _ensure-env _ensure-frontend-env _guard-local-db _guard-local-api
 
 UV_EXTRA ?=
 
@@ -66,10 +66,38 @@ db-down:
 	-docker stop $(PG_CONTAINER) > /dev/null 2>&1
 	-docker rm $(PG_CONTAINER) > /dev/null 2>&1
 
-migrate: _ensure-env db-up
+# Refuse to run any DB-touching target if .env points at a non-local host.
+# We never want `make run` / `make migrate` to accidentally apply migrations
+# or open connections against a remote (prod/staging) database — the local
+# Postgres container is the only acceptable target for dev commands.
+_guard-local-db: _ensure-env
+	@url=$$(grep -E '^DATABASE_URL=' .env | head -1 | cut -d= -f2- | tr -d '"' | tr -d "'"); \
+	host=$$(echo "$$url" | sed -E 's|^[^:]+://[^@]*@([^:/?]+).*|\1|'); \
+	case "$$host" in \
+		localhost|127.0.0.1|::1|"") ;; \
+		*) echo "REFUSING: .env DATABASE_URL host '$$host' is not local."; \
+		   echo "Local dev must not run against prod/staging. Set DATABASE_URL=$(DATABASE_URL) in .env."; \
+		   exit 1;; \
+	esac
+
+# Same guard for the frontend — VITE_API_URL is what `npm run dev` reads,
+# so a prod URL there silently makes the local UI hit prod even when the
+# local backend is running fine. That's exactly what tripped up dummy-run
+# the first time around. Empty / unset is fine (frontend defaults apply).
+_guard-local-api: _ensure-frontend-env
+	@url=$$(grep -E '^VITE_API_URL=' frontend/.env | head -1 | cut -d= -f2- | tr -d '"' | tr -d "'"); \
+	host=$$(echo "$$url" | sed -E 's|^[^:]+://([^:/?]+).*|\1|'); \
+	case "$$host" in \
+		localhost|127.0.0.1|::1|"") ;; \
+		*) echo "REFUSING: frontend/.env VITE_API_URL host '$$host' is not local."; \
+		   echo "Local dev must not call prod/staging API. Set VITE_API_URL=http://localhost:8080 in frontend/.env."; \
+		   exit 1;; \
+	esac
+
+migrate: _ensure-env _guard-local-db db-up
 	alembic upgrade head
 
-run: _ensure-env _ensure-frontend-env db-up migrate
+run: _ensure-env _ensure-frontend-env _guard-local-api db-up migrate
 	uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \
 	cd frontend && npm run dev & \
 	wait
@@ -77,7 +105,7 @@ run: _ensure-env _ensure-frontend-env db-up migrate
 # Same as `run` but forces the model list to come from the synthesised
 # upgraded fixture instead of the live OpenTela endpoint. Useful for
 # iterating on the model-card UI without depending on prod state.
-dummy-run: _ensure-env _ensure-frontend-env db-up migrate
+dummy-run: _ensure-env _ensure-frontend-env _guard-local-api db-up migrate
 	OTELA_FIXTURE_PATH=$(PWD)/backend/tests/fixtures/dnt_table_dev_live.json \
 	uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \
 	cd frontend && npm run dev & \
diff --git a/backend/services/model_service.py b/backend/services/model_service.py
index ec43bb6..3d2a7f3 100644
--- a/backend/services/model_service.py
+++ b/backend/services/model_service.py
@@ -73,6 +73,7 @@ def get_all_models(endpoint: str, with_details: bool = False):
                 "object": "model",
                 "created": "0x",
                 "owner": "0x",
+                "has_service": False,
                 **meta,
             }
             if with_details:
@@ -93,6 +94,7 @@ def get_all_models(endpoint: str, with_details: bool = False):
                     "object": "model",
                     "created": "0x",
                     "owner": "0x",
+                    "has_service": True,
                     **meta,
                 }
                 if with_details:
diff --git a/backend/tests/fixtures/dnt_table_dev_live.json b/backend/tests/fixtures/dnt_table_dev_live.json
index b6ccb64..8394bc6 100644
--- a/backend/tests/fixtures/dnt_table_dev_live.json
+++ b/backend/tests/fixtures/dnt_table_dev_live.json
@@ -770,5 +770,352 @@
     ],
     "status": "ready",
     "version": "v0.1.11"
+  },
+  "/QmNWFQvMQSimNCYzRYpZb7yhwfER53nYo3pJhQJ9HAdM7U": {
+    "id": "QmNWFQvMQSimNCYzRYpZb7yhwfER53nYo3pJhQJ9HAdM7U",
+    "latency": 0,
+    "privileged": false,
+    "owner": "",
+    "current_offering": null,
+    "role": null,
+    "status": "ready",
+    "available_offering": null,
+    "service": [
+      {
+        "name": "llm",
+        "hardware": {
+          "gpus": null,
+          "host_memory": 0,
+          "host_memory_bandwidth": 0,
+          "host_memory_used": 0
+        },
+        "status": "connected",
+        "host": "localhost",
+        "port": "8080",
+        "identity_group": [
+          "model=meta-llama/Llama-3.3-70B-Instruct"
+        ]
+      }
+    ],
+    "last_seen": 1779267166,
+    "version": "sai-v0.0.6",
+    "hostname": "sglang-llama-70b-bb9cbb4b6-wn9x2",
+    "labels": {
+      "framework": "sglang",
+      "launched_by": "k8s",
+      "served_model_name": "meta-llama/Llama-3.3-70B-Instruct",
+      "started_at": "2026-05-19T15:58:39Z",
+      "worker_group_id": "sglang-llama-70b-bb9cbb4b6-wn9x2"
+    },
+    "public_address": "",
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 14
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 9
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 8
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 7
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "connected": true,
+    "load": null
+  },
+  "/QmNpziFtdsDmFFPSdgL3XMwz6cf3jDaZxNVvb4e9Be39f3": {
+    "id": "QmNpziFtdsDmFFPSdgL3XMwz6cf3jDaZxNVvb4e9Be39f3",
+    "latency": 0,
+    "privileged": false,
+    "owner": "",
+    "current_offering": null,
+    "role": null,
+    "status": "ready",
+    "available_offering": null,
+    "service": [
+      {
+        "name": "llm",
+        "hardware": {
+          "gpus": null,
+          "host_memory": 0,
+          "host_memory_bandwidth": 0,
+          "host_memory_used": 0
+        },
+        "status": "connected",
+        "host": "localhost",
+        "port": "8080",
+        "identity_group": [
+          "model=Snowflake/snowflake-arctic-embed-l-v2.0"
+        ]
+      }
+    ],
+    "last_seen": 1779267169,
+    "version": "sai-v0.0.6",
+    "hostname": "vllm-snowflake-7468ff7dc8-4hv68",
+    "labels": {
+      "framework": "vllm",
+      "launched_by": "k8s",
+      "served_model_name": "Snowflake/snowflake-arctic-embed-l-v2.0",
+      "started_at": "2026-05-19T15:57:44Z",
+      "worker_group_id": "vllm-snowflake-7468ff7dc8-4hv68"
+    },
+    "public_address": "",
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 1
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "connected": true,
+    "load": null
+  },
+  "/QmUsx1HD2WPJ9hjVGXcpA9RTcn5YmN6bDuxFF2mUburLRq": {
+    "id": "QmUsx1HD2WPJ9hjVGXcpA9RTcn5YmN6bDuxFF2mUburLRq",
+    "latency": 0,
+    "privileged": false,
+    "owner": "",
+    "current_offering": null,
+    "role": null,
+    "status": "ready",
+    "available_offering": null,
+    "service": [
+      {
+        "name": "llm",
+        "hardware": {
+          "gpus": null,
+          "host_memory": 0,
+          "host_memory_bandwidth": 0,
+          "host_memory_used": 0
+        },
+        "status": "connected",
+        "host": "localhost",
+        "port": "8080",
+        "identity_group": [
+          "model=Qwen/Qwen3.5-27B"
+        ]
+      }
+    ],
+    "last_seen": 1779267152,
+    "version": "sai-v0.0.6",
+    "hostname": "vllm-qwen35-27b-7fc7bb7ffc-4bgkf",
+    "labels": {
+      "framework": "vllm",
+      "launched_by": "k8s",
+      "served_model_name": "Qwen/Qwen3.5-27B",
+      "started_at": "2026-05-19T15:59:26Z",
+      "worker_group_id": "vllm-qwen35-27b-7fc7bb7ffc-4bgkf"
+    },
+    "public_address": "",
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 0
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 1
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "connected": true,
+    "load": null
+  },
+  "/QmWZBsbbkn93QHp4wVDaQ92u9LEBZsgAUn9jbj1iBKdDhj": {
+    "id": "QmWZBsbbkn93QHp4wVDaQ92u9LEBZsgAUn9jbj1iBKdDhj",
+    "latency": 0,
+    "privileged": false,
+    "owner": "",
+    "current_offering": null,
+    "role": null,
+    "status": "ready",
+    "available_offering": null,
+    "service": [
+      {
+        "name": "llm",
+        "hardware": {
+          "gpus": null,
+          "host_memory": 0,
+          "host_memory_bandwidth": 0,
+          "host_memory_used": 0
+        },
+        "status": "connected",
+        "host": "localhost",
+        "port": "8080",
+        "identity_group": [
+          "model=swiss-ai/Apertus-70B-Instruct-2509"
+        ]
+      }
+    ],
+    "last_seen": 1779267164,
+    "version": "sai-v0.0.6",
+    "hostname": "vllm-apertus-70b-84d688fd7d-mbhbd",
+    "labels": {
+      "framework": "vllm",
+      "launched_by": "k8s",
+      "served_model_name": "swiss-ai/Apertus-70B-Instruct-2509",
+      "started_at": "2026-05-20T08:27:04Z",
+      "worker_group_id": "vllm-apertus-70b-84d688fd7d-mbhbd"
+    },
+    "public_address": "",
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 2
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 1
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 3
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 1
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "connected": true,
+    "load": null
+  },
+  "/QmYTeswtJmGEzYnPEG64GFyQ6USnfCdER6czXkr2VAPE5u": {
+    "id": "QmYTeswtJmGEzYnPEG64GFyQ6USnfCdER6czXkr2VAPE5u",
+    "latency": 0,
+    "privileged": false,
+    "owner": "",
+    "current_offering": null,
+    "role": null,
+    "status": "ready",
+    "available_offering": null,
+    "service": [
+      {
+        "name": "llm",
+        "hardware": {
+          "gpus": null,
+          "host_memory": 0,
+          "host_memory_bandwidth": 0,
+          "host_memory_used": 0
+        },
+        "status": "connected",
+        "host": "localhost",
+        "port": "8080",
+        "identity_group": [
+          "model=swiss-ai/Apertus-8B-Instruct-2509"
+        ]
+      }
+    ],
+    "last_seen": 1779267169,
+    "version": "sai-v0.0.6",
+    "hostname": "sglang-apertus-8b-78b4f9d77-v2pbt",
+    "labels": {
+      "framework": "sglang",
+      "launched_by": "k8s",
+      "served_model_name": "swiss-ai/Apertus-8B-Instruct-2509",
+      "started_at": "2026-05-19T16:11:00Z",
+      "worker_group_id": "sglang-apertus-8b-78b4f9d77-v2pbt"
+    },
+    "public_address": "",
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 3
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "connected": true,
+    "load": null
+  },
+  "/QmbVVJVM1VwTCHEo8FCFZinWT5zEBoccDJmbZtj7zRK6Cx": {
+    "id": "QmbVVJVM1VwTCHEo8FCFZinWT5zEBoccDJmbZtj7zRK6Cx",
+    "latency": 0,
+    "privileged": false,
+    "owner": "",
+    "current_offering": null,
+    "role": null,
+    "status": "ready",
+    "available_offering": null,
+    "service": [
+      {
+        "name": "llm",
+        "hardware": {
+          "gpus": null,
+          "host_memory": 0,
+          "host_memory_bandwidth": 0,
+          "host_memory_used": 0
+        },
+        "status": "connected",
+        "host": "localhost",
+        "port": "8080",
+        "identity_group": [
+          "model=zai-org/GLM-4.7-Flash"
+        ]
+      }
+    ],
+    "last_seen": 1779267154,
+    "version": "sai-v0.0.6",
+    "hostname": "sglang-glm-47-flash-759d7dd467-fx9s4",
+    "labels": {
+      "framework": "sglang",
+      "launched_by": "k8s",
+      "served_model_name": "zai-org/GLM-4.7-Flash",
+      "started_at": "2026-05-19T15:58:03Z",
+      "worker_group_id": "sglang-glm-47-flash-759d7dd467-fx9s4"
+    },
+    "public_address": "",
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 1
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "connected": true,
+    "load": null
   }
-}
\ No newline at end of file
+}
diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py
index dceb6bc..f56a3b1 100644
--- a/backend/tests/test_model_service.py
+++ b/backend/tests/test_model_service.py
@@ -98,6 +98,7 @@ def test_new_binary_head_carries_labels():
     assert entry["worker_group_id"] == "12345"
     assert entry["framework"] == "sglang"
     assert entry["status"] == "ready"
+    assert entry["has_service"] is True
 
 
 def test_metrics_only_follower_groups_with_head_via_worker_group_id():
@@ -126,6 +127,12 @@ def test_metrics_only_follower_groups_with_head_via_worker_group_id():
         == by_id["QmFollower"]["worker_group_id"]
         == "12345"
     )
+    # has_service distinguishes the actually-serving peer from TP-worker /
+    # metrics-only peers — the frontend uses this to pick the head when
+    # aggregating replicas (otherwise a multi-node TP replica shows pending
+    # forever because rank-1..N never register an llm service).
+    assert by_id["QmHead"]["has_service"] is True
+    assert by_id["QmFollower"]["has_service"] is False
 
 
 def test_pending_peer_without_served_model_name_label_falls_back_to_empty_id():
diff --git a/frontend/.env.example b/frontend/.env.example
new file mode 100644
index 0000000..4908f82
--- /dev/null
+++ b/frontend/.env.example
@@ -0,0 +1,16 @@
+# Copy this to frontend/.env. The Makefile does it automatically.
+#
+# Only VITE_-prefixed vars are visible to the Astro/Vite frontend; do
+# NOT paste backend secrets (DATABASE_URL, AUTH0_CLIENT_SECRET, Neon
+# URLs, etc.) here — those go in the project-root .env or
+# rob-poc/serving-api/{dev,prod}/secrets.yaml only.
+
+# API the frontend talks to. Must be localhost during local dev so
+# `make run` / `make dummy-run` hit your own backend instead of prod.
+# The Makefile's _guard-local-api refuses non-local values.
+VITE_API_URL=http://localhost:8080
+
+# Auth0 SPA client (research.computer tenant). Safe to commit values
+# since they're public client identifiers.
+VITE_AUTH0_CLIENT_ID=REPLACE_WITH_VITE_AUTH0_CLIENT_ID
+VITE_AUTH0_DOMAIN=researchcomputer.eu.auth0.com
diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index 1688e5b..66ed76e 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -1,6 +1,6 @@
 <script lang="ts">
   import { getModelLogo } from '../../lib/modelLogos';
-  import { getModelMetricsUrl, getModelTier } from '../../lib/modelMetrics';
+  import { getModelMetricsUrl, getTierFromLaunchedBy } from '../../lib/modelMetrics';
 
   interface Peer {
     peer_id?: string;
@@ -44,11 +44,11 @@
 
   const logoUrl = getModelLogo(entry.data.title);
   const metricsUrl = getModelMetricsUrl(entry.data.title);
-  // L1-hosted models are 24/7 by nature (CSCS L1 service), independent
-  // of the modelMetrics.ts config — let launched_by drive the badge so
-  // newly-discovered L1 models don't need a code change to look right.
-  const isL1Model = entry.data.replicas[0]?.head?.launched_by === "cscs_L1";
-  const tier = isL1Model ? "L2" : getModelTier(entry.data.title);
+  // Tier follows the peer's launched_by label: "k8s" or "cscs_L1" → 24/7,
+  // anything else (a username from model-launch, or no label) → Slurm.
+  const headLaunchedBy = entry.data.replicas[0]?.head?.launched_by;
+  const isL1Model = headLaunchedBy === "cscs_L1";
+  const tier = getTierFromLaunchedBy(headLaunchedBy);
   const chatUrl = `${chatAppUrl.replace(/\/$/, "")}/?models=${encodeURIComponent(entry.data.title)}`;
 
   let expanded = false;
@@ -60,6 +60,21 @@
   $: firstHead = entry.data.replicas[0]?.head ?? {};
   $: framework = firstHead.framework || "";
 
+  // Aggregated status across all replicas:
+  //   "ready"   — every replica's head is ready
+  //   "pending" — at least one replica is still booting
+  //   "unknown" — no status info at all (legacy binary)
+  // Used by both the traffic-light dot and the greyed-tile styling so
+  // we can compare which signal reads better at a glance.
+  $: aggregateStatus = (() => {
+    const statuses = entry.data.replicas.map(r => r.head?.status).filter(Boolean);
+    if (statuses.length === 0) return "unknown";
+    if (statuses.some(s => s === "pending")) return "pending";
+    if (statuses.every(s => s === "ready")) return "ready";
+    return "unknown";
+  })();
+  $: isPending = aggregateStatus === "pending";
+
   // "2026-05-17T07:00:00Z" → "2026-05-17T07:00:00Z (11 hours ago)".
   // Returns the iso untouched if it doesn't parse — keeps the row useful even
   // if OpenTela emits something we don't understand.
@@ -134,12 +149,22 @@
   aria-expanded={expanded}
   on:click={toggleExpand}
   on:keydown={onKeyDown}
+  class:tile-pending={isPending}
   class="relative group flex flex-col py-3 px-4 rounded-lg border border-black/15 dark:border-white/20 hover:bg-black/5 dark:hover:bg-white/5 hover:text-black dark:hover:text-white transition-colors duration-300 ease-in-out cursor-pointer"
 >
   <div class="flex items-center gap-3 min-w-0">
     <img src={logoUrl} alt="Model logo" class="w-8 h-8 object-contain" />
     <div class="flex flex-col flex-1 min-w-0">
       <div class="font-semibold flex items-center gap-2 min-w-0">
+        <!-- Traffic-light dot reflects aggregated replica status.
+             Visible whether the tile is expanded or not. -->
+        <span
+          class="status-dot status-dot-{aggregateStatus}"
+          title={aggregateStatus === "ready" ? "All replicas ready"
+               : aggregateStatus === "pending" ? "At least one replica is still starting up"
+               : "Status unknown"}
+          aria-label="status: {aggregateStatus}"
+        ></span>
         <span
           on:click={copyModelName}
           on:keydown={(e) => { if (e.key === "Enter") copyModelName(e); }}
@@ -316,6 +341,50 @@
     border-radius: 4px;
   }
 
+  /* Traffic-light dot — small filled circle in front of the model name. */
+  .status-dot {
+    display: inline-block;
+    width: 0.6rem;
+    height: 0.6rem;
+    border-radius: 9999px;
+    flex-shrink: 0;
+    box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.1) inset;
+  }
+  .status-dot-ready {
+    background-color: #10b981; /* emerald-500 */
+  }
+  .status-dot-pending {
+    background-color: #f59e0b; /* amber-500 */
+    animation: status-pulse 1.5s ease-in-out infinite;
+  }
+  .status-dot-unknown {
+    background-color: #9ca3af; /* gray-400 */
+  }
+  @keyframes status-pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.35; }
+  }
+
+  /* Greyed-tile treatment — applied to the outer card when any replica
+     is pending. Mutes text/icons via `color` (inherited by SVGs using
+     currentColor) and grayscales the logo + colored badges. The
+     status-dot opts out by using its own background-color, so the
+     amber pending signal stays vivid against the otherwise-grey card. */
+  .tile-pending {
+    background-color: rgba(0, 0, 0, 0.04);
+    color: #6b7280; /* gray-500 */
+  }
+  :global(.dark) .tile-pending {
+    background-color: rgba(255, 255, 255, 0.04);
+    color: #9ca3af; /* gray-400 */
+  }
+  .tile-pending img,
+  .tile-pending .uptime-badge,
+  .tile-pending .slurm-badge,
+  .tile-pending .instance-count {
+    filter: grayscale(1);
+  }
+
   .uptime-badge {
     background-color: #2563eb;
     color: white;
diff --git a/frontend/src/components/ui/ModelList.svelte b/frontend/src/components/ui/ModelList.svelte
index 997b808..5f9a487 100644
--- a/frontend/src/components/ui/ModelList.svelte
+++ b/frontend/src/components/ui/ModelList.svelte
@@ -2,7 +2,7 @@
     import { onMount } from "svelte";
     import ModelCard from "./ModelCard.svelte";
     import { getApiUrl } from "../../lib/config";
-    import { getModelTier } from "../../lib/modelMetrics";
+    import { getTierFromLaunchedBy } from "../../lib/modelMetrics";
 
     export let chatAppUrl;
 
@@ -57,8 +57,15 @@
             replicaCount = 0;
             models = Array.from(modelsMap.values()).map(grouped => {
                 const replicas = Array.from(grouped.replicas.values()).map(r => {
-                    // The head is the peer that owns the serving entry.
-                    const head = r.peers.find(p => p.id === grouped.id) || r.peers[0];
+                    // The head is the peer that owns the serving entry. In a
+                    // multi-node TP replica only rank-0 registers `llm`, so
+                    // has_service uniquely identifies it; the other peers all
+                    // share the same id (from served_model_name) and would
+                    // otherwise be indistinguishable.
+                    const head =
+                        r.peers.find(p => p.has_service) ||
+                        r.peers.find(p => p.id === grouped.id) ||
+                        r.peers[0];
                     const followers = r.peers.filter(p => p !== head);
                     return {
                         worker_group_id: r.worker_group_id,
@@ -101,8 +108,9 @@
             if (!haystack.includes(q)) return false;
         }
 
-        if (activeFilter === "24/7") return getModelTier(title) === "L2";
-        if (activeFilter === "slurm") return getModelTier(title) === "slurm";
+        const tier = getTierFromLaunchedBy(m.data.replicas[0]?.head?.launched_by);
+        if (activeFilter === "24/7") return tier === "L2";
+        if (activeFilter === "slurm") return tier === "slurm";
         return true;
     });
 </script>
diff --git a/frontend/src/lib/modelMetrics.ts b/frontend/src/lib/modelMetrics.ts
index 4764bbd..6139610 100644
--- a/frontend/src/lib/modelMetrics.ts
+++ b/frontend/src/lib/modelMetrics.ts
@@ -4,25 +4,24 @@ export type HostingTier = "L2" | "slurm";
 
 type ModelConfig = {
   metrics?: boolean;
-  tier?: HostingTier;
 };
 
-const models: Record<string, ModelConfig> = {
-  "swiss-ai/Apertus-8B-Instruct-2509": { tier: "L2" },
-  "zai-org/GLM-4.7-Flash": { tier: "L2" },
-  "Snowflake/snowflake-arctic-embed-l-v2.0": { tier: "L2" },
-  "cais/HarmBench-Llama-2-13b-cls": { tier: "L2" },
-  "meta-llama/Llama-3.3-70B-Instruct": { tier: "L2" },
-  "meta-llama/Llama-Guard-4-12B": { tier: "L2" },
-  "swiss-ai/Apertus-70B-Instruct-2509": { tier: "L2" },
-  "Qwen/Qwen3.5-27B": { tier: "L2" },
-};
+// Per-model overrides for the Grafana metrics dashboard URL. Add an entry
+// with `metrics: false` for models that have no panel — clicking through
+// to a blank dashboard is worse than hiding the button.
+const models: Record<string, ModelConfig> = {};
 
 export function getModelMetricsUrl(modelName: string): string | null {
   if (models[modelName]?.metrics === false) return null;
   return `${METRICS_BASE}${encodeURIComponent(modelName)}`;
 }
 
-export function getModelTier(modelName: string): HostingTier {
-  return models[modelName]?.tier ?? "slurm";
+// Tier is now driven by the peer's `launched_by` label instead of a
+// hardcoded model list. Persistent infra launchers ("k8s", "cscs_L1") map
+// to the 24/7 badge; anything else (a username from model-launch, or an
+// older OpenTela binary that doesn't emit the label) is a Slurm job.
+const PERSISTENT_LAUNCHERS = new Set(["k8s", "cscs_L1"]);
+
+export function getTierFromLaunchedBy(launched_by: string | undefined): HostingTier {
+  return launched_by && PERSISTENT_LAUNCHERS.has(launched_by) ? "L2" : "slurm";
 }