swiss-ai · robmsmt · May 18, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/backend/services/model_service.py b/backend/services/model_service.py
@@ -63,8 +63,13 @@ def get_all_models(endpoint: str, with_details: bool = False):
             # worker_group_id and show it as part of a launching/follower set.
             if not meta["worker_group_id"]:
                 continue
+            # Fall back to the served_model_name label so the frontend can
+            # group PENDING peers under their eventual model card during boot.
+            # Without this, the brief PENDING window is invisible because the
+            # peer has no advertised service yet and nothing else maps its
+            # worker_group_id back to a model id.
             entry = {
-                "id": "",  # no model yet
+                "id": meta["labels"].get("served_model_name", ""),
                 "object": "model",
                 "created": "0x",
                 "owner": "0x",

diff --git a/backend/tests/fixtures/dnt_table_dev_live.json b/backend/tests/fixtures/dnt_table_dev_live.json
@@ -415,6 +415,61 @@
     "status": "ready",
     "version": "v0.1.11"
   },
+  "/QmY7FvKB3i6N1yvpkgAZXQCnFmpKR5WJ4MqqGNcLb3tWC5": {
+    "available_offering": null,
+    "connected": true,
+    "current_offering": null,
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 6
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 5
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 5
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 13
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "hostname": "nid007456",
+    "id": "QmY7FvKB3i6N1yvpkgAZXQCnFmpKR5WJ4MqqGNcLb3tWC5",
+    "labels": {
+      "expires_at": "2026-05-19T00:09:35Z",
+      "framework": "sglang",
+      "framework_args": "--port 8080 --model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 --served-model-name swiss-ai/Apertus-8B-Instruct-2509-rob-dev3 --host 0.0.0.0 --enable-metrics",
+      "launched_by": "rosmith",
+      "served_model_name": "swiss-ai/Apertus-8B-Instruct-2509-rob-dev3",
+      "slurm_job_id": "2297439",
+      "slurm_partition": "normal",
+      "started_at": "2026-05-18T18:09:35Z",
+      "worker_group_id": "2297439"
+    },
+    "last_seen": 1779127775,
+    "latency": 0,
+    "load": null,
+    "owner": "",
+    "privileged": false,
+    "public_address": "",
+    "role": null,
+    "service": null,
+    "status": "pending",
+    "version": "dev-9ff5ec9"
+  },
   "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": {
     "available_offering": null,
     "connected": true,

diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py
@@ -53,6 +53,7 @@ def json(self):
         "slurm_job_id": "12345",
         "worker_group_id": "12345",
         "framework": "sglang",
+        "served_model_name": "swiss-ai/Apertus-8B",
         "started_at": "2026-05-15T18:00:00Z",
     },
     "hardware": {"gpus": [{"name": "GH200"}] * 4},
@@ -100,9 +101,11 @@ def test_new_binary_head_carries_labels():
 
 
 def test_metrics_only_follower_groups_with_head_via_worker_group_id():
-    """A multi-node replica's follower has no `service` but does carry
-    worker_group_id. It should appear in the output with id='' so the
-    frontend can attribute it to the same replica as the head."""
+    """A peer with no advertised `service` (multi-node follower, or a head
+    still in PENDING during boot) should fall back to its served_model_name
+    label so the frontend can render the model card during the brief window
+    before the service is published. Without the fallback, the peer's id
+    stays empty and the frontend silently drops it."""
     with patch("backend.services.model_service.requests.get") as mock_get:
         mock_get.return_value = _dnt_response(
             {
@@ -114,15 +117,38 @@ def test_metrics_only_follower_groups_with_head_via_worker_group_id():
     assert len(out) == 2
     by_id = {e["peer_id"]: e for e in out}
     assert by_id["QmHead"]["id"] == "swiss-ai/Apertus-8B"
-    assert by_id["QmFollower"]["id"] == ""
-    # Shared worker_group_id lets the frontend group them.
+    # Follower inherits id from the served_model_name label — same model card.
+    assert by_id["QmFollower"]["id"] == "swiss-ai/Apertus-8B"
+    assert by_id["QmFollower"]["status"] == "pending"
+    # Shared worker_group_id lets the frontend group them within the model.
     assert (
         by_id["QmHead"]["worker_group_id"]
         == by_id["QmFollower"]["worker_group_id"]
         == "12345"
     )
 
 
+def test_pending_peer_without_served_model_name_label_falls_back_to_empty_id():
+    """Defensive: if a peer is mid-boot from an older binary that doesn't
+    emit served_model_name, we still surface it via worker_group_id with
+    id=''. The frontend then needs another peer in the same group with an
+    id to attribute it; otherwise it's dropped."""
+    peer = {
+        **PEER_NEW_BINARY_FOLLOWER,
+        "labels": {
+            k: v
+            for k, v in PEER_NEW_BINARY_FOLLOWER["labels"].items()
+            if k != "served_model_name"
+        },
+    }
+    with patch("backend.services.model_service.requests.get") as mock_get:
+        mock_get.return_value = _dnt_response({"/QmPending": peer})
+        out = get_all_models("http://x/v1/dnt/table", with_details=True)
+    assert len(out) == 1
+    assert out[0]["id"] == ""
+    assert out[0]["worker_group_id"] == "12345"
+
+
 def test_follower_without_worker_group_id_skipped():
     """Older binary follower with no labels and no service is uninformative —
     drop it so the model list stays clean."""
@@ -196,9 +222,10 @@ def test_real_prod_payload_returns_models():
 
 def test_upgraded_payload_groups_multinode_replica():
     """Simulated v0.0.6 deployment: the gemma 'multi-node demo' pair share a
-    worker_group_id. One has a service, the other is metrics-only with id=''.
-    Backend returns both entries with the shared worker_group_id so the
-    frontend can aggregate them into one logical replica."""
+    worker_group_id. Both peers carry the served_model_name label, so both
+    resolve to the same model id even though only one advertises a service.
+    Backend returns both entries with the shared worker_group_id + model id
+    so the frontend can aggregate them into one logical replica."""
     with patch("backend.services.model_service.requests.get") as mock_get:
         mock_get.return_value = type(
             "R",
@@ -212,7 +239,8 @@ def test_upgraded_payload_groups_multinode_replica():
         by_wg.setdefault(e["worker_group_id"], []).append(e)
     multi = [v for v in by_wg.values() if len(v) > 1]
     assert multi, "fixture should contain at least one multi-peer worker group"
-    # At least one peer in the multi-peer group should be metrics-only (id='').
     pair = multi[0]
-    assert any(e["id"] == "" for e in pair), pair
-    assert any(e["id"] != "" for e in pair), pair
+    # Both peers in the group share the same non-empty model id.
+    ids = {e["id"] for e in pair}
+    assert ids != {""}, pair
+    assert len(ids) == 1, f"peers in one worker group should share one model id: {ids}"
diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
@@ -83,13 +83,21 @@
 
   // Header summary across all replicas of this model. If every replica has
   // the same per-replica topology (almost always true: a model is launched
-  // with one shape), show it once. Otherwise admit ambiguity rather than
-  // pick one to display.
+  // with one shape), show it with the replica multiplier prefixed when
+  // there's more than one. Otherwise admit ambiguity rather than pick one
+  // to display.
+  //
+  //   1 replica, 1 node              → "4x NVIDIA GH200 120GB"
+  //   1 replica, 4 nodes             → "4 nodes × 4x NVIDIA GH200 120GB"
+  //   2 replicas, 4 nodes each       → "2 replicas × 4 nodes × 4x NVIDIA GH200 120GB"
+  //   replicas with differing shapes → "Various"
   function topologySummary(replicas: Replica[]): string {
     if (replicas.length === 0) return "unknown";
     const distinct = new Set(replicas.map(topologyString));
-    if (distinct.size === 1) return [...distinct][0];
-    return "Various";
+    if (distinct.size !== 1) return "Various";
+    const perReplica = [...distinct][0];
+    if (replicas.length === 1) return perReplica;
+    return `${replicas.length} replicas × ${perReplica}`;
   }
 
   async function copyModelName(e: Event) {
@@ -187,7 +195,7 @@
       on:keydown|stopPropagation
       role="region"
     >
-      <!-- Action buttons (what clicking the card used to do, plus metrics) -->
+      <!-- Action buttons: Chat (primary) + Metrics, left-aligned. -->
       <div class="flex flex-wrap gap-2">
         <a
           href={chatUrl}
@@ -200,7 +208,7 @@
             <polyline points="15 3 21 3 21 9"></polyline>
             <line x1="10" y1="14" x2="21" y2="3"></line>
           </svg>
-          Open in OpenWebUI
+          Chat
         </a>
         {#if metricsUrl}
           <a
@@ -213,7 +221,7 @@
               <path d="M3 3v18h18"></path>
               <path d="M7 15l4-4 4 4 5-5"></path>
             </svg>
-            Metrics Dashboard
+            Metrics
           </a>
         {/if}
       </div>
@@ -272,8 +280,9 @@
               !["launched_by","slurm_job_id","worker_group_id","framework","started_at","expires_at","slurm_partition","served_model_name"].includes(k)
             )}
             {#if extra.length > 0}
+              {@const pad = Math.max(...extra.map(([k]) => k.length)) + 1}
               <div class="text-xs text-slate-500 dark:text-slate-400 mt-2 mb-1">Extra labels</div>
-              <pre class="code-block">{extra.map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}</pre>
+              <pre class="code-block">{extra.map(([k, v]) => `${k.padEnd(pad)} ${v}`).join("\n")}</pre>
             {/if}
           {/if}
         </div>