From 416327fe287e1d770e60b938b3e8495443a75fba Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 19:30:51 +0200
Subject: [PATCH 1/9] fix: surface PENDING peers under their served_model_name
 during boot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before: when a launching peer is still in PENDING (no service advertised
yet), get_all_models surfaced it with id="" and worker_group_id set.
The frontend (ModelList.svelte) builds wgToModel from peers that already
carry an id, then drops any remaining id="" peer whose worker_group_id
doesn't appear in that map. During the brief PENDING window every peer
in the worker group is service-less, so wgToModel is empty for that
group and the replica is silently filtered out. By the time we COULD
render it, registrar.go flips status from PENDING to READY and advertises
the service in the same step — so PENDING is never actually visible on
the dashboard.

After: fall back to labels.served_model_name (already emitted by
model-launch's _ocf_labels on every peer) when synthesising the
no-service entry. The peer now has a real model id during boot, the
frontend's grouping succeeds, and the status pill renders "pending"
until the health check passes.

Tests updated: the multi-node-replica grouping test previously asserted
the follower kept id="". With served_model_name on every peer, both
peers in the group now resolve to the same id; we still verify the
shared worker_group_id keeps them in one replica. Added a defensive
test for the older-binary case (no served_model_name label) where the
id stays empty as before.
---
 backend/services/model_service.py   |  7 ++++-
 backend/tests/test_model_service.py | 46 ++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/backend/services/model_service.py b/backend/services/model_service.py
index f1eec4f..ec43bb6 100644
--- a/backend/services/model_service.py
+++ b/backend/services/model_service.py
@@ -63,8 +63,13 @@ def get_all_models(endpoint: str, with_details: bool = False):
             # worker_group_id and show it as part of a launching/follower set.
             if not meta["worker_group_id"]:
                 continue
+            # Fall back to the served_model_name label so the frontend can
+            # group PENDING peers under their eventual model card during boot.
+            # Without this, the brief PENDING window is invisible because the
+            # peer has no advertised service yet and nothing else maps its
+            # worker_group_id back to a model id.
             entry = {
-                "id": "",  # no model yet
+                "id": meta["labels"].get("served_model_name", ""),
                 "object": "model",
                 "created": "0x",
                 "owner": "0x",
diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py
index e8c7727..82bc1fe 100644
--- a/backend/tests/test_model_service.py
+++ b/backend/tests/test_model_service.py
@@ -53,6 +53,7 @@ def json(self):
         "slurm_job_id": "12345",
         "worker_group_id": "12345",
         "framework": "sglang",
+        "served_model_name": "swiss-ai/Apertus-8B",
         "started_at": "2026-05-15T18:00:00Z",
     },
     "hardware": {"gpus": [{"name": "GH200"}] * 4},
@@ -100,9 +101,11 @@ def test_new_binary_head_carries_labels():
 
 
 def test_metrics_only_follower_groups_with_head_via_worker_group_id():
-    """A multi-node replica's follower has no `service` but does carry
-    worker_group_id. It should appear in the output with id='' so the
-    frontend can attribute it to the same replica as the head."""
+    """A peer with no advertised `service` (multi-node follower, or a head
+    still in PENDING during boot) should fall back to its served_model_name
+    label so the frontend can render the model card during the brief window
+    before the service is published. Without the fallback, the peer's id
+    stays empty and the frontend silently drops it."""
     with patch("backend.services.model_service.requests.get") as mock_get:
         mock_get.return_value = _dnt_response(
             {
@@ -114,8 +117,10 @@ def test_metrics_only_follower_groups_with_head_via_worker_group_id():
     assert len(out) == 2
     by_id = {e["peer_id"]: e for e in out}
     assert by_id["QmHead"]["id"] == "swiss-ai/Apertus-8B"
-    assert by_id["QmFollower"]["id"] == ""
-    # Shared worker_group_id lets the frontend group them.
+    # Follower inherits id from the served_model_name label — same model card.
+    assert by_id["QmFollower"]["id"] == "swiss-ai/Apertus-8B"
+    assert by_id["QmFollower"]["status"] == "pending"
+    # Shared worker_group_id lets the frontend group them within the model.
     assert (
         by_id["QmHead"]["worker_group_id"]
         == by_id["QmFollower"]["worker_group_id"]
@@ -123,6 +128,23 @@ def test_metrics_only_follower_groups_with_head_via_worker_group_id():
     )
 
 
+def test_pending_peer_without_served_model_name_label_falls_back_to_empty_id():
+    """Defensive: if a peer is mid-boot from an older binary that doesn't
+    emit served_model_name, we still surface it via worker_group_id with
+    id=''. The frontend then needs another peer in the same group with an
+    id to attribute it; otherwise it's dropped."""
+    peer = {
+        **PEER_NEW_BINARY_FOLLOWER,
+        "labels": {k: v for k, v in PEER_NEW_BINARY_FOLLOWER["labels"].items() if k != "served_model_name"},
+    }
+    with patch("backend.services.model_service.requests.get") as mock_get:
+        mock_get.return_value = _dnt_response({"/QmPending": peer})
+        out = get_all_models("http://x/v1/dnt/table", with_details=True)
+    assert len(out) == 1
+    assert out[0]["id"] == ""
+    assert out[0]["worker_group_id"] == "12345"
+
+
 def test_follower_without_worker_group_id_skipped():
     """Older binary follower with no labels and no service is uninformative —
     drop it so the model list stays clean."""
@@ -196,9 +218,10 @@ def test_real_prod_payload_returns_models():
 
 def test_upgraded_payload_groups_multinode_replica():
     """Simulated v0.0.6 deployment: the gemma 'multi-node demo' pair share a
-    worker_group_id. One has a service, the other is metrics-only with id=''.
-    Backend returns both entries with the shared worker_group_id so the
-    frontend can aggregate them into one logical replica."""
+    worker_group_id. Both peers carry the served_model_name label, so both
+    resolve to the same model id even though only one advertises a service.
+    Backend returns both entries with the shared worker_group_id + model id
+    so the frontend can aggregate them into one logical replica."""
     with patch("backend.services.model_service.requests.get") as mock_get:
         mock_get.return_value = type(
             "R",
@@ -212,7 +235,8 @@ def test_upgraded_payload_groups_multinode_replica():
         by_wg.setdefault(e["worker_group_id"], []).append(e)
     multi = [v for v in by_wg.values() if len(v) > 1]
     assert multi, "fixture should contain at least one multi-peer worker group"
-    # At least one peer in the multi-peer group should be metrics-only (id='').
     pair = multi[0]
-    assert any(e["id"] == "" for e in pair), pair
-    assert any(e["id"] != "" for e in pair), pair
+    # Both peers in the group share the same non-empty model id.
+    ids = {e["id"] for e in pair}
+    assert ids != {""}, pair
+    assert len(ids) == 1, f"peers in one worker group should share one model id: {ids}"

From 99ca1498e35cc8130b708e6215b556e9e0eb3655 Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:08:29 +0200
Subject: [PATCH 2/9] ui: right-align action buttons + pending peer example
 fixture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right-align the OpenWebUI + Metrics Dashboard buttons inside the
expanded model card (`justify-end` on the flex row). Matches the
in-card details which are right-aligned by design.

Add a synthetic PENDING peer to the dummy-run fixture so /make dummy-run
shows what a booting model looks like — status: "pending", service:
[], served_model_name carried in labels. Hostname + peer id are
synthetic but realistic; framework_args resembles a 70B vLLM launch.
---
 .../tests/fixtures/dnt_table_dev_live.json    | 55 +++++++++++++++++++
 frontend/src/components/ui/ModelCard.svelte   |  2 +-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/backend/tests/fixtures/dnt_table_dev_live.json b/backend/tests/fixtures/dnt_table_dev_live.json
index c3fa15f..4e8fc09 100644
--- a/backend/tests/fixtures/dnt_table_dev_live.json
+++ b/backend/tests/fixtures/dnt_table_dev_live.json
@@ -139,6 +139,61 @@
     "status": "ready",
     "version": "v0.1.11"
   },
+  "/QmPendingExamplePeerForDummyRunZZZZZZZZZZZZZZ": {
+    "available_offering": null,
+    "connected": true,
+    "current_offering": null,
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 0
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 0
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 0
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 0
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "hostname": "nid007777",
+    "id": "QmPendingExamplePeerForDummyRunZZZZZZZZZZZZZZ",
+    "labels": {
+      "expires_at": "2026-05-19T01:30:00Z",
+      "framework": "vllm",
+      "framework_args": "--port 8080 --model /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-70B-Instruct-2509 --served-model-name swiss-ai/Apertus-70B-Instruct-2509-rob-pending --tensor-parallel-size 4 --max-model-len 8192 --enable-metrics",
+      "launched_by": "rosmith",
+      "served_model_name": "swiss-ai/Apertus-70B-Instruct-2509-rob-pending",
+      "slurm_job_id": "2299999",
+      "slurm_partition": "normal",
+      "started_at": "2026-05-18T19:30:00Z",
+      "worker_group_id": "2299999"
+    },
+    "last_seen": 1779139800,
+    "latency": 0,
+    "load": null,
+    "owner": "",
+    "privileged": false,
+    "public_address": "",
+    "role": null,
+    "service": [],
+    "status": "pending",
+    "version": "dev-9ff5ec9"
+  },
   "/QmPqzJvbFVdoXXTq2rKW1pZBUmKLhZEWWHa4RFVrnUMf7K": {
     "available_offering": null,
     "connected": true,
diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index a71e37c..23720ce 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -188,7 +188,7 @@
       role="region"
     >
       <!-- Action buttons (what clicking the card used to do, plus metrics) -->
-      <div class="flex flex-wrap gap-2">
+      <div class="flex flex-wrap justify-end gap-2">
         <a
           href={chatUrl}
           target="_blank"

From 4ebdec09c9bfec3c8c3b934acc50889ee2672bcb Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:10:13 +0200
Subject: [PATCH 3/9] fixture: swap synthetic pending peer for a real one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Caught the dev mesh while a fresh sml launch (job 2297439, --dev3) was
still in OCF-PENDING — service: [], status: "pending", labels carry
served_model_name. Real shape, real hostname, real peer id; nothing
hand-rolled.
---
 .../tests/fixtures/dnt_table_dev_live.json    | 110 +++++++++---------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/backend/tests/fixtures/dnt_table_dev_live.json b/backend/tests/fixtures/dnt_table_dev_live.json
index 4e8fc09..b6ccb64 100644
--- a/backend/tests/fixtures/dnt_table_dev_live.json
+++ b/backend/tests/fixtures/dnt_table_dev_live.json
@@ -139,61 +139,6 @@
     "status": "ready",
     "version": "v0.1.11"
   },
-  "/QmPendingExamplePeerForDummyRunZZZZZZZZZZZZZZ": {
-    "available_offering": null,
-    "connected": true,
-    "current_offering": null,
-    "hardware": {
-      "gpus": [
-        {
-          "name": "NVIDIA GH200 120GB",
-          "total_memory": 97871,
-          "used_memory": 0
-        },
-        {
-          "name": "NVIDIA GH200 120GB",
-          "total_memory": 97871,
-          "used_memory": 0
-        },
-        {
-          "name": "NVIDIA GH200 120GB",
-          "total_memory": 97871,
-          "used_memory": 0
-        },
-        {
-          "name": "NVIDIA GH200 120GB",
-          "total_memory": 97871,
-          "used_memory": 0
-        }
-      ],
-      "host_memory": 0,
-      "host_memory_bandwidth": 0,
-      "host_memory_used": 0
-    },
-    "hostname": "nid007777",
-    "id": "QmPendingExamplePeerForDummyRunZZZZZZZZZZZZZZ",
-    "labels": {
-      "expires_at": "2026-05-19T01:30:00Z",
-      "framework": "vllm",
-      "framework_args": "--port 8080 --model /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-70B-Instruct-2509 --served-model-name swiss-ai/Apertus-70B-Instruct-2509-rob-pending --tensor-parallel-size 4 --max-model-len 8192 --enable-metrics",
-      "launched_by": "rosmith",
-      "served_model_name": "swiss-ai/Apertus-70B-Instruct-2509-rob-pending",
-      "slurm_job_id": "2299999",
-      "slurm_partition": "normal",
-      "started_at": "2026-05-18T19:30:00Z",
-      "worker_group_id": "2299999"
-    },
-    "last_seen": 1779139800,
-    "latency": 0,
-    "load": null,
-    "owner": "",
-    "privileged": false,
-    "public_address": "",
-    "role": null,
-    "service": [],
-    "status": "pending",
-    "version": "dev-9ff5ec9"
-  },
   "/QmPqzJvbFVdoXXTq2rKW1pZBUmKLhZEWWHa4RFVrnUMf7K": {
     "available_offering": null,
     "connected": true,
@@ -470,6 +415,61 @@
     "status": "ready",
     "version": "v0.1.11"
   },
+  "/QmY7FvKB3i6N1yvpkgAZXQCnFmpKR5WJ4MqqGNcLb3tWC5": {
+    "available_offering": null,
+    "connected": true,
+    "current_offering": null,
+    "hardware": {
+      "gpus": [
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 6
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 5
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 5
+        },
+        {
+          "name": "NVIDIA GH200 120GB",
+          "total_memory": 97871,
+          "used_memory": 13
+        }
+      ],
+      "host_memory": 0,
+      "host_memory_bandwidth": 0,
+      "host_memory_used": 0
+    },
+    "hostname": "nid007456",
+    "id": "QmY7FvKB3i6N1yvpkgAZXQCnFmpKR5WJ4MqqGNcLb3tWC5",
+    "labels": {
+      "expires_at": "2026-05-19T00:09:35Z",
+      "framework": "sglang",
+      "framework_args": "--port 8080 --model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 --served-model-name swiss-ai/Apertus-8B-Instruct-2509-rob-dev3 --host 0.0.0.0 --enable-metrics",
+      "launched_by": "rosmith",
+      "served_model_name": "swiss-ai/Apertus-8B-Instruct-2509-rob-dev3",
+      "slurm_job_id": "2297439",
+      "slurm_partition": "normal",
+      "started_at": "2026-05-18T18:09:35Z",
+      "worker_group_id": "2297439"
+    },
+    "last_seen": 1779127775,
+    "latency": 0,
+    "load": null,
+    "owner": "",
+    "privileged": false,
+    "public_address": "",
+    "role": null,
+    "service": null,
+    "status": "pending",
+    "version": "dev-9ff5ec9"
+  },
   "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": {
     "available_offering": null,
     "connected": true,

From a83d4e242ea78e5f07cf74fa1ae1735f94e53f19 Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:16:44 +0200
Subject: [PATCH 4/9] ui: tighten extra-labels padding to fit the actual key
 length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Extra labels pre-block used padEnd(18) — a fixed width copied from
the main-labels block where it makes columns line up with header /
follower entries. In the extras block there's typically just one entry
(framework_args, 14 chars), so the fixed pad inserts 4 extra spaces
between key and value with nothing to align them to. Reads like a
formatting bug.

Compute the pad from the actual keys present + 1. With framework_args
alone, that's padEnd(15) — one space between key and value. If more
labels show up later, they self-align.
---
 frontend/src/components/ui/ModelCard.svelte | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index 23720ce..eae8260 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -272,8 +272,9 @@
               !["launched_by","slurm_job_id","worker_group_id","framework","started_at","expires_at","slurm_partition","served_model_name"].includes(k)
             )}
             {#if extra.length > 0}
+              {@const pad = Math.max(...extra.map(([k]) => k.length)) + 1}
               <div class="text-xs text-slate-500 dark:text-slate-400 mt-2 mb-1">Extra labels</div>
-              <pre class="code-block">{extra.map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}</pre>
+              <pre class="code-block">{extra.map(([k, v]) => `${k.padEnd(pad)} ${v}`).join("\n")}</pre>
             {/if}
           {/if}
         </div>

From d1c37032a879fa518396faadfc598fd10e5e1b30 Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:21:36 +0200
Subject: [PATCH 5/9] ui: rename "Open in OpenWebUI" to "Swiss AI Chat",
 reorder buttons
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Button text "Open in OpenWebUI" → "Swiss AI Chat". The underlying URL
  still points at the OpenWebUI deployment, but users see "Swiss AI Chat"
  which matches the surface-level brand they actually interact with.
- Reorder so Metrics Dashboard (secondary, emerald) sits left of Swiss
  AI Chat (primary, black). Both still right-aligned as a group; the
  primary action lands at the right edge where the eye finishes scanning
  the card.
---
 frontend/src/components/ui/ModelCard.svelte | 29 +++++++++++----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index eae8260..b8d3ed9 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -187,21 +187,9 @@
       on:keydown|stopPropagation
       role="region"
     >
-      <!-- Action buttons (what clicking the card used to do, plus metrics) -->
+      <!-- Action buttons: Metrics (secondary, left) then Swiss AI Chat
+           (primary, right). Right-aligned as a group. -->
       <div class="flex flex-wrap justify-end gap-2">
-        <a
-          href={chatUrl}
-          target="_blank"
-          rel="noopener noreferrer"
-          class="inline-flex items-center gap-2 px-4 py-2 rounded-md bg-black hover:bg-neutral-800 text-white text-sm font-medium transition-colors"
-        >
-          <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
-            <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path>
-            <polyline points="15 3 21 3 21 9"></polyline>
-            <line x1="10" y1="14" x2="21" y2="3"></line>
-          </svg>
-          Open in OpenWebUI
-        </a>
         {#if metricsUrl}
           <a
             href={metricsUrl}
@@ -216,6 +204,19 @@
             Metrics Dashboard
           </a>
         {/if}
+        <a
+          href={chatUrl}
+          target="_blank"
+          rel="noopener noreferrer"
+          class="inline-flex items-center gap-2 px-4 py-2 rounded-md bg-black hover:bg-neutral-800 text-white text-sm font-medium transition-colors"
+        >
+          <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path>
+            <polyline points="15 3 21 3 21 9"></polyline>
+            <line x1="10" y1="14" x2="21" y2="3"></line>
+          </svg>
+          Swiss AI Chat
+        </a>
       </div>
 
       <!-- Per-replica detail blocks -->

From 6325f312068c342efefd2c302ca4ae8f06cce744 Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:22:51 +0200
Subject: [PATCH 6/9] ui: shorten button labels to "Chat" and "Metrics",
 restore Chat-first order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- "Swiss AI Chat" → "Chat" and "Metrics Dashboard" → "Metrics". The
  expanded card sits below the model title that already says
  "swiss-ai/..." — repeating "Swiss AI" in the button label is noise,
  and "Dashboard" doesn't carry meaning past the icon.
- Restore Chat-first order so Metrics ends up at the right edge, where
  it lived before the brief mid-iteration swap.
---
 frontend/src/components/ui/ModelCard.svelte | 31 ++++++++++-----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index b8d3ed9..feaeb85 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -187,9 +187,21 @@
       on:keydown|stopPropagation
       role="region"
     >
-      <!-- Action buttons: Metrics (secondary, left) then Swiss AI Chat
-           (primary, right). Right-aligned as a group. -->
+      <!-- Action buttons: Chat (primary) + Metrics, right-aligned. -->
       <div class="flex flex-wrap justify-end gap-2">
+        <a
+          href={chatUrl}
+          target="_blank"
+          rel="noopener noreferrer"
+          class="inline-flex items-center gap-2 px-4 py-2 rounded-md bg-black hover:bg-neutral-800 text-white text-sm font-medium transition-colors"
+        >
+          <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path>
+            <polyline points="15 3 21 3 21 9"></polyline>
+            <line x1="10" y1="14" x2="21" y2="3"></line>
+          </svg>
+          Chat
+        </a>
         {#if metricsUrl}
           <a
             href={metricsUrl}
@@ -201,22 +213,9 @@
               <path d="M3 3v18h18"></path>
               <path d="M7 15l4-4 4 4 5-5"></path>
             </svg>
-            Metrics Dashboard
+            Metrics
           </a>
         {/if}
-        <a
-          href={chatUrl}
-          target="_blank"
-          rel="noopener noreferrer"
-          class="inline-flex items-center gap-2 px-4 py-2 rounded-md bg-black hover:bg-neutral-800 text-white text-sm font-medium transition-colors"
-        >
-          <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
-            <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path>
-            <polyline points="15 3 21 3 21 9"></polyline>
-            <line x1="10" y1="14" x2="21" y2="3"></line>
-          </svg>
-          Swiss AI Chat
-        </a>
       </div>
 
       <!-- Per-replica detail blocks -->

From 6a7379f7dcb0488ef657992fa5ba5dcc9e850f03 Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:24:35 +0200
Subject: [PATCH 7/9] ui: left-align action buttons (drop justify-end)

---
 frontend/src/components/ui/ModelCard.svelte | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index feaeb85..8067fd9 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -187,8 +187,8 @@
       on:keydown|stopPropagation
       role="region"
     >
-      <!-- Action buttons: Chat (primary) + Metrics, right-aligned. -->
-      <div class="flex flex-wrap justify-end gap-2">
+      <!-- Action buttons: Chat (primary) + Metrics, left-aligned. -->
+      <div class="flex flex-wrap gap-2">
         <a
           href={chatUrl}
           target="_blank"

From f4269baf24ce50da390a49270dab6f2aea1cf6ff Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:26:12 +0200
Subject: [PATCH 8/9] ui: prefix topology summary with replica count when > 1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The header line said "on 4 nodes × 4x GH200" for a model with 2
replicas of 4 nodes each — undercounting the actual resources by half.
Prepend the replica multiplier so the line describes total commitment:

  1 replica, 4 nodes      → "4 nodes × 4x GH200"
  2 replicas, 4 nodes each → "2 replicas × 4 nodes × 4x GH200"
  10 replicas, 1 node each → "10 replicas × 4x GH200"

The red ×N chip next to the title still shows the replica count
on its own; the topology line now expresses it in resource terms.
---
 frontend/src/components/ui/ModelCard.svelte | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte
index 8067fd9..f556640 100644
--- a/frontend/src/components/ui/ModelCard.svelte
+++ b/frontend/src/components/ui/ModelCard.svelte
@@ -83,13 +83,21 @@
 
   // Header summary across all replicas of this model. If every replica has
   // the same per-replica topology (almost always true: a model is launched
-  // with one shape), show it once. Otherwise admit ambiguity rather than
-  // pick one to display.
+  // with one shape), show it with the replica multiplier prefixed when
+  // there's more than one. Otherwise admit ambiguity rather than pick one
+  // to display.
+  //
+  //   1 replica, 1 node              → "4x NVIDIA GH200 120GB"
+  //   1 replica, 4 nodes             → "4 nodes × 4x NVIDIA GH200 120GB"
+  //   2 replicas, 4 nodes each       → "2 replicas × 4 nodes × 4x NVIDIA GH200 120GB"
+  //   replicas with differing shapes → "Various"
   function topologySummary(replicas: Replica[]): string {
     if (replicas.length === 0) return "unknown";
     const distinct = new Set(replicas.map(topologyString));
-    if (distinct.size === 1) return [...distinct][0];
-    return "Various";
+    if (distinct.size !== 1) return "Various";
+    const perReplica = [...distinct][0];
+    if (replicas.length === 1) return perReplica;
+    return `${replicas.length} replicas × ${perReplica}`;
   }
 
   async function copyModelName(e: Event) {

From bb349df4c24e82d81a779c05eefa08e244ec5cc1 Mon Sep 17 00:00:00 2001
From: robmsmt <rob@robmsmt.com>
Date: Mon, 18 May 2026 20:30:27 +0200
Subject: [PATCH 9/9] format: ruff format on test_model_service.py

---
 backend/tests/test_model_service.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py
index 82bc1fe..464271d 100644
--- a/backend/tests/test_model_service.py
+++ b/backend/tests/test_model_service.py
@@ -135,7 +135,11 @@ def test_pending_peer_without_served_model_name_label_falls_back_to_empty_id():
     id to attribute it; otherwise it's dropped."""
     peer = {
         **PEER_NEW_BINARY_FOLLOWER,
-        "labels": {k: v for k, v in PEER_NEW_BINARY_FOLLOWER["labels"].items() if k != "served_model_name"},
+        "labels": {
+            k: v
+            for k, v in PEER_NEW_BINARY_FOLLOWER["labels"].items()
+            if k != "served_model_name"
+        },
     }
     with patch("backend.services.model_service.requests.get") as mock_get:
         mock_get.return_value = _dnt_response({"/QmPending": peer})