Skip to content
Merged
7 changes: 6 additions & 1 deletion backend/services/model_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,13 @@ def get_all_models(endpoint: str, with_details: bool = False):
# worker_group_id and show it as part of a launching/follower set.
if not meta["worker_group_id"]:
continue
# Fall back to the served_model_name label so the frontend can
# group PENDING peers under their eventual model card during boot.
# Without this, the brief PENDING window is invisible because the
# peer has no advertised service yet and nothing else maps its
# worker_group_id back to a model id.
entry = {
"id": "", # no model yet
"id": meta["labels"].get("served_model_name", ""),
"object": "model",
"created": "0x",
"owner": "0x",
Expand Down
55 changes: 55 additions & 0 deletions backend/tests/fixtures/dnt_table_dev_live.json
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,61 @@
"status": "ready",
"version": "v0.1.11"
},
"/QmY7FvKB3i6N1yvpkgAZXQCnFmpKR5WJ4MqqGNcLb3tWC5": {
"available_offering": null,
"connected": true,
"current_offering": null,
"hardware": {
"gpus": [
{
"name": "NVIDIA GH200 120GB",
"total_memory": 97871,
"used_memory": 6
},
{
"name": "NVIDIA GH200 120GB",
"total_memory": 97871,
"used_memory": 5
},
{
"name": "NVIDIA GH200 120GB",
"total_memory": 97871,
"used_memory": 5
},
{
"name": "NVIDIA GH200 120GB",
"total_memory": 97871,
"used_memory": 13
}
],
"host_memory": 0,
"host_memory_bandwidth": 0,
"host_memory_used": 0
},
"hostname": "nid007456",
"id": "QmY7FvKB3i6N1yvpkgAZXQCnFmpKR5WJ4MqqGNcLb3tWC5",
"labels": {
"expires_at": "2026-05-19T00:09:35Z",
"framework": "sglang",
"framework_args": "--port 8080 --model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 --served-model-name swiss-ai/Apertus-8B-Instruct-2509-rob-dev3 --host 0.0.0.0 --enable-metrics",
"launched_by": "rosmith",
"served_model_name": "swiss-ai/Apertus-8B-Instruct-2509-rob-dev3",
"slurm_job_id": "2297439",
"slurm_partition": "normal",
"started_at": "2026-05-18T18:09:35Z",
"worker_group_id": "2297439"
},
"last_seen": 1779127775,
"latency": 0,
"load": null,
"owner": "",
"privileged": false,
"public_address": "",
"role": null,
"service": null,
"status": "pending",
"version": "dev-9ff5ec9"
},
"/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": {
"available_offering": null,
"connected": true,
Expand Down
50 changes: 39 additions & 11 deletions backend/tests/test_model_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def json(self):
"slurm_job_id": "12345",
"worker_group_id": "12345",
"framework": "sglang",
"served_model_name": "swiss-ai/Apertus-8B",
"started_at": "2026-05-15T18:00:00Z",
},
"hardware": {"gpus": [{"name": "GH200"}] * 4},
Expand Down Expand Up @@ -100,9 +101,11 @@ def test_new_binary_head_carries_labels():


def test_metrics_only_follower_groups_with_head_via_worker_group_id():
"""A multi-node replica's follower has no `service` but does carry
worker_group_id. It should appear in the output with id='' so the
frontend can attribute it to the same replica as the head."""
"""A peer with no advertised `service` (multi-node follower, or a head
still in PENDING during boot) should fall back to its served_model_name
label so the frontend can render the model card during the brief window
before the service is published. Without the fallback, the peer's id
stays empty and the frontend silently drops it."""
with patch("backend.services.model_service.requests.get") as mock_get:
mock_get.return_value = _dnt_response(
{
Expand All @@ -114,15 +117,38 @@ def test_metrics_only_follower_groups_with_head_via_worker_group_id():
assert len(out) == 2
by_id = {e["peer_id"]: e for e in out}
assert by_id["QmHead"]["id"] == "swiss-ai/Apertus-8B"
assert by_id["QmFollower"]["id"] == ""
# Shared worker_group_id lets the frontend group them.
# Follower inherits id from the served_model_name label — same model card.
assert by_id["QmFollower"]["id"] == "swiss-ai/Apertus-8B"
assert by_id["QmFollower"]["status"] == "pending"
# Shared worker_group_id lets the frontend group them within the model.
assert (
by_id["QmHead"]["worker_group_id"]
== by_id["QmFollower"]["worker_group_id"]
== "12345"
)


def test_pending_peer_without_served_model_name_label_falls_back_to_empty_id():
"""Defensive: if a peer is mid-boot from an older binary that doesn't
emit served_model_name, we still surface it via worker_group_id with
id=''. The frontend then needs another peer in the same group with an
id to attribute it; otherwise it's dropped."""
peer = {
**PEER_NEW_BINARY_FOLLOWER,
"labels": {
k: v
for k, v in PEER_NEW_BINARY_FOLLOWER["labels"].items()
if k != "served_model_name"
},
}
with patch("backend.services.model_service.requests.get") as mock_get:
mock_get.return_value = _dnt_response({"/QmPending": peer})
out = get_all_models("http://x/v1/dnt/table", with_details=True)
assert len(out) == 1
assert out[0]["id"] == ""
assert out[0]["worker_group_id"] == "12345"


def test_follower_without_worker_group_id_skipped():
"""Older binary follower with no labels and no service is uninformative —
drop it so the model list stays clean."""
Expand Down Expand Up @@ -196,9 +222,10 @@ def test_real_prod_payload_returns_models():

def test_upgraded_payload_groups_multinode_replica():
"""Simulated v0.0.6 deployment: the gemma 'multi-node demo' pair share a
worker_group_id. One has a service, the other is metrics-only with id=''.
Backend returns both entries with the shared worker_group_id so the
frontend can aggregate them into one logical replica."""
worker_group_id. Both peers carry the served_model_name label, so both
resolve to the same model id even though only one advertises a service.
Backend returns both entries with the shared worker_group_id + model id
so the frontend can aggregate them into one logical replica."""
with patch("backend.services.model_service.requests.get") as mock_get:
mock_get.return_value = type(
"R",
Expand All @@ -212,7 +239,8 @@ def test_upgraded_payload_groups_multinode_replica():
by_wg.setdefault(e["worker_group_id"], []).append(e)
multi = [v for v in by_wg.values() if len(v) > 1]
assert multi, "fixture should contain at least one multi-peer worker group"
# At least one peer in the multi-peer group should be metrics-only (id='').
pair = multi[0]
assert any(e["id"] == "" for e in pair), pair
assert any(e["id"] != "" for e in pair), pair
# Both peers in the group share the same non-empty model id.
ids = {e["id"] for e in pair}
assert ids != {""}, pair
assert len(ids) == 1, f"peers in one worker group should share one model id: {ids}"
25 changes: 17 additions & 8 deletions frontend/src/components/ui/ModelCard.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,21 @@

// Header summary across all replicas of this model. If every replica has
// the same per-replica topology (almost always true: a model is launched
// with one shape), show it once. Otherwise admit ambiguity rather than
// pick one to display.
// with one shape), show it with the replica multiplier prefixed when
// there's more than one. Otherwise admit ambiguity rather than pick one
// to display.
//
// 1 replica, 1 node → "4x NVIDIA GH200 120GB"
// 1 replica, 4 nodes → "4 nodes × 4x NVIDIA GH200 120GB"
// 2 replicas, 4 nodes each → "2 replicas × 4 nodes × 4x NVIDIA GH200 120GB"
// replicas with differing shapes → "Various"
function topologySummary(replicas: Replica[]): string {
if (replicas.length === 0) return "unknown";
const distinct = new Set(replicas.map(topologyString));
if (distinct.size === 1) return [...distinct][0];
return "Various";
if (distinct.size !== 1) return "Various";
const perReplica = [...distinct][0];
if (replicas.length === 1) return perReplica;
return `${replicas.length} replicas × ${perReplica}`;
}

async function copyModelName(e: Event) {
Expand Down Expand Up @@ -187,7 +195,7 @@
on:keydown|stopPropagation
role="region"
>
<!-- Action buttons (what clicking the card used to do, plus metrics) -->
<!-- Action buttons: Chat (primary) + Metrics, left-aligned. -->
<div class="flex flex-wrap gap-2">
<a
href={chatUrl}
Expand All @@ -200,7 +208,7 @@
<polyline points="15 3 21 3 21 9"></polyline>
<line x1="10" y1="14" x2="21" y2="3"></line>
</svg>
Open in OpenWebUI
Chat
</a>
{#if metricsUrl}
<a
Expand All @@ -213,7 +221,7 @@
<path d="M3 3v18h18"></path>
<path d="M7 15l4-4 4 4 5-5"></path>
</svg>
Metrics Dashboard
Metrics
</a>
{/if}
</div>
Expand Down Expand Up @@ -272,8 +280,9 @@
!["launched_by","slurm_job_id","worker_group_id","framework","started_at","expires_at","slurm_partition","served_model_name"].includes(k)
)}
{#if extra.length > 0}
{@const pad = Math.max(...extra.map(([k]) => k.length)) + 1}
<div class="text-xs text-slate-500 dark:text-slate-400 mt-2 mb-1">Extra labels</div>
<pre class="code-block">{extra.map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}</pre>
<pre class="code-block">{extra.map(([k, v]) => `${k.padEnd(pad)} ${v}`).join("\n")}</pre>
{/if}
{/if}
</div>
Expand Down
Loading