From 6484072037538aa1d67df99520b3aec7c0852f6e Mon Sep 17 00:00:00 2001 From: robmsmt Date: Sun, 17 May 2026 16:12:43 +0200 Subject: [PATCH 1/8] feat: surface peer metadata in model card expansion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend (model_service): pass through hostname, version, status, labels, and convenience pulls (launched_by, slurm_job_id, worker_group_id, framework, started_at) for each DNT peer. Also surface metrics-only follower peers (no service, but with worker_group_id) so multi-node replicas can be reconstructed in aggregation. Frontend (ModelList): group raw peers by worker_group_id to count replicas distinctly from peers/nodes. Headline now reads "Available Models X, Replicas Y" when the two diverge. Frontend (ModelCard): clicking the card now expands inline instead of opening OpenWebUI. The expansion shows: - Open in OpenWebUI button (the prior click behaviour) - Per-replica monospace block with model, launched_by, slurm_job_id, started_at, framework, version, head + follower hostnames - Topology header (e.g. "2 nodes × 4x GH200") - Per-replica extra-labels block for anything else OCF carries Fixtures: snapshot of live prod /dnt/table + a script that synthesises the post-v0.0.6 shape (hostname/version/status/labels) by adding a multi-node replica demo (shared worker_group_id, one head + one metrics-only follower) so the new code paths have a realistic test. --- backend/services/model_service.py | 87 +- backend/tests/fixtures/build_upgraded.py | 113 + backend/tests/fixtures/dnt_table_prod.json | 2844 ++++++++++++++ .../tests/fixtures/dnt_table_upgraded.json | 3324 +++++++++++++++++ backend/tests/test_model_service.py | 186 + frontend/src/components/ui/ModelCard.svelte | 342 +- frontend/src/components/ui/ModelList.svelte | 81 +- 7 files changed, 6824 insertions(+), 153 deletions(-) create mode 100644 backend/tests/fixtures/build_upgraded.py create mode 100644 backend/tests/fixtures/dnt_table_prod.json create mode 100644 backend/tests/fixtures/dnt_table_upgraded.json create mode 100644 backend/tests/test_model_service.py diff --git a/backend/services/model_service.py b/backend/services/model_service.py index ebcba51..3f4ce65 100644 --- a/backend/services/model_service.py +++ b/backend/services/model_service.py @@ -2,17 +2,66 @@ from backend.config import parse_hardware_info +def _peer_metadata(node_info: dict) -> dict: + """Pull the surfaced launch-time fields off a DNT peer entry. + + Older OCF binaries ( str: + for fw, hints in FRAMEWORK_HINTS.items(): + if any(h in model for h in hints): + return fw + return "sglang" + + +def main() -> None: + src = json.loads(SRC.read_text()) + upgraded: dict = {} + next_job_id = 2256000 + multi_node_assigned = False + + # Synthesize a stable ordering so re-runs produce stable diffs. + for i, (pid_key, peer) in enumerate(sorted(src.items())): + peer = dict(peer) # shallow copy + + # Hostname based on peer ID, padded — looks like a real nidXXXXXX. + peer["hostname"] = f"nid{(0x6000 + i):06d}"[-9:] + peer["version"] = "v0.0.6" + + services = peer.get("service") or [] + model_name = "" + for svc in services: + for ig in svc.get("identity_group") or []: + if ig.startswith("model="): + model_name = ig[6:] + break + if model_name: + break + + peer["status"] = "ready" if model_name else "pending" + job_id = next_job_id + next_job_id += 1 + + peer["labels"] = { + "launched_by": USERS[i % len(USERS)], + "slurm_job_id": str(job_id), + "slurm_partition": "normal", + "worker_group_id": str(job_id), + "framework": guess_framework(model_name) if model_name else "", + "served_model_name": model_name, + "started_at": "2026-05-15T18:00:00Z", + } + # Drop empty entries so the JSON looks closer to what OCF emits. + peer["labels"] = {k: v for k, v in peer["labels"].items() if v} + + upgraded[pid_key] = peer + + # Demo a multi-node replica: pick a model with multiple peers and + # collapse two of them into a single worker_group_id, with the second + # becoming metrics-only (no service, no service entries). + by_model: dict[str, list[str]] = {} + for pid_key, peer in upgraded.items(): + for svc in peer.get("service") or []: + for ig in svc.get("identity_group") or []: + if ig.startswith("model="): + by_model.setdefault(ig[6:], []).append(pid_key) + for model, peers in by_model.items(): + if len(peers) >= 2: + head_key, follower_key = peers[0], peers[1] + shared = upgraded[head_key]["labels"]["worker_group_id"] + upgraded[follower_key]["labels"]["worker_group_id"] = shared + upgraded[follower_key]["labels"]["slurm_job_id"] = shared + upgraded[follower_key]["labels"]["launched_by"] = upgraded[head_key]["labels"]["launched_by"] + # Metrics-only: drop the service advertisement. + upgraded[follower_key]["service"] = [] + upgraded[follower_key]["status"] = "ready" + multi_node_assigned = True + print(f"multi-node demo: {model} → head={head_key}, follower={follower_key}, wg={shared}") + break + assert multi_node_assigned, "No model has >=2 peers; cannot demo multi-node" + + DST.write_text(json.dumps(upgraded, indent=2)) + print(f"wrote {DST} ({len(upgraded)} peers)") + + +if __name__ == "__main__": + main() diff --git a/backend/tests/fixtures/dnt_table_prod.json b/backend/tests/fixtures/dnt_table_prod.json new file mode 100644 index 0000000..d72b887 --- /dev/null +++ b/backend/tests/fixtures/dnt_table_prod.json @@ -0,0 +1,2844 @@ +{ + "/QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG": { + "id": "QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-70B-Instruct-2509" + ] + } + ], + "last_seen": 1779027016, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89753 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL": { + "id": "QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq": { + "id": "QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82025 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN": { + "id": "QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84657 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW": { + "id": "QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5": { + "id": "QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn": { + "id": "QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027021, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84576 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP": { + "id": "QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86394 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86392 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG": { + "id": "QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86594 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G": { + "id": "QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT": { + "id": "QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84563 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em": { + "id": "QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84586 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ": { + "id": "QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84579 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm": { + "id": "QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS": { + "id": "QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18559 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18569 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18570 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8": { + "id": "QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84623 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7": { + "id": "QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84595 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn": { + "id": "QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur": { + "id": "QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84564 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 14 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob": { + "id": "QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=meta-llama/Llama-3.3-70B-Instruct" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2": { + "id": "QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84612 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8": { + "id": "QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW": { + "id": "QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84622 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 16 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 13 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe": { + "id": "QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV": { + "id": "QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85692 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85690 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa": { + "id": "QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82817 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82134 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82142 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82014 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y": { + "id": "QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027002, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82144 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82152 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82024 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr": { + "id": "QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84644 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4": { + "id": "QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027005, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84547 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U": { + "id": "QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7": { + "id": "Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp": { + "id": "QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC": { + "id": "QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 17 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 10 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk": { + "id": "Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84598 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK": { + "id": "QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu": { + "id": "QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy": { + "id": "QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Snowflake/snowflake-arctic-embed-l-v2.0" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2649 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH": { + "id": "QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f": { + "id": "QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": { + "id": "QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": null, + "last_seen": 1779027021, + "version": "", + "public_address": "148.187.108.178", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL": { + "id": "QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84582 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd": { + "id": "QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A": { + "id": "Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=zai-org/GLM-4.7-Flash" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 94835 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B": { + "id": "QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-27B" + ] + } + ], + "last_seen": 1779027010, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90935 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90409 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG": { + "id": "QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT": { + "id": "QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3": { + "id": "QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027021, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ": { + "id": "QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 19117 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp": { + "id": "QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-8B-Instruct-2509" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 88607 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY": { + "id": "QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027021, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + } +} diff --git a/backend/tests/fixtures/dnt_table_upgraded.json b/backend/tests/fixtures/dnt_table_upgraded.json new file mode 100644 index 0000000..86b9172 --- /dev/null +++ b/backend/tests/fixtures/dnt_table_upgraded.json @@ -0,0 +1,3324 @@ +{ + "/QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG": { + "id": "QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-70B-Instruct-2509" + ] + } + ], + "last_seen": 1779027016, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89753 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024576", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256000", + "slurm_partition": "normal", + "worker_group_id": "2256000", + "framework": "sglang", + "served_model_name": "swiss-ai/Apertus-70B-Instruct-2509", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL": { + "id": "QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024577", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256001", + "slurm_partition": "normal", + "worker_group_id": "2256001", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq": { + "id": "QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82025 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024578", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256002", + "slurm_partition": "normal", + "worker_group_id": "2256002", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN": { + "id": "QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84657 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024579", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256003", + "slurm_partition": "normal", + "worker_group_id": "2256003", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW": { + "id": "QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024580", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256001", + "slurm_partition": "normal", + "worker_group_id": "2256001", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5": { + "id": "QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024581", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256005", + "slurm_partition": "normal", + "worker_group_id": "2256005", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn": { + "id": "QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84576 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024582", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256006", + "slurm_partition": "normal", + "worker_group_id": "2256006", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP": { + "id": "QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86394 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86392 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024583", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256007", + "slurm_partition": "normal", + "worker_group_id": "2256007", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG": { + "id": "QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86594 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024584", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256008", + "slurm_partition": "normal", + "worker_group_id": "2256008", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G": { + "id": "QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024585", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256009", + "slurm_partition": "normal", + "worker_group_id": "2256009", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT": { + "id": "QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84563 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024586", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256010", + "slurm_partition": "normal", + "worker_group_id": "2256010", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em": { + "id": "QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84586 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024587", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256011", + "slurm_partition": "normal", + "worker_group_id": "2256011", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ": { + "id": "QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84579 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024588", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256012", + "slurm_partition": "normal", + "worker_group_id": "2256012", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm": { + "id": "QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024589", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256013", + "slurm_partition": "normal", + "worker_group_id": "2256013", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS": { + "id": "QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18559 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18569 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18570 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024590", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256014", + "slurm_partition": "normal", + "worker_group_id": "2256014", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8": { + "id": "QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84623 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024591", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256015", + "slurm_partition": "normal", + "worker_group_id": "2256015", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7": { + "id": "QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84595 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024592", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256016", + "slurm_partition": "normal", + "worker_group_id": "2256016", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn": { + "id": "QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024593", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256017", + "slurm_partition": "normal", + "worker_group_id": "2256017", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur": { + "id": "QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84564 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 14 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024594", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256018", + "slurm_partition": "normal", + "worker_group_id": "2256018", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob": { + "id": "QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=meta-llama/Llama-3.3-70B-Instruct" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024595", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256019", + "slurm_partition": "normal", + "worker_group_id": "2256019", + "framework": "vllm", + "served_model_name": "meta-llama/Llama-3.3-70B-Instruct", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2": { + "id": "QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84612 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024596", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256020", + "slurm_partition": "normal", + "worker_group_id": "2256020", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8": { + "id": "QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024597", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256021", + "slurm_partition": "normal", + "worker_group_id": "2256021", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW": { + "id": "QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84622 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 16 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 13 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024598", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256022", + "slurm_partition": "normal", + "worker_group_id": "2256022", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe": { + "id": "QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024599", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256023", + "slurm_partition": "normal", + "worker_group_id": "2256023", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV": { + "id": "QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85692 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85690 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024600", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256024", + "slurm_partition": "normal", + "worker_group_id": "2256024", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa": { + "id": "QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82817 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82134 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82142 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82014 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024601", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256025", + "slurm_partition": "normal", + "worker_group_id": "2256025", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y": { + "id": "QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027002, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82144 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82152 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82024 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024602", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256026", + "slurm_partition": "normal", + "worker_group_id": "2256026", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr": { + "id": "QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84644 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024603", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256027", + "slurm_partition": "normal", + "worker_group_id": "2256027", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4": { + "id": "QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027005, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84547 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024604", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256028", + "slurm_partition": "normal", + "worker_group_id": "2256028", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U": { + "id": "QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024605", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256029", + "slurm_partition": "normal", + "worker_group_id": "2256029", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7": { + "id": "Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024606", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256030", + "slurm_partition": "normal", + "worker_group_id": "2256030", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp": { + "id": "QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024607", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256031", + "slurm_partition": "normal", + "worker_group_id": "2256031", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC": { + "id": "QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 17 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 10 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024608", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256032", + "slurm_partition": "normal", + "worker_group_id": "2256032", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk": { + "id": "Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84598 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024609", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256033", + "slurm_partition": "normal", + "worker_group_id": "2256033", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK": { + "id": "QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024610", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256034", + "slurm_partition": "normal", + "worker_group_id": "2256034", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu": { + "id": "QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024611", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256035", + "slurm_partition": "normal", + "worker_group_id": "2256035", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy": { + "id": "QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Snowflake/snowflake-arctic-embed-l-v2.0" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2649 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024612", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256036", + "slurm_partition": "normal", + "worker_group_id": "2256036", + "framework": "vllm", + "served_model_name": "Snowflake/snowflake-arctic-embed-l-v2.0", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH": { + "id": "QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024613", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256037", + "slurm_partition": "normal", + "worker_group_id": "2256037", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f": { + "id": "QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024614", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256038", + "slurm_partition": "normal", + "worker_group_id": "2256038", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": { + "id": "QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "pending", + "available_offering": null, + "service": null, + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "148.187.108.178", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024615", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256039", + "slurm_partition": "normal", + "worker_group_id": "2256039", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL": { + "id": "QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84582 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024616", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256040", + "slurm_partition": "normal", + "worker_group_id": "2256040", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd": { + "id": "QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "pending", + "available_offering": null, + "service": [], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024617", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256041", + "slurm_partition": "normal", + "worker_group_id": "2256041", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A": { + "id": "Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=zai-org/GLM-4.7-Flash" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 94835 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024618", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256042", + "slurm_partition": "normal", + "worker_group_id": "2256042", + "framework": "sglang", + "served_model_name": "zai-org/GLM-4.7-Flash", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B": { + "id": "QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-27B" + ] + } + ], + "last_seen": 1779027010, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90935 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90409 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024619", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256043", + "slurm_partition": "normal", + "worker_group_id": "2256043", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-27B", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG": { + "id": "QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024620", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256044", + "slurm_partition": "normal", + "worker_group_id": "2256044", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT": { + "id": "QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024621", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256045", + "slurm_partition": "normal", + "worker_group_id": "2256045", + "framework": "sglang", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3": { + "id": "QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024622", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256046", + "slurm_partition": "normal", + "worker_group_id": "2256046", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ": { + "id": "QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 19117 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024623", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256047", + "slurm_partition": "normal", + "worker_group_id": "2256047", + "framework": "sglang", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp": { + "id": "QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-8B-Instruct-2509" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 88607 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024624", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256048", + "slurm_partition": "normal", + "worker_group_id": "2256048", + "framework": "sglang", + "served_model_name": "swiss-ai/Apertus-8B-Instruct-2509", + "started_at": "2026-05-15T18:00:00Z" + } + }, + "/QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY": { + "id": "QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024625", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256049", + "slurm_partition": "normal", + "worker_group_id": "2256049", + "framework": "vllm", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-15T18:00:00Z" + } + } +} \ No newline at end of file diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py new file mode 100644 index 0000000..361c70f --- /dev/null +++ b/backend/tests/test_model_service.py @@ -0,0 +1,186 @@ +"""Unit tests for get_all_models: aggregation of DNT peers into per-model +entries the frontend can consume.""" + +from unittest.mock import patch + +from backend.services.model_service import get_all_models + + +def _dnt_response(peers: dict): + """Build a fake requests.Response.json() for a DNT /v1/dnt/table call.""" + + class FakeResp: + def __init__(self, data): + self._data = data + + def json(self): + return self._data + + return FakeResp(peers) + + +PEER_NEW_BINARY_HEAD = { + "id": "QmHead", + "hostname": "nid006220", + "version": "v0.0.6", + "status": "ready", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "12345", + "worker_group_id": "12345", + "framework": "sglang", + "started_at": "2026-05-15T18:00:00Z", + }, + "hardware": {"gpus": [{"name": "GH200"}] * 4}, + "service": [ + { + "name": "llm", + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": ["model=swiss-ai/Apertus-8B"], + } + ], +} + +PEER_NEW_BINARY_FOLLOWER = { + "id": "QmFollower", + "hostname": "nid006221", + "version": "v0.0.6", + "status": "pending", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "12345", + "worker_group_id": "12345", + "framework": "sglang", + "started_at": "2026-05-15T18:00:00Z", + }, + "hardware": {"gpus": [{"name": "GH200"}] * 4}, + "service": [], +} + +PEER_OLD_BINARY = { + "id": "QmOld", + "hardware": {"gpus": [{"name": "GH200"}] * 4}, + "service": [ + { + "name": "llm", + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": ["model=legacy/Llama-70B"], + } + ], +} + + +def test_old_binary_peer_still_surfaces(): + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response({"/QmOld": PEER_OLD_BINARY}) + out = get_all_models("http://x/v1/dnt/table", with_details=True) + assert len(out) == 1 + assert out[0]["id"] == "legacy/Llama-70B" + # Old binary contributes no metadata — frontend treats blanks as unknown. + assert out[0]["hostname"] == "" + assert out[0]["worker_group_id"] == "" + + +def test_new_binary_head_carries_labels(): + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response({"/QmHead": PEER_NEW_BINARY_HEAD}) + out = get_all_models("http://x/v1/dnt/table", with_details=True) + assert len(out) == 1 + entry = out[0] + assert entry["id"] == "swiss-ai/Apertus-8B" + assert entry["hostname"] == "nid006220" + assert entry["launched_by"] == "rosmith" + assert entry["worker_group_id"] == "12345" + assert entry["framework"] == "sglang" + assert entry["status"] == "ready" + + +def test_metrics_only_follower_groups_with_head_via_worker_group_id(): + """A multi-node replica's follower has no `service` but does carry + worker_group_id. It should appear in the output with id='' so the + frontend can attribute it to the same replica as the head.""" + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response( + { + "/QmHead": PEER_NEW_BINARY_HEAD, + "/QmFollower": PEER_NEW_BINARY_FOLLOWER, + } + ) + out = get_all_models("http://x/v1/dnt/table", with_details=True) + assert len(out) == 2 + by_id = {e["peer_id"]: e for e in out} + assert by_id["QmHead"]["id"] == "swiss-ai/Apertus-8B" + assert by_id["QmFollower"]["id"] == "" + # Shared worker_group_id lets the frontend group them. + assert by_id["QmHead"]["worker_group_id"] == by_id["QmFollower"]["worker_group_id"] == "12345" + + +def test_follower_without_worker_group_id_skipped(): + """Older binary follower with no labels and no service is uninformative — + drop it so the model list stays clean.""" + bare = {"id": "QmBare", "service": [], "hardware": {"gpus": []}} + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response({"/QmBare": bare}) + out = get_all_models("http://x/v1/dnt/table") + assert out == [] + + +def test_request_failure_returns_empty(): + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.side_effect = Exception("boom") + out = get_all_models("http://x/v1/dnt/table") + assert out == [] + + +# ── fixtures from live prod ───────────────────────────────────────────────── + + +def _load_fixture(name: str) -> dict: + import json + import pathlib + + p = pathlib.Path(__file__).parent / "fixtures" / name + return json.loads(p.read_text()) + + +def test_real_prod_payload_returns_models(): + """Pre-upgrade prod payload: every peer either has service entries (which + surface as a model entry) or has no labels (so we drop the metrics-only + fallback). End count should match what the dashboard shows today.""" + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = type( + "R", (), {"json": lambda self=None: _load_fixture("dnt_table_prod.json")} + )() + out = get_all_models("http://x/v1/dnt/table", with_details=True) + # All non-empty ids should be model names from the live network. + model_ids = {e["id"] for e in out if e["id"]} + assert "swiss-ai/Apertus-70B-Instruct-2509" in model_ids + assert "openai/gpt-oss-120b-Vsdo" in model_ids + # No labels on the old binary → worker_group_id should be empty everywhere. + assert all(e["worker_group_id"] == "" for e in out) + + +def test_upgraded_payload_groups_multinode_replica(): + """Simulated v0.0.6 deployment: the gemma 'multi-node demo' pair share a + worker_group_id. One has a service, the other is metrics-only with id=''. + Backend returns both entries with the shared worker_group_id so the + frontend can aggregate them into one logical replica.""" + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = type( + "R", (), {"json": lambda self=None: _load_fixture("dnt_table_upgraded.json")} + )() + out = get_all_models("http://x/v1/dnt/table", with_details=True) + # Find the shared-wg cluster + by_wg: dict[str, list] = {} + for e in out: + by_wg.setdefault(e["worker_group_id"], []).append(e) + multi = [v for v in by_wg.values() if len(v) > 1] + assert multi, "fixture should contain at least one multi-peer worker group" + # At least one peer in the multi-peer group should be metrics-only (id=''). + pair = multi[0] + assert any(e["id"] == "" for e in pair), pair + assert any(e["id"] != "" for e in pair), pair diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte index 1055dcb..fc6c49e 100644 --- a/frontend/src/components/ui/ModelCard.svelte +++ b/frontend/src/components/ui/ModelCard.svelte @@ -2,15 +2,39 @@ import { getModelLogo } from '../../lib/modelLogos'; import { getModelMetricsUrl, getModelTier } from '../../lib/modelMetrics'; + interface Peer { + peer_id?: string; + hostname?: string; + version?: string; + status?: string; + device?: string; + launched_by?: string; + slurm_job_id?: string; + started_at?: string; + framework?: string; + worker_group_id?: string; + labels?: Record; + } + + interface Replica { + worker_group_id: string; + head: Peer; + followers: Peer[]; + nodesPerReplica: number; + devices: string[]; + } + interface ModelCardProps { entry: { - collection: string; - slug: string; + collection?: string; + slug?: string; data: { title: string; description: string; devices: string[]; - instanceCount: number; + replicas: Replica[]; + replicaCount: number; + nodeCount: number; }; }; } @@ -22,112 +46,188 @@ const tier = getModelTier(entry.data.title); const chatUrl = `${chatAppUrl.replace(/\/$/, "")}/?models=${encodeURIComponent(entry.data.title)}`; + let expanded = false; let copied = false; + // Aggregated metadata for the headline summary — pull from the first + // replica's head peer. All replicas of the same model usually share the + // same launcher/framework, but we render them per-replica below anyway. + $: firstHead = entry.data.replicas[0]?.head ?? {}; + $: framework = firstHead.framework || ""; + + // Multi-node topology string: "2 nodes × 4xGH200" for an 8-GPU TP replica. + function topologyString(r: Replica): string { + const dev = r.devices[0] || "?"; + if (r.nodesPerReplica === 1) return dev; + return `${r.nodesPerReplica} nodes × ${dev}`; + } + async function copyModelName(e: Event) { e.preventDefault(); e.stopPropagation(); try { await navigator.clipboard.writeText(entry.data.title); copied = true; - setTimeout(() => { - copied = false; - }, 1200); + setTimeout(() => { copied = false; }, 1200); } catch (err) { console.error('Failed to copy:', err); } } + + function toggleExpand() { + expanded = !expanded; + } + + function onKeyDown(e: KeyboardEvent) { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + toggleExpand(); + } + } - -
- Model logo -
-
- - {entry.data.title} - - - {#if metricsUrl} - - Metrics - - {/if} - {#if tier === "L2"} +
+ Model logo +
+
{ if (e.key === "Enter") copyModelName(e); }} + role="button" + tabindex="0" + class="inline-block cursor-pointer break-all font-mono {copied ? 'animate-name-flash' : ''}" + title="Click to copy model name" > - 24/7 + {entry.data.title} - {:else if tier === "slurm"} - - Slurm - - {/if} - {#if entry.data.instanceCount > 1} - - x{entry.data.instanceCount} - - {/if} + {#if copied} + + + + {:else} + + + + + {/if} + + {#if metricsUrl} + + Metrics + + {/if} + {#if tier === "L2"} + 24/7 + {:else if tier === "slurm"} + Slurm + {/if} + {#if entry.data.replicaCount > 1} + + x{entry.data.replicaCount} + + {/if} +
+
on {entry.data.devices.join(', ') || 'unknown'}
-
on {entry.data.devices.join(', ')}
+ + +
+ + {#if expanded} +
+ + + + + + + + Open in OpenWebUI + + + + {#each entry.data.replicas as replica, idx (replica.worker_group_id)} +
+
+ Replica {idx + 1}{entry.data.replicaCount > 1 ? ` / ${entry.data.replicaCount}` : ""} + · + {topologyString(replica)} + {#if replica.head.status} + {replica.head.status} + {/if} +
+ + +
{[
+            ["model", entry.data.title],
+            ["launched_by", replica.head.launched_by || "?"],
+            ["slurm_job_id", replica.head.slurm_job_id || "?"],
+            ["started_at", replica.head.started_at || "?"],
+            ["framework", replica.head.framework || "?"],
+            ["version", replica.head.version || "?"],
+            ["worker_group_id", replica.worker_group_id],
+            ["head", `${replica.head.hostname || "?"} (${replica.head.peer_id || "?"})`],
+            ...replica.followers.map((f, i) => [
+              `follower_${i + 1}`,
+              `${f.hostname || "?"} (${f.peer_id || "?"})`,
+            ]),
+          ].map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}
+ + + {#if replica.head.labels && Object.keys(replica.head.labels).length > 0} + {@const extra = Object.entries(replica.head.labels).filter(([k]) => + !["launched_by","slurm_job_id","worker_group_id","framework","started_at","slurm_partition","served_model_name"].includes(k) + )} + {#if extra.length > 0} +
Extra labels
+
{extra.map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}
+ {/if} + {/if} +
+ {/each} +
+ {/if}
- - - - - + \ No newline at end of file + diff --git a/frontend/src/components/ui/ModelList.svelte b/frontend/src/components/ui/ModelList.svelte index 835411e..6207e27 100644 --- a/frontend/src/components/ui/ModelList.svelte +++ b/frontend/src/components/ui/ModelList.svelte @@ -8,6 +8,7 @@ let models = []; let modelCount = 0; + let replicaCount = 0; let loading = true; let error = null; @@ -20,30 +21,68 @@ const data = await response.json(); const rawModels = data.data; - const modelsMap = new Map(); + // Map worker_group_id → model_id, so metrics-only follower peers + // (id="") can be attributed to the right model. + const wgToModel = new Map(); + for (const m of rawModels) { + if (m.id && m.worker_group_id) wgToModel.set(m.worker_group_id, m.id); + } - for (const model of rawModels) { - if (!modelsMap.has(model.id)) { - modelsMap.set(model.id, { - id: model.id, - devices: new Set(), - count: 0, + // Group: model_id → worker_group_id → list of peer entries. + const modelsMap = new Map(); + for (const m of rawModels) { + let modelId = m.id; + if (!modelId) { + modelId = wgToModel.get(m.worker_group_id); + if (!modelId) continue; // orphan metrics peer + } + if (!modelsMap.has(modelId)) { + modelsMap.set(modelId, { id: modelId, replicas: new Map() }); + } + const model = modelsMap.get(modelId); + // Fall back to peer_id when worker_group_id is missing + // (older OCF binary) so each peer becomes its own replica. + const wg = m.worker_group_id || m.peer_id || `legacy-${model.replicas.size}`; + if (!model.replicas.has(wg)) { + model.replicas.set(wg, { + worker_group_id: wg, + peers: [], }); } - const existing = modelsMap.get(model.id); - existing.devices.add(model.device); - existing.count++; + model.replicas.get(wg).peers.push(m); } + // Materialize for rendering. modelCount = modelsMap.size; - models = Array.from(modelsMap.values()).map(groupedModel => ({ - data: { - title: groupedModel.id, - description: groupedModel.id, - devices: Array.from(groupedModel.devices), - instanceCount: groupedModel.count, - }, - })); + replicaCount = 0; + models = Array.from(modelsMap.values()).map(grouped => { + const replicas = Array.from(grouped.replicas.values()).map(r => { + // The head is the peer that owns the serving entry. + const head = r.peers.find(p => p.id === grouped.id) || r.peers[0]; + const followers = r.peers.filter(p => p !== head); + return { + worker_group_id: r.worker_group_id, + head, + followers, + nodesPerReplica: r.peers.length, + // device strings are per-peer; collect distinct ones + devices: Array.from(new Set(r.peers.map(p => p.device).filter(Boolean))), + }; + }); + replicaCount += replicas.length; + const allDevices = Array.from(new Set(replicas.flatMap(r => r.devices))); + return { + data: { + title: grouped.id, + description: grouped.id, + devices: allDevices, + replicas, + replicaCount: replicas.length, + // Total peers (head + followers) across all replicas. + nodeCount: replicas.reduce((s, r) => s + r.nodesPerReplica, 0), + }, + }; + }); } catch (err) { console.error("Error fetching models:", err); error = err.message; @@ -74,6 +113,12 @@ Available Models {#if !loading && !error} {modelCount} + {#if replicaCount !== modelCount} + + , Replicas + {replicaCount} + + {/if} {/if}

From 68d9764682cf77018a8fa542a7d8468d59843c24 Mon Sep 17 00:00:00 2001 From: robmsmt Date: Sun, 17 May 2026 16:19:13 +0200 Subject: [PATCH 2/8] =?UTF-8?q?refactor:=20rename=20ocf=20=E2=86=92=20otel?= =?UTF-8?q?a;=20add=20OTELA=5FFIXTURE=5FPATH=20for=20UI=20iteration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Settings: ocf_head_addr → otela_head_addr, ocf_fixture_path → otela_fixture_path. DEPLOY NOTE: this changes the env var names from OCF_HEAD_ADDR / OCF_FIXTURE_PATH to OTELA_HEAD_ADDR / OTELA_FIXTURE_PATH. Ops side must update before/with this deploy or /v1/models* will hit an empty endpoint. Comments, README, k8s manifests, and the in-repo guides now refer to "OpenTela" rather than "OCF". External image tag (ghcr.io/researchcomputer/ocf:*) and the in-binary mount path (/root/.ocfcore/keys) are untouched — both are dictated by upstream and would need a coordinated rename there first. Fixture mode: when OTELA_FIXTURE_PATH is set, /v1/models* reads that JSON file instead of HTTP-getting OTELA_HEAD_ADDR/v1/dnt/table. Used for iterating on the new model-card expansion UI against the synthesised post-v0.0.6 payload before the binary actually ships. --- README.md | 4 ++-- backend/config.py | 6 +++++- backend/routers/completions.py | 4 ++-- backend/routers/embeddings.py | 2 +- backend/routers/models.py | 14 ++++++++++---- backend/routers/rerank.py | 4 ++-- backend/routers/responses.py | 2 +- backend/routers/tokenization.py | 4 ++-- backend/services/model_service.py | 18 +++++++++++++++--- backend/tests/fixtures/build_upgraded.py | 5 +++-- backend/tests/test_app.py | 2 +- frontend/src/components/ui/ModelList.svelte | 2 +- meta/k8s/{ocf-test.yaml => otela-test.yaml} | 12 ++++++------ meta/k8s/{ocf.yaml => otela.yaml} | 8 ++++---- 14 files changed, 55 insertions(+), 32 deletions(-) rename meta/k8s/{ocf-test.yaml => otela-test.yaml} (87%) rename meta/k8s/{ocf.yaml => otela.yaml} (91%) diff --git a/README.md b/README.md index 78922c8..a2fcf73 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Frontend and backend API proxy for SwissAI LLM serving. For examples on how to l │ ▼ ┌─────────────────┐ - │ OCF │ OpenTela P2P routing → model=apertus-... + │ OpenTela │ P2P routing → model=apertus-... └────────┬────────┘ │ ▼ @@ -42,7 +42,7 @@ frontend/ # web UI (Astro + Svelte) meta/ # example Dockerfiles, example k8s manifests, build scripts ``` -OCF (Open Compute Framework) now renamed to OpenTela upstream is maintained at [eth-easl/OpenTela](https://github.com/eth-easl/OpenTela). We maintain a fork at [swiss-ai/OpenTela](https://github.com/swiss-ai/opentela) to control deployments to dev+prod. +OpenTela (formerly OCF / "Open Compute Framework") is maintained upstream at [eth-easl/OpenTela](https://github.com/eth-easl/OpenTela). We maintain a fork at [swiss-ai/OpenTela](https://github.com/swiss-ai/opentela) to control deployments to dev+prod. ## Dev Quick Start diff --git a/backend/config.py b/backend/config.py index a8ee21b..d4127db 100644 --- a/backend/config.py +++ b/backend/config.py @@ -17,7 +17,11 @@ class Settings(BaseSettings): database_url: str = "" auth_secret: str = "" auth_trust_host: bool = False - ocf_head_addr: str = "" + otela_head_addr: str = "" + # When set, /v1/models* reads this JSON file instead of calling + # $otela_head_addr/v1/dnt/table. Used for UI iteration against synthesised + # upgraded payloads (see backend/tests/fixtures/build_upgraded.py). + otela_fixture_path: str = "" langfuse_host: str = "" langfuse_public_key: str = "" langfuse_secret_key: str = "" diff --git a/backend/routers/completions.py b/backend/routers/completions.py index 8c38f71..f296793 100644 --- a/backend/routers/completions.py +++ b/backend/routers/completions.py @@ -75,7 +75,7 @@ async def chat_completion( ) response = await llm_proxy( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, request=llm_request, ) @@ -125,7 +125,7 @@ async def completion( ) response = await llm_proxy_completions( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, request=llm_request, ) diff --git a/backend/routers/embeddings.py b/backend/routers/embeddings.py index d8a3439..34a810e 100644 --- a/backend/routers/embeddings.py +++ b/backend/routers/embeddings.py @@ -26,7 +26,7 @@ async def embeddings( data["app_title"] = app_title response = await llm_proxy_embeddings( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, **data, ) diff --git a/backend/routers/models.py b/backend/routers/models.py index 32576a6..668dbf9 100644 --- a/backend/routers/models.py +++ b/backend/routers/models.py @@ -6,9 +6,17 @@ settings = get_settings() +def _dnt_endpoint() -> str: + """When OTELA_FIXTURE_PATH is set, read DNT from disk instead of HTTP — + used for iterating on the UI against synthesised post-upgrade payloads.""" + if settings.otela_fixture_path: + return settings.otela_fixture_path + return settings.otela_head_addr + "/v1/dnt/table" + + @router.get("/v1/models_detailed") async def list_models_detailed(): - models = get_all_models(settings.ocf_head_addr + "/v1/dnt/table", with_details=True) + models = get_all_models(_dnt_endpoint(), with_details=True) return dict( object="list", data=models, @@ -17,9 +25,7 @@ async def list_models_detailed(): @router.get("/v1/models") async def list_models(): - models = get_all_models( - settings.ocf_head_addr + "/v1/dnt/table", with_details=False - ) + models = get_all_models(_dnt_endpoint(), with_details=False) return dict( object="list", data=models, diff --git a/backend/routers/rerank.py b/backend/routers/rerank.py index 9b47aef..7f5b211 100644 --- a/backend/routers/rerank.py +++ b/backend/routers/rerank.py @@ -14,7 +14,7 @@ async def rerank( ): data = await request.json() response = await llm_proxy_rerank( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), @@ -29,7 +29,7 @@ async def score( ): data = await request.json() response = await llm_proxy_score( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), diff --git a/backend/routers/responses.py b/backend/routers/responses.py index 598270c..33ebb49 100644 --- a/backend/routers/responses.py +++ b/backend/routers/responses.py @@ -17,7 +17,7 @@ async def create_response( stream = data.get("stream", False) response = await llm_proxy_responses( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, stream=stream, diff --git a/backend/routers/tokenization.py b/backend/routers/tokenization.py index 99680d0..6381aa3 100644 --- a/backend/routers/tokenization.py +++ b/backend/routers/tokenization.py @@ -14,7 +14,7 @@ async def tokenize( ): data = await request.json() response = await llm_proxy_tokenize( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), @@ -29,7 +29,7 @@ async def detokenize( ): data = await request.json() response = await llm_proxy_detokenize( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), diff --git a/backend/services/model_service.py b/backend/services/model_service.py index 3f4ce65..41cbc2a 100644 --- a/backend/services/model_service.py +++ b/backend/services/model_service.py @@ -1,12 +1,16 @@ +import json +import pathlib + import requests + from backend.config import parse_hardware_info def _peer_metadata(node_info: dict) -> dict: """Pull the surfaced launch-time fields off a DNT peer entry. - Older OCF binaries ( dict: } +def _load_dnt(endpoint: str) -> dict: + """Fetch DNT data. If endpoint points at a local file (no scheme), read + it as JSON — that's the fixture-mode dev path. Otherwise HTTP-GET it.""" + if endpoint and not endpoint.startswith(("http://", "https://")): + return json.loads(pathlib.Path(endpoint).read_text()) + return requests.get(endpoint).json() + + def get_all_models(endpoint: str, with_details: bool = False): """Return one entry per (peer, model) pair served on the network. @@ -36,7 +48,7 @@ def get_all_models(endpoint: str, with_details: bool = False): metrics-only followers all share the same worker_group_id). """ try: - data = requests.get(endpoint).json() + data = _load_dnt(endpoint) except Exception: return [] models = [] diff --git a/backend/tests/fixtures/build_upgraded.py b/backend/tests/fixtures/build_upgraded.py index 52882c9..ae3341d 100644 --- a/backend/tests/fixtures/build_upgraded.py +++ b/backend/tests/fixtures/build_upgraded.py @@ -2,7 +2,8 @@ """Build dnt_table_upgraded.json from dnt_table_prod.json. Adds the new Peer fields (hostname, version, status, labels) as if the -v0.0.6 OCF binary plus the model-launch --label changes had been deployed: +v0.0.6 OpenTela binary plus the model-launch --label changes had been +deployed: - Every peer gets a synthetic SLURM job id (= its own worker_group_id). - Each peer's metadata reflects realistic launched_by / framework values. @@ -76,7 +77,7 @@ def main() -> None: "served_model_name": model_name, "started_at": "2026-05-15T18:00:00Z", } - # Drop empty entries so the JSON looks closer to what OCF emits. + # Drop empty entries so the JSON looks closer to what OpenTela emits. peer["labels"] = {k: v for k, v in peer["labels"].items() if v} upgraded[pid_key] = peer diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py index bee50b2..be326a6 100644 --- a/backend/tests/test_app.py +++ b/backend/tests/test_app.py @@ -63,7 +63,7 @@ def test_app_routes_registered(client): def test_models_endpoint_no_auth(client): - """/v1/models should return 200 even when OCF is unreachable.""" + """/v1/models should return 200 even when OpenTela is unreachable.""" response = client.get("/v1/models") assert response.status_code == 200 assert response.json()["object"] == "list" diff --git a/frontend/src/components/ui/ModelList.svelte b/frontend/src/components/ui/ModelList.svelte index 6207e27..83ac1a1 100644 --- a/frontend/src/components/ui/ModelList.svelte +++ b/frontend/src/components/ui/ModelList.svelte @@ -41,7 +41,7 @@ } const model = modelsMap.get(modelId); // Fall back to peer_id when worker_group_id is missing - // (older OCF binary) so each peer becomes its own replica. + // (older OpenTela binary) so each peer becomes its own replica. const wg = m.worker_group_id || m.peer_id || `legacy-${model.replicas.size}`; if (!model.replicas.has(wg)) { model.replicas.set(wg, { diff --git a/meta/k8s/ocf-test.yaml b/meta/k8s/otela-test.yaml similarity index 87% rename from meta/k8s/ocf-test.yaml rename to meta/k8s/otela-test.yaml index 03ea016..13f6a5a 100644 --- a/meta/k8s/ocf-test.yaml +++ b/meta/k8s/otela-test.yaml @@ -28,9 +28,9 @@ spec: - "148.187.108.173" ports: - containerPort: 8092 - name: ocf-http-port + name: otela-http-port - containerPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port resources: requests: memory: "8Gi" @@ -39,11 +39,11 @@ spec: memory: "16Gi" cpu: "8" volumeMounts: - - name: test-ocf-key-id + - name: test-otela-key-id mountPath: /root/.ocfcore/keys readOnly: true volumes: - - name: test-ocf-key-id + - name: test-otela-key-id secret: secretName: test-dispatcher-id-key --- @@ -59,9 +59,9 @@ spec: - protocol: TCP port: 8092 targetPort: 8092 - name: ocf-http-port + name: otela-http-port - protocol: TCP port: 43905 targetPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port type: LoadBalancer \ No newline at end of file diff --git a/meta/k8s/ocf.yaml b/meta/k8s/otela.yaml similarity index 91% rename from meta/k8s/ocf.yaml rename to meta/k8s/otela.yaml index 3c5ba99..f4e5657 100644 --- a/meta/k8s/ocf.yaml +++ b/meta/k8s/otela.yaml @@ -26,9 +26,9 @@ spec: - "148.187.108.172" ports: - containerPort: 8092 - name: ocf-http-port + name: otela-http-port - containerPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port resources: requests: memory: "8Gi" @@ -57,9 +57,9 @@ spec: - protocol: TCP port: 8092 targetPort: 8092 - name: ocf-http-port + name: otela-http-port - protocol: TCP port: 43905 targetPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port type: LoadBalancer \ No newline at end of file From c72b6a8e9c8bbc701d7631ec8995ae7d5272afe2 Mon Sep 17 00:00:00 2001 From: robmsmt Date: Sun, 17 May 2026 16:24:03 +0200 Subject: [PATCH 3/8] fix: accept legacy OCF_* env vars in addition to OTELA_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use pydantic AliasChoices so OCF_HEAD_ADDR / OCF_FIXTURE_PATH still populate the renamed settings. Deployments can migrate on their own schedule without a synchronized cut-over. When both legacy and canonical names are set, the canonical OTELA_* wins — a partial migration shouldn't silently keep the legacy value in force. --- backend/config.py | 18 +++++++++++++++--- backend/tests/test_model_service.py | 26 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/backend/config.py b/backend/config.py index d4127db..3c3952f 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,6 +1,8 @@ -from pydantic_settings import BaseSettings from functools import lru_cache +from pydantic import AliasChoices, Field +from pydantic_settings import BaseSettings + @lru_cache() def get_settings(): @@ -17,11 +19,20 @@ class Settings(BaseSettings): database_url: str = "" auth_secret: str = "" auth_trust_host: bool = False - otela_head_addr: str = "" + # Accept the historical OCF_* env var names in addition to the canonical + # OTELA_* ones so existing deployments keep working through the rename. + # Python attribute access stays `settings.otela_*`. + otela_head_addr: str = Field( + default="", + validation_alias=AliasChoices("otela_head_addr", "ocf_head_addr"), + ) # When set, /v1/models* reads this JSON file instead of calling # $otela_head_addr/v1/dnt/table. Used for UI iteration against synthesised # upgraded payloads (see backend/tests/fixtures/build_upgraded.py). - otela_fixture_path: str = "" + otela_fixture_path: str = Field( + default="", + validation_alias=AliasChoices("otela_fixture_path", "ocf_fixture_path"), + ) langfuse_host: str = "" langfuse_public_key: str = "" langfuse_secret_key: str = "" @@ -32,6 +43,7 @@ class Settings(BaseSettings): class Config: env_file = ".env" + populate_by_name = True def parse_hardware_info(hardware_info): diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py index 361c70f..411d95b 100644 --- a/backend/tests/test_model_service.py +++ b/backend/tests/test_model_service.py @@ -129,6 +129,32 @@ def test_follower_without_worker_group_id_skipped(): assert out == [] +def test_legacy_ocf_env_vars_still_work(monkeypatch): + """OCF_HEAD_ADDR and OCF_FIXTURE_PATH must keep working through the + rename to OTELA_*. Deployments can migrate on their own schedule.""" + from backend.config import Settings + + monkeypatch.setenv("OCF_HEAD_ADDR", "http://legacy:8092") + monkeypatch.setenv("OCF_FIXTURE_PATH", "/legacy/fixture.json") + monkeypatch.delenv("OTELA_HEAD_ADDR", raising=False) + monkeypatch.delenv("OTELA_FIXTURE_PATH", raising=False) + s = Settings() + assert s.otela_head_addr == "http://legacy:8092" + assert s.otela_fixture_path == "/legacy/fixture.json" + + +def test_canonical_otela_env_vars_win_over_legacy(monkeypatch): + """When both are set, the canonical OTELA_* names win so a partial + migration (one renamed, one not) doesn't silently keep the legacy + value in force.""" + from backend.config import Settings + + monkeypatch.setenv("OCF_HEAD_ADDR", "http://legacy:8092") + monkeypatch.setenv("OTELA_HEAD_ADDR", "http://canonical:8092") + s = Settings() + assert s.otela_head_addr == "http://canonical:8092" + + def test_request_failure_returns_empty(): with patch("backend.services.model_service.requests.get") as mock_get: mock_get.side_effect = Exception("boom") From 174b3895ce92f240b8469cdc4fc2111284f93397 Mon Sep 17 00:00:00 2001 From: robmsmt Date: Sun, 17 May 2026 16:54:57 +0200 Subject: [PATCH 4/8] makefile launch docker --- Makefile | 61 ++++++++++++++++++++++++++++++++-- frontend/src/pages/index.astro | 2 +- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 7cf29da..5fb4567 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,14 @@ -.PHONY: install install-dev format check test run +.PHONY: install install-dev format check test run db-up db-down migrate _ensure-env _ensure-frontend-env UV_EXTRA ?= +PG_CONTAINER := serving-api-pg +PG_PORT := 5433 +PG_USER := serving +PG_PASS := serving +PG_DB := serving +DATABASE_URL := postgresql://$(PG_USER):$(PG_PASS)@localhost:$(PG_PORT)/$(PG_DB) + install: uv pip install $(UV_EXTRA) -r backend/requirements.txt @@ -19,7 +26,57 @@ check: test: pytest backend/tests/ -v -run: +_ensure-env: + @if [ ! -f .env ]; then \ + echo "DATABASE_URL=$(DATABASE_URL)" > .env; \ + echo "wrote default .env (DATABASE_URL -> local docker postgres on :$(PG_PORT))"; \ + fi + +_ensure-frontend-env: + @if [ ! -f frontend/.env ]; then \ + secret=$$(openssl rand -hex 32); \ + { \ + echo "AUTH_SECRET=$$secret"; \ + echo "AUTH_TRUST_HOST=true"; \ + echo "AUTH0_CLIENT_ID="; \ + echo "AUTH0_CLIENT_SECRET="; \ + echo "AUTH0_ISSUER="; \ + } > frontend/.env; \ + echo "wrote default frontend/.env (AUTH_SECRET generated; fill in AUTH0_* to enable login)"; \ + fi + +db-up: + @if [ -z "$$(docker ps -q -f name=^/$(PG_CONTAINER)$$)" ]; then \ + if [ -n "$$(docker ps -aq -f name=^/$(PG_CONTAINER)$$)" ]; then \ + echo "starting existing $(PG_CONTAINER) container"; \ + docker start $(PG_CONTAINER) > /dev/null; \ + else \ + echo "creating $(PG_CONTAINER) container on :$(PG_PORT)"; \ + docker run -d --name $(PG_CONTAINER) \ + -e POSTGRES_USER=$(PG_USER) \ + -e POSTGRES_PASSWORD=$(PG_PASS) \ + -e POSTGRES_DB=$(PG_DB) \ + -p $(PG_PORT):5432 \ + postgres:16 > /dev/null; \ + fi; \ + fi + @printf "waiting for postgres"; \ + for i in $$(seq 1 30); do \ + if docker exec $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB) > /dev/null 2>&1; then \ + echo " ready"; exit 0; \ + fi; \ + printf "."; sleep 1; \ + done; \ + echo " timed out"; exit 1 + +db-down: + -docker stop $(PG_CONTAINER) > /dev/null 2>&1 + -docker rm $(PG_CONTAINER) > /dev/null 2>&1 + +migrate: _ensure-env db-up + alembic upgrade head + +run: _ensure-env _ensure-frontend-env db-up migrate uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \ cd frontend && npm run dev & \ wait diff --git a/frontend/src/pages/index.astro b/frontend/src/pages/index.astro index fcf53e6..4819b0a 100644 --- a/frontend/src/pages/index.astro +++ b/frontend/src/pages/index.astro @@ -24,7 +24,7 @@ const articles = (await getCollection("articles")) .sort((a, b) => b.data.date.valueOf() - a.data.date.valueOf()) .slice(0,SITE.NUM_PROJECTS_ON_HOMEPAGE); -const session = await getSession(Astro.request); +const session = import.meta.env.DEV ? null : await getSession(Astro.request); const isSessionValid = session && session.user; const chatAppUrl = process.env.CHAT_APP_URL || "https://chat.swissai.cscs.ch"; From 522b0440ea2b3504ba1a1369f50edcac06de7818 Mon Sep 17 00:00:00 2001 From: robmsmt Date: Sun, 17 May 2026 17:43:49 +0200 Subject: [PATCH 5/8] feat: realistic upgraded fixture + graceful legacy fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upgraded DNT fixture now includes framework_args (the second monospace block in the card expansion), expires_at (SLURM time limit applied to started_at), slurm_reservation (mixed in for some launches), and varied started_at values spread across several hours so the UI shows a realistic mix of ages. Note: framework_args isn't in the opentela --label set we shipped yet — this fixture preempts a planned follow-up patch there. Until that ships, real prod data won't carry framework_args; serving-api just hides the row. ModelCard expansion: filter out empty rows so the legacy / pre-v0.0.6 case shows just what's known (peer ids + model) instead of a wall of "?" placeholders. When no labels exist at all, render a small amber hint pointing at the v0.0.6 requirement instead of silently rendering an empty card. --- backend/tests/fixtures/build_upgraded.py | 99 +++++++- .../tests/fixtures/dnt_table_upgraded.json | 211 +++++++++++++----- frontend/src/components/ui/ModelCard.svelte | 57 +++-- 3 files changed, 290 insertions(+), 77 deletions(-) diff --git a/backend/tests/fixtures/build_upgraded.py b/backend/tests/fixtures/build_upgraded.py index ae3341d..1ff752d 100644 --- a/backend/tests/fixtures/build_upgraded.py +++ b/backend/tests/fixtures/build_upgraded.py @@ -6,7 +6,10 @@ deployed: - Every peer gets a synthetic SLURM job id (= its own worker_group_id). -- Each peer's metadata reflects realistic launched_by / framework values. +- Each peer's metadata reflects realistic launched_by / framework / + framework_args values, varied per model. +- started_at and expires_at are spread across "now-ish" so the UI shows + a realistic mix of recently-launched and longer-running replicas. - One multi-peer model is manually re-keyed so two of its peers share a worker_group_id, simulating a 2-node TP replica with a metrics-only follower (no `service`). This lets us exercise the multi-node-replica @@ -18,6 +21,7 @@ import json import pathlib +from datetime import datetime, timedelta, timezone HERE = pathlib.Path(__file__).parent SRC = HERE / "dnt_table_prod.json" @@ -28,8 +32,58 @@ # Models whose served name suggests a particular launcher. FRAMEWORK_HINTS = { - "sglang": ["Apertus", "GLM", "gemma", "olmo"], - "vllm": ["Qwen", "Llama", "Snowflake", "Kimi", "Apertus-1.5"], + "sglang": ["Apertus", "GLM", "gemma", "olmo", "gpt-oss"], + "vllm": ["Qwen", "Llama", "Snowflake", "Kimi"], +} + +# Representative framework_args per model. Covers what an operator +# actually types — paths, tensor-parallel sizing, memory caps. Real OCF +# emits these verbatim via `--label framework_args="..."`. Fixture-only +# until opentela patch lands the framework_args label. +FRAMEWORK_ARGS = { + "Apertus-70B-Instruct-2509": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-70B-Instruct-2509 " + "--tensor-parallel-size 4 --max-model-len 65536 --port 8080" + ), + "Apertus-8B-Instruct-2509": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 " + "--port 8080 --enable-metrics" + ), + "gemma-4-31B-it": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it " + "--tensor-parallel-size 4 --port 8080" + ), + "Qwen3.5-397B-A17B": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B " + "--tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080" + ), + "gpt-oss-120b": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b " + "--tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss" + ), + "Qwen3-32B": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B " + "--tensor-parallel-size 4 --port 8080" + ), + "Llama-3.3-70B-Instruct": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/meta-llama/Llama-3.3-70B-Instruct " + "--tensor-parallel-size 4 --max-model-len 8192 --port 8080" + ), + "Qwen3-Next-80B-A3B-Instruct": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-Next-80B-A3B-Instruct " + "--tensor-parallel-size 4 --port 8080" + ), + "snowflake-arctic-embed-l-v2.0": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Snowflake/snowflake-arctic-embed-l-v2.0 " + "--task embed --port 8080" + ), + "GLM-4.7-Flash": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/zai-org/GLM-4.7-Flash --port 8080" + ), + "Qwen3.5-27B": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-27B " + "--tensor-parallel-size 2 --port 8080" + ), } @@ -40,6 +94,18 @@ def guess_framework(model: str) -> str: return "sglang" +def guess_framework_args(model: str) -> str: + """Find the best-matching entry in FRAMEWORK_ARGS for this served name.""" + for key, args in FRAMEWORK_ARGS.items(): + if key in model: + return args + return "--port 8080" + + +# Baseline "now" so the fixture is deterministic across regenerations. +NOW = datetime(2026, 5, 17, 13, 0, tzinfo=timezone.utc) + + def main() -> None: src = json.loads(SRC.read_text()) upgraded: dict = {} @@ -68,14 +134,27 @@ def main() -> None: job_id = next_job_id next_job_id += 1 + # Spread launches over the past few hours: 30 min apart starting + # 6 h ago. Plausibly varied; older launches expire sooner. + started_offset = timedelta(minutes=30 * (i % 12) + 5 * (i // 12)) + started_at = NOW - timedelta(hours=6) + started_offset + # Pick a SLURM time-limit consistent with how the launcher is + # actually used today — short jobs (1 h) for quick tests, + # long ones (12 h) for stable serving. Mix them. + time_limit = timedelta(hours=12 if i % 3 == 0 else 1 if i % 7 == 0 else 6) + expires_at = started_at + time_limit + peer["labels"] = { "launched_by": USERS[i % len(USERS)], "slurm_job_id": str(job_id), "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0" if i % 4 == 0 else "", "worker_group_id": str(job_id), "framework": guess_framework(model_name) if model_name else "", + "framework_args": guess_framework_args(model_name) if model_name else "", "served_model_name": model_name, - "started_at": "2026-05-15T18:00:00Z", + "started_at": started_at.isoformat().replace("+00:00", "Z"), + "expires_at": expires_at.isoformat().replace("+00:00", "Z"), } # Drop empty entries so the JSON looks closer to what OpenTela emits. peer["labels"] = {k: v for k, v in peer["labels"].items() if v} @@ -94,10 +173,14 @@ def main() -> None: for model, peers in by_model.items(): if len(peers) >= 2: head_key, follower_key = peers[0], peers[1] - shared = upgraded[head_key]["labels"]["worker_group_id"] - upgraded[follower_key]["labels"]["worker_group_id"] = shared - upgraded[follower_key]["labels"]["slurm_job_id"] = shared - upgraded[follower_key]["labels"]["launched_by"] = upgraded[head_key]["labels"]["launched_by"] + head_labels = upgraded[head_key]["labels"] + shared = head_labels["worker_group_id"] + f_labels = upgraded[follower_key]["labels"] + f_labels["worker_group_id"] = shared + f_labels["slurm_job_id"] = shared + f_labels["launched_by"] = head_labels["launched_by"] + f_labels["started_at"] = head_labels["started_at"] + f_labels["expires_at"] = head_labels["expires_at"] # Metrics-only: drop the service advertisement. upgraded[follower_key]["service"] = [] upgraded[follower_key]["status"] = "ready" diff --git a/backend/tests/fixtures/dnt_table_upgraded.json b/backend/tests/fixtures/dnt_table_upgraded.json index 86b9172..ffe7edb 100644 --- a/backend/tests/fixtures/dnt_table_upgraded.json +++ b/backend/tests/fixtures/dnt_table_upgraded.json @@ -62,10 +62,13 @@ "launched_by": "rosmith", "slurm_job_id": "2256000", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256000", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-70B-Instruct-2509 --tensor-parallel-size 4 --max-model-len 65536 --port 8080", "served_model_name": "swiss-ai/Apertus-70B-Instruct-2509", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:00:00Z", + "expires_at": "2026-05-17T19:00:00Z" } }, "/QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL": { @@ -133,8 +136,10 @@ "slurm_partition": "normal", "worker_group_id": "2256001", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:30:00Z", + "expires_at": "2026-05-17T13:30:00Z" } }, "/QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq": { @@ -202,8 +207,10 @@ "slurm_partition": "normal", "worker_group_id": "2256002", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:00:00Z", + "expires_at": "2026-05-17T14:00:00Z" } }, "/QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN": { @@ -271,8 +278,10 @@ "slurm_partition": "normal", "worker_group_id": "2256003", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:30:00Z", + "expires_at": "2026-05-17T20:30:00Z" } }, "/QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW": { @@ -322,10 +331,13 @@ "launched_by": "xyao", "slurm_job_id": "2256001", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256001", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:30:00Z", + "expires_at": "2026-05-17T13:30:00Z" } }, "/QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5": { @@ -393,8 +405,10 @@ "slurm_partition": "normal", "worker_group_id": "2256005", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:30:00Z", + "expires_at": "2026-05-17T15:30:00Z" } }, "/QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn": { @@ -462,8 +476,10 @@ "slurm_partition": "normal", "worker_group_id": "2256006", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:00:00Z", + "expires_at": "2026-05-17T22:00:00Z" } }, "/QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP": { @@ -531,8 +547,10 @@ "slurm_partition": "normal", "worker_group_id": "2256007", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:30:00Z", + "expires_at": "2026-05-17T11:30:00Z" } }, "/QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG": { @@ -598,10 +616,13 @@ "launched_by": "isternfel", "slurm_job_id": "2256008", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256008", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:00:00Z", + "expires_at": "2026-05-17T17:00:00Z" } }, "/QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G": { @@ -669,8 +690,10 @@ "slurm_partition": "normal", "worker_group_id": "2256009", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:30:00Z", + "expires_at": "2026-05-17T23:30:00Z" } }, "/QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT": { @@ -738,8 +761,10 @@ "slurm_partition": "normal", "worker_group_id": "2256010", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:00:00Z", + "expires_at": "2026-05-17T18:00:00Z" } }, "/QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em": { @@ -807,8 +832,10 @@ "slurm_partition": "normal", "worker_group_id": "2256011", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:30:00Z", + "expires_at": "2026-05-17T18:30:00Z" } }, "/QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ": { @@ -874,10 +901,13 @@ "launched_by": "aahadinia", "slurm_job_id": "2256012", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256012", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:05:00Z", + "expires_at": "2026-05-17T19:05:00Z" } }, "/QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm": { @@ -945,8 +975,10 @@ "slurm_partition": "normal", "worker_group_id": "2256013", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:35:00Z", + "expires_at": "2026-05-17T13:35:00Z" } }, "/QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS": { @@ -1014,8 +1046,10 @@ "slurm_partition": "normal", "worker_group_id": "2256014", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:05:00Z", + "expires_at": "2026-05-17T09:05:00Z" } }, "/QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8": { @@ -1083,8 +1117,10 @@ "slurm_partition": "normal", "worker_group_id": "2256015", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:35:00Z", + "expires_at": "2026-05-17T20:35:00Z" } }, "/QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7": { @@ -1150,10 +1186,13 @@ "launched_by": "xyao", "slurm_job_id": "2256016", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256016", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:05:00Z", + "expires_at": "2026-05-17T15:05:00Z" } }, "/QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn": { @@ -1221,8 +1260,10 @@ "slurm_partition": "normal", "worker_group_id": "2256017", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:35:00Z", + "expires_at": "2026-05-17T15:35:00Z" } }, "/QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur": { @@ -1290,8 +1331,10 @@ "slurm_partition": "normal", "worker_group_id": "2256018", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:05:00Z", + "expires_at": "2026-05-17T22:05:00Z" } }, "/QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob": { @@ -1359,8 +1402,10 @@ "slurm_partition": "normal", "worker_group_id": "2256019", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/meta-llama/Llama-3.3-70B-Instruct --tensor-parallel-size 4 --max-model-len 8192 --port 8080", "served_model_name": "meta-llama/Llama-3.3-70B-Instruct", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:35:00Z", + "expires_at": "2026-05-17T16:35:00Z" } }, "/QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2": { @@ -1426,10 +1471,13 @@ "launched_by": "rosmith", "slurm_job_id": "2256020", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256020", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:05:00Z", + "expires_at": "2026-05-17T17:05:00Z" } }, "/QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8": { @@ -1497,8 +1545,10 @@ "slurm_partition": "normal", "worker_group_id": "2256021", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:35:00Z", + "expires_at": "2026-05-17T23:35:00Z" } }, "/QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW": { @@ -1566,8 +1616,10 @@ "slurm_partition": "normal", "worker_group_id": "2256022", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:05:00Z", + "expires_at": "2026-05-17T18:05:00Z" } }, "/QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe": { @@ -1635,8 +1687,10 @@ "slurm_partition": "normal", "worker_group_id": "2256023", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:35:00Z", + "expires_at": "2026-05-17T18:35:00Z" } }, "/QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV": { @@ -1702,10 +1756,13 @@ "launched_by": "yiswang", "slurm_job_id": "2256024", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256024", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:10:00Z", + "expires_at": "2026-05-17T19:10:00Z" } }, "/QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa": { @@ -1773,8 +1830,10 @@ "slurm_partition": "normal", "worker_group_id": "2256025", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:40:00Z", + "expires_at": "2026-05-17T13:40:00Z" } }, "/QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y": { @@ -1842,8 +1901,10 @@ "slurm_partition": "normal", "worker_group_id": "2256026", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:10:00Z", + "expires_at": "2026-05-17T14:10:00Z" } }, "/QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr": { @@ -1911,8 +1972,10 @@ "slurm_partition": "normal", "worker_group_id": "2256027", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:40:00Z", + "expires_at": "2026-05-17T20:40:00Z" } }, "/QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4": { @@ -1978,10 +2041,13 @@ "launched_by": "isternfel", "slurm_job_id": "2256028", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256028", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:10:00Z", + "expires_at": "2026-05-17T10:10:00Z" } }, "/QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U": { @@ -2049,8 +2115,10 @@ "slurm_partition": "normal", "worker_group_id": "2256029", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:40:00Z", + "expires_at": "2026-05-17T15:40:00Z" } }, "/Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7": { @@ -2118,8 +2186,10 @@ "slurm_partition": "normal", "worker_group_id": "2256030", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-Next-80B-A3B-Instruct --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:10:00Z", + "expires_at": "2026-05-17T22:10:00Z" } }, "/QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp": { @@ -2187,8 +2257,10 @@ "slurm_partition": "normal", "worker_group_id": "2256031", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:40:00Z", + "expires_at": "2026-05-17T16:40:00Z" } }, "/QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC": { @@ -2254,10 +2326,13 @@ "launched_by": "aahadinia", "slurm_job_id": "2256032", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256032", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:10:00Z", + "expires_at": "2026-05-17T17:10:00Z" } }, "/Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk": { @@ -2325,8 +2400,10 @@ "slurm_partition": "normal", "worker_group_id": "2256033", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:40:00Z", + "expires_at": "2026-05-17T23:40:00Z" } }, "/QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK": { @@ -2394,8 +2471,10 @@ "slurm_partition": "normal", "worker_group_id": "2256034", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:10:00Z", + "expires_at": "2026-05-17T18:10:00Z" } }, "/QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu": { @@ -2463,8 +2542,10 @@ "slurm_partition": "normal", "worker_group_id": "2256035", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:40:00Z", + "expires_at": "2026-05-17T13:40:00Z" } }, "/QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy": { @@ -2515,10 +2596,13 @@ "launched_by": "xyao", "slurm_job_id": "2256036", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256036", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Snowflake/snowflake-arctic-embed-l-v2.0 --task embed --port 8080", "served_model_name": "Snowflake/snowflake-arctic-embed-l-v2.0", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:15:00Z", + "expires_at": "2026-05-17T19:15:00Z" } }, "/QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH": { @@ -2586,8 +2670,10 @@ "slurm_partition": "normal", "worker_group_id": "2256037", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:45:00Z", + "expires_at": "2026-05-17T13:45:00Z" } }, "/QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f": { @@ -2655,8 +2741,10 @@ "slurm_partition": "normal", "worker_group_id": "2256038", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:15:00Z", + "expires_at": "2026-05-17T14:15:00Z" } }, "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": { @@ -2686,7 +2774,8 @@ "slurm_job_id": "2256039", "slurm_partition": "normal", "worker_group_id": "2256039", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T08:45:00Z", + "expires_at": "2026-05-17T20:45:00Z" } }, "/QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL": { @@ -2752,10 +2841,13 @@ "launched_by": "rosmith", "slurm_job_id": "2256040", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256040", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:15:00Z", + "expires_at": "2026-05-17T15:15:00Z" } }, "/QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd": { @@ -2806,7 +2898,8 @@ "slurm_job_id": "2256041", "slurm_partition": "normal", "worker_group_id": "2256041", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T09:45:00Z", + "expires_at": "2026-05-17T15:45:00Z" } }, "/Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A": { @@ -2859,8 +2952,10 @@ "slurm_partition": "normal", "worker_group_id": "2256042", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/zai-org/GLM-4.7-Flash --port 8080", "served_model_name": "zai-org/GLM-4.7-Flash", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:15:00Z", + "expires_at": "2026-05-17T22:15:00Z" } }, "/QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B": { @@ -2918,8 +3013,10 @@ "slurm_partition": "normal", "worker_group_id": "2256043", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-27B --tensor-parallel-size 2 --port 8080", "served_model_name": "Qwen/Qwen3.5-27B", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T10:45:00Z", + "expires_at": "2026-05-17T16:45:00Z" } }, "/QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG": { @@ -2985,10 +3082,13 @@ "launched_by": "yiswang", "slurm_job_id": "2256044", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256044", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:15:00Z", + "expires_at": "2026-05-17T17:15:00Z" } }, "/QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT": { @@ -3056,8 +3156,10 @@ "slurm_partition": "normal", "worker_group_id": "2256045", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", "served_model_name": "openai/gpt-oss-120b-Vsdo", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T11:45:00Z", + "expires_at": "2026-05-17T23:45:00Z" } }, "/QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3": { @@ -3125,8 +3227,10 @@ "slurm_partition": "normal", "worker_group_id": "2256046", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:15:00Z", + "expires_at": "2026-05-17T18:15:00Z" } }, "/QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ": { @@ -3194,8 +3298,10 @@ "slurm_partition": "normal", "worker_group_id": "2256047", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", "served_model_name": "google/gemma-4-31B-it-TsOA", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T12:45:00Z", + "expires_at": "2026-05-17T18:45:00Z" } }, "/QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp": { @@ -3246,10 +3352,13 @@ "launched_by": "isternfel", "slurm_job_id": "2256048", "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", "worker_group_id": "2256048", "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 --port 8080 --enable-metrics", "served_model_name": "swiss-ai/Apertus-8B-Instruct-2509", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:20:00Z", + "expires_at": "2026-05-17T19:20:00Z" } }, "/QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY": { @@ -3317,8 +3426,10 @@ "slurm_partition": "normal", "worker_group_id": "2256049", "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", "served_model_name": "Qwen/Qwen3-32B-kgCt", - "started_at": "2026-05-15T18:00:00Z" + "started_at": "2026-05-17T07:50:00Z", + "expires_at": "2026-05-17T08:50:00Z" } } } \ No newline at end of file diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte index fc6c49e..35db36a 100644 --- a/frontend/src/components/ui/ModelCard.svelte +++ b/frontend/src/components/ui/ModelCard.svelte @@ -186,35 +186,54 @@ {#each entry.data.replicas as replica, idx (replica.worker_group_id)} + {@const head = replica.head} + {@const hasLabels = !!(head.launched_by || head.slurm_job_id || head.started_at || head.framework || head.version || head.status)} + {@const peerLine = (p) => { + const hn = p.hostname; + const pid = p.peer_id; + if (hn && pid) return `${hn} (${pid})`; + return hn || pid || "unknown"; + }} + {@const rows = [ + ["model", entry.data.title], + ["launched_by", head.launched_by], + ["slurm_job_id", head.slurm_job_id], + ["started_at", head.started_at], + ["framework", head.framework], + ["version", head.version], + // worker_group_id is omitted when it's a synthesised legacy-N fallback — + // it's just noise in that case. + ["worker_group_id", replica.worker_group_id.startsWith("legacy-") ? "" : replica.worker_group_id], + ["head", peerLine(head)], + ...replica.followers.map((f, i) => [`follower_${i + 1}`, peerLine(f)]), + ].filter(([, v]) => v && v !== "unknown" || v === peerLine(head) || (typeof v === "string" && v.includes("(")))}

Replica {idx + 1}{entry.data.replicaCount > 1 ? ` / ${entry.data.replicaCount}` : ""} · {topologyString(replica)} - {#if replica.head.status} - {replica.head.status} + {#if head.status} + {head.status} {/if}
- -
{[
-            ["model", entry.data.title],
-            ["launched_by", replica.head.launched_by || "?"],
-            ["slurm_job_id", replica.head.slurm_job_id || "?"],
-            ["started_at", replica.head.started_at || "?"],
-            ["framework", replica.head.framework || "?"],
-            ["version", replica.head.version || "?"],
-            ["worker_group_id", replica.worker_group_id],
-            ["head", `${replica.head.hostname || "?"} (${replica.head.peer_id || "?"})`],
-            ...replica.followers.map((f, i) => [
-              `follower_${i + 1}`,
-              `${f.hostname || "?"} (${f.peer_id || "?"})`,
-            ]),
-          ].map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}
+ +
{rows
+            .filter(([, v]) => v)
+            .map(([k, v]) => `${k.padEnd(18)} ${v}`)
+            .join("\n")}
+ + {#if !hasLabels} +

+ Launch metadata (launched_by, slurm_job_id, framework, started_at…) requires OpenTela v0.0.6+ on the serving node. +

+ {/if} - {#if replica.head.labels && Object.keys(replica.head.labels).length > 0} - {@const extra = Object.entries(replica.head.labels).filter(([k]) => + {#if head.labels && Object.keys(head.labels).length > 0} + {@const extra = Object.entries(head.labels).filter(([k]) => !["launched_by","slurm_job_id","worker_group_id","framework","started_at","slurm_partition","served_model_name"].includes(k) )} {#if extra.length > 0} From 1f65ebfda695723fb71cb469a1b8ccd6ec2aebf0 Mon Sep 17 00:00:00 2001 From: robmsmt Date: Sun, 17 May 2026 17:52:58 +0200 Subject: [PATCH 6/8] feat: add `make dummy-run` for fixture-driven dev Same as `make run` but forces OTELA_FIXTURE_PATH at the synthesised upgraded fixture, so the model card UI shows the v0.0.6-shape payload (hostname, version, labels, multi-node demo) without depending on live prod state or whatever's in the developer's .env. Use `make run` to hit live prod, `make dummy-run` to iterate on the UI. --- Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5fb4567..4bc2fdf 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install install-dev format check test run db-up db-down migrate _ensure-env _ensure-frontend-env +.PHONY: install install-dev format check test run dummy-run db-up db-down migrate _ensure-env _ensure-frontend-env UV_EXTRA ?= @@ -80,3 +80,12 @@ run: _ensure-env _ensure-frontend-env db-up migrate uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \ cd frontend && npm run dev & \ wait + +# Same as `run` but forces the model list to come from the synthesised +# upgraded fixture instead of the live OpenTela endpoint. Useful for +# iterating on the model-card UI without depending on prod state. +dummy-run: _ensure-env _ensure-frontend-env db-up migrate + OTELA_FIXTURE_PATH=$(PWD)/backend/tests/fixtures/dnt_table_upgraded.json \ + uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \ + cd frontend && npm run dev & \ + wait From 2d3940d612dcb4de3b425754adcc7c856f600cea Mon Sep 17 00:00:00 2001 From: robmsmt Date: Mon, 18 May 2026 13:48:39 +0200 Subject: [PATCH 7/8] use updated url, add time info in replica-panel for info on node --- Makefile | 15 +++------ backend/services/model_service.py | 3 +- frontend/src/components/ui/ModelCard.svelte | 32 +++++++++++++++---- frontend/src/components/ui/ModelList.svelte | 6 ++-- .../src/content/articles/03-opentela/index.md | 2 +- .../guides/01-getting-started/model-launch.md | 2 +- frontend/src/layouts/PageLayout.astro | 2 +- frontend/src/lib/config.ts | 4 +-- frontend/src/pages/api_key.astro | 2 +- frontend/src/pages/index.astro | 2 +- frontend/src/pages/leaderboard/index.astro | 2 +- 11 files changed, 42 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 4bc2fdf..a9e4ead 100644 --- a/Makefile +++ b/Makefile @@ -28,21 +28,14 @@ test: _ensure-env: @if [ ! -f .env ]; then \ - echo "DATABASE_URL=$(DATABASE_URL)" > .env; \ - echo "wrote default .env (DATABASE_URL -> local docker postgres on :$(PG_PORT))"; \ + cp .env.example .env; \ + echo "copied .env.example -> .env"; \ fi _ensure-frontend-env: @if [ ! -f frontend/.env ]; then \ - secret=$$(openssl rand -hex 32); \ - { \ - echo "AUTH_SECRET=$$secret"; \ - echo "AUTH_TRUST_HOST=true"; \ - echo "AUTH0_CLIENT_ID="; \ - echo "AUTH0_CLIENT_SECRET="; \ - echo "AUTH0_ISSUER="; \ - } > frontend/.env; \ - echo "wrote default frontend/.env (AUTH_SECRET generated; fill in AUTH0_* to enable login)"; \ + cp frontend/.env.example frontend/.env; \ + echo "copied frontend/.env.example -> frontend/.env (fill in AUTH0_* to enable login)"; \ fi db-up: diff --git a/backend/services/model_service.py b/backend/services/model_service.py index 41cbc2a..f1eec4f 100644 --- a/backend/services/model_service.py +++ b/backend/services/model_service.py @@ -18,7 +18,7 @@ def _peer_metadata(node_info: dict) -> dict: return { "peer_id": node_info.get("id", ""), "hostname": node_info.get("hostname", ""), - "version": node_info.get("version", ""), + "otela_version": node_info.get("version", ""), "status": node_info.get("status", ""), "labels": labels, # Convenience pulls — frontends can just read these directly @@ -28,6 +28,7 @@ def _peer_metadata(node_info: dict) -> dict: "slurm_job_id": labels.get("slurm_job_id", ""), "framework": labels.get("framework", ""), "started_at": labels.get("started_at", ""), + "expires_at": labels.get("expires_at", ""), } diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte index 35db36a..742b560 100644 --- a/frontend/src/components/ui/ModelCard.svelte +++ b/frontend/src/components/ui/ModelCard.svelte @@ -5,12 +5,13 @@ interface Peer { peer_id?: string; hostname?: string; - version?: string; status?: string; device?: string; launched_by?: string; slurm_job_id?: string; started_at?: string; + expires_at?: string; + otela_version?: string; framework?: string; worker_group_id?: string; labels?: Record; @@ -55,6 +56,24 @@ $: firstHead = entry.data.replicas[0]?.head ?? {}; $: framework = firstHead.framework || ""; + // "2026-05-17T07:00:00Z" → "2026-05-17T07:00:00Z (11 hours ago)". + // Returns the iso untouched if it doesn't parse — keeps the row useful even + // if OpenTela emits something we don't understand. + function withRelative(iso: string | undefined): string { + if (!iso) return ""; + const t = new Date(iso).getTime(); + if (isNaN(t)) return iso; + const diffMs = t - Date.now(); + const abs = Math.abs(diffMs); + const rtf = new Intl.RelativeTimeFormat("en", { numeric: "auto" }); + let rel: string; + if (abs < 60_000) rel = rtf.format(Math.round(diffMs / 1000), "second"); + else if (abs < 3_600_000) rel = rtf.format(Math.round(diffMs / 60_000), "minute"); + else if (abs < 86_400_000) rel = rtf.format(Math.round(diffMs / 3_600_000), "hour"); + else rel = rtf.format(Math.round(diffMs / 86_400_000), "day"); + return `${iso} (${rel})`; + } + // Multi-node topology string: "2 nodes × 4xGH200" for an 8-GPU TP replica. function topologyString(r: Replica): string { const dev = r.devices[0] || "?"; @@ -187,7 +206,7 @@ {#each entry.data.replicas as replica, idx (replica.worker_group_id)} {@const head = replica.head} - {@const hasLabels = !!(head.launched_by || head.slurm_job_id || head.started_at || head.framework || head.version || head.status)} + {@const hasLabels = !!(head.launched_by || head.slurm_job_id || head.started_at || head.expires_at || head.framework || head.otela_version || head.status)} {@const peerLine = (p) => { const hn = p.hostname; const pid = p.peer_id; @@ -198,9 +217,10 @@ ["model", entry.data.title], ["launched_by", head.launched_by], ["slurm_job_id", head.slurm_job_id], - ["started_at", head.started_at], + ["started_at", withRelative(head.started_at)], + ["expires_at", withRelative(head.expires_at)], ["framework", head.framework], - ["version", head.version], + ["otela_version", head.otela_version], // worker_group_id is omitted when it's a synthesised legacy-N fallback — // it's just noise in that case. ["worker_group_id", replica.worker_group_id.startsWith("legacy-") ? "" : replica.worker_group_id], @@ -227,14 +247,14 @@ {#if !hasLabels}

- Launch metadata (launched_by, slurm_job_id, framework, started_at…) requires OpenTela v0.0.6+ on the serving node. + Launch metadata (launched_by, slurm_job_id, framework, started_at, expires_at…) requires OpenTela v0.0.6+ on the serving node.

{/if} {#if head.labels && Object.keys(head.labels).length > 0} {@const extra = Object.entries(head.labels).filter(([k]) => - !["launched_by","slurm_job_id","worker_group_id","framework","started_at","slurm_partition","served_model_name"].includes(k) + !["launched_by","slurm_job_id","worker_group_id","framework","started_at","expires_at","slurm_partition","served_model_name"].includes(k) )} {#if extra.length > 0}
Extra labels
diff --git a/frontend/src/components/ui/ModelList.svelte b/frontend/src/components/ui/ModelList.svelte index 83ac1a1..997b808 100644 --- a/frontend/src/components/ui/ModelList.svelte +++ b/frontend/src/components/ui/ModelList.svelte @@ -112,11 +112,9 @@

Available Models {#if !loading && !error} - {modelCount} + {modelCount} {#if replicaCount !== modelCount} - - , Replicas - {replicaCount} + {replicaCount} {/if} {/if} diff --git a/frontend/src/content/articles/03-opentela/index.md b/frontend/src/content/articles/03-opentela/index.md index 364b034..19bb02e 100644 --- a/frontend/src/content/articles/03-opentela/index.md +++ b/frontend/src/content/articles/03-opentela/index.md @@ -59,4 +59,4 @@ By using OpenTela, SwissAI enables: ## Conclusion -The Swiss AI Initiative's integration of OpenTela represents a significant shift toward a more sovereign and collaborative AI infrastructure. By leveraging OpenTela's decentralized architecture, SwissAI built a platform where traditional HPC clusters can be used as a shared pool where every researchers can benefit from and contribute to. It also effectively converts fragmented, idle GPU capacity into a unified, accessible resource for the research community. You can view the real-time status of the models served by SwissAI on the [Swiss AI Research Platform](https://serving.swissai.cscs.ch/). If you are interested in learning more about how SwissAI uses OpenTela, please feel free to reach out to us! \ No newline at end of file +The Swiss AI Initiative's integration of OpenTela represents a significant shift toward a more sovereign and collaborative AI infrastructure. By leveraging OpenTela's decentralized architecture, SwissAI built a platform where traditional HPC clusters can be used as a shared pool where every researchers can benefit from and contribute to. It also effectively converts fragmented, idle GPU capacity into a unified, accessible resource for the research community. You can view the real-time status of the models served by SwissAI on the [Swiss AI Research Platform](https://serving.swissai.svc.cscs.ch/). If you are interested in learning more about how SwissAI uses OpenTela, please feel free to reach out to us! \ No newline at end of file diff --git a/frontend/src/content/guides/01-getting-started/model-launch.md b/frontend/src/content/guides/01-getting-started/model-launch.md index 49e961d..e452731 100644 --- a/frontend/src/content/guides/01-getting-started/model-launch.md +++ b/frontend/src/content/guides/01-getting-started/model-launch.md @@ -6,7 +6,7 @@ date: "December 22 2025" ## Recommended: Use model-launch -_[**model-launch**](https://github.com/swiss-ai/model-launch) is the recommended tool for getting models on [serving.swissai.cscs.ch](https://serving.swissai.cscs.ch)!_ +_[**model-launch**](https://github.com/swiss-ai/model-launch) is the recommended tool for getting models on [serving.swissai.svc.cscs.ch](https://serving.swissai.svc.cscs.ch)!_ It provides a framework-agnostic approach to submitting SLURM jobs for distributed inference using SGLang or vLLM. The tool handles single-node and multi-node deployments, automatically integrates with OCF (Open Compute Framework) for service discovery, and makes your models accessible externally from outside the cluster. It includes ready-to-use examples for popular models like Swiss AI Apertus, DeepSeek-V3, Kimi-K2, and many others, with support for advanced features like multi-worker routing, pre-launch commands, and interactive debugging modes. diff --git a/frontend/src/layouts/PageLayout.astro b/frontend/src/layouts/PageLayout.astro index 2086472..5a137ad 100644 --- a/frontend/src/layouts/PageLayout.astro +++ b/frontend/src/layouts/PageLayout.astro @@ -10,7 +10,7 @@ type Props = { }; const { title, description } = Astro.props; -const apiUrl = process.env.VITE_API_URL || 'https://api.swissai.cscs.ch'; +const apiUrl = import.meta.env.VITE_API_URL || 'https://api.swissai.svc.cscs.ch'; --- diff --git a/frontend/src/lib/config.ts b/frontend/src/lib/config.ts index fd18d7f..3d9355a 100644 --- a/frontend/src/lib/config.ts +++ b/frontend/src/lib/config.ts @@ -1,8 +1,8 @@ -const DEFAULT_API_URL = 'https://api.swissai.cscs.ch'; +const DEFAULT_API_URL = 'https://api.swissai.svc.cscs.ch'; export function getApiUrl(): string { if (typeof window !== 'undefined') { return (window as any).__API_URL__ || DEFAULT_API_URL; } - return process.env.VITE_API_URL || DEFAULT_API_URL; + return import.meta.env.VITE_API_URL || DEFAULT_API_URL; } diff --git a/frontend/src/pages/api_key.astro b/frontend/src/pages/api_key.astro index 52b0498..f2fd1f5 100644 --- a/frontend/src/pages/api_key.astro +++ b/frontend/src/pages/api_key.astro @@ -28,7 +28,7 @@ if (isDev) { // API key will be fetched client-side to avoid exposing it in the HTML let apiKey = "Loading..."; -const apiUrl = process.env.VITE_API_URL || 'https://api.swissai.cscs.ch'; +const apiUrl = import.meta.env.VITE_API_URL || 'https://api.swissai.svc.cscs.ch'; --- diff --git a/frontend/src/pages/index.astro b/frontend/src/pages/index.astro index 4819b0a..38225d5 100644 --- a/frontend/src/pages/index.astro +++ b/frontend/src/pages/index.astro @@ -26,7 +26,7 @@ const articles = (await getCollection("articles")) const session = import.meta.env.DEV ? null : await getSession(Astro.request); const isSessionValid = session && session.user; -const chatAppUrl = process.env.CHAT_APP_URL || "https://chat.swissai.cscs.ch"; +const chatAppUrl = process.env.CHAT_APP_URL || "https://chat.swissai.svc.cscs.ch"; const contributors = [ { diff --git a/frontend/src/pages/leaderboard/index.astro b/frontend/src/pages/leaderboard/index.astro index 59dfdcc..d81b4c7 100644 --- a/frontend/src/pages/leaderboard/index.astro +++ b/frontend/src/pages/leaderboard/index.astro @@ -9,7 +9,7 @@ let tokenData = {}; let error = null; try { - const response = await fetch(`${process.env.VITE_API_URL || 'https://api.swissai.cscs.ch'}/v1/metrics`, { + const response = await fetch(`${import.meta.env.VITE_API_URL || 'https://api.swissai.svc.cscs.ch'}/v1/metrics`, { method: "POST", headers: { "Content-Type": "application/json", From d1c2598db25c54152f93367325fec9b924b13964 Mon Sep 17 00:00:00 2001 From: robmsmt Date: Mon, 18 May 2026 13:53:53 +0200 Subject: [PATCH 8/8] format --- backend/tests/fixtures/build_upgraded.py | 4 +++- backend/tests/test_model_service.py | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/tests/fixtures/build_upgraded.py b/backend/tests/fixtures/build_upgraded.py index 1ff752d..c5c3b4f 100644 --- a/backend/tests/fixtures/build_upgraded.py +++ b/backend/tests/fixtures/build_upgraded.py @@ -185,7 +185,9 @@ def main() -> None: upgraded[follower_key]["service"] = [] upgraded[follower_key]["status"] = "ready" multi_node_assigned = True - print(f"multi-node demo: {model} → head={head_key}, follower={follower_key}, wg={shared}") + print( + f"multi-node demo: {model} → head={head_key}, follower={follower_key}, wg={shared}" + ) break assert multi_node_assigned, "No model has >=2 peers; cannot demo multi-node" diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py index 411d95b..e8c7727 100644 --- a/backend/tests/test_model_service.py +++ b/backend/tests/test_model_service.py @@ -116,7 +116,11 @@ def test_metrics_only_follower_groups_with_head_via_worker_group_id(): assert by_id["QmHead"]["id"] == "swiss-ai/Apertus-8B" assert by_id["QmFollower"]["id"] == "" # Shared worker_group_id lets the frontend group them. - assert by_id["QmHead"]["worker_group_id"] == by_id["QmFollower"]["worker_group_id"] == "12345" + assert ( + by_id["QmHead"]["worker_group_id"] + == by_id["QmFollower"]["worker_group_id"] + == "12345" + ) def test_follower_without_worker_group_id_skipped(): @@ -197,7 +201,9 @@ def test_upgraded_payload_groups_multinode_replica(): frontend can aggregate them into one logical replica.""" with patch("backend.services.model_service.requests.get") as mock_get: mock_get.return_value = type( - "R", (), {"json": lambda self=None: _load_fixture("dnt_table_upgraded.json")} + "R", + (), + {"json": lambda self=None: _load_fixture("dnt_table_upgraded.json")}, )() out = get_all_models("http://x/v1/dnt/table", with_details=True) # Find the shared-wg cluster