diff --git a/Makefile b/Makefile index 7cf29da..a9e4ead 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,14 @@ -.PHONY: install install-dev format check test run +.PHONY: install install-dev format check test run dummy-run db-up db-down migrate _ensure-env _ensure-frontend-env UV_EXTRA ?= +PG_CONTAINER := serving-api-pg +PG_PORT := 5433 +PG_USER := serving +PG_PASS := serving +PG_DB := serving +DATABASE_URL := postgresql://$(PG_USER):$(PG_PASS)@localhost:$(PG_PORT)/$(PG_DB) + install: uv pip install $(UV_EXTRA) -r backend/requirements.txt @@ -19,7 +26,59 @@ check: test: pytest backend/tests/ -v -run: +_ensure-env: + @if [ ! -f .env ]; then \ + cp .env.example .env; \ + echo "copied .env.example -> .env"; \ + fi + +_ensure-frontend-env: + @if [ ! -f frontend/.env ]; then \ + cp frontend/.env.example frontend/.env; \ + echo "copied frontend/.env.example -> frontend/.env (fill in AUTH0_* to enable login)"; \ + fi + +db-up: + @if [ -z "$$(docker ps -q -f name=^/$(PG_CONTAINER)$$)" ]; then \ + if [ -n "$$(docker ps -aq -f name=^/$(PG_CONTAINER)$$)" ]; then \ + echo "starting existing $(PG_CONTAINER) container"; \ + docker start $(PG_CONTAINER) > /dev/null; \ + else \ + echo "creating $(PG_CONTAINER) container on :$(PG_PORT)"; \ + docker run -d --name $(PG_CONTAINER) \ + -e POSTGRES_USER=$(PG_USER) \ + -e POSTGRES_PASSWORD=$(PG_PASS) \ + -e POSTGRES_DB=$(PG_DB) \ + -p $(PG_PORT):5432 \ + postgres:16 > /dev/null; \ + fi; \ + fi + @printf "waiting for postgres"; \ + for i in $$(seq 1 30); do \ + if docker exec $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB) > /dev/null 2>&1; then \ + echo " ready"; exit 0; \ + fi; \ + printf "."; sleep 1; \ + done; \ + echo " timed out"; exit 1 + +db-down: + -docker stop $(PG_CONTAINER) > /dev/null 2>&1 + -docker rm $(PG_CONTAINER) > /dev/null 2>&1 + +migrate: _ensure-env db-up + alembic upgrade head + +run: _ensure-env _ensure-frontend-env db-up migrate + uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \ + cd frontend && npm run dev & \ + wait + +# Same as `run` but forces the model list to come from the synthesised +# upgraded fixture instead of the live OpenTela endpoint. Useful for +# iterating on the model-card UI without depending on prod state. +dummy-run: _ensure-env _ensure-frontend-env db-up migrate + OTELA_FIXTURE_PATH=$(PWD)/backend/tests/fixtures/dnt_table_upgraded.json \ uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \ cd frontend && npm run dev & \ wait diff --git a/README.md b/README.md index 78922c8..a2fcf73 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Frontend and backend API proxy for SwissAI LLM serving. For examples on how to l │ ▼ ┌─────────────────┐ - │ OCF │ OpenTela P2P routing → model=apertus-... + │ OpenTela │ P2P routing → model=apertus-... └────────┬────────┘ │ ▼ @@ -42,7 +42,7 @@ frontend/ # web UI (Astro + Svelte) meta/ # example Dockerfiles, example k8s manifests, build scripts ``` -OCF (Open Compute Framework) now renamed to OpenTela upstream is maintained at [eth-easl/OpenTela](https://github.com/eth-easl/OpenTela). We maintain a fork at [swiss-ai/OpenTela](https://github.com/swiss-ai/opentela) to control deployments to dev+prod. +OpenTela (formerly OCF / "Open Compute Framework") is maintained upstream at [eth-easl/OpenTela](https://github.com/eth-easl/OpenTela). We maintain a fork at [swiss-ai/OpenTela](https://github.com/swiss-ai/opentela) to control deployments to dev+prod. ## Dev Quick Start diff --git a/backend/config.py b/backend/config.py index a8ee21b..3c3952f 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,6 +1,8 @@ -from pydantic_settings import BaseSettings from functools import lru_cache +from pydantic import AliasChoices, Field +from pydantic_settings import BaseSettings + @lru_cache() def get_settings(): @@ -17,7 +19,20 @@ class Settings(BaseSettings): database_url: str = "" auth_secret: str = "" auth_trust_host: bool = False - ocf_head_addr: str = "" + # Accept the historical OCF_* env var names in addition to the canonical + # OTELA_* ones so existing deployments keep working through the rename. + # Python attribute access stays `settings.otela_*`. + otela_head_addr: str = Field( + default="", + validation_alias=AliasChoices("otela_head_addr", "ocf_head_addr"), + ) + # When set, /v1/models* reads this JSON file instead of calling + # $otela_head_addr/v1/dnt/table. Used for UI iteration against synthesised + # upgraded payloads (see backend/tests/fixtures/build_upgraded.py). + otela_fixture_path: str = Field( + default="", + validation_alias=AliasChoices("otela_fixture_path", "ocf_fixture_path"), + ) langfuse_host: str = "" langfuse_public_key: str = "" langfuse_secret_key: str = "" @@ -28,6 +43,7 @@ class Settings(BaseSettings): class Config: env_file = ".env" + populate_by_name = True def parse_hardware_info(hardware_info): diff --git a/backend/routers/completions.py b/backend/routers/completions.py index 8c38f71..f296793 100644 --- a/backend/routers/completions.py +++ b/backend/routers/completions.py @@ -75,7 +75,7 @@ async def chat_completion( ) response = await llm_proxy( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, request=llm_request, ) @@ -125,7 +125,7 @@ async def completion( ) response = await llm_proxy_completions( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, request=llm_request, ) diff --git a/backend/routers/embeddings.py b/backend/routers/embeddings.py index d8a3439..34a810e 100644 --- a/backend/routers/embeddings.py +++ b/backend/routers/embeddings.py @@ -26,7 +26,7 @@ async def embeddings( data["app_title"] = app_title response = await llm_proxy_embeddings( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, **data, ) diff --git a/backend/routers/models.py b/backend/routers/models.py index 32576a6..668dbf9 100644 --- a/backend/routers/models.py +++ b/backend/routers/models.py @@ -6,9 +6,17 @@ settings = get_settings() +def _dnt_endpoint() -> str: + """When OTELA_FIXTURE_PATH is set, read DNT from disk instead of HTTP — + used for iterating on the UI against synthesised post-upgrade payloads.""" + if settings.otela_fixture_path: + return settings.otela_fixture_path + return settings.otela_head_addr + "/v1/dnt/table" + + @router.get("/v1/models_detailed") async def list_models_detailed(): - models = get_all_models(settings.ocf_head_addr + "/v1/dnt/table", with_details=True) + models = get_all_models(_dnt_endpoint(), with_details=True) return dict( object="list", data=models, @@ -17,9 +25,7 @@ async def list_models_detailed(): @router.get("/v1/models") async def list_models(): - models = get_all_models( - settings.ocf_head_addr + "/v1/dnt/table", with_details=False - ) + models = get_all_models(_dnt_endpoint(), with_details=False) return dict( object="list", data=models, diff --git a/backend/routers/rerank.py b/backend/routers/rerank.py index 9b47aef..7f5b211 100644 --- a/backend/routers/rerank.py +++ b/backend/routers/rerank.py @@ -14,7 +14,7 @@ async def rerank( ): data = await request.json() response = await llm_proxy_rerank( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), @@ -29,7 +29,7 @@ async def score( ): data = await request.json() response = await llm_proxy_score( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), diff --git a/backend/routers/responses.py b/backend/routers/responses.py index 598270c..33ebb49 100644 --- a/backend/routers/responses.py +++ b/backend/routers/responses.py @@ -17,7 +17,7 @@ async def create_response( stream = data.get("stream", False) response = await llm_proxy_responses( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, stream=stream, diff --git a/backend/routers/tokenization.py b/backend/routers/tokenization.py index 99680d0..6381aa3 100644 --- a/backend/routers/tokenization.py +++ b/backend/routers/tokenization.py @@ -14,7 +14,7 @@ async def tokenize( ): data = await request.json() response = await llm_proxy_tokenize( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), @@ -29,7 +29,7 @@ async def detokenize( ): data = await request.json() response = await llm_proxy_detokenize( - endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/", + endpoint=settings.otela_head_addr + "/v1/service/llm/v1/", api_key=token, payload=data, model=data.get("model", "unknown"), diff --git a/backend/services/model_service.py b/backend/services/model_service.py index ebcba51..f1eec4f 100644 --- a/backend/services/model_service.py +++ b/backend/services/model_service.py @@ -1,18 +1,80 @@ +import json +import pathlib + import requests + from backend.config import parse_hardware_info +def _peer_metadata(node_info: dict) -> dict: + """Pull the surfaced launch-time fields off a DNT peer entry. + + Older OpenTela binaries ( dict: + """Fetch DNT data. If endpoint points at a local file (no scheme), read + it as JSON — that's the fixture-mode dev path. Otherwise HTTP-GET it.""" + if endpoint and not endpoint.startswith(("http://", "https://")): + return json.loads(pathlib.Path(endpoint).read_text()) + return requests.get(endpoint).json() + + def get_all_models(endpoint: str, with_details: bool = False): + """Return one entry per (peer, model) pair served on the network. + + The frontend aggregates these by model id and by worker_group_id to + produce the model card + replica count. We keep the granularity at the + peer level so multi-node replicas show their full topology (head + + metrics-only followers all share the same worker_group_id). + """ try: - data = requests.get(endpoint).json() + data = _load_dnt(endpoint) except Exception: return [] models = [] for node_info in data.values(): - if not node_info.get("service"): - continue + meta = _peer_metadata(node_info) device_info = parse_hardware_info(node_info.get("hardware")) - for service in node_info["service"]: + services = node_info.get("service") or [] + if not services: + # Metrics-only / pending peer: surface it under a sentinel id so + # the frontend can attribute it to the right replica via + # worker_group_id and show it as part of a launching/follower set. + if not meta["worker_group_id"]: + continue + entry = { + "id": "", # no model yet + "object": "model", + "created": "0x", + "owner": "0x", + **meta, + } + if with_details: + entry["device"] = device_info + models.append(entry) + continue + for service in services: if not service.get("identity_group"): continue model_names = [ @@ -20,25 +82,15 @@ def get_all_models(endpoint: str, with_details: bool = False): for identity in service["identity_group"] if identity.startswith("model=") ] - if with_details: - models.extend( - { - "id": model_name, - "device": device_info, - "object": "model", - "created": "0x", - "owner": "0x", - } - for model_name in model_names - ) - else: - models.extend( - { - "id": model_name, - "object": "model", - "created": "0x", - "owner": "0x", - } - for model_name in model_names - ) + for model_name in model_names: + entry = { + "id": model_name, + "object": "model", + "created": "0x", + "owner": "0x", + **meta, + } + if with_details: + entry["device"] = device_info + models.append(entry) return models diff --git a/backend/tests/fixtures/build_upgraded.py b/backend/tests/fixtures/build_upgraded.py new file mode 100644 index 0000000..c5c3b4f --- /dev/null +++ b/backend/tests/fixtures/build_upgraded.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +"""Build dnt_table_upgraded.json from dnt_table_prod.json. + +Adds the new Peer fields (hostname, version, status, labels) as if the +v0.0.6 OpenTela binary plus the model-launch --label changes had been +deployed: + +- Every peer gets a synthetic SLURM job id (= its own worker_group_id). +- Each peer's metadata reflects realistic launched_by / framework / + framework_args values, varied per model. +- started_at and expires_at are spread across "now-ish" so the UI shows + a realistic mix of recently-launched and longer-running replicas. +- One multi-peer model is manually re-keyed so two of its peers share a + worker_group_id, simulating a 2-node TP replica with a metrics-only + follower (no `service`). This lets us exercise the multi-node-replica + aggregation path on the frontend without needing live multi-node data. + +Re-run after refreshing dnt_table_prod.json: + python3 build_upgraded.py +""" + +import json +import pathlib +from datetime import datetime, timedelta, timezone + +HERE = pathlib.Path(__file__).parent +SRC = HERE / "dnt_table_prod.json" +DST = HERE / "dnt_table_upgraded.json" + +# Plausible owners cycling through real users so the UI shows multiple. +USERS = ["rosmith", "xyao", "aahadinia", "isternfel", "yiswang"] + +# Models whose served name suggests a particular launcher. +FRAMEWORK_HINTS = { + "sglang": ["Apertus", "GLM", "gemma", "olmo", "gpt-oss"], + "vllm": ["Qwen", "Llama", "Snowflake", "Kimi"], +} + +# Representative framework_args per model. Covers what an operator +# actually types — paths, tensor-parallel sizing, memory caps. Real OCF +# emits these verbatim via `--label framework_args="..."`. Fixture-only +# until opentela patch lands the framework_args label. +FRAMEWORK_ARGS = { + "Apertus-70B-Instruct-2509": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-70B-Instruct-2509 " + "--tensor-parallel-size 4 --max-model-len 65536 --port 8080" + ), + "Apertus-8B-Instruct-2509": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 " + "--port 8080 --enable-metrics" + ), + "gemma-4-31B-it": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it " + "--tensor-parallel-size 4 --port 8080" + ), + "Qwen3.5-397B-A17B": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B " + "--tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080" + ), + "gpt-oss-120b": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b " + "--tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss" + ), + "Qwen3-32B": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B " + "--tensor-parallel-size 4 --port 8080" + ), + "Llama-3.3-70B-Instruct": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/meta-llama/Llama-3.3-70B-Instruct " + "--tensor-parallel-size 4 --max-model-len 8192 --port 8080" + ), + "Qwen3-Next-80B-A3B-Instruct": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-Next-80B-A3B-Instruct " + "--tensor-parallel-size 4 --port 8080" + ), + "snowflake-arctic-embed-l-v2.0": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Snowflake/snowflake-arctic-embed-l-v2.0 " + "--task embed --port 8080" + ), + "GLM-4.7-Flash": ( + "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/zai-org/GLM-4.7-Flash --port 8080" + ), + "Qwen3.5-27B": ( + "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-27B " + "--tensor-parallel-size 2 --port 8080" + ), +} + + +def guess_framework(model: str) -> str: + for fw, hints in FRAMEWORK_HINTS.items(): + if any(h in model for h in hints): + return fw + return "sglang" + + +def guess_framework_args(model: str) -> str: + """Find the best-matching entry in FRAMEWORK_ARGS for this served name.""" + for key, args in FRAMEWORK_ARGS.items(): + if key in model: + return args + return "--port 8080" + + +# Baseline "now" so the fixture is deterministic across regenerations. +NOW = datetime(2026, 5, 17, 13, 0, tzinfo=timezone.utc) + + +def main() -> None: + src = json.loads(SRC.read_text()) + upgraded: dict = {} + next_job_id = 2256000 + multi_node_assigned = False + + # Synthesize a stable ordering so re-runs produce stable diffs. + for i, (pid_key, peer) in enumerate(sorted(src.items())): + peer = dict(peer) # shallow copy + + # Hostname based on peer ID, padded — looks like a real nidXXXXXX. + peer["hostname"] = f"nid{(0x6000 + i):06d}"[-9:] + peer["version"] = "v0.0.6" + + services = peer.get("service") or [] + model_name = "" + for svc in services: + for ig in svc.get("identity_group") or []: + if ig.startswith("model="): + model_name = ig[6:] + break + if model_name: + break + + peer["status"] = "ready" if model_name else "pending" + job_id = next_job_id + next_job_id += 1 + + # Spread launches over the past few hours: 30 min apart starting + # 6 h ago. Plausibly varied; older launches expire sooner. + started_offset = timedelta(minutes=30 * (i % 12) + 5 * (i // 12)) + started_at = NOW - timedelta(hours=6) + started_offset + # Pick a SLURM time-limit consistent with how the launcher is + # actually used today — short jobs (1 h) for quick tests, + # long ones (12 h) for stable serving. Mix them. + time_limit = timedelta(hours=12 if i % 3 == 0 else 1 if i % 7 == 0 else 6) + expires_at = started_at + time_limit + + peer["labels"] = { + "launched_by": USERS[i % len(USERS)], + "slurm_job_id": str(job_id), + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0" if i % 4 == 0 else "", + "worker_group_id": str(job_id), + "framework": guess_framework(model_name) if model_name else "", + "framework_args": guess_framework_args(model_name) if model_name else "", + "served_model_name": model_name, + "started_at": started_at.isoformat().replace("+00:00", "Z"), + "expires_at": expires_at.isoformat().replace("+00:00", "Z"), + } + # Drop empty entries so the JSON looks closer to what OpenTela emits. + peer["labels"] = {k: v for k, v in peer["labels"].items() if v} + + upgraded[pid_key] = peer + + # Demo a multi-node replica: pick a model with multiple peers and + # collapse two of them into a single worker_group_id, with the second + # becoming metrics-only (no service, no service entries). + by_model: dict[str, list[str]] = {} + for pid_key, peer in upgraded.items(): + for svc in peer.get("service") or []: + for ig in svc.get("identity_group") or []: + if ig.startswith("model="): + by_model.setdefault(ig[6:], []).append(pid_key) + for model, peers in by_model.items(): + if len(peers) >= 2: + head_key, follower_key = peers[0], peers[1] + head_labels = upgraded[head_key]["labels"] + shared = head_labels["worker_group_id"] + f_labels = upgraded[follower_key]["labels"] + f_labels["worker_group_id"] = shared + f_labels["slurm_job_id"] = shared + f_labels["launched_by"] = head_labels["launched_by"] + f_labels["started_at"] = head_labels["started_at"] + f_labels["expires_at"] = head_labels["expires_at"] + # Metrics-only: drop the service advertisement. + upgraded[follower_key]["service"] = [] + upgraded[follower_key]["status"] = "ready" + multi_node_assigned = True + print( + f"multi-node demo: {model} → head={head_key}, follower={follower_key}, wg={shared}" + ) + break + assert multi_node_assigned, "No model has >=2 peers; cannot demo multi-node" + + DST.write_text(json.dumps(upgraded, indent=2)) + print(f"wrote {DST} ({len(upgraded)} peers)") + + +if __name__ == "__main__": + main() diff --git a/backend/tests/fixtures/dnt_table_prod.json b/backend/tests/fixtures/dnt_table_prod.json new file mode 100644 index 0000000..d72b887 --- /dev/null +++ b/backend/tests/fixtures/dnt_table_prod.json @@ -0,0 +1,2844 @@ +{ + "/QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG": { + "id": "QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-70B-Instruct-2509" + ] + } + ], + "last_seen": 1779027016, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89753 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL": { + "id": "QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq": { + "id": "QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82025 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN": { + "id": "QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84657 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW": { + "id": "QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5": { + "id": "QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn": { + "id": "QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027021, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84576 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP": { + "id": "QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86394 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86392 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG": { + "id": "QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86594 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G": { + "id": "QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT": { + "id": "QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84563 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em": { + "id": "QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84586 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ": { + "id": "QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84579 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm": { + "id": "QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS": { + "id": "QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18559 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18569 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18570 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8": { + "id": "QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84623 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7": { + "id": "QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84595 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn": { + "id": "QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur": { + "id": "QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84564 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 14 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob": { + "id": "QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=meta-llama/Llama-3.3-70B-Instruct" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2": { + "id": "QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84612 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8": { + "id": "QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW": { + "id": "QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84622 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 16 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 13 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe": { + "id": "QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV": { + "id": "QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85692 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85690 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa": { + "id": "QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82817 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82134 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82142 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82014 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y": { + "id": "QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027002, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82144 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82152 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82024 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr": { + "id": "QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84644 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4": { + "id": "QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027005, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84547 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U": { + "id": "QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7": { + "id": "Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp": { + "id": "QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC": { + "id": "QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027006, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 17 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 10 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk": { + "id": "Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84598 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK": { + "id": "QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu": { + "id": "QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy": { + "id": "QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Snowflake/snowflake-arctic-embed-l-v2.0" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2649 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH": { + "id": "QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f": { + "id": "QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": { + "id": "QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": null, + "last_seen": 1779027021, + "version": "", + "public_address": "148.187.108.178", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL": { + "id": "QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84582 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd": { + "id": "QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A": { + "id": "Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=zai-org/GLM-4.7-Flash" + ] + } + ], + "last_seen": 1779027020, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 94835 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B": { + "id": "QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-27B" + ] + } + ], + "last_seen": 1779027010, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90935 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90409 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG": { + "id": "QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT": { + "id": "QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3": { + "id": "QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027021, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ": { + "id": "QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 19117 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp": { + "id": "QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-8B-Instruct-2509" + ] + } + ], + "last_seen": 1779027015, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 88607 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + }, + "/QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY": { + "id": "QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027021, + "version": "", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null + } +} diff --git a/backend/tests/fixtures/dnt_table_upgraded.json b/backend/tests/fixtures/dnt_table_upgraded.json new file mode 100644 index 0000000..ffe7edb --- /dev/null +++ b/backend/tests/fixtures/dnt_table_upgraded.json @@ -0,0 +1,3435 @@ +{ + "/QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG": { + "id": "QmPBTq7XuewwbkrdU3c9hdCuZ4XCJMm7wMKeU91xu2hcvG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-70B-Instruct-2509" + ] + } + ], + "last_seen": 1779027016, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89753 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89755 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024576", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256000", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256000", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-70B-Instruct-2509 --tensor-parallel-size 4 --max-model-len 65536 --port 8080", + "served_model_name": "swiss-ai/Apertus-70B-Instruct-2509", + "started_at": "2026-05-17T07:00:00Z", + "expires_at": "2026-05-17T19:00:00Z" + } + }, + "/QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL": { + "id": "QmPZuZMWhcX2f8EbqRi9HAfrx4ZCiXVfT3c1jifNabwkRL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18555 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024577", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256001", + "slurm_partition": "normal", + "worker_group_id": "2256001", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T07:30:00Z", + "expires_at": "2026-05-17T13:30:00Z" + } + }, + "/QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq": { + "id": "QmQ1JHLDJe7KhnD48S9TgaEPG3p2az1kq4dMoKUdBssRDq", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82153 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82025 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024578", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256002", + "slurm_partition": "normal", + "worker_group_id": "2256002", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-17T08:00:00Z", + "expires_at": "2026-05-17T14:00:00Z" + } + }, + "/QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN": { + "id": "QmQFqgsty9RjE8DCMMH7QY4pCPwEhCdief1gjku44ALbNN", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84657 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024579", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256003", + "slurm_partition": "normal", + "worker_group_id": "2256003", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T08:30:00Z", + "expires_at": "2026-05-17T20:30:00Z" + } + }, + "/QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW": { + "id": "QmQxjzdBtpjRWwXP991ow1LezbSBSAtJP4cnBW7635YEyW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024580", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256001", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256001", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T07:30:00Z", + "expires_at": "2026-05-17T13:30:00Z" + } + }, + "/QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5": { + "id": "QmRPfyCPgcL2n9Tob6vKeNoQMX39e3fBS4JWNw7Cpz9Wa5", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024581", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256005", + "slurm_partition": "normal", + "worker_group_id": "2256005", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T09:30:00Z", + "expires_at": "2026-05-17T15:30:00Z" + } + }, + "/QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn": { + "id": "QmRYSXoAfVk1ZsqmduBrsDd1Zcx541KMA9wtT3Mnsau9tn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84576 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024582", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256006", + "slurm_partition": "normal", + "worker_group_id": "2256006", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T10:00:00Z", + "expires_at": "2026-05-17T22:00:00Z" + } + }, + "/QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP": { + "id": "QmRgcwJAjsYRLTnBMJvk8ARYa9tNmBjHvPZ9dsLfrq1MTP", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86394 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86392 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024583", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256007", + "slurm_partition": "normal", + "worker_group_id": "2256007", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T10:30:00Z", + "expires_at": "2026-05-17T11:30:00Z" + } + }, + "/QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG": { + "id": "QmRzLCghn5BSCAmoqzTmBopB2Nh9jyApsibMBc7VU1pREG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86594 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86593 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024584", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256008", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256008", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T11:00:00Z", + "expires_at": "2026-05-17T17:00:00Z" + } + }, + "/QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G": { + "id": "QmSpTkK6cemTaVWugFjThfr4P17m5ZeQjso2adT8FWkD7G", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86385 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024585", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256009", + "slurm_partition": "normal", + "worker_group_id": "2256009", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T11:30:00Z", + "expires_at": "2026-05-17T23:30:00Z" + } + }, + "/QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT": { + "id": "QmSssGsuXMcN61G6ejHCwhJjbbLLukqNNs4KGiAcYZ4vPT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84563 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024586", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256010", + "slurm_partition": "normal", + "worker_group_id": "2256010", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T12:00:00Z", + "expires_at": "2026-05-17T18:00:00Z" + } + }, + "/QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em": { + "id": "QmSy3ee3TMXRE2PPc1VTRQyWWDj94ieahAWsJn3PF2c6em", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84586 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 11 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024587", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256011", + "slurm_partition": "normal", + "worker_group_id": "2256011", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T12:30:00Z", + "expires_at": "2026-05-17T18:30:00Z" + } + }, + "/QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ": { + "id": "QmTUY7a8RYRFnhxMgQ2pNm4V9tKQDzQBpMc6dWQ9isysDZ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84579 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024588", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256012", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256012", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T07:05:00Z", + "expires_at": "2026-05-17T19:05:00Z" + } + }, + "/QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm": { + "id": "QmTVFAoyUwxkBgUJtydmUNkA1sYF2j29St5Lz5JtW4zKKm", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024589", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256013", + "slurm_partition": "normal", + "worker_group_id": "2256013", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T07:35:00Z", + "expires_at": "2026-05-17T13:35:00Z" + } + }, + "/QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS": { + "id": "QmTidAAZgMyXAiBwsbqxi4MZQjgwiuzjw4JwNu4VJtXXmS", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18559 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18573 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18569 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18570 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024590", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256014", + "slurm_partition": "normal", + "worker_group_id": "2256014", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T08:05:00Z", + "expires_at": "2026-05-17T09:05:00Z" + } + }, + "/QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8": { + "id": "QmUBhMF1Gg19ZbUrmBCCWdprt3XMLiXftZebuUnnaGeUJ8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84623 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024591", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256015", + "slurm_partition": "normal", + "worker_group_id": "2256015", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T08:35:00Z", + "expires_at": "2026-05-17T20:35:00Z" + } + }, + "/QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7": { + "id": "QmUCz7yLJt84xHyZySK6LMQDDXNQW6qfVz7TWc3UDLt6j7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84595 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024592", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256016", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256016", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T09:05:00Z", + "expires_at": "2026-05-17T15:05:00Z" + } + }, + "/QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn": { + "id": "QmV44xF8bg51ZVrWSAstzms12i8Dt3f1oHV7qXyQiQnhTn", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024593", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256017", + "slurm_partition": "normal", + "worker_group_id": "2256017", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T09:35:00Z", + "expires_at": "2026-05-17T15:35:00Z" + } + }, + "/QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur": { + "id": "QmVKgxLCrKx35THcpaGbAMyweAcN8WuSRqbbrcUtRQgJur", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84564 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 14 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024594", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256018", + "slurm_partition": "normal", + "worker_group_id": "2256018", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T10:05:00Z", + "expires_at": "2026-05-17T22:05:00Z" + } + }, + "/QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob": { + "id": "QmVyfUwaMw1HD9YvJ6spi2mrDyrsC5fYnTgTqKcGpraVob", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=meta-llama/Llama-3.3-70B-Instruct" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89747 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 89539 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024595", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256019", + "slurm_partition": "normal", + "worker_group_id": "2256019", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/meta-llama/Llama-3.3-70B-Instruct --tensor-parallel-size 4 --max-model-len 8192 --port 8080", + "served_model_name": "meta-llama/Llama-3.3-70B-Instruct", + "started_at": "2026-05-17T10:35:00Z", + "expires_at": "2026-05-17T16:35:00Z" + } + }, + "/QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2": { + "id": "QmWH14wTNJsHJEnpRXGAozEj89Ja1Hy2nsY9RhEATgVXD2", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84612 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 8 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024596", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256020", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256020", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T11:05:00Z", + "expires_at": "2026-05-17T17:05:00Z" + } + }, + "/QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8": { + "id": "QmWUY2amw5ZG2triPc5mVjySsowxxz9zp8GZpb5mJ267S8", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024597", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256021", + "slurm_partition": "normal", + "worker_group_id": "2256021", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T11:35:00Z", + "expires_at": "2026-05-17T23:35:00Z" + } + }, + "/QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW": { + "id": "QmXY2pbA48gJ7HcSnBQMV6VSaBMiK3jWXRyNVUbxRtZJRW", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027011, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84622 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 16 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 13 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024598", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256022", + "slurm_partition": "normal", + "worker_group_id": "2256022", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T12:05:00Z", + "expires_at": "2026-05-17T18:05:00Z" + } + }, + "/QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe": { + "id": "QmYFsphqm4gNgpgAdjwa6ZaCFPwJxaVHFwJ9P7cLSkTqZe", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18866 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024599", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256023", + "slurm_partition": "normal", + "worker_group_id": "2256023", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T12:35:00Z", + "expires_at": "2026-05-17T18:35:00Z" + } + }, + "/QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV": { + "id": "QmYaMDbbw8WRY7RDcFuFHWDtmA9smyPeBcVNfCW4XC5mVV", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85692 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85900 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 85690 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024600", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256024", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256024", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T07:10:00Z", + "expires_at": "2026-05-17T19:10:00Z" + } + }, + "/QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa": { + "id": "QmZ2DSBRFUdoD2vxt4WBNjSnAZZNB211BQGA3N1P7WnkYa", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82817 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82134 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82142 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82014 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024601", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256025", + "slurm_partition": "normal", + "worker_group_id": "2256025", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-17T07:40:00Z", + "expires_at": "2026-05-17T13:40:00Z" + } + }, + "/QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y": { + "id": "QmZAJMsbmnfz2KoYbzDC7NC4SLJfzUu5y3Wiz4stXhxE6y", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027002, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82827 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82144 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82152 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82024 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024602", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256026", + "slurm_partition": "normal", + "worker_group_id": "2256026", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-17T08:10:00Z", + "expires_at": "2026-05-17T14:10:00Z" + } + }, + "/QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr": { + "id": "QmZKtLQ4Hmtu3LEphS2hn7jciRyHa4TYAPjpkfsYmZkgZr", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84644 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 5 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024603", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256027", + "slurm_partition": "normal", + "worker_group_id": "2256027", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T08:40:00Z", + "expires_at": "2026-05-17T20:40:00Z" + } + }, + "/QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4": { + "id": "QmZZTz4iU4Cs46bPDFjVdG4Ws4ErvL5C3jNmpV7dwj1Xb4", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027005, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84547 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024604", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256028", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256028", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T09:10:00Z", + "expires_at": "2026-05-17T10:10:00Z" + } + }, + "/QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U": { + "id": "QmZkb4QBua1DbmPiKz7eMpQVdceqpNxSfsjzPuJBVyDT9U", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024605", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256029", + "slurm_partition": "normal", + "worker_group_id": "2256029", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-17T09:40:00Z", + "expires_at": "2026-05-17T15:40:00Z" + } + }, + "/Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7": { + "id": "Qma4wwUVNRfNsz4JB26z26LJyNzwhZ6YWSVeXhKAkckJY7", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024606", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256030", + "slurm_partition": "normal", + "worker_group_id": "2256030", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-Next-80B-A3B-Instruct --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-Next-80B-A3B-Instruct-yiswang", + "started_at": "2026-05-17T10:10:00Z", + "expires_at": "2026-05-17T22:10:00Z" + } + }, + "/QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp": { + "id": "QmaEQoJVdvv2nRV3HUsH6dzjxoCy6mYBkCxGh37AaLNeMp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-VoMF" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82829 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82146 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82154 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 82026 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024607", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256031", + "slurm_partition": "normal", + "worker_group_id": "2256031", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-VoMF", + "started_at": "2026-05-17T10:40:00Z", + "expires_at": "2026-05-17T16:40:00Z" + } + }, + "/QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC": { + "id": "QmaLdTc28YRJn5uMrUt5kKnuWQzT2giA6dE9ZRkeG7pHVC", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027006, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 17 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 10 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024608", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256032", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256032", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-17T11:10:00Z", + "expires_at": "2026-05-17T17:10:00Z" + } + }, + "/Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk": { + "id": "Qmaf4Ahny2u9yYyHLZtv5THxZaV2fWFKFicq7bJryvDYtk", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84598 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 6 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024609", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256033", + "slurm_partition": "normal", + "worker_group_id": "2256033", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T11:40:00Z", + "expires_at": "2026-05-17T23:40:00Z" + } + }, + "/QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK": { + "id": "QmatBYkA34rU7Xp7MZGfQWCnF8jKqvivfuseSw89teB2GK", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024610", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256034", + "slurm_partition": "normal", + "worker_group_id": "2256034", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-17T12:10:00Z", + "expires_at": "2026-05-17T18:10:00Z" + } + }, + "/QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu": { + "id": "QmatXigW2oBdFezZQ3jdVzF3kyq6DD38vuPZvLcVo8Jbmu", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024611", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256035", + "slurm_partition": "normal", + "worker_group_id": "2256035", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-17T12:40:00Z", + "expires_at": "2026-05-17T13:40:00Z" + } + }, + "/QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy": { + "id": "QmbChDqsb1od2fyQ5V98kWjckswmXfVJGtutPdqpkXv3jy", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Snowflake/snowflake-arctic-embed-l-v2.0" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2649 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024612", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256036", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256036", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Snowflake/snowflake-arctic-embed-l-v2.0 --task embed --port 8080", + "served_model_name": "Snowflake/snowflake-arctic-embed-l-v2.0", + "started_at": "2026-05-17T07:15:00Z", + "expires_at": "2026-05-17T19:15:00Z" + } + }, + "/QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH": { + "id": "QmbRVvFzCmZhEXiTENtvDvNab8wREqEo4byVKDw83TTHeH", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 4 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024613", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256037", + "slurm_partition": "normal", + "worker_group_id": "2256037", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-17T07:45:00Z", + "expires_at": "2026-05-17T13:45:00Z" + } + }, + "/QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f": { + "id": "QmbRoB5rRsDTnSaVQsG72ZJtvdVvERx9Nv3B5p2AGGDw2f", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86383 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86590 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024614", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256038", + "slurm_partition": "normal", + "worker_group_id": "2256038", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T08:15:00Z", + "expires_at": "2026-05-17T14:15:00Z" + } + }, + "/QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9": { + "id": "QmbUKJkCfotDzbFE5uoTsXD4GRyPHjzZC1f2yAGLoeBMn9", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "pending", + "available_offering": null, + "service": null, + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "148.187.108.178", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024615", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256039", + "slurm_partition": "normal", + "worker_group_id": "2256039", + "started_at": "2026-05-17T08:45:00Z", + "expires_at": "2026-05-17T20:45:00Z" + } + }, + "/QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL": { + "id": "QmbiBQSUfDeXTdkPHfa6cyySRNJWSXspRbNwjCMJC6juVL", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84582 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024616", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256040", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256040", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T09:15:00Z", + "expires_at": "2026-05-17T15:15:00Z" + } + }, + "/QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd": { + "id": "QmbjQk58JdgKcygFU85ztiqCF4MRqe6K6RAT9D7SEeUzyd", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "pending", + "available_offering": null, + "service": [], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 12 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 9 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 7 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024617", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256041", + "slurm_partition": "normal", + "worker_group_id": "2256041", + "started_at": "2026-05-17T09:45:00Z", + "expires_at": "2026-05-17T15:45:00Z" + } + }, + "/Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A": { + "id": "Qmc7Lhr1FRE4vh7Mk7VJAgPoEGJU8fYM82tdZsuKn8Bu4A", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=zai-org/GLM-4.7-Flash" + ] + } + ], + "last_seen": 1779027020, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 94835 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024618", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256042", + "slurm_partition": "normal", + "worker_group_id": "2256042", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/zai-org/GLM-4.7-Flash --port 8080", + "served_model_name": "zai-org/GLM-4.7-Flash", + "started_at": "2026-05-17T10:15:00Z", + "expires_at": "2026-05-17T22:15:00Z" + } + }, + "/QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B": { + "id": "QmcNUTEmgWq9u51XaQ3NVdQmUGb5AXcTq31b81HRajuF8B", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-27B" + ] + } + ], + "last_seen": 1779027010, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90935 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 90409 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024619", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256043", + "slurm_partition": "normal", + "worker_group_id": "2256043", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-27B --tensor-parallel-size 2 --port 8080", + "served_model_name": "Qwen/Qwen3.5-27B", + "started_at": "2026-05-17T10:45:00Z", + "expires_at": "2026-05-17T16:45:00Z" + } + }, + "/QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG": { + "id": "QmcRV1QhEcmGEbxer4DwpswD27wf3g86cjzYdRiWprj7KG", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027010, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024620", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256044", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256044", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T11:15:00Z", + "expires_at": "2026-05-17T17:15:00Z" + } + }, + "/QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT": { + "id": "QmduMdBLdY6vDX3P1Wbrbv5QvPEGXQKnJrWb5WQMRADGRT", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=openai/gpt-oss-120b-Vsdo" + ] + } + ], + "last_seen": 1779027003, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 84605 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024621", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "2256045", + "slurm_partition": "normal", + "worker_group_id": "2256045", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/openai/gpt-oss-120b --tensor-parallel-size 4 --port 8080 --reasoning-parser openai-oss", + "served_model_name": "openai/gpt-oss-120b-Vsdo", + "started_at": "2026-05-17T11:45:00Z", + "expires_at": "2026-05-17T23:45:00Z" + } + }, + "/QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3": { + "id": "QmdvyPbnCXYz9SrHtN1RTZC8zfu17BgiuaNhy5Dxkjkdx3", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3.5-397B-A17B-IYuQ" + ] + } + ], + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 2 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 3 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024622", + "labels": { + "launched_by": "xyao", + "slurm_job_id": "2256046", + "slurm_partition": "normal", + "worker_group_id": "2256046", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3.5-397B-A17B --tensor-parallel-size 4 --max-model-len 32768 --gpu-memory-utilization 0.85 --port 8080", + "served_model_name": "Qwen/Qwen3.5-397B-A17B-IYuQ", + "started_at": "2026-05-17T12:15:00Z", + "expires_at": "2026-05-17T18:15:00Z" + } + }, + "/QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ": { + "id": "QmeHwokMDRGBQkJaAcJ2kqUPgVVfb3pAspwznmGtfG3SrQ", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=google/gemma-4-31B-it-TsOA" + ] + } + ], + "last_seen": 1779027004, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18864 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 19117 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 18865 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024623", + "labels": { + "launched_by": "aahadinia", + "slurm_job_id": "2256047", + "slurm_partition": "normal", + "worker_group_id": "2256047", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/google/gemma-4-31B-it --tensor-parallel-size 4 --port 8080", + "served_model_name": "google/gemma-4-31B-it-TsOA", + "started_at": "2026-05-17T12:45:00Z", + "expires_at": "2026-05-17T18:45:00Z" + } + }, + "/QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp": { + "id": "QmfCUkV2TZaSzeVCaXQ1VaxsPJaweujarmpvgdBALd4qzp", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=swiss-ai/Apertus-8B-Instruct-2509" + ] + } + ], + "last_seen": 1779027015, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 88607 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024624", + "labels": { + "launched_by": "isternfel", + "slurm_job_id": "2256048", + "slurm_partition": "normal", + "slurm_reservation": "SD-69241-apertus-1-5-0", + "worker_group_id": "2256048", + "framework": "sglang", + "framework_args": "--model-path /capstor/store/cscs/swissai/infra01/hf_models/models/swiss-ai/Apertus-8B-Instruct-2509 --port 8080 --enable-metrics", + "served_model_name": "swiss-ai/Apertus-8B-Instruct-2509", + "started_at": "2026-05-17T07:20:00Z", + "expires_at": "2026-05-17T19:20:00Z" + } + }, + "/QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY": { + "id": "QmfEhbkxvqWJ5uPCyEtLLizjnnEJ5SHMrGn36wDwhNezJY", + "latency": 0, + "privileged": false, + "owner": "", + "current_offering": null, + "role": null, + "status": "ready", + "available_offering": null, + "service": [ + { + "name": "llm", + "hardware": { + "gpus": null, + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": [ + "model=Qwen/Qwen3-32B-kgCt" + ] + } + ], + "last_seen": 1779027021, + "version": "v0.0.6", + "public_address": "", + "hardware": { + "gpus": [ + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86592 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86591 + }, + { + "name": "NVIDIA GH200 120GB", + "total_memory": 97871, + "used_memory": 86384 + } + ], + "host_memory": 0, + "host_memory_bandwidth": 0, + "host_memory_used": 0 + }, + "connected": true, + "load": null, + "hostname": "nid024625", + "labels": { + "launched_by": "yiswang", + "slurm_job_id": "2256049", + "slurm_partition": "normal", + "worker_group_id": "2256049", + "framework": "vllm", + "framework_args": "--model /capstor/store/cscs/swissai/infra01/hf_models/models/Qwen/Qwen3-32B --tensor-parallel-size 4 --port 8080", + "served_model_name": "Qwen/Qwen3-32B-kgCt", + "started_at": "2026-05-17T07:50:00Z", + "expires_at": "2026-05-17T08:50:00Z" + } + } +} \ No newline at end of file diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py index bee50b2..be326a6 100644 --- a/backend/tests/test_app.py +++ b/backend/tests/test_app.py @@ -63,7 +63,7 @@ def test_app_routes_registered(client): def test_models_endpoint_no_auth(client): - """/v1/models should return 200 even when OCF is unreachable.""" + """/v1/models should return 200 even when OpenTela is unreachable.""" response = client.get("/v1/models") assert response.status_code == 200 assert response.json()["object"] == "list" diff --git a/backend/tests/test_model_service.py b/backend/tests/test_model_service.py new file mode 100644 index 0000000..e8c7727 --- /dev/null +++ b/backend/tests/test_model_service.py @@ -0,0 +1,218 @@ +"""Unit tests for get_all_models: aggregation of DNT peers into per-model +entries the frontend can consume.""" + +from unittest.mock import patch + +from backend.services.model_service import get_all_models + + +def _dnt_response(peers: dict): + """Build a fake requests.Response.json() for a DNT /v1/dnt/table call.""" + + class FakeResp: + def __init__(self, data): + self._data = data + + def json(self): + return self._data + + return FakeResp(peers) + + +PEER_NEW_BINARY_HEAD = { + "id": "QmHead", + "hostname": "nid006220", + "version": "v0.0.6", + "status": "ready", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "12345", + "worker_group_id": "12345", + "framework": "sglang", + "started_at": "2026-05-15T18:00:00Z", + }, + "hardware": {"gpus": [{"name": "GH200"}] * 4}, + "service": [ + { + "name": "llm", + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": ["model=swiss-ai/Apertus-8B"], + } + ], +} + +PEER_NEW_BINARY_FOLLOWER = { + "id": "QmFollower", + "hostname": "nid006221", + "version": "v0.0.6", + "status": "pending", + "labels": { + "launched_by": "rosmith", + "slurm_job_id": "12345", + "worker_group_id": "12345", + "framework": "sglang", + "started_at": "2026-05-15T18:00:00Z", + }, + "hardware": {"gpus": [{"name": "GH200"}] * 4}, + "service": [], +} + +PEER_OLD_BINARY = { + "id": "QmOld", + "hardware": {"gpus": [{"name": "GH200"}] * 4}, + "service": [ + { + "name": "llm", + "status": "connected", + "host": "localhost", + "port": "8080", + "identity_group": ["model=legacy/Llama-70B"], + } + ], +} + + +def test_old_binary_peer_still_surfaces(): + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response({"/QmOld": PEER_OLD_BINARY}) + out = get_all_models("http://x/v1/dnt/table", with_details=True) + assert len(out) == 1 + assert out[0]["id"] == "legacy/Llama-70B" + # Old binary contributes no metadata — frontend treats blanks as unknown. + assert out[0]["hostname"] == "" + assert out[0]["worker_group_id"] == "" + + +def test_new_binary_head_carries_labels(): + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response({"/QmHead": PEER_NEW_BINARY_HEAD}) + out = get_all_models("http://x/v1/dnt/table", with_details=True) + assert len(out) == 1 + entry = out[0] + assert entry["id"] == "swiss-ai/Apertus-8B" + assert entry["hostname"] == "nid006220" + assert entry["launched_by"] == "rosmith" + assert entry["worker_group_id"] == "12345" + assert entry["framework"] == "sglang" + assert entry["status"] == "ready" + + +def test_metrics_only_follower_groups_with_head_via_worker_group_id(): + """A multi-node replica's follower has no `service` but does carry + worker_group_id. It should appear in the output with id='' so the + frontend can attribute it to the same replica as the head.""" + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response( + { + "/QmHead": PEER_NEW_BINARY_HEAD, + "/QmFollower": PEER_NEW_BINARY_FOLLOWER, + } + ) + out = get_all_models("http://x/v1/dnt/table", with_details=True) + assert len(out) == 2 + by_id = {e["peer_id"]: e for e in out} + assert by_id["QmHead"]["id"] == "swiss-ai/Apertus-8B" + assert by_id["QmFollower"]["id"] == "" + # Shared worker_group_id lets the frontend group them. + assert ( + by_id["QmHead"]["worker_group_id"] + == by_id["QmFollower"]["worker_group_id"] + == "12345" + ) + + +def test_follower_without_worker_group_id_skipped(): + """Older binary follower with no labels and no service is uninformative — + drop it so the model list stays clean.""" + bare = {"id": "QmBare", "service": [], "hardware": {"gpus": []}} + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = _dnt_response({"/QmBare": bare}) + out = get_all_models("http://x/v1/dnt/table") + assert out == [] + + +def test_legacy_ocf_env_vars_still_work(monkeypatch): + """OCF_HEAD_ADDR and OCF_FIXTURE_PATH must keep working through the + rename to OTELA_*. Deployments can migrate on their own schedule.""" + from backend.config import Settings + + monkeypatch.setenv("OCF_HEAD_ADDR", "http://legacy:8092") + monkeypatch.setenv("OCF_FIXTURE_PATH", "/legacy/fixture.json") + monkeypatch.delenv("OTELA_HEAD_ADDR", raising=False) + monkeypatch.delenv("OTELA_FIXTURE_PATH", raising=False) + s = Settings() + assert s.otela_head_addr == "http://legacy:8092" + assert s.otela_fixture_path == "/legacy/fixture.json" + + +def test_canonical_otela_env_vars_win_over_legacy(monkeypatch): + """When both are set, the canonical OTELA_* names win so a partial + migration (one renamed, one not) doesn't silently keep the legacy + value in force.""" + from backend.config import Settings + + monkeypatch.setenv("OCF_HEAD_ADDR", "http://legacy:8092") + monkeypatch.setenv("OTELA_HEAD_ADDR", "http://canonical:8092") + s = Settings() + assert s.otela_head_addr == "http://canonical:8092" + + +def test_request_failure_returns_empty(): + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.side_effect = Exception("boom") + out = get_all_models("http://x/v1/dnt/table") + assert out == [] + + +# ── fixtures from live prod ───────────────────────────────────────────────── + + +def _load_fixture(name: str) -> dict: + import json + import pathlib + + p = pathlib.Path(__file__).parent / "fixtures" / name + return json.loads(p.read_text()) + + +def test_real_prod_payload_returns_models(): + """Pre-upgrade prod payload: every peer either has service entries (which + surface as a model entry) or has no labels (so we drop the metrics-only + fallback). End count should match what the dashboard shows today.""" + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = type( + "R", (), {"json": lambda self=None: _load_fixture("dnt_table_prod.json")} + )() + out = get_all_models("http://x/v1/dnt/table", with_details=True) + # All non-empty ids should be model names from the live network. + model_ids = {e["id"] for e in out if e["id"]} + assert "swiss-ai/Apertus-70B-Instruct-2509" in model_ids + assert "openai/gpt-oss-120b-Vsdo" in model_ids + # No labels on the old binary → worker_group_id should be empty everywhere. + assert all(e["worker_group_id"] == "" for e in out) + + +def test_upgraded_payload_groups_multinode_replica(): + """Simulated v0.0.6 deployment: the gemma 'multi-node demo' pair share a + worker_group_id. One has a service, the other is metrics-only with id=''. + Backend returns both entries with the shared worker_group_id so the + frontend can aggregate them into one logical replica.""" + with patch("backend.services.model_service.requests.get") as mock_get: + mock_get.return_value = type( + "R", + (), + {"json": lambda self=None: _load_fixture("dnt_table_upgraded.json")}, + )() + out = get_all_models("http://x/v1/dnt/table", with_details=True) + # Find the shared-wg cluster + by_wg: dict[str, list] = {} + for e in out: + by_wg.setdefault(e["worker_group_id"], []).append(e) + multi = [v for v in by_wg.values() if len(v) > 1] + assert multi, "fixture should contain at least one multi-peer worker group" + # At least one peer in the multi-peer group should be metrics-only (id=''). + pair = multi[0] + assert any(e["id"] == "" for e in pair), pair + assert any(e["id"] != "" for e in pair), pair diff --git a/frontend/src/components/ui/ModelCard.svelte b/frontend/src/components/ui/ModelCard.svelte index 1055dcb..742b560 100644 --- a/frontend/src/components/ui/ModelCard.svelte +++ b/frontend/src/components/ui/ModelCard.svelte @@ -2,15 +2,40 @@ import { getModelLogo } from '../../lib/modelLogos'; import { getModelMetricsUrl, getModelTier } from '../../lib/modelMetrics'; + interface Peer { + peer_id?: string; + hostname?: string; + status?: string; + device?: string; + launched_by?: string; + slurm_job_id?: string; + started_at?: string; + expires_at?: string; + otela_version?: string; + framework?: string; + worker_group_id?: string; + labels?: Record; + } + + interface Replica { + worker_group_id: string; + head: Peer; + followers: Peer[]; + nodesPerReplica: number; + devices: string[]; + } + interface ModelCardProps { entry: { - collection: string; - slug: string; + collection?: string; + slug?: string; data: { title: string; description: string; devices: string[]; - instanceCount: number; + replicas: Replica[]; + replicaCount: number; + nodeCount: number; }; }; } @@ -22,112 +47,226 @@ const tier = getModelTier(entry.data.title); const chatUrl = `${chatAppUrl.replace(/\/$/, "")}/?models=${encodeURIComponent(entry.data.title)}`; + let expanded = false; let copied = false; + // Aggregated metadata for the headline summary — pull from the first + // replica's head peer. All replicas of the same model usually share the + // same launcher/framework, but we render them per-replica below anyway. + $: firstHead = entry.data.replicas[0]?.head ?? {}; + $: framework = firstHead.framework || ""; + + // "2026-05-17T07:00:00Z" → "2026-05-17T07:00:00Z (11 hours ago)". + // Returns the iso untouched if it doesn't parse — keeps the row useful even + // if OpenTela emits something we don't understand. + function withRelative(iso: string | undefined): string { + if (!iso) return ""; + const t = new Date(iso).getTime(); + if (isNaN(t)) return iso; + const diffMs = t - Date.now(); + const abs = Math.abs(diffMs); + const rtf = new Intl.RelativeTimeFormat("en", { numeric: "auto" }); + let rel: string; + if (abs < 60_000) rel = rtf.format(Math.round(diffMs / 1000), "second"); + else if (abs < 3_600_000) rel = rtf.format(Math.round(diffMs / 60_000), "minute"); + else if (abs < 86_400_000) rel = rtf.format(Math.round(diffMs / 3_600_000), "hour"); + else rel = rtf.format(Math.round(diffMs / 86_400_000), "day"); + return `${iso} (${rel})`; + } + + // Multi-node topology string: "2 nodes × 4xGH200" for an 8-GPU TP replica. + function topologyString(r: Replica): string { + const dev = r.devices[0] || "?"; + if (r.nodesPerReplica === 1) return dev; + return `${r.nodesPerReplica} nodes × ${dev}`; + } + async function copyModelName(e: Event) { e.preventDefault(); e.stopPropagation(); try { await navigator.clipboard.writeText(entry.data.title); copied = true; - setTimeout(() => { - copied = false; - }, 1200); + setTimeout(() => { copied = false; }, 1200); } catch (err) { console.error('Failed to copy:', err); } } + + function toggleExpand() { + expanded = !expanded; + } + + function onKeyDown(e: KeyboardEvent) { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + toggleExpand(); + } + } - -
- Model logo -
-
- - {entry.data.title} - - - {#if metricsUrl} - - Metrics - - {/if} - {#if tier === "L2"} +
+ Model logo +
+
{ if (e.key === "Enter") copyModelName(e); }} + role="button" + tabindex="0" + class="inline-block cursor-pointer break-all font-mono {copied ? 'animate-name-flash' : ''}" + title="Click to copy model name" > - 24/7 + {entry.data.title} - {:else if tier === "slurm"} - - Slurm - - {/if} - {#if entry.data.instanceCount > 1} - - x{entry.data.instanceCount} - - {/if} + {#if copied} + + + + {:else} + + + + + {/if} + + {#if metricsUrl} + + Metrics + + {/if} + {#if tier === "L2"} + 24/7 + {:else if tier === "slurm"} + Slurm + {/if} + {#if entry.data.replicaCount > 1} + + x{entry.data.replicaCount} + + {/if} +
+
on {entry.data.devices.join(', ') || 'unknown'}
-
on {entry.data.devices.join(', ')}
+ + +
+ + {#if expanded} +
+ + + + + + + + Open in OpenWebUI + + + + {#each entry.data.replicas as replica, idx (replica.worker_group_id)} + {@const head = replica.head} + {@const hasLabels = !!(head.launched_by || head.slurm_job_id || head.started_at || head.expires_at || head.framework || head.otela_version || head.status)} + {@const peerLine = (p) => { + const hn = p.hostname; + const pid = p.peer_id; + if (hn && pid) return `${hn} (${pid})`; + return hn || pid || "unknown"; + }} + {@const rows = [ + ["model", entry.data.title], + ["launched_by", head.launched_by], + ["slurm_job_id", head.slurm_job_id], + ["started_at", withRelative(head.started_at)], + ["expires_at", withRelative(head.expires_at)], + ["framework", head.framework], + ["otela_version", head.otela_version], + // worker_group_id is omitted when it's a synthesised legacy-N fallback — + // it's just noise in that case. + ["worker_group_id", replica.worker_group_id.startsWith("legacy-") ? "" : replica.worker_group_id], + ["head", peerLine(head)], + ...replica.followers.map((f, i) => [`follower_${i + 1}`, peerLine(f)]), + ].filter(([, v]) => v && v !== "unknown" || v === peerLine(head) || (typeof v === "string" && v.includes("(")))} +
+
+ Replica {idx + 1}{entry.data.replicaCount > 1 ? ` / ${entry.data.replicaCount}` : ""} + · + {topologyString(replica)} + {#if head.status} + {head.status} + {/if} +
+ + +
{rows
+            .filter(([, v]) => v)
+            .map(([k, v]) => `${k.padEnd(18)} ${v}`)
+            .join("\n")}
+ + {#if !hasLabels} +

+ Launch metadata (launched_by, slurm_job_id, framework, started_at, expires_at…) requires OpenTela v0.0.6+ on the serving node. +

+ {/if} + + + {#if head.labels && Object.keys(head.labels).length > 0} + {@const extra = Object.entries(head.labels).filter(([k]) => + !["launched_by","slurm_job_id","worker_group_id","framework","started_at","expires_at","slurm_partition","served_model_name"].includes(k) + )} + {#if extra.length > 0} +
Extra labels
+
{extra.map(([k, v]) => `${k.padEnd(18)} ${v}`).join("\n")}
+ {/if} + {/if} +
+ {/each} +
+ {/if}
- - - - - + \ No newline at end of file + diff --git a/frontend/src/components/ui/ModelList.svelte b/frontend/src/components/ui/ModelList.svelte index 835411e..997b808 100644 --- a/frontend/src/components/ui/ModelList.svelte +++ b/frontend/src/components/ui/ModelList.svelte @@ -8,6 +8,7 @@ let models = []; let modelCount = 0; + let replicaCount = 0; let loading = true; let error = null; @@ -20,30 +21,68 @@ const data = await response.json(); const rawModels = data.data; - const modelsMap = new Map(); + // Map worker_group_id → model_id, so metrics-only follower peers + // (id="") can be attributed to the right model. + const wgToModel = new Map(); + for (const m of rawModels) { + if (m.id && m.worker_group_id) wgToModel.set(m.worker_group_id, m.id); + } - for (const model of rawModels) { - if (!modelsMap.has(model.id)) { - modelsMap.set(model.id, { - id: model.id, - devices: new Set(), - count: 0, + // Group: model_id → worker_group_id → list of peer entries. + const modelsMap = new Map(); + for (const m of rawModels) { + let modelId = m.id; + if (!modelId) { + modelId = wgToModel.get(m.worker_group_id); + if (!modelId) continue; // orphan metrics peer + } + if (!modelsMap.has(modelId)) { + modelsMap.set(modelId, { id: modelId, replicas: new Map() }); + } + const model = modelsMap.get(modelId); + // Fall back to peer_id when worker_group_id is missing + // (older OpenTela binary) so each peer becomes its own replica. + const wg = m.worker_group_id || m.peer_id || `legacy-${model.replicas.size}`; + if (!model.replicas.has(wg)) { + model.replicas.set(wg, { + worker_group_id: wg, + peers: [], }); } - const existing = modelsMap.get(model.id); - existing.devices.add(model.device); - existing.count++; + model.replicas.get(wg).peers.push(m); } + // Materialize for rendering. modelCount = modelsMap.size; - models = Array.from(modelsMap.values()).map(groupedModel => ({ - data: { - title: groupedModel.id, - description: groupedModel.id, - devices: Array.from(groupedModel.devices), - instanceCount: groupedModel.count, - }, - })); + replicaCount = 0; + models = Array.from(modelsMap.values()).map(grouped => { + const replicas = Array.from(grouped.replicas.values()).map(r => { + // The head is the peer that owns the serving entry. + const head = r.peers.find(p => p.id === grouped.id) || r.peers[0]; + const followers = r.peers.filter(p => p !== head); + return { + worker_group_id: r.worker_group_id, + head, + followers, + nodesPerReplica: r.peers.length, + // device strings are per-peer; collect distinct ones + devices: Array.from(new Set(r.peers.map(p => p.device).filter(Boolean))), + }; + }); + replicaCount += replicas.length; + const allDevices = Array.from(new Set(replicas.flatMap(r => r.devices))); + return { + data: { + title: grouped.id, + description: grouped.id, + devices: allDevices, + replicas, + replicaCount: replicas.length, + // Total peers (head + followers) across all replicas. + nodeCount: replicas.reduce((s, r) => s + r.nodesPerReplica, 0), + }, + }; + }); } catch (err) { console.error("Error fetching models:", err); error = err.message; @@ -73,7 +112,11 @@

Available Models {#if !loading && !error} - {modelCount} + {modelCount} + {#if replicaCount !== modelCount} + {replicaCount} + + {/if} {/if}

diff --git a/frontend/src/content/articles/03-opentela/index.md b/frontend/src/content/articles/03-opentela/index.md index 364b034..19bb02e 100644 --- a/frontend/src/content/articles/03-opentela/index.md +++ b/frontend/src/content/articles/03-opentela/index.md @@ -59,4 +59,4 @@ By using OpenTela, SwissAI enables: ## Conclusion -The Swiss AI Initiative's integration of OpenTela represents a significant shift toward a more sovereign and collaborative AI infrastructure. By leveraging OpenTela's decentralized architecture, SwissAI built a platform where traditional HPC clusters can be used as a shared pool where every researchers can benefit from and contribute to. It also effectively converts fragmented, idle GPU capacity into a unified, accessible resource for the research community. You can view the real-time status of the models served by SwissAI on the [Swiss AI Research Platform](https://serving.swissai.cscs.ch/). If you are interested in learning more about how SwissAI uses OpenTela, please feel free to reach out to us! \ No newline at end of file +The Swiss AI Initiative's integration of OpenTela represents a significant shift toward a more sovereign and collaborative AI infrastructure. By leveraging OpenTela's decentralized architecture, SwissAI built a platform where traditional HPC clusters can be used as a shared pool where every researchers can benefit from and contribute to. It also effectively converts fragmented, idle GPU capacity into a unified, accessible resource for the research community. You can view the real-time status of the models served by SwissAI on the [Swiss AI Research Platform](https://serving.swissai.svc.cscs.ch/). If you are interested in learning more about how SwissAI uses OpenTela, please feel free to reach out to us! \ No newline at end of file diff --git a/frontend/src/content/guides/01-getting-started/model-launch.md b/frontend/src/content/guides/01-getting-started/model-launch.md index 49e961d..e452731 100644 --- a/frontend/src/content/guides/01-getting-started/model-launch.md +++ b/frontend/src/content/guides/01-getting-started/model-launch.md @@ -6,7 +6,7 @@ date: "December 22 2025" ## Recommended: Use model-launch -_[**model-launch**](https://github.com/swiss-ai/model-launch) is the recommended tool for getting models on [serving.swissai.cscs.ch](https://serving.swissai.cscs.ch)!_ +_[**model-launch**](https://github.com/swiss-ai/model-launch) is the recommended tool for getting models on [serving.swissai.svc.cscs.ch](https://serving.swissai.svc.cscs.ch)!_ It provides a framework-agnostic approach to submitting SLURM jobs for distributed inference using SGLang or vLLM. The tool handles single-node and multi-node deployments, automatically integrates with OCF (Open Compute Framework) for service discovery, and makes your models accessible externally from outside the cluster. It includes ready-to-use examples for popular models like Swiss AI Apertus, DeepSeek-V3, Kimi-K2, and many others, with support for advanced features like multi-worker routing, pre-launch commands, and interactive debugging modes. diff --git a/frontend/src/layouts/PageLayout.astro b/frontend/src/layouts/PageLayout.astro index 2086472..5a137ad 100644 --- a/frontend/src/layouts/PageLayout.astro +++ b/frontend/src/layouts/PageLayout.astro @@ -10,7 +10,7 @@ type Props = { }; const { title, description } = Astro.props; -const apiUrl = process.env.VITE_API_URL || 'https://api.swissai.cscs.ch'; +const apiUrl = import.meta.env.VITE_API_URL || 'https://api.swissai.svc.cscs.ch'; --- diff --git a/frontend/src/lib/config.ts b/frontend/src/lib/config.ts index fd18d7f..3d9355a 100644 --- a/frontend/src/lib/config.ts +++ b/frontend/src/lib/config.ts @@ -1,8 +1,8 @@ -const DEFAULT_API_URL = 'https://api.swissai.cscs.ch'; +const DEFAULT_API_URL = 'https://api.swissai.svc.cscs.ch'; export function getApiUrl(): string { if (typeof window !== 'undefined') { return (window as any).__API_URL__ || DEFAULT_API_URL; } - return process.env.VITE_API_URL || DEFAULT_API_URL; + return import.meta.env.VITE_API_URL || DEFAULT_API_URL; } diff --git a/frontend/src/pages/api_key.astro b/frontend/src/pages/api_key.astro index 52b0498..f2fd1f5 100644 --- a/frontend/src/pages/api_key.astro +++ b/frontend/src/pages/api_key.astro @@ -28,7 +28,7 @@ if (isDev) { // API key will be fetched client-side to avoid exposing it in the HTML let apiKey = "Loading..."; -const apiUrl = process.env.VITE_API_URL || 'https://api.swissai.cscs.ch'; +const apiUrl = import.meta.env.VITE_API_URL || 'https://api.swissai.svc.cscs.ch'; --- diff --git a/frontend/src/pages/index.astro b/frontend/src/pages/index.astro index fcf53e6..38225d5 100644 --- a/frontend/src/pages/index.astro +++ b/frontend/src/pages/index.astro @@ -24,9 +24,9 @@ const articles = (await getCollection("articles")) .sort((a, b) => b.data.date.valueOf() - a.data.date.valueOf()) .slice(0,SITE.NUM_PROJECTS_ON_HOMEPAGE); -const session = await getSession(Astro.request); +const session = import.meta.env.DEV ? null : await getSession(Astro.request); const isSessionValid = session && session.user; -const chatAppUrl = process.env.CHAT_APP_URL || "https://chat.swissai.cscs.ch"; +const chatAppUrl = process.env.CHAT_APP_URL || "https://chat.swissai.svc.cscs.ch"; const contributors = [ { diff --git a/frontend/src/pages/leaderboard/index.astro b/frontend/src/pages/leaderboard/index.astro index 59dfdcc..d81b4c7 100644 --- a/frontend/src/pages/leaderboard/index.astro +++ b/frontend/src/pages/leaderboard/index.astro @@ -9,7 +9,7 @@ let tokenData = {}; let error = null; try { - const response = await fetch(`${process.env.VITE_API_URL || 'https://api.swissai.cscs.ch'}/v1/metrics`, { + const response = await fetch(`${import.meta.env.VITE_API_URL || 'https://api.swissai.svc.cscs.ch'}/v1/metrics`, { method: "POST", headers: { "Content-Type": "application/json", diff --git a/meta/k8s/ocf-test.yaml b/meta/k8s/otela-test.yaml similarity index 87% rename from meta/k8s/ocf-test.yaml rename to meta/k8s/otela-test.yaml index 03ea016..13f6a5a 100644 --- a/meta/k8s/ocf-test.yaml +++ b/meta/k8s/otela-test.yaml @@ -28,9 +28,9 @@ spec: - "148.187.108.173" ports: - containerPort: 8092 - name: ocf-http-port + name: otela-http-port - containerPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port resources: requests: memory: "8Gi" @@ -39,11 +39,11 @@ spec: memory: "16Gi" cpu: "8" volumeMounts: - - name: test-ocf-key-id + - name: test-otela-key-id mountPath: /root/.ocfcore/keys readOnly: true volumes: - - name: test-ocf-key-id + - name: test-otela-key-id secret: secretName: test-dispatcher-id-key --- @@ -59,9 +59,9 @@ spec: - protocol: TCP port: 8092 targetPort: 8092 - name: ocf-http-port + name: otela-http-port - protocol: TCP port: 43905 targetPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port type: LoadBalancer \ No newline at end of file diff --git a/meta/k8s/ocf.yaml b/meta/k8s/otela.yaml similarity index 91% rename from meta/k8s/ocf.yaml rename to meta/k8s/otela.yaml index 3c5ba99..f4e5657 100644 --- a/meta/k8s/ocf.yaml +++ b/meta/k8s/otela.yaml @@ -26,9 +26,9 @@ spec: - "148.187.108.172" ports: - containerPort: 8092 - name: ocf-http-port + name: otela-http-port - containerPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port resources: requests: memory: "8Gi" @@ -57,9 +57,9 @@ spec: - protocol: TCP port: 8092 targetPort: 8092 - name: ocf-http-port + name: otela-http-port - protocol: TCP port: 43905 targetPort: 43905 - name: ocf-p2p-port + name: otela-p2p-port type: LoadBalancer \ No newline at end of file