swiss-ai · robmsmt · May 18, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/Makefile b/Makefile
@@ -1,7 +1,14 @@
-.PHONY: install install-dev format check test run
+.PHONY: install install-dev format check test run dummy-run db-up db-down migrate _ensure-env _ensure-frontend-env
 
 UV_EXTRA ?=
 
+PG_CONTAINER := serving-api-pg
+PG_PORT := 5433
+PG_USER := serving
+PG_PASS := serving
+PG_DB := serving
+DATABASE_URL := postgresql://$(PG_USER):$(PG_PASS)@localhost:$(PG_PORT)/$(PG_DB)
+
 install:
 	uv pip install $(UV_EXTRA) -r backend/requirements.txt
 
@@ -19,7 +26,59 @@ check:
 test:
 	pytest backend/tests/ -v
 
-run:
+_ensure-env:
+	@if [ ! -f .env ]; then \
+		cp .env.example .env; \
+		echo "copied .env.example -> .env"; \
+	fi
+
+_ensure-frontend-env:
+	@if [ ! -f frontend/.env ]; then \
+		cp frontend/.env.example frontend/.env; \
+		echo "copied frontend/.env.example -> frontend/.env (fill in AUTH0_* to enable login)"; \
+	fi
+
+db-up:
+	@if [ -z "$$(docker ps -q -f name=^/$(PG_CONTAINER)$$)" ]; then \
+		if [ -n "$$(docker ps -aq -f name=^/$(PG_CONTAINER)$$)" ]; then \
+			echo "starting existing $(PG_CONTAINER) container"; \
+			docker start $(PG_CONTAINER) > /dev/null; \
+		else \
+			echo "creating $(PG_CONTAINER) container on :$(PG_PORT)"; \
+			docker run -d --name $(PG_CONTAINER) \
+				-e POSTGRES_USER=$(PG_USER) \
+				-e POSTGRES_PASSWORD=$(PG_PASS) \
+				-e POSTGRES_DB=$(PG_DB) \
+				-p $(PG_PORT):5432 \
+				postgres:16 > /dev/null; \
+		fi; \
+	fi
+	@printf "waiting for postgres"; \
+	for i in $$(seq 1 30); do \
+		if docker exec $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB) > /dev/null 2>&1; then \
+			echo " ready"; exit 0; \
+		fi; \
+		printf "."; sleep 1; \
+	done; \
+	echo " timed out"; exit 1
+
+db-down:
+	-docker stop $(PG_CONTAINER) > /dev/null 2>&1
+	-docker rm $(PG_CONTAINER) > /dev/null 2>&1
+
+migrate: _ensure-env db-up
+	alembic upgrade head
+
+run: _ensure-env _ensure-frontend-env db-up migrate
+	uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \
+	cd frontend && npm run dev & \
+	wait
+
+# Same as `run` but forces the model list to come from the synthesised
+# upgraded fixture instead of the live OpenTela endpoint. Useful for
+# iterating on the model-card UI without depending on prod state.
+dummy-run: _ensure-env _ensure-frontend-env db-up migrate
+	OTELA_FIXTURE_PATH=$(PWD)/backend/tests/fixtures/dnt_table_upgraded.json \
 	uvicorn backend.main:app --reload --host 0.0.0.0 --port 8080 & \
 	cd frontend && npm run dev & \
 	wait
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ Frontend and backend API proxy for SwissAI LLM serving. For examples on how to l
                  │
                  ▼
         ┌─────────────────┐
-        │       OCF       │  OpenTela P2P routing → model=apertus-...
+        │     OpenTela    │  P2P routing → model=apertus-...
         └────────┬────────┘
                  │
                  ▼
@@ -42,7 +42,7 @@ frontend/        # web UI (Astro + Svelte)
 meta/            # example Dockerfiles, example k8s manifests, build scripts
 ```
 
-OCF (Open Compute Framework) now renamed to OpenTela upstream is maintained at [eth-easl/OpenTela](https://github.com/eth-easl/OpenTela). We maintain a fork at [swiss-ai/OpenTela](https://github.com/swiss-ai/opentela) to control deployments to dev+prod.
+OpenTela (formerly OCF / "Open Compute Framework") is maintained upstream at [eth-easl/OpenTela](https://github.com/eth-easl/OpenTela). We maintain a fork at [swiss-ai/OpenTela](https://github.com/swiss-ai/opentela) to control deployments to dev+prod.
 
 ## Dev Quick Start
 

diff --git a/backend/config.py b/backend/config.py
@@ -1,6 +1,8 @@
-from pydantic_settings import BaseSettings
 from functools import lru_cache
 
+from pydantic import AliasChoices, Field
+from pydantic_settings import BaseSettings
+
 
 @lru_cache()
 def get_settings():
@@ -17,7 +19,20 @@ class Settings(BaseSettings):
     database_url: str = ""
     auth_secret: str = ""
     auth_trust_host: bool = False
-    ocf_head_addr: str = ""
+    # Accept the historical OCF_* env var names in addition to the canonical
+    # OTELA_* ones so existing deployments keep working through the rename.
+    # Python attribute access stays `settings.otela_*`.
+    otela_head_addr: str = Field(
+        default="",
+        validation_alias=AliasChoices("otela_head_addr", "ocf_head_addr"),
+    )
+    # When set, /v1/models* reads this JSON file instead of calling
+    # $otela_head_addr/v1/dnt/table. Used for UI iteration against synthesised
+    # upgraded payloads (see backend/tests/fixtures/build_upgraded.py).
+    otela_fixture_path: str = Field(
+        default="",
+        validation_alias=AliasChoices("otela_fixture_path", "ocf_fixture_path"),
+    )
     langfuse_host: str = ""
     langfuse_public_key: str = ""
     langfuse_secret_key: str = ""
@@ -28,6 +43,7 @@ class Settings(BaseSettings):
 
     class Config:
         env_file = ".env"
+        populate_by_name = True
 
 
 def parse_hardware_info(hardware_info):

diff --git a/backend/routers/completions.py b/backend/routers/completions.py
@@ -75,7 +75,7 @@ async def chat_completion(
     )
 
     response = await llm_proxy(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         request=llm_request,
     )
@@ -125,7 +125,7 @@ async def completion(
     )
 
     response = await llm_proxy_completions(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         request=llm_request,
     )

diff --git a/backend/routers/embeddings.py b/backend/routers/embeddings.py
@@ -26,7 +26,7 @@ async def embeddings(
     data["app_title"] = app_title
 
     response = await llm_proxy_embeddings(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         **data,
     )

diff --git a/backend/routers/models.py b/backend/routers/models.py
@@ -6,9 +6,17 @@
 settings = get_settings()
 
 
+def _dnt_endpoint() -> str:
+    """When OTELA_FIXTURE_PATH is set, read DNT from disk instead of HTTP —
+    used for iterating on the UI against synthesised post-upgrade payloads."""
+    if settings.otela_fixture_path:
+        return settings.otela_fixture_path
+    return settings.otela_head_addr + "/v1/dnt/table"
+
+
 @router.get("/v1/models_detailed")
 async def list_models_detailed():
-    models = get_all_models(settings.ocf_head_addr + "/v1/dnt/table", with_details=True)
+    models = get_all_models(_dnt_endpoint(), with_details=True)
     return dict(
         object="list",
         data=models,
@@ -17,9 +25,7 @@ async def list_models_detailed():
 
 @router.get("/v1/models")
 async def list_models():
-    models = get_all_models(
-        settings.ocf_head_addr + "/v1/dnt/table", with_details=False
-    )
+    models = get_all_models(_dnt_endpoint(), with_details=False)
     return dict(
         object="list",
         data=models,

diff --git a/backend/routers/rerank.py b/backend/routers/rerank.py
@@ -14,7 +14,7 @@ async def rerank(
 ):
     data = await request.json()
     response = await llm_proxy_rerank(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         payload=data,
         model=data.get("model", "unknown"),
@@ -29,7 +29,7 @@ async def score(
 ):
     data = await request.json()
     response = await llm_proxy_score(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         payload=data,
         model=data.get("model", "unknown"),

diff --git a/backend/routers/responses.py b/backend/routers/responses.py
@@ -17,7 +17,7 @@ async def create_response(
     stream = data.get("stream", False)
 
     response = await llm_proxy_responses(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         payload=data,
         stream=stream,

diff --git a/backend/routers/tokenization.py b/backend/routers/tokenization.py
@@ -14,7 +14,7 @@ async def tokenize(
 ):
     data = await request.json()
     response = await llm_proxy_tokenize(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         payload=data,
         model=data.get("model", "unknown"),
@@ -29,7 +29,7 @@ async def detokenize(
 ):
     data = await request.json()
     response = await llm_proxy_detokenize(
-        endpoint=settings.ocf_head_addr + "/v1/service/llm/v1/",
+        endpoint=settings.otela_head_addr + "/v1/service/llm/v1/",
         api_key=token,
         payload=data,
         model=data.get("model", "unknown"),

diff --git a/backend/services/model_service.py b/backend/services/model_service.py
@@ -1,44 +1,96 @@
+import json
+import pathlib
+
 import requests
+
 from backend.config import parse_hardware_info
 
 
+def _peer_metadata(node_info: dict) -> dict:
+    """Pull the surfaced launch-time fields off a DNT peer entry.
+
+    Older OpenTela binaries (<v0.0.6) don't emit hostname/status/labels —
+    we return whatever's present and let consumers treat missing keys as
+    'unknown'. labels.worker_group_id is what the frontend groups by to
+    count replicas of a single model.
+    """
+    labels = node_info.get("labels") or {}
+    return {
+        "peer_id": node_info.get("id", ""),
+        "hostname": node_info.get("hostname", ""),
+        "otela_version": node_info.get("version", ""),
+        "status": node_info.get("status", ""),
+        "labels": labels,
+        # Convenience pulls — frontends can just read these directly
+        # without having to dig into labels every time.
+        "worker_group_id": labels.get("worker_group_id", ""),
+        "launched_by": labels.get("launched_by", ""),
+        "slurm_job_id": labels.get("slurm_job_id", ""),
+        "framework": labels.get("framework", ""),
+        "started_at": labels.get("started_at", ""),
+        "expires_at": labels.get("expires_at", ""),
+    }
+
+
+def _load_dnt(endpoint: str) -> dict:
+    """Fetch DNT data. If endpoint points at a local file (no scheme), read
+    it as JSON — that's the fixture-mode dev path. Otherwise HTTP-GET it."""
+    if endpoint and not endpoint.startswith(("http://", "https://")):
+        return json.loads(pathlib.Path(endpoint).read_text())
+    return requests.get(endpoint).json()
+
+
 def get_all_models(endpoint: str, with_details: bool = False):
+    """Return one entry per (peer, model) pair served on the network.
+
+    The frontend aggregates these by model id and by worker_group_id to
+    produce the model card + replica count. We keep the granularity at the
+    peer level so multi-node replicas show their full topology (head +
+    metrics-only followers all share the same worker_group_id).
+    """
     try:
-        data = requests.get(endpoint).json()
+        data = _load_dnt(endpoint)
     except Exception:
         return []
     models = []
     for node_info in data.values():
-        if not node_info.get("service"):
-            continue
+        meta = _peer_metadata(node_info)
         device_info = parse_hardware_info(node_info.get("hardware"))
-        for service in node_info["service"]:
+        services = node_info.get("service") or []
+        if not services:
+            # Metrics-only / pending peer: surface it under a sentinel id so
+            # the frontend can attribute it to the right replica via
+            # worker_group_id and show it as part of a launching/follower set.
+            if not meta["worker_group_id"]:
+                continue
+            entry = {
+                "id": "",  # no model yet
+                "object": "model",
+                "created": "0x",
+                "owner": "0x",
+                **meta,
+            }
+            if with_details:
+                entry["device"] = device_info
+            models.append(entry)
+            continue
+        for service in services:
             if not service.get("identity_group"):
                 continue
             model_names = [
                 identity[len("model=") :]
                 for identity in service["identity_group"]
                 if identity.startswith("model=")
             ]
-            if with_details:
-                models.extend(
-                    {
-                        "id": model_name,
-                        "device": device_info,
-                        "object": "model",
-                        "created": "0x",
-                        "owner": "0x",
-                    }
-                    for model_name in model_names
-                )
-            else:
-                models.extend(
-                    {
-                        "id": model_name,
-                        "object": "model",
-                        "created": "0x",
-                        "owner": "0x",
-                    }
-                    for model_name in model_names
-                )
+            for model_name in model_names:
+                entry = {
+                    "id": model_name,
+                    "object": "model",
+                    "created": "0x",
+                    "owner": "0x",
+                    **meta,
+                }
+                if with_details:
+                    entry["device"] = device_info
+                models.append(entry)
     return models