Add chat completion and model endpoint tests for MaaS

SB159 · SB159 · commit 2394db8acb7a · 2025-11-07T17:21:59.000-06:00
diff --git a/tests/model_serving/model_server/maas_billing/conftest.py b/tests/model_serving/model_server/maas_billing/conftest.py
@@ -3,11 +3,13 @@
 import pytest
 import requests
 from simple_logger.logger import get_logger
+from utilities.plugins.constant import OpenAIEnpoints
 
 from tests.model_serving.model_server.maas_billing.utils import (
     detect_scheme_via_llmisvc,
     host_from_ingress_domain,
     mint_token,
+    llmis_name,
 )
 
 LOGGER = get_logger(name=__name__)
@@ -44,3 +46,14 @@ def base_url(admin_client) -> str:
     scheme = detect_scheme_via_llmisvc(client=admin_client, namespace="llm")
     host = host_from_ingress_domain(client=admin_client)
     return f"{scheme}://{host}/maas-api"
+
+
+@pytest.fixture(scope="session")
+def model_url(admin_client) -> str:
+    """
+    MODEL_URL:http(s)://<host>/llm/<deployment>/v1/chat/completions
+    """
+    scheme = detect_scheme_via_llmisvc(client=admin_client, namespace="llm")
+    host = host_from_ingress_domain(client=admin_client)
+    deployment = llmis_name(client=admin_client, namespace="llm", label_selector=None)
+    return f"{scheme}://{host}/llm/{deployment}{OpenAIEnpoints.CHAT_COMPLETIONS}"
diff --git a/tests/model_serving/model_server/maas_billing/test_maas_endpoints.py b/tests/model_serving/model_server/maas_billing/test_maas_endpoints.py
@@ -0,0 +1,59 @@
+from utilities.plugins.constant import RestHeader, OpenAIEnpoints
+from simple_logger.logger import get_logger
+
+LOGGER = get_logger(name=__name__)
+MODELS_INFO = OpenAIEnpoints.MODELS_INFO
+CHAT_COMPLETIONS = OpenAIEnpoints.CHAT_COMPLETIONS
+
+
+class TestMaasEndpoints:
+    def test_model(self, request_session_http, base_url: str, minted_token: str) -> None:
+        """Verify /v1/models endpoint is reachable and returns available models."""
+        headers = {"Authorization": f"Bearer {minted_token}", **RestHeader.HEADERS}
+        url = f"{base_url}{MODELS_INFO}"
+
+        resp = request_session_http.get(url, headers=headers, timeout=60)
+        assert resp.status_code == 200, f"/v1/models failed: {resp.status_code} {resp.text[:200]}"
+
+        body = resp.json()
+        assert isinstance(body.get("data"), list), "'data' missing or not a list"
+        assert body["data"], "no models found"
+
+    def test_chat_completions(
+        self,
+        request_session_http,
+        base_url: str,
+        minted_token: str,
+        model_url: str,
+    ) -> None:
+        """
+        Verify the chat completion endpoint /llm/<deployment>/v1/chat/completions
+        responds correctly to a prompt request.
+
+        """
+        headers = {"Authorization": f"Bearer {minted_token}", **RestHeader.HEADERS}
+
+        # 1) Pick a model id from /v1/models
+        models_url = f"{base_url}{MODELS_INFO}"
+        models_resp = request_session_http.get(models_url, headers=headers, timeout=60)
+        assert models_resp.status_code == 200, f"/v1/models failed: {models_resp.status_code} {models_resp.text[:200]}"
+        models = models_resp.json().get("data", [])
+        assert models, "no models available"
+        model_id = models[0].get("id", "")
+        LOGGER.info("Using model_id=%s", model_id)
+
+        # 2) Prepare the chat completion endpoint URL
+        payload = {"model": model_id, "prompt": "Hello", "max_tokens": 50}
+        LOGGER.info("POST %s with keys=%s", model_url, list(payload.keys()))
+        resp = request_session_http.post(url=model_url, headers=headers, json=payload, timeout=60)
+        LOGGER.info("POST %s -> %s", model_url, resp.status_code)
+        assert resp.status_code == 200, (
+            f"/v1/chat/completions failed: {resp.status_code} {resp.text[:200]} (url={model_url})"
+        )
+
+        body = resp.json()
+        assert isinstance(body.get("choices"), list), "'choices' missing or not a list"
+        if body["choices"]:
+            msg = body["choices"][0].get("message", {}) or {}
+            text = msg.get("content") or body["choices"][0].get("text", "")
+            assert isinstance(text, str) and text.strip() != "", "first choice has no text content"
diff --git a/tests/model_serving/model_server/maas_billing/utils.py b/tests/model_serving/model_server/maas_billing/utils.py
@@ -65,3 +65,17 @@ def b64url_decode(encoded_str: str) -> bytes:
     padding = "=" * (-len(encoded_str) % 4)
     padded_bytes = (encoded_str + padding).encode(encoding="utf-8")
     return base64.urlsafe_b64decode(s=padded_bytes)
+
+
+def llmis_name(client, namespace: str = "llm", label_selector: str | None = None) -> str:
+    """
+    Return the name of the first Ready LLMInferenceService.
+    """
+    for service in LLMInferenceService.get(dyn_client=client, namespace=namespace, label_selector=label_selector):
+        conditions = (service.instance.status or {}).get("conditions", [])
+        is_ready = any(
+            condition.get("type") == "Ready" and condition.get("status") == "True" for condition in conditions
+        )
+        if is_ready:
+            return service.name
+    raise RuntimeError("No Ready LLMInferenceService found")