Skip to content

Commit 589601a

Browse files
authored
tests(maas-billing): add TinyLlama LLMD model (#861)
* Add TinyLlama LLMD * test-maas-billing: TinyLlama s3 deployment * tests-maas-billing - review comments implemented * tests(maas-billing): added fixtures
1 parent 0440830 commit 589601a

File tree

4 files changed

+253
-74
lines changed

4 files changed

+253
-74
lines changed

tests/model_serving/model_server/maas_billing/conftest.py

Lines changed: 91 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,16 @@
55
import requests
66
from simple_logger.logger import get_logger
77
from utilities.plugins.constant import OpenAIEnpoints
8+
from ocp_resources.service_account import ServiceAccount
89

910
from kubernetes.dynamic import DynamicClient
11+
from ocp_resources.namespace import Namespace
12+
from ocp_resources.llm_inference_service import LLMInferenceService
13+
14+
from utilities.llmd_utils import create_llmisvc
15+
from utilities.llmd_constants import ModelStorage, ContainerImages
16+
from utilities.constants import Timeout
17+
1018
from ocp_resources.infrastructure import Infrastructure
1119
from ocp_resources.oauth import OAuth
1220
from ocp_resources.resource import ResourceEditor
@@ -15,11 +23,14 @@
1523
from utilities.infra import login_with_user_password, get_openshift_token
1624
from utilities.general import wait_for_oauth_openshift_deployment
1725
from ocp_resources.secret import Secret
26+
27+
1828
from tests.model_serving.model_server.maas_billing.utils import (
1929
detect_scheme_via_llmisvc,
2030
host_from_ingress_domain,
2131
mint_token,
2232
llmis_name,
33+
patch_llmisvc_with_maas_router,
2334
create_maas_group,
2435
build_maas_headers,
2536
get_maas_models_response,
@@ -60,22 +71,19 @@ def minted_token(request_session_http, base_url: str, current_client_token: str)
6071
return token
6172

6273

63-
@pytest.fixture(scope="module")
64-
def base_url(admin_client) -> str:
65-
scheme = detect_scheme_via_llmisvc(client=admin_client)
66-
host = host_from_ingress_domain(client=admin_client)
67-
return f"{scheme}://{host}/maas-api"
74+
@pytest.fixture(scope="class")
75+
def base_url(maas_scheme: str, maas_host: str) -> str:
76+
return f"{maas_scheme}://{maas_host}/maas-api"
6877

6978

70-
@pytest.fixture(scope="session")
71-
def model_url(admin_client) -> str:
72-
"""
73-
MODEL_URL:http(s)://<host>/llm/<deployment>/v1/chat/completions
74-
"""
75-
scheme = detect_scheme_via_llmisvc(client=admin_client)
76-
host = host_from_ingress_domain(client=admin_client)
79+
@pytest.fixture(scope="class")
80+
def model_url(
81+
maas_scheme: str,
82+
maas_host: str,
83+
admin_client: DynamicClient,
84+
) -> str:
7785
deployment = llmis_name(client=admin_client)
78-
return f"{scheme}://{host}/llm/{deployment}{CHAT_COMPLETIONS}"
86+
return f"{maas_scheme}://{maas_host}/llm/{deployment}{CHAT_COMPLETIONS}"
7987

8088

8189
@pytest.fixture
@@ -85,9 +93,10 @@ def maas_headers(minted_token: str) -> dict:
8593

8694
@pytest.fixture
8795
def maas_models(
88-
request_session_http,
89-
base_url,
90-
maas_headers,
96+
request_session_http: requests.Session,
97+
base_url: str,
98+
maas_headers: dict,
99+
maas_inference_service_tinyllama: LLMInferenceService,
91100
):
92101
resp = get_maas_models_response(
93102
session=request_session_http,
@@ -458,3 +467,69 @@ def maas_models_response_for_actor(
458467
base_url=base_url,
459468
headers=maas_headers_for_actor,
460469
)
470+
471+
472+
@pytest.fixture(scope="class")
473+
def maas_inference_service_tinyllama(
474+
admin_client: DynamicClient,
475+
unprivileged_model_namespace: Namespace,
476+
model_service_account: ServiceAccount,
477+
) -> Generator[LLMInferenceService, None, None]:
478+
"""
479+
TinyLlama S3-backed LLMInferenceService wired through MaaS for tests.
480+
"""
481+
with (
482+
create_llmisvc(
483+
client=admin_client,
484+
name="llm-s3-tinyllama",
485+
namespace=unprivileged_model_namespace.name,
486+
storage_uri=ModelStorage.TINYLLAMA_S3,
487+
container_image=ContainerImages.VLLM_CPU,
488+
container_resources={
489+
"limits": {"cpu": "2", "memory": "12Gi"},
490+
"requests": {"cpu": "1", "memory": "8Gi"},
491+
},
492+
service_account=model_service_account.name,
493+
wait=True,
494+
timeout=Timeout.TIMEOUT_15MIN,
495+
) as llm_service,
496+
patch_llmisvc_with_maas_router(llm_service=llm_service),
497+
):
498+
llmd_instance = llm_service.instance
499+
model_spec = llmd_instance.spec.model
500+
501+
storage_uri = model_spec.uri
502+
assert storage_uri == ModelStorage.TINYLLAMA_S3, (
503+
f"Unexpected storage_uri on TinyLlama LLMInferenceService: {storage_uri}"
504+
)
505+
506+
status = llmd_instance.status
507+
conditions = {condition.type: condition.status for condition in status.conditions}
508+
assert conditions.get("Ready") == "True", f"TinyLlama LLMInferenceService not Ready, conditions={conditions}"
509+
510+
LOGGER.info(
511+
f"MaaS: TinyLlama S3 LLMInferenceService "
512+
f"{llm_service.namespace}/{llm_service.name} "
513+
f"is Ready with storage_uri={storage_uri}"
514+
)
515+
516+
yield llm_service
517+
518+
LOGGER.info(
519+
f"MaaS: TinyLlama S3 LLMInferenceService "
520+
f"{llm_service.namespace}/{llm_service.name} "
521+
f"will be deleted at teardown"
522+
)
523+
524+
525+
@pytest.fixture(scope="class")
526+
def maas_scheme(admin_client: DynamicClient, unprivileged_model_namespace: Namespace) -> str:
527+
return detect_scheme_via_llmisvc(
528+
client=admin_client,
529+
namespace=unprivileged_model_namespace.name,
530+
)
531+
532+
533+
@pytest.fixture(scope="session")
534+
def maas_host(admin_client):
535+
return host_from_ingress_domain(client=admin_client)
Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
1-
from utilities.plugins.constant import OpenAIEnpoints
21
from simple_logger.logger import get_logger
32
import requests
3+
import pytest
4+
from tests.model_serving.model_server.maas_billing.utils import verify_chat_completions
45

56
LOGGER = get_logger(name=__name__)
6-
MODELS_INFO = OpenAIEnpoints.MODELS_INFO
7-
CHAT_COMPLETIONS = OpenAIEnpoints.CHAT_COMPLETIONS
87

98

9+
@pytest.mark.parametrize(
10+
"unprivileged_model_namespace",
11+
[
12+
pytest.param(
13+
{
14+
"name": "llm",
15+
"modelmesh-enabled": False,
16+
},
17+
id="maas-billing-namespace",
18+
),
19+
],
20+
indirect=True,
21+
)
1022
class TestMaasEndpoints:
1123
def test_model(
1224
self,
@@ -26,32 +38,11 @@ def test_chat_completions(
2638
maas_headers: dict,
2739
maas_models: list,
2840
) -> None:
29-
"""
30-
Verify /llm/<deployment>/v1/chat/completions responds to a simple prompt.
31-
"""
32-
model_id = maas_models[0].get("id", "")
33-
LOGGER.info("Using model_id=%s", model_id)
34-
assert model_id, "first model from /v1/models has no 'id'"
35-
36-
payload = {"model": model_id, "prompt": "Hello", "max_tokens": 50}
37-
LOGGER.info(f"POST {model_url} with keys={list(payload.keys())}")
38-
39-
resp = request_session_http.post(
40-
url=model_url,
41+
"""Verify /llm/<deployment>/v1/chat/completions responds to a simple prompt."""
42+
verify_chat_completions(
43+
request_session_http=request_session_http,
44+
model_url=model_url,
4145
headers=maas_headers,
42-
json=payload,
43-
timeout=60,
44-
)
45-
LOGGER.info(f"POST {model_url} -> {resp.status_code}")
46-
47-
assert resp.status_code == 200, (
48-
f"/v1/chat/completions failed: {resp.status_code} {resp.text[:200]} (url={model_url})"
46+
models_list=maas_models,
47+
log_prefix="MaaS Endpoint Test",
4948
)
50-
51-
body = resp.json()
52-
choices = body.get("choices", [])
53-
assert isinstance(choices, list) and choices, "'choices' missing or empty"
54-
55-
msg = choices[0].get("message", {}) or {}
56-
text = msg.get("content") or choices[0].get("text", "")
57-
assert isinstance(text, str) and text.strip(), "first choice has no text content"

tests/model_serving/model_server/maas_billing/test_maas_rbac_e2e.py

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import pytest
22
from simple_logger.logger import get_logger
33
from utilities.plugins.constant import OpenAIEnpoints
4+
from tests.model_serving.model_server.maas_billing.utils import (
5+
verify_chat_completions,
6+
)
47

58
LOGGER = get_logger(name=__name__)
69

@@ -14,6 +17,19 @@
1417
]
1518

1619

20+
@pytest.mark.parametrize(
21+
"unprivileged_model_namespace",
22+
[
23+
pytest.param(
24+
{
25+
"name": "llm",
26+
"modelmesh-enabled": False,
27+
},
28+
id="maas-billing-namespace",
29+
),
30+
],
31+
indirect=True,
32+
)
1733
@pytest.mark.usefixtures("maas_free_group", "maas_premium_group")
1834
@pytest.mark.parametrize(
1935
"ocp_token_for_actor",
@@ -37,6 +53,7 @@ def test_mint_token_for_actors(
3753

3854
def test_models_visible_for_actors(
3955
self,
56+
model_url: str,
4057
maas_models_response_for_actor,
4158
) -> None:
4259
"""Use fixture for /v1/models response."""
@@ -50,34 +67,24 @@ def test_chat_completions_for_actors(
5067
model_url: str,
5168
maas_headers_for_actor: dict,
5269
maas_models_response_for_actor,
70+
ocp_token_for_actor,
5371
) -> None:
5472
"""
5573
Reuse the models fixture instead of duplicating the /v1/models logic,
56-
then call /v1/chat/completions with the first model id.
74+
then call /v1/chat/completions with the first model id using the
75+
common verify_chat_completions helper.
5776
"""
5877
models_response = maas_models_response_for_actor
59-
models = models_response.json().get("data", [])
60-
assert models, "no models returned from /v1/models"
61-
model_id = models[0].get("id", "")
62-
assert model_id, "first model from /v1/models has no 'id'"
78+
models_list = models_response.json().get("data", [])
79+
assert models_list, "no models returned from /v1/models"
6380

64-
payload = {"model": model_id, "prompt": "Hello", "max_tokens": 16}
65-
66-
LOGGER.info(f"MaaS RBAC: POST {model_url} with payload keys={list(payload.keys())}")
67-
68-
chat_response = request_session_http.post(
69-
url=model_url,
81+
verify_chat_completions(
82+
request_session_http=request_session_http,
83+
model_url=model_url,
7084
headers=maas_headers_for_actor,
71-
json=payload,
72-
timeout=60,
73-
)
74-
75-
LOGGER.info(f"MaaS RBAC: POST {model_url} -> {chat_response.status_code}")
76-
77-
assert chat_response.status_code == 200, (
78-
f"/v1/chat/completions failed: {chat_response.status_code} {chat_response.text[:200]} (url={model_url})"
85+
models_list=models_list,
86+
prompt_text="Hello",
87+
max_tokens=16,
88+
request_timeout_seconds=60,
89+
log_prefix="MaaS RBAC",
7990
)
80-
81-
chat_body = chat_response.json()
82-
choices = chat_body.get("choices", [])
83-
assert isinstance(choices, list) and choices, "'choices' missing or empty"

0 commit comments

Comments
 (0)