Skip to content

Commit 4f8c7cf

Browse files
committed
Add TinyLlama LLMD
1 parent eea334a commit 4f8c7cf

File tree

3 files changed

+138
-2
lines changed

3 files changed

+138
-2
lines changed

tests/model_serving/model_server/maas_billing/conftest.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@
44
import requests
55
from simple_logger.logger import get_logger
66
from utilities.plugins.constant import RestHeader, OpenAIEnpoints
7+
from kubernetes.dynamic import DynamicClient
8+
from ocp_resources.namespace import Namespace
9+
from ocp_resources.llm_inference_service import LLMInferenceService
710

11+
from utilities.llmd_utils import create_llmisvc
12+
from utilities.llmd_constants import ModelStorage as LLMDModelStorage, ContainerImages
13+
from utilities.constants import Timeout
814

915
from tests.model_serving.model_server.maas_billing.utils import (
1016
detect_scheme_via_llmisvc,
1117
host_from_ingress_domain,
1218
mint_token,
1319
llmis_name,
20+
patch_llmisvc_with_maas_router,
1421
)
1522

1623
LOGGER = get_logger(name=__name__)
@@ -52,9 +59,13 @@ def base_url(admin_client) -> str:
5259

5360

5461
@pytest.fixture(scope="session")
55-
def model_url(admin_client) -> str:
62+
def model_url(
63+
admin_client: DynamicClient,
64+
llmd_inference_service_tinyllama: LLMInferenceService,
65+
) -> str:
5666
"""
5767
MODEL_URL:http(s)://<host>/llm/<deployment>/v1/chat/completions
68+
5869
"""
5970
scheme = detect_scheme_via_llmisvc(client=admin_client)
6071
host = host_from_ingress_domain(client=admin_client)
@@ -73,6 +84,7 @@ def maas_models(
7384
request_session_http: requests.Session,
7485
base_url: str,
7586
maas_headers: dict,
87+
llmd_inference_service_tinyllama: LLMInferenceService,
7688
):
7789
"""
7890
Call /v1/models once and return the list of models.
@@ -86,3 +98,54 @@ def maas_models(
8698
models = resp.json().get("data", [])
8799
assert models, "no models available"
88100
return models
101+
102+
103+
@pytest.fixture(scope="session")
104+
def llmd_inference_service_tinyllama(
105+
admin_client: DynamicClient,
106+
) -> Generator[LLMInferenceService, None, None]:
107+
"""
108+
Create a real LLMD model (TinyLlama chat HF) in the 'llm' namespace
109+
for MaaS Billing tests, and delete it when the session ends.
110+
"""
111+
namespace_name = "llm"
112+
113+
Namespace(
114+
client=admin_client,
115+
name=namespace_name,
116+
ensure_exists=True,
117+
)
118+
119+
container_resources = {
120+
"limits": {"cpu": "2", "memory": "16Gi"},
121+
"requests": {"cpu": "1", "memory": "12Gi"},
122+
}
123+
124+
create_kwargs = {
125+
"client": admin_client,
126+
"name": "llm-hf-tinyllama",
127+
"namespace": namespace_name,
128+
"storage_uri": LLMDModelStorage.HF_TINYLLAMA,
129+
"container_image": ContainerImages.VLLM_CPU,
130+
"container_resources": container_resources,
131+
"wait": True,
132+
"timeout": Timeout.TIMEOUT_15MIN,
133+
}
134+
135+
with create_llmisvc(**create_kwargs) as llm_service:
136+
LOGGER.info(
137+
f"MaaS LLMD: created LLMInferenceService {llm_service.namespace}/{llm_service.name} for TinyLlama HF"
138+
)
139+
140+
patch_llmisvc_with_maas_router(
141+
llm_service=llm_service,
142+
client=admin_client,
143+
)
144+
145+
yield llm_service
146+
147+
LOGGER.info(
148+
f"MaaS LLMD: finished tests; LLMInferenceService "
149+
f"{llm_service.namespace}/{llm_service.name} will be deleted "
150+
"by context manager"
151+
)

tests/model_serving/model_server/maas_billing/test_maas_endpoints.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ def test_chat_completions(
3333
LOGGER.info("Using model_id=%s", model_id)
3434
assert model_id, "first model from /v1/models has no 'id'"
3535

36-
payload = {"model": model_id, "prompt": "Hello", "max_tokens": 50}
36+
payload = {
37+
"model": model_id,
38+
"messages": [{"role": "user", "content": "Hello from MaaS billing test!"}],
39+
"max_tokens": 50,
40+
}
3741
LOGGER.info(f"POST {model_url} with keys={list(payload.keys())}")
3842

3943
resp = request_session_http.post(

tests/model_serving/model_server/maas_billing/utils.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
from urllib.parse import urlparse
99
from ocp_resources.llm_inference_service import LLMInferenceService
1010
from utilities.llmd_utils import get_llm_inference_url
11+
from simple_logger.logger import get_logger
12+
from kubernetes.dynamic import DynamicClient
13+
from kubernetes.dynamic.exceptions import DynamicApiError
14+
15+
LOGGER = get_logger(name=__name__)
1116

1217

1318
def host_from_ingress_domain(client) -> str:
@@ -104,3 +109,67 @@ def llmis_name(client, namespace: str = "llm", label_selector: str | None = None
104109
raise RuntimeError("No Ready LLMInferenceService found")
105110

106111
return service.name
112+
113+
114+
def patch_llmisvc_with_maas_router(
115+
llm_service: LLMInferenceService,
116+
client: DynamicClient,
117+
) -> None:
118+
"""
119+
Patch an existing LLMInferenceService with MaaS router wiring and annotations.
120+
121+
This is used for TinyLlama so that the model is reachable via the maas-default-gateway
122+
123+
and participates in MaaS flows.
124+
"""
125+
router_spec = {
126+
"gateway": {
127+
"refs": [
128+
{
129+
"name": "maas-default-gateway",
130+
"namespace": "openshift-ingress",
131+
}
132+
]
133+
},
134+
"route": {},
135+
}
136+
137+
LOGGER.info(
138+
f"MaaS LLMD: patching LLMInferenceService "
139+
f"{llm_service.namespace}/{llm_service.name} "
140+
f"with MaaS router spec: {router_spec}"
141+
)
142+
143+
patch_body = {
144+
"metadata": {
145+
"annotations": {
146+
"alpha.maas.opendatahub.io/tiers": "[]",
147+
}
148+
},
149+
"spec": {
150+
"router": router_spec,
151+
},
152+
}
153+
154+
llmisvc_res = client.resources.get(
155+
api_version="serving.kserve.io/v1alpha1",
156+
kind="LLMInferenceService",
157+
)
158+
159+
try:
160+
llmisvc_res.patch(
161+
name=llm_service.name,
162+
namespace=llm_service.namespace,
163+
body=patch_body,
164+
content_type="application/merge-patch+json",
165+
)
166+
except DynamicApiError as exc:
167+
LOGGER.error(
168+
f"MaaS LLMD: failed to patch LLMInferenceService {llm_service.namespace}/{llm_service.name}: {exc}"
169+
)
170+
raise
171+
172+
LOGGER.info(
173+
f"MaaS LLMD: successfully patched LLMInferenceService "
174+
f"{llm_service.namespace}/{llm_service.name} for MaaS routing"
175+
)

0 commit comments

Comments
 (0)