44import requests
55from simple_logger .logger import get_logger
66from utilities .plugins .constant import RestHeader , OpenAIEnpoints
7+ from kubernetes .dynamic import DynamicClient
8+ from ocp_resources .namespace import Namespace
9+ from ocp_resources .llm_inference_service import LLMInferenceService
710
11+ from utilities .llmd_utils import create_llmisvc
12+ from utilities .llmd_constants import ModelStorage as LLMDModelStorage , ContainerImages
13+ from utilities .constants import Timeout
814
915from tests .model_serving .model_server .maas_billing .utils import (
1016 detect_scheme_via_llmisvc ,
1117 host_from_ingress_domain ,
1218 mint_token ,
1319 llmis_name ,
20+ patch_llmisvc_with_maas_router ,
1421)
1522
1623LOGGER = get_logger (name = __name__ )
@@ -52,9 +59,13 @@ def base_url(admin_client) -> str:
5259
5360
5461@pytest .fixture (scope = "session" )
55- def model_url (admin_client ) -> str :
62+ def model_url (
63+ admin_client : DynamicClient ,
64+ llmd_inference_service_tinyllama : LLMInferenceService ,
65+ ) -> str :
5666 """
5767 MODEL_URL:http(s)://<host>/llm/<deployment>/v1/chat/completions
68+
5869 """
5970 scheme = detect_scheme_via_llmisvc (client = admin_client )
6071 host = host_from_ingress_domain (client = admin_client )
@@ -73,6 +84,7 @@ def maas_models(
7384 request_session_http : requests .Session ,
7485 base_url : str ,
7586 maas_headers : dict ,
87+ llmd_inference_service_tinyllama : LLMInferenceService ,
7688):
7789 """
7890 Call /v1/models once and return the list of models.
@@ -86,3 +98,54 @@ def maas_models(
8698 models = resp .json ().get ("data" , [])
8799 assert models , "no models available"
88100 return models
101+
102+
103+ @pytest .fixture (scope = "session" )
104+ def llmd_inference_service_tinyllama (
105+ admin_client : DynamicClient ,
106+ ) -> Generator [LLMInferenceService , None , None ]:
107+ """
108+ Create a real LLMD model (TinyLlama chat HF) in the 'llm' namespace
109+ for MaaS Billing tests, and delete it when the session ends.
110+ """
111+ namespace_name = "llm"
112+
113+ Namespace (
114+ client = admin_client ,
115+ name = namespace_name ,
116+ ensure_exists = True ,
117+ )
118+
119+ container_resources = {
120+ "limits" : {"cpu" : "2" , "memory" : "16Gi" },
121+ "requests" : {"cpu" : "1" , "memory" : "12Gi" },
122+ }
123+
124+ create_kwargs = {
125+ "client" : admin_client ,
126+ "name" : "llm-hf-tinyllama" ,
127+ "namespace" : namespace_name ,
128+ "storage_uri" : LLMDModelStorage .HF_TINYLLAMA ,
129+ "container_image" : ContainerImages .VLLM_CPU ,
130+ "container_resources" : container_resources ,
131+ "wait" : True ,
132+ "timeout" : Timeout .TIMEOUT_15MIN ,
133+ }
134+
135+ with create_llmisvc (** create_kwargs ) as llm_service :
136+ LOGGER .info (
137+ f"MaaS LLMD: created LLMInferenceService { llm_service .namespace } /{ llm_service .name } for TinyLlama HF"
138+ )
139+
140+ patch_llmisvc_with_maas_router (
141+ llm_service = llm_service ,
142+ client = admin_client ,
143+ )
144+
145+ yield llm_service
146+
147+ LOGGER .info (
148+ f"MaaS LLMD: finished tests; LLMInferenceService "
149+ f"{ llm_service .namespace } /{ llm_service .name } will be deleted "
150+ "by context manager"
151+ )
0 commit comments