55import requests
66from simple_logger .logger import get_logger
77from utilities .plugins .constant import OpenAIEnpoints
8+ from ocp_resources .service_account import ServiceAccount
89
910from kubernetes .dynamic import DynamicClient
11+ from ocp_resources .namespace import Namespace
12+ from ocp_resources .llm_inference_service import LLMInferenceService
13+
14+ from utilities .llmd_utils import create_llmisvc
15+ from utilities .llmd_constants import ModelStorage , ContainerImages
16+ from utilities .constants import Timeout
17+
1018from ocp_resources .infrastructure import Infrastructure
1119from ocp_resources .oauth import OAuth
1220from ocp_resources .resource import ResourceEditor
1523from utilities .infra import login_with_user_password , get_openshift_token
1624from utilities .general import wait_for_oauth_openshift_deployment
1725from ocp_resources .secret import Secret
26+
27+
1828from tests .model_serving .model_server .maas_billing .utils import (
1929 detect_scheme_via_llmisvc ,
2030 host_from_ingress_domain ,
2131 mint_token ,
2232 llmis_name ,
33+ patch_llmisvc_with_maas_router ,
2334 create_maas_group ,
2435 build_maas_headers ,
2536 get_maas_models_response ,
@@ -60,22 +71,19 @@ def minted_token(request_session_http, base_url: str, current_client_token: str)
6071 return token
6172
6273
63- @pytest .fixture (scope = "module" )
64- def base_url (admin_client ) -> str :
65- scheme = detect_scheme_via_llmisvc (client = admin_client )
66- host = host_from_ingress_domain (client = admin_client )
67- return f"{ scheme } ://{ host } /maas-api"
74+ @pytest .fixture (scope = "class" )
75+ def base_url (maas_scheme : str , maas_host : str ) -> str :
76+ return f"{ maas_scheme } ://{ maas_host } /maas-api"
6877
6978
70- @pytest .fixture (scope = "session" )
71- def model_url (admin_client ) -> str :
72- """
73- MODEL_URL:http(s)://<host>/llm/<deployment>/v1/chat/completions
74- """
75- scheme = detect_scheme_via_llmisvc (client = admin_client )
76- host = host_from_ingress_domain (client = admin_client )
79+ @pytest .fixture (scope = "class" )
80+ def model_url (
81+ maas_scheme : str ,
82+ maas_host : str ,
83+ admin_client : DynamicClient ,
84+ ) -> str :
7785 deployment = llmis_name (client = admin_client )
78- return f"{ scheme } ://{ host } /llm/{ deployment } { CHAT_COMPLETIONS } "
86+ return f"{ maas_scheme } ://{ maas_host } /llm/{ deployment } { CHAT_COMPLETIONS } "
7987
8088
8189@pytest .fixture
@@ -85,9 +93,10 @@ def maas_headers(minted_token: str) -> dict:
8593
8694@pytest .fixture
8795def maas_models (
88- request_session_http ,
89- base_url ,
90- maas_headers ,
96+ request_session_http : requests .Session ,
97+ base_url : str ,
98+ maas_headers : dict ,
99+ maas_inference_service_tinyllama : LLMInferenceService ,
91100):
92101 resp = get_maas_models_response (
93102 session = request_session_http ,
@@ -458,3 +467,69 @@ def maas_models_response_for_actor(
458467 base_url = base_url ,
459468 headers = maas_headers_for_actor ,
460469 )
470+
471+
472+ @pytest .fixture (scope = "class" )
473+ def maas_inference_service_tinyllama (
474+ admin_client : DynamicClient ,
475+ unprivileged_model_namespace : Namespace ,
476+ model_service_account : ServiceAccount ,
477+ ) -> Generator [LLMInferenceService , None , None ]:
478+ """
479+ TinyLlama S3-backed LLMInferenceService wired through MaaS for tests.
480+ """
481+ with (
482+ create_llmisvc (
483+ client = admin_client ,
484+ name = "llm-s3-tinyllama" ,
485+ namespace = unprivileged_model_namespace .name ,
486+ storage_uri = ModelStorage .TINYLLAMA_S3 ,
487+ container_image = ContainerImages .VLLM_CPU ,
488+ container_resources = {
489+ "limits" : {"cpu" : "2" , "memory" : "12Gi" },
490+ "requests" : {"cpu" : "1" , "memory" : "8Gi" },
491+ },
492+ service_account = model_service_account .name ,
493+ wait = True ,
494+ timeout = Timeout .TIMEOUT_15MIN ,
495+ ) as llm_service ,
496+ patch_llmisvc_with_maas_router (llm_service = llm_service ),
497+ ):
498+ llmd_instance = llm_service .instance
499+ model_spec = llmd_instance .spec .model
500+
501+ storage_uri = model_spec .uri
502+ assert storage_uri == ModelStorage .TINYLLAMA_S3 , (
503+ f"Unexpected storage_uri on TinyLlama LLMInferenceService: { storage_uri } "
504+ )
505+
506+ status = llmd_instance .status
507+ conditions = {condition .type : condition .status for condition in status .conditions }
508+ assert conditions .get ("Ready" ) == "True" , f"TinyLlama LLMInferenceService not Ready, conditions={ conditions } "
509+
510+ LOGGER .info (
511+ f"MaaS: TinyLlama S3 LLMInferenceService "
512+ f"{ llm_service .namespace } /{ llm_service .name } "
513+ f"is Ready with storage_uri={ storage_uri } "
514+ )
515+
516+ yield llm_service
517+
518+ LOGGER .info (
519+ f"MaaS: TinyLlama S3 LLMInferenceService "
520+ f"{ llm_service .namespace } /{ llm_service .name } "
521+ f"will be deleted at teardown"
522+ )
523+
524+
525+ @pytest .fixture (scope = "class" )
526+ def maas_scheme (admin_client : DynamicClient , unprivileged_model_namespace : Namespace ) -> str :
527+ return detect_scheme_via_llmisvc (
528+ client = admin_client ,
529+ namespace = unprivileged_model_namespace .name ,
530+ )
531+
532+
533+ @pytest .fixture (scope = "session" )
534+ def maas_host (admin_client ):
535+ return host_from_ingress_domain (client = admin_client )
0 commit comments