Skip to content

Commit 5ac0a40

Browse files
committed
WIP: MaaS gateway/policies(not for PR yet)
1 parent b6df07a commit 5ac0a40

3 files changed

Lines changed: 495 additions & 69 deletions

File tree

tests/model_serving/model_server/maas_billing/conftest.py

Lines changed: 204 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212

1313
from utilities.llmd_utils import create_llmisvc
1414
from utilities.llmd_constants import ModelStorage, ContainerImages
15-
from utilities.constants import Timeout
15+
from utilities.constants import (
16+
MAAS_GATEWAY_NAMESPACE,
17+
MAAS_RATE_LIMIT_POLICY_NAME,
18+
MAAS_TOKEN_RATE_LIMIT_POLICY_NAME,
19+
Timeout,
20+
)
1621

1722
from ocp_resources.infrastructure import Infrastructure
1823
from ocp_resources.oauth import OAuth
@@ -34,15 +39,17 @@
3439
get_maas_models_response,
3540
verify_chat_completions,
3641
maas_gateway_rate_limits_patched,
42+
ensure_maas_gateway_and_policies,
3743
)
3844

39-
4045
LOGGER = get_logger(name=__name__)
4146
MODELS_INFO = OpenAIEnpoints.MODELS_INFO
4247
CHAT_COMPLETIONS = OpenAIEnpoints.CHAT_COMPLETIONS
4348

4449
MAAS_FREE_GROUP = "maas-free-users"
4550
MAAS_PREMIUM_GROUP = "maas-premium-users"
51+
DSC_NAME = "default"
52+
MAAS_DSC_COMPONENT_KEY = "modelsAsService"
4653

4754

4855
@pytest.fixture(scope="session")
@@ -75,17 +82,17 @@ def minted_token(request_session_http, base_url: str, current_client_token: str)
7582
def base_url(maas_scheme: str, maas_host: str) -> str:
7683
return f"{maas_scheme}://{maas_host}/maas-api"
7784

78-
7985
@pytest.fixture(scope="class")
8086
def model_url(
8187
maas_scheme: str,
8288
maas_host: str,
8389
admin_client: DynamicClient,
8490
maas_inference_service_tinyllama: LLMInferenceService,
8591
) -> str:
86-
deployment = llmis_name(client=admin_client)
87-
# deployment = maas_inference_service_tinyllama.name
88-
return f"{maas_scheme}://{maas_host}/llm/{deployment}{CHAT_COMPLETIONS}"
92+
deployment = maas_inference_service_tinyllama.name
93+
url = f"{maas_scheme}://{maas_host}/llm/{deployment}{CHAT_COMPLETIONS}"
94+
LOGGER.info("MaaS: constructed model_url=%s (deployment=%s)", url, deployment)
95+
return url
8996

9097

9198
@pytest.fixture
@@ -525,6 +532,101 @@ def exercise_rate_limiter(
525532
return status_codes_list
526533

527534

535+
# @pytest.fixture(scope="class")
536+
# def maas_inference_service_tinyllama(
537+
# admin_client: DynamicClient,
538+
# unprivileged_model_namespace: Namespace,
539+
# model_service_account: ServiceAccount,
540+
# ) -> Generator[LLMInferenceService, None, None]:
541+
# """
542+
# TinyLlama S3-backed LLMInferenceService wired through MaaS for tests.
543+
# """
544+
# with (
545+
# create_llmisvc(
546+
# client=admin_client,
547+
# name="llm-s3-tinyllama",
548+
# namespace=unprivileged_model_namespace.name,
549+
# storage_uri=ModelStorage.TINYLLAMA_S3,
550+
# container_image=ContainerImages.VLLM_CPU,
551+
# container_resources={
552+
# "limits": {"cpu": "2", "memory": "12Gi"},
553+
# "requests": {"cpu": "1", "memory": "8Gi"},
554+
# },
555+
# service_account=model_service_account.name,
556+
# wait=True,
557+
# timeout=Timeout.TIMEOUT_15MIN,
558+
# ) as llm_service,
559+
# patch_llmisvc_with_maas_router(llm_service=llm_service),
560+
# ):
561+
# llmd_instance = llm_service.instance
562+
# model_spec = llmd_instance.spec.model
563+
564+
# storage_uri = model_spec.uri
565+
# assert storage_uri == ModelStorage.TINYLLAMA_S3, (
566+
# f"Unexpected storage_uri on TinyLlama LLMInferenceService: {storage_uri}"
567+
# )
568+
569+
# status = llmd_instance.status
570+
# conditions = {condition.type: condition.status for condition in status.conditions}
571+
# assert conditions.get("Ready") == "True", f"TinyLlama LLMInferenceService not Ready, conditions={conditions}"
572+
573+
# LOGGER.info(
574+
# f"MaaS: TinyLlama S3 LLMInferenceService "
575+
# f"{llm_service.namespace}/{llm_service.name} "
576+
# f"is Ready with storage_uri={storage_uri}"
577+
# )
578+
579+
# yield llm_service
580+
581+
# LOGGER.info(
582+
# f"MaaS: TinyLlama S3 LLMInferenceService "
583+
# f"{llm_service.namespace}/{llm_service.name} "
584+
# f"will be deleted at teardown"
585+
# )
586+
587+
# @pytest.fixture(scope="class")
588+
# def maas_inference_service_tinyllama(
589+
# admin_client: DynamicClient,
590+
# unprivileged_model_namespace: Namespace,
591+
# model_service_account: ServiceAccount,
592+
# ) -> Generator[LLMInferenceService, None, None]:
593+
# """
594+
# TinyLlama S3-backed LLMInferenceService wired through MaaS for tests.
595+
# """
596+
# with create_llmisvc(
597+
# client=admin_client,
598+
# name="llm-s3-tinyllama",
599+
# namespace=unprivileged_model_namespace.name,
600+
# storage_uri=ModelStorage.TINYLLAMA_S3,
601+
# container_image=ContainerImages.VLLM_CPU,
602+
# container_resources={
603+
# "limits": {"cpu": "2", "memory": "12Gi"},
604+
# "requests": {"cpu": "1", "memory": "8Gi"},
605+
# },
606+
# service_account=model_service_account.name,
607+
# wait=False, # 🔴 IMPORTANT CHANGE
608+
# timeout=Timeout.TIMEOUT_15MIN,
609+
# ) as llm_service:
610+
611+
# # ✅ Patch IMMEDIATELY
612+
# with patch_llmisvc_with_maas_router(llm_service=llm_service):
613+
614+
# # Now wait for readiness AFTER patch
615+
# llm_service.wait_for_condition(
616+
# condition="Ready",
617+
# status="True",
618+
# timeout=Timeout.TIMEOUT_15MIN,
619+
# )
620+
621+
# # llm_service.refresh()
622+
623+
# LOGGER.info(
624+
# "MaaS: TinyLlama LLMI %s/%s Ready and patched",
625+
# llm_service.namespace,
626+
# llm_service.name,
627+
# )
628+
629+
# yield llm_service
528630
@pytest.fixture(scope="class")
529631
def maas_inference_service_tinyllama(
530632
admin_client: DynamicClient,
@@ -534,49 +636,42 @@ def maas_inference_service_tinyllama(
534636
"""
535637
TinyLlama S3-backed LLMInferenceService wired through MaaS for tests.
536638
"""
537-
with (
538-
create_llmisvc(
539-
client=admin_client,
540-
name="llm-s3-tinyllama",
541-
namespace=unprivileged_model_namespace.name,
542-
storage_uri=ModelStorage.TINYLLAMA_S3,
543-
container_image=ContainerImages.VLLM_CPU,
544-
container_resources={
545-
"limits": {"cpu": "2", "memory": "12Gi"},
546-
"requests": {"cpu": "1", "memory": "8Gi"},
547-
},
548-
service_account=model_service_account.name,
549-
wait=True,
550-
timeout=Timeout.TIMEOUT_15MIN,
551-
) as llm_service,
552-
patch_llmisvc_with_maas_router(llm_service=llm_service),
553-
):
554-
llmd_instance = llm_service.instance
555-
model_spec = llmd_instance.spec.model
556-
557-
storage_uri = model_spec.uri
558-
assert storage_uri == ModelStorage.TINYLLAMA_S3, (
559-
f"Unexpected storage_uri on TinyLlama LLMInferenceService: {storage_uri}"
560-
)
561-
562-
status = llmd_instance.status
563-
conditions = {condition.type: condition.status for condition in status.conditions}
564-
assert conditions.get("Ready") == "True", f"TinyLlama LLMInferenceService not Ready, conditions={conditions}"
565-
566-
LOGGER.info(
567-
f"MaaS: TinyLlama S3 LLMInferenceService "
568-
f"{llm_service.namespace}/{llm_service.name} "
569-
f"is Ready with storage_uri={storage_uri}"
570-
)
639+
with create_llmisvc(
640+
client=admin_client,
641+
name="llm-s3-tinyllama",
642+
namespace=unprivileged_model_namespace.name,
643+
storage_uri=ModelStorage.TINYLLAMA_S3,
644+
container_image=ContainerImages.VLLM_CPU,
645+
container_resources={
646+
"limits": {"cpu": "2", "memory": "12Gi"},
647+
"requests": {"cpu": "1", "memory": "8Gi"},
648+
},
649+
service_account=model_service_account.name,
650+
wait=False,
651+
timeout=Timeout.TIMEOUT_15MIN,
652+
) as llm_service:
653+
654+
# Patch immediately so the controller creates HTTPRoute on MaaS gateway
655+
with patch_llmisvc_with_maas_router(llm_service=llm_service):
656+
657+
inst = llm_service.instance
658+
storage_uri = inst.spec.model.uri
659+
assert storage_uri == ModelStorage.TINYLLAMA_S3, (
660+
f"Unexpected storage_uri on TinyLlama LLMI: {storage_uri}"
661+
)
571662

572-
yield llm_service
663+
llm_service.wait_for_condition(
664+
condition="Ready",
665+
status="True",
666+
timeout=Timeout.TIMEOUT_15MIN,
667+
)
573668

574-
LOGGER.info(
575-
f"MaaS: TinyLlama S3 LLMInferenceService "
576-
f"{llm_service.namespace}/{llm_service.name} "
577-
f"will be deleted at teardown"
578-
)
669+
LOGGER.info(
670+
f"MaaS: TinyLlama LLMI {llm_service.namespace}/{llm_service.name} "
671+
f"Ready and patched (storage_uri={storage_uri})"
672+
)
579673

674+
yield llm_service
580675

581676
@pytest.fixture(scope="class")
582677
def maas_scheme(admin_client: DynamicClient, unprivileged_model_namespace: Namespace) -> str:
@@ -585,6 +680,9 @@ def maas_scheme(admin_client: DynamicClient, unprivileged_model_namespace: Names
585680
namespace=unprivileged_model_namespace.name,
586681
)
587682

683+
# @pytest.fixture(scope="class")
684+
# def maas_scheme() -> str:
685+
# return "https"
588686

589687
@pytest.fixture(scope="class")
590688
def maas_host(admin_client):
@@ -594,15 +692,68 @@ def maas_host(admin_client):
594692
@pytest.fixture(scope="class")
595693
def maas_gateway_rate_limits(
596694
admin_client: DynamicClient,
695+
maas_gateway_and_policies,
597696
) -> Generator[None, None, None]:
598-
namespace = "openshift-ingress"
599-
token_policy_name = "gateway-token-rate-limits"
600-
request_policy_name = "gateway-rate-limits"
601-
602697
with maas_gateway_rate_limits_patched(
603698
admin_client=admin_client,
604-
namespace=namespace,
605-
token_policy_name=token_policy_name,
606-
request_policy_name=request_policy_name,
699+
namespace=MAAS_GATEWAY_NAMESPACE,
700+
token_policy_name=MAAS_TOKEN_RATE_LIMIT_POLICY_NAME,
701+
request_policy_name=MAAS_RATE_LIMIT_POLICY_NAME,
702+
):
703+
yield
704+
705+
@pytest.fixture(scope="session")
706+
def maas_gateway_api_hostname(admin_client: DynamicClient) -> str:
707+
return host_from_ingress_domain(client=admin_client)
708+
709+
710+
@pytest.fixture(scope="session")
711+
def maas_gateway_and_policies(
712+
admin_client: DynamicClient,
713+
maas_gateway_api_hostname: str,
714+
# maas_controller_enabled,
715+
) -> Generator[None, None, None]:
716+
"""
717+
Ensure MaaS Gateway + Kuadrant policies exist once per test session.
718+
"""
719+
with ensure_maas_gateway_and_policies(
720+
admin_client=admin_client,
721+
hostname=maas_gateway_api_hostname,
607722
):
608723
yield
724+
725+
# @pytest.fixture(scope="session")
726+
# def maas_controller_enabled(admin_client):
727+
# """
728+
# Enable MaaS controller via DataScienceCluster component toggle.
729+
# Fails fast if the MaaS DSC component key is not present.
730+
# """
731+
# data_science_cluster = DataScienceCluster(client=admin_client, name=DSC_NAME)
732+
# data_science_cluster.get()
733+
734+
# components_section = (data_science_cluster.instance.get("spec") or {}).get("components") or {}
735+
# if MAAS_DSC_COMPONENT_KEY not in components_section:
736+
# raise RuntimeError(
737+
# f"MaaS tests require DSC component '{MAAS_DSC_COMPONENT_KEY}', but it is missing. "
738+
# f"Available DSC components: {sorted(list(components_section.keys()))}"
739+
# )
740+
741+
# current_component_state = (
742+
# (components_section.get(MAAS_DSC_COMPONENT_KEY) or {}).get("managementState")
743+
# )
744+
# if current_component_state == "Managed":
745+
# wait_for_data_science_cluster_ready(data_science_cluster=data_science_cluster)
746+
# yield
747+
# return
748+
749+
# patch_body = {
750+
# "spec": {
751+
# "components": {
752+
# MAAS_DSC_COMPONENT_KEY: {"managementState": "Managed"},
753+
# }
754+
# }
755+
# }
756+
757+
# with ResourceEditor(patches={data_science_cluster: patch_body}):
758+
# wait_for_data_science_cluster_ready(data_science_cluster=data_science_cluster)
759+
# yield

0 commit comments

Comments
 (0)