|
| 1 | +from typing import Dict, Generator, Any |
| 2 | +import pytest |
| 3 | +import os |
| 4 | +from kubernetes.dynamic import DynamicClient |
| 5 | +from ocp_resources.data_science_cluster import DataScienceCluster |
| 6 | +from ocp_resources.deployment import Deployment |
| 7 | +from _pytest.fixtures import FixtureRequest |
| 8 | +from ocp_resources.namespace import Namespace |
| 9 | +from utilities.infra import create_ns |
| 10 | +from simple_logger.logger import get_logger |
| 11 | +from utilities.rag_utils import create_llama_stack_distribution, LlamaStackDistribution |
| 12 | +from utilities.data_science_cluster_utils import update_components_in_dsc |
| 13 | +from utilities.constants import DscComponents, Timeout |
| 14 | +from utilities.general import generate_random_name |
| 15 | +from timeout_sampler import TimeoutSampler |
| 16 | + |
| 17 | +LOGGER = get_logger(name=__name__) |
| 18 | + |
| 19 | + |
| 20 | +def llama_stack_server() -> Dict[str, Any]: |
| 21 | + rag_vllm_url = os.getenv("RAG_VLLM_URL") |
| 22 | + rag_vllm_model = os.getenv("RAG_VLLM_MODEL") |
| 23 | + rag_vllm_token = os.getenv("RAG_VLLM_TOKEN") |
| 24 | + |
| 25 | + return { |
| 26 | + "containerSpec": { |
| 27 | + "env": [ |
| 28 | + {"name": "INFERENCE_MODEL", "value": rag_vllm_model}, |
| 29 | + {"name": "VLLM_TLS_VERIFY", "value": "false"}, |
| 30 | + {"name": "VLLM_API_TOKEN", "value": rag_vllm_token}, |
| 31 | + {"name": "VLLM_URL", "value": rag_vllm_url}, |
| 32 | + {"name": "MILVUS_DB_PATH", "value": "/.llama/distributions/remote-vllm/milvus.db"}, |
| 33 | + ], |
| 34 | + "name": "llama-stack", |
| 35 | + "port": 8321, |
| 36 | + }, |
| 37 | + "distribution": {"image": "quay.io/mcampbel/llama-stack:milvus-granite-embedding-125m-english"}, |
| 38 | + "podOverrides": { |
| 39 | + "volumeMounts": [{"mountPath": "/root/.llama", "name": "llama-storage"}], |
| 40 | + "volumes": [{"emptyDir": {}, "name": "llama-storage"}], |
| 41 | + }, |
| 42 | + } |
| 43 | + |
| 44 | + |
| 45 | +@pytest.fixture(scope="class") |
| 46 | +def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[None, Any, Any]: |
| 47 | + with update_components_in_dsc( |
| 48 | + dsc=dsc_resource, |
| 49 | + components={ |
| 50 | + DscComponents.LLAMASTACKOPERATOR: DscComponents.ManagementState.MANAGED, |
| 51 | + }, |
| 52 | + wait_for_components_state=True, |
| 53 | + ) as dsc: |
| 54 | + yield dsc |
| 55 | + |
| 56 | + |
| 57 | +@pytest.fixture(scope="function") |
| 58 | +def rag_test_namespace(unprivileged_client: DynamicClient) -> Generator[Namespace, Any, Any]: |
| 59 | + namespace_name = generate_random_name(prefix="rag-test-") |
| 60 | + with create_ns(namespace_name, unprivileged_client=unprivileged_client) as ns: |
| 61 | + yield ns |
| 62 | + |
| 63 | + |
| 64 | +@pytest.fixture(scope="function") |
| 65 | +def llama_stack_distribution_from_template( |
| 66 | + enabled_llama_stack_operator: Generator[None, Any, Any], |
| 67 | + rag_test_namespace: Namespace, |
| 68 | + request: FixtureRequest, |
| 69 | + admin_client: DynamicClient, |
| 70 | +) -> Generator[LlamaStackDistribution, Any, Any]: |
| 71 | + with create_llama_stack_distribution( |
| 72 | + client=admin_client, |
| 73 | + name="rag-llama-stack-distribution", |
| 74 | + namespace=rag_test_namespace.name, |
| 75 | + replicas=1, |
| 76 | + server=llama_stack_server(), |
| 77 | + ) as llama_stack_distribution: |
| 78 | + yield llama_stack_distribution |
| 79 | + |
| 80 | + |
| 81 | +@pytest.fixture(scope="function") |
| 82 | +def llama_stack_distribution_deployment( |
| 83 | + rag_test_namespace: Namespace, |
| 84 | + admin_client: DynamicClient, |
| 85 | + llama_stack_distribution_from_template: Generator[LlamaStackDistribution, Any, Any], |
| 86 | +) -> Generator[Deployment, Any, Any]: |
| 87 | + deployment = Deployment( |
| 88 | + client=admin_client, |
| 89 | + namespace=rag_test_namespace.name, |
| 90 | + name="rag-llama-stack-distribution", |
| 91 | + ) |
| 92 | + |
| 93 | + timeout = Timeout.TIMEOUT_15_SEC |
| 94 | + sampler = TimeoutSampler( |
| 95 | + wait_timeout=timeout, sleep=1, func=lambda deployment: deployment.exists is not None, deployment=deployment |
| 96 | + ) |
| 97 | + for item in sampler: |
| 98 | + if item: |
| 99 | + break # Break after first successful iteration |
| 100 | + |
| 101 | + assert deployment.exists, f"llama stack distribution deployment doesn't exist within {timeout} seconds" |
| 102 | + yield deployment |
0 commit comments