RHOAIENG-24934 - Test infrastructure for RAG (opendatahub-io#335)

jiripetrlik · pre-commit-ci[bot] · web-flow · commit c3d07417ab37 · 2025-06-27T09:45:11.000-04:00
* RHOAIENG-24934 - Test infrastructure for RAG * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/tests/model_registry/rbac/conftest.py b/tests/model_registry/rbac/conftest.py
@@ -18,7 +18,8 @@
 from pyhelper_utils.shell import run_command
 
 from tests.model_registry.rbac.utils import wait_for_oauth_openshift_deployment, create_role_binding
-from tests.model_registry.utils import generate_random_name, generate_namespace_name
+from utilities.general import generate_random_name
+from tests.model_registry.utils import generate_namespace_name
 from utilities.infra import login_with_user_password
 from utilities.user_utils import UserTestSession, create_htpasswd_file, wait_for_user_creation
 from tests.model_registry.rbac.group_utils import create_group
diff --git a/tests/model_registry/rest_api/conftest.py b/tests/model_registry/rest_api/conftest.py
@@ -9,12 +9,12 @@
     execute_model_registry_patch_command,
 )
 from utilities.constants import Protocols
+from utilities.general import generate_random_name
 from ocp_resources.deployment import Deployment
 from tests.model_registry.utils import (
     get_model_registry_deployment_template_dict,
     apply_mysql_args_and_volume_mounts,
     add_mysql_certs_volumes_to_deployment,
-    generate_random_name,
 )
 
 from tests.model_registry.constants import (
diff --git a/tests/model_registry/utils.py b/tests/model_registry/utils.py
@@ -1,4 +1,3 @@
-import uuid
 from typing import Any, List
 
 from kubernetes.dynamic import DynamicClient
@@ -233,34 +232,6 @@ def wait_for_pods_running(
     return None
 
 
-def generate_random_name(prefix: str = "", length: int = 8) -> str:
-    """
-    Generates a name with a required prefix and a random suffix derived from a UUID.
-
-    The length of the random suffix can be controlled, defaulting to 8 characters.
-    The suffix is taken from the beginning of a V4 UUID's hex representation.
-
-    Args:
-        prefix (str): The required prefix for the generated name.
-        length (int, optional): The desired length for the UUID-derived suffix.
-                               Defaults to 8. Must be between 1 and 32.
-
-    Returns:
-        str: A string in the format "prefix-uuid_suffix".
-
-    Raises:
-        ValueError: If prefix is empty, or if length is not between 1 and 32.
-    """
-    if not isinstance(length, int) or not (1 <= length <= 32):
-        raise ValueError("suffix_length must be an integer between 1 and 32.")
-    # Generate a new random UUID (version 4)
-    random_uuid = uuid.uuid4()
-    # Use the first 'length' characters of the hexadecimal representation of the UUID as the suffix.
-    # random_uuid.hex is 32 characters long.
-    suffix = random_uuid.hex[:length]
-    return f"{prefix}-{suffix}" if prefix else suffix
-
-
 def generate_namespace_name(file_path: str) -> str:
     return (file_path.removesuffix(".py").replace("/", "-").replace("_", "-"))[-63:].split("-", 1)[-1]
 
diff --git a/tests/rag/__init__.py b/tests/rag/__init__.py
diff --git a/tests/rag/conftest.py b/tests/rag/conftest.py
@@ -0,0 +1,102 @@
+from typing import Dict, Generator, Any
+import pytest
+import os
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.data_science_cluster import DataScienceCluster
+from ocp_resources.deployment import Deployment
+from _pytest.fixtures import FixtureRequest
+from ocp_resources.namespace import Namespace
+from utilities.infra import create_ns
+from simple_logger.logger import get_logger
+from utilities.rag_utils import create_llama_stack_distribution, LlamaStackDistribution
+from utilities.data_science_cluster_utils import update_components_in_dsc
+from utilities.constants import DscComponents, Timeout
+from utilities.general import generate_random_name
+from timeout_sampler import TimeoutSampler
+
+LOGGER = get_logger(name=__name__)
+
+
+def llama_stack_server() -> Dict[str, Any]:
+    rag_vllm_url = os.getenv("RAG_VLLM_URL")
+    rag_vllm_model = os.getenv("RAG_VLLM_MODEL")
+    rag_vllm_token = os.getenv("RAG_VLLM_TOKEN")
+
+    return {
+        "containerSpec": {
+            "env": [
+                {"name": "INFERENCE_MODEL", "value": rag_vllm_model},
+                {"name": "VLLM_TLS_VERIFY", "value": "false"},
+                {"name": "VLLM_API_TOKEN", "value": rag_vllm_token},
+                {"name": "VLLM_URL", "value": rag_vllm_url},
+                {"name": "MILVUS_DB_PATH", "value": "/.llama/distributions/remote-vllm/milvus.db"},
+            ],
+            "name": "llama-stack",
+            "port": 8321,
+        },
+        "distribution": {"image": "quay.io/mcampbel/llama-stack:milvus-granite-embedding-125m-english"},
+        "podOverrides": {
+            "volumeMounts": [{"mountPath": "/root/.llama", "name": "llama-storage"}],
+            "volumes": [{"emptyDir": {}, "name": "llama-storage"}],
+        },
+    }
+
+
+@pytest.fixture(scope="class")
+def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[None, Any, Any]:
+    with update_components_in_dsc(
+        dsc=dsc_resource,
+        components={
+            DscComponents.LLAMASTACKOPERATOR: DscComponents.ManagementState.MANAGED,
+        },
+        wait_for_components_state=True,
+    ) as dsc:
+        yield dsc
+
+
+@pytest.fixture(scope="function")
+def rag_test_namespace(unprivileged_client: DynamicClient) -> Generator[Namespace, Any, Any]:
+    namespace_name = generate_random_name(prefix="rag-test-")
+    with create_ns(namespace_name, unprivileged_client=unprivileged_client) as ns:
+        yield ns
+
+
+@pytest.fixture(scope="function")
+def llama_stack_distribution_from_template(
+    enabled_llama_stack_operator: Generator[None, Any, Any],
+    rag_test_namespace: Namespace,
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+) -> Generator[LlamaStackDistribution, Any, Any]:
+    with create_llama_stack_distribution(
+        client=admin_client,
+        name="rag-llama-stack-distribution",
+        namespace=rag_test_namespace.name,
+        replicas=1,
+        server=llama_stack_server(),
+    ) as llama_stack_distribution:
+        yield llama_stack_distribution
+
+
+@pytest.fixture(scope="function")
+def llama_stack_distribution_deployment(
+    rag_test_namespace: Namespace,
+    admin_client: DynamicClient,
+    llama_stack_distribution_from_template: Generator[LlamaStackDistribution, Any, Any],
+) -> Generator[Deployment, Any, Any]:
+    deployment = Deployment(
+        client=admin_client,
+        namespace=rag_test_namespace.name,
+        name="rag-llama-stack-distribution",
+    )
+
+    timeout = Timeout.TIMEOUT_15_SEC
+    sampler = TimeoutSampler(
+        wait_timeout=timeout, sleep=1, func=lambda deployment: deployment.exists is not None, deployment=deployment
+    )
+    for item in sampler:
+        if item:
+            break  # Break after first successful iteration
+
+    assert deployment.exists, f"llama stack distribution deployment doesn't exist within {timeout} seconds"
+    yield deployment
diff --git a/tests/rag/test_rag.py b/tests/rag/test_rag.py
@@ -0,0 +1,12 @@
+class TestRag:
+    def test_rag_deployment(self, llama_stack_distribution_deployment):
+        """
+        Test that the Llama stack distribution deployment for
+        RAG was created and it has a working pod.
+
+        This verifies that:
+        1. The Llama stack operator is up.
+        2. It is possible to create a Llama stack distribution.
+        3. A pod for the Llama stack distribution starts correctly.
+        """
+        llama_stack_distribution_deployment.wait_for_replicas()
diff --git a/utilities/constants.py b/utilities/constants.py
@@ -153,6 +153,7 @@ class DscComponents:
     MODELMESHSERVING: str = "modelmeshserving"
     KSERVE: str = "kserve"
     MODELREGISTRY: str = "modelregistry"
+    LLAMASTACKOPERATOR: str = "llamastackoperator"
 
     class ManagementState:
         MANAGED: str = "Managed"
@@ -162,11 +163,13 @@ class ConditionType:
         MODEL_REGISTRY_READY: str = "ModelRegistryReady"
         KSERVE_READY: str = "KserveReady"
         MODEL_MESH_SERVING_READY: str = "ModelMeshServingReady"
+        LLAMA_STACK_OPERATOR_READY: str = "LlamaStackOperatorReady"
 
     COMPONENT_MAPPING: dict[str, str] = {
         MODELMESHSERVING: ConditionType.MODEL_MESH_SERVING_READY,
         KSERVE: ConditionType.KSERVE_READY,
         MODELREGISTRY: ConditionType.MODEL_REGISTRY_READY,
+        LLAMASTACKOPERATOR: ConditionType.LLAMA_STACK_OPERATOR_READY,
     }
 
 
diff --git a/utilities/general.py b/utilities/general.py
@@ -1,6 +1,7 @@
 import base64
 import re
 from typing import List, Tuple
+import uuid
 
 from kubernetes.dynamic import DynamicClient
 from kubernetes.dynamic.exceptions import ResourceNotFoundError
@@ -302,3 +303,31 @@ def create_ig_pod_label_selector_str(ig: InferenceGraph) -> str:
 
     """
     return f"serving.kserve.io/inferencegraph={ig.name}"
+
+
+def generate_random_name(prefix: str = "", length: int = 8) -> str:
+    """
+    Generates a name with a required prefix and a random suffix derived from a UUID.
+
+    The length of the random suffix can be controlled, defaulting to 8 characters.
+    The suffix is taken from the beginning of a V4 UUID's hex representation.
+
+    Args:
+        prefix (str): The required prefix for the generated name.
+        length (int, optional): The desired length for the UUID-derived suffix.
+                               Defaults to 8. Must be between 1 and 32.
+
+    Returns:
+        str: A string in the format "prefix-uuid_suffix".
+
+    Raises:
+        ValueError: If prefix is empty, or if length is not between 1 and 32.
+    """
+    if not isinstance(length, int) or not (1 <= length <= 32):
+        raise ValueError("suffix_length must be an integer between 1 and 32.")
+    # Generate a new random UUID (version 4)
+    random_uuid = uuid.uuid4()
+    # Use the first 'length' characters of the hexadecimal representation of the UUID as the suffix.
+    # random_uuid.hex is 32 characters long.
+    suffix = random_uuid.hex[:length]
+    return f"{prefix}-{suffix}" if prefix else suffix
diff --git a/utilities/rag_utils.py b/utilities/rag_utils.py
@@ -0,0 +1,50 @@
+from contextlib import contextmanager
+from ocp_resources.resource import NamespacedResource
+from kubernetes.dynamic import DynamicClient
+from typing import Any, Dict, Generator
+
+
+class LlamaStackDistribution(NamespacedResource):
+    api_group: str = "llamastack.io"
+
+    def __init__(self, replicas: int, server: Dict[str, Any], **kwargs: Any):
+        """
+        Args:
+            kwargs: Keyword arguments to pass to the LlamaStackDistribution constructor
+        """
+        super().__init__(
+            **kwargs,
+        )
+        self.replicas = replicas
+        self.server = server
+
+    def to_dict(self) -> None:
+        super().to_dict()
+        if not self.kind_dict and not self.yaml_file:
+            self.res["spec"] = {}
+            _spec = self.res["spec"]
+            _spec["replicas"] = self.replicas
+            _spec["server"] = self.server
+
+
+@contextmanager
+def create_llama_stack_distribution(
+    client: DynamicClient,
+    name: str,
+    namespace: str,
+    replicas: int,
+    server: Dict[str, Any],
+    teardown: bool = True,
+) -> Generator[LlamaStackDistribution, Any, Any]:
+    """
+    Context manager to create and optionally delete a LLama Stack Distribution
+    """
+    with LlamaStackDistribution(
+        client=client,
+        name=name,
+        namespace=namespace,
+        replicas=replicas,
+        server=server,
+        teardown=teardown,
+    ) as llama_stack_distribution:
+        yield llama_stack_distribution

Original file line number	Diff line number	Diff line change
`@@ -9,12 +9,12 @@`
`9`	`9`	`execute_model_registry_patch_command,`
`10`	`10`	`)`
`11`	`11`	`from utilities.constants import Protocols`
	`12`	`+from utilities.general import generate_random_name`
`12`	`13`	`from ocp_resources.deployment import Deployment`
`13`	`14`	`from tests.model_registry.utils import (`
`14`	`15`	`get_model_registry_deployment_template_dict,`
`15`	`16`	`apply_mysql_args_and_volume_mounts,`
`16`	`17`	`add_mysql_certs_volumes_to_deployment,`
`17`		`- generate_random_name,`
`18`	`18`	`)`
`19`	`19`
`20`	`20`	`from tests.model_registry.constants import (`