opendatahub-io
diff --git a/‎tests/llama_stack/agents/test_agents.py‎ ‎…a_stack/agents/test_agents_deprecated.py‎tests/llama_stack/agents/test_agents.py renamed to tests/llama_stack/agents/test_agents_deprecated.py
Lines changed: 33 additions & 12 deletions b/‎tests/llama_stack/agents/test_agents.py‎ ‎…a_stack/agents/test_agents_deprecated.py‎tests/llama_stack/agents/test_agents.py renamed to tests/llama_stack/agents/test_agents_deprecated.py
Lines changed: 33 additions & 12 deletions
diff --git a/‎tests/llama_stack/conftest.py‎
Lines changed: 63 additions & 6 deletions b/‎tests/llama_stack/conftest.py‎
Lines changed: 63 additions & 6 deletions
diff --git a/‎tests/llama_stack/constants.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/llama_stack/constants.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/llama_stack/eval/conftest.py‎
Lines changed: 121 additions & 0 deletions b/‎tests/llama_stack/eval/conftest.py‎
Lines changed: 121 additions & 0 deletions
@@ -21,13 +21,19 @@
 )
 @pytest.mark.rag
 @pytest.mark.skip_must_gather
-class TestLlamaStackAgents:
-    """Test class for LlamaStack Agents API
+class TestLlamaStackAgentsDeprecated:
+    """Test class for LlamaStack Agents API (Deprecated)
 
-    For more information about this API, see:
-    - https://llamastack.github.io/docs/building_applications/agent
-    - https://llamastack.github.io/docs/references/python_sdk_reference#agents
-    - https://llamastack.github.io/docs/building_applications/responses_vs_agents
+    Deprecation Notice: The LlamaStack Agents API was removed server-side in llama-stack 0.3.0.
+    It is partially implemented in llama-stack-client using the Responses API
+    (https://github.com/llamastack/llama-stack-client-python/pull/281).
+
+    Users are encouraged to use the Responses API directly.
+
+    For more information, see:
+    - https://llamastack.github.io/docs/api-deprecated/agents
+    - "Migrating from Agent objects to Responses in Llama Stack":
+      https://github.com/opendatahub-io/agents/blob/5902bef12c25281eecfcd3d25654de8b02857e33/migration/legacy-agents/responses-api-agent-migration.ipynb
     """
 
     @pytest.mark.smoke
@@ -106,11 +112,18 @@ def test_agents_simple_agent(
             )
 
     @pytest.mark.smoke
+    @pytest.mark.parametrize(
+        "enable_streaming",
+        [
+            pytest.param(False, id="streaming_disabled"),
+        ],
+    )
     def test_agents_rag_agent(
         self,
         unprivileged_llama_stack_client: LlamaStackClient,
         llama_stack_models: ModelInfo,
         vector_store_with_example_docs: VectorStore,
+        enable_streaming: bool,
     ) -> None:
         """
         Test RAG agent that can answer questions about the Torchtune project using the documents
@@ -123,7 +136,8 @@ def test_agents_rag_agent(
         Based on "Build a RAG Agent" example available at
         https://llamastack.github.io/docs/getting_started/detailed_tutorial
 
-        # TODO: update this example to use the vector_store API
+        Note: streaming is not tested (enable_streaming = False), as it seems to be broken in
+        llama-stack 0.3.0 (Agents API is only partially implemented)
         """
 
         # Create the RAG agent connected to the vector database
@@ -147,19 +161,26 @@ def test_agents_rag_agent(
             rag_agent=rag_agent,
             session_id=session_id,
             turns_with_expectations=turns_with_expectations,
-            stream=True,
+            stream=enable_streaming,
             verbose=True,
             min_keywords_required=1,
             print_events=False,
         )
 
         # Assert that validation was successful
-        assert validation_result["success"], f"RAG agent validation failed. Summary: {validation_result['summary']}"
+        assert validation_result["success"], (
+            f"RAG agent validation failed with streaming={enable_streaming}. Summary: {validation_result['summary']}"
+        )
 
         # Additional assertions for specific requirements
         for result in validation_result["results"]:
-            assert result["event_count"] > 0, f"No events generated for question: {result['question']}"
-            assert result["response_length"] > 0, f"No response content for question: {result['question']}"
+            assert result["response_length"] > 0, (
+                f"No response content for question: {result['question']} (streaming={enable_streaming})"
+            )
             assert len(result["found_keywords"]) > 0, (
-                f"No expected keywords found in response for: {result['question']}"
+                f"No expected keywords found in response for: {result['question']} (streaming={enable_streaming})"
             )
+            if enable_streaming:
+                assert result["event_count"] > 0, (
+                    f"No events generated for question: {result['question']} (streaming={enable_streaming})"
+                )
@@ -19,6 +19,7 @@
     create_llama_stack_distribution,
     wait_for_llama_stack_client_ready,
     vector_store_create_file_from_url,
+    wait_for_unique_llama_stack_pod,
 )
 from utilities.constants import DscComponents, Annotations
 from utilities.data_science_cluster_utils import update_components_in_dsc
@@ -30,6 +31,8 @@
 
 LOGGER = get_logger(name=__name__)
 
+distribution_name = generate_random_name(prefix="llama-stack-distribution")
+
 
 @pytest.fixture(scope="class")
 def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[DataScienceCluster, Any, Any]:
@@ -87,6 +90,14 @@ def llama_stack_server_config(
         - fms_orchestrator_url_fixture: Fixture name to get FMS orchestrator URL from
         - vector_io_provider: Vector I/O provider type ("milvus" or "milvus-remote")
         - llama_stack_storage_size: Storage size for the deployment
+        - embedding_model: Embedding model identifier for inference
+        - kubeflow_llama_stack_url: LlamaStack service URL for Kubeflow
+        - kubeflow_pipelines_endpoint: Kubeflow Pipelines API endpoint URL
+        - kubeflow_namespace: Namespace for Kubeflow resources
+        - kubeflow_base_image: Base container image for Kubeflow pipelines
+        - kubeflow_results_s3_prefix: S3 prefix for storing Kubeflow results
+        - kubeflow_s3_credentials_secret_name: Secret name for S3 credentials
+        - kubeflow_pipelines_token: Authentication token for Kubeflow Pipelines
 
     Example:
         @pytest.mark.parametrize("llama_stack_server_config",
@@ -136,6 +147,48 @@ def test_with_remote_milvus(llama_stack_server_config):
     if embedding_model:
         env_vars.append({"name": "EMBEDDING_MODEL", "value": embedding_model})
 
+    # Kubeflow-related environment variables
+    if params.get("enable_ragas_remote"):
+        # Get fixtures only when Ragas Remote/Kubeflow is enabled
+        model_namespace = request.getfixturevalue(argname="model_namespace")
+        current_client_token = request.getfixturevalue(argname="current_client_token")
+        dspa_route = request.getfixturevalue(argname="dspa_route")
+        dspa_s3_secret = request.getfixturevalue(argname="dspa_s3_secret")
+
+        # KUBEFLOW_LLAMA_STACK_URL: Build from LlamaStackDistribution service
+        env_vars.append({
+            "name": "KUBEFLOW_LLAMA_STACK_URL",
+            "value": f"http://{distribution_name}-service.{model_namespace.name}.svc.cluster.local:8321",
+        })
+
+        # KUBEFLOW_PIPELINES_ENDPOINT: Get from DSPA route
+        env_vars.append({"name": "KUBEFLOW_PIPELINES_ENDPOINT", "value": f"https://{dspa_route.instance.spec.host}"})
+
+        # KUBEFLOW_NAMESPACE: Use model namespace
+        env_vars.append({"name": "KUBEFLOW_NAMESPACE", "value": model_namespace.name})
+
+        # KUBEFLOW_BASE_IMAGE
+        env_vars.append({
+            "name": "KUBEFLOW_BASE_IMAGE",
+            "value": params.get(
+                "kubeflow_base_image",
+                "quay.io/diegosquayorg/my-ragas-provider-image"
+                "@sha256:3749096c47f7536d6be2a7932e691abebacd578bafbe65bad2f7db475e2b93fb",
+            ),
+        })
+
+        # KUBEFLOW_RESULTS_S3_PREFIX: Build from MinIO bucket
+        env_vars.append({
+            "name": "KUBEFLOW_RESULTS_S3_PREFIX",
+            "value": params.get("kubeflow_results_s3_prefix", "s3://llms/ragas-results"),
+        })
+
+        # KUBEFLOW_S3_CREDENTIALS_SECRET_NAME: Use DSPA secret name
+        env_vars.append({"name": "KUBEFLOW_S3_CREDENTIALS_SECRET_NAME", "value": dspa_s3_secret.name})
+
+        # KUBEFLOW_PIPELINES_TOKEN: Get from current client token
+        env_vars.append({"name": "KUBEFLOW_PIPELINES_TOKEN", "value": str(current_client_token)})
+
     # Depending on parameter vector_io_provider, deploy vector_io provider and obtain required env_vars
     vector_io_provider = params.get("vector_io_provider") or "milvus"
     env_vars_vector_io = vector_io_provider_deployment_config_factory(provider_name=vector_io_provider)
@@ -144,8 +197,8 @@ def test_with_remote_milvus(llama_stack_server_config):
     server_config: Dict[str, Any] = {
         "containerSpec": {
             "resources": {
-                "requests": {"cpu": "250m", "memory": "500Mi"},
-                "limits": {"cpu": "2", "memory": "12Gi"},
+                "requests": {"cpu": "1", "memory": "3Gi"},
+                "limits": {"cpu": "3", "memory": "6Gi"},
             },
             "env": env_vars,
             "name": "llama-stack",
@@ -189,7 +242,6 @@ def llama_stack_distribution(
     llama_stack_server_config: Dict[str, Any],
 ) -> Generator[LlamaStackDistribution, None, None]:
     # Distribution name needs a random substring due to bug RHAIENG-999 / RHAIENG-1139
-    distribution_name = generate_random_name(prefix="llama-stack-distribution")
     with create_llama_stack_distribution(
         client=admin_client,
         name=distribution_name,
@@ -208,6 +260,7 @@ def _get_llama_stack_distribution_deployment(
     """
     Returns the Deployment resource for a given LlamaStackDistribution.
     Note: The deployment is created by the operator; this function retrieves it.
+    Includes a workaround for RHAIENG-1819 to ensure exactly one pod exists.
 
     Args:
         client (DynamicClient): Kubernetes client
@@ -222,9 +275,12 @@ def _get_llama_stack_distribution_deployment(
         name=llama_stack_distribution.name,
         min_ready_seconds=10,
     )
-
+    deployment.timeout_seconds = 120
     deployment.wait(timeout=120)
     deployment.wait_for_replicas()
+    # Workaround for RHAIENG-1819 (Incorrect number of llama-stack pods deployed after
+    # creating LlamaStackDistribution after setting custom ca bundle in DSCI)
+    wait_for_unique_llama_stack_pod(client=client, namespace=llama_stack_distribution.namespace)
     yield deployment
 
 
@@ -321,6 +377,7 @@ def _create_llama_stack_test_route(
                 }
             }
         ):
+            route.wait(timeout=60)
             yield route
 
 
@@ -355,11 +412,11 @@ def _create_llama_stack_client(
 ) -> Generator[LlamaStackClient, Any, Any]:
     # LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
     verifySSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true"
-    http_client = httpx.Client(verify=verifySSL)
+    http_client = httpx.Client(verify=verifySSL, timeout=240)
     try:
         client = LlamaStackClient(
             base_url=f"https://{route.host}",
-            timeout=180.0,
+            max_retries=3,
             http_client=http_client,
         )
         wait_for_llama_stack_client_ready(client=client)
 
@@ -18,6 +18,7 @@ class Safety(str, Enum):
     class Eval(str, Enum):
         TRUSTYAI_LMEVAL = "trustyai_lmeval"
         TRUSTYAI_RAGAS_INLINE = "trustyai_ragas_inline"
+        TRUSTYAI_RAGAS_REMOTE = "trustyai_ragas_remote"
 
 
 class ModelInfo(NamedTuple):
 
@@ -2,11 +2,18 @@
 
 import pytest
 from kubernetes.dynamic import DynamicClient
+from ocp_resources.data_science_pipelines_application import DataSciencePipelinesApplication
 from ocp_resources.namespace import Namespace
 from ocp_resources.persistent_volume_claim import PersistentVolumeClaim
 from ocp_resources.pod import Pod
+from ocp_resources.route import Route
+from ocp_resources.secret import Secret
+from ocp_resources.service import Service
+from timeout_sampler import TimeoutSampler
 
 from tests.llama_stack.eval.constants import DK_CUSTOM_DATASET_IMAGE
+from tests.llama_stack.eval.utils import wait_for_dspa_pods
+from utilities.constants import MinIo
 
 
 @pytest.fixture(scope="class")
@@ -87,3 +94,117 @@ def teardown_lmeval_job_pod(admin_client, model_namespace) -> None:
     ]:
         for pod in pods:
             pod.delete()
+
+
+@pytest.fixture(scope="class")
+def dspa(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    minio_pod: Pod,
+    minio_service: Service,
+    dspa_s3_secret: Secret,
+) -> Generator[DataSciencePipelinesApplication, Any, Any]:
+    """
+    Creates a DataSciencePipelinesApplication with MinIO object storage.
+    """
+
+    with DataSciencePipelinesApplication(
+        client=admin_client,
+        name="dspa",
+        namespace=model_namespace.name,
+        dsp_version="v2",
+        pod_to_pod_tls=True,
+        api_server={
+            "deploy": True,
+            "enableOauth": True,
+            "enableSamplePipeline": False,
+            "cacheEnabled": True,
+            "artifactSignedURLExpirySeconds": 60,
+            "pipelineStore": "kubernetes",
+        },
+        database={
+            "disableHealthCheck": False,
+            "mariaDB": {
+                "deploy": True,
+                "pipelineDBName": "mlpipeline",
+                "pvcSize": "10Gi",
+                "username": "mlpipeline",
+            },
+        },
+        object_storage={
+            "disableHealthCheck": False,
+            "enableExternalRoute": False,
+            "externalStorage": {
+                "bucket": "ods-ci-ds-pipelines",
+                "host": f"{minio_service.instance.spec.clusterIP}:{MinIo.Metadata.DEFAULT_PORT}",
+                "region": "us-east-1",
+                "scheme": "http",
+                "s3CredentialsSecret": {
+                    "accessKey": "AWS_ACCESS_KEY_ID",  # pragma: allowlist secret
+                    "secretKey": "AWS_SECRET_ACCESS_KEY",  # pragma: allowlist secret
+                    "secretName": dspa_s3_secret.name,
+                },
+            },
+        },
+        persistence_agent={
+            "deploy": True,
+            "numWorkers": 2,
+        },
+        scheduled_workflow={
+            "deploy": True,
+            "cronScheduleTimezone": "UTC",
+        },
+    ) as dspa_resource:
+        wait_for_dspa_pods(
+            admin_client=admin_client,
+            namespace=model_namespace.name,
+            dspa_name=dspa_resource.name,
+        )
+        yield dspa_resource
+
+
+@pytest.fixture(scope="class")
+def dspa_s3_secret(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    minio_service: Service,
+) -> Generator[Secret, Any, Any]:
+    """
+    Creates a secret for DSPA S3 credentials using MinIO.
+    """
+    with Secret(
+        client=admin_client,
+        name="dashboard-dspa-secret",
+        namespace=model_namespace.name,
+        string_data={
+            "AWS_ACCESS_KEY_ID": MinIo.Credentials.ACCESS_KEY_VALUE,
+            "AWS_SECRET_ACCESS_KEY": MinIo.Credentials.SECRET_KEY_VALUE,
+            "AWS_DEFAULT_REGION": "us-east-1",
+        },
+    ) as secret:
+        yield secret
+
+
+@pytest.fixture(scope="class")
+def dspa_route(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    dspa: DataSciencePipelinesApplication,
+) -> Generator[Route, Any, Any]:
+    """
+    Retrieves the Route for the DSPA API server.
+    """
+
+    def _get_dspa_route() -> Route | None:
+        routes = list(
+            Route.get(
+                dyn_client=admin_client,
+                namespace=model_namespace.name,
+                name="ds-pipeline-dspa",
+            )
+        )
+        return routes[0] if routes else None
+
+    for route in TimeoutSampler(wait_timeout=120, sleep=5, func=_get_dspa_route):
+        if route:
+            yield route