opendatahub-io
diff --git a/‎tests/llama_stack/conftest.py‎
Lines changed: 52 additions & 1 deletion b/‎tests/llama_stack/conftest.py‎
Lines changed: 52 additions & 1 deletion
diff --git a/‎tests/llama_stack/constants.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/llama_stack/constants.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/llama_stack/eval/conftest.py‎
Lines changed: 121 additions & 0 deletions b/‎tests/llama_stack/eval/conftest.py‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎tests/llama_stack/eval/test_ragas_provider.py‎
Lines changed: 99 additions & 20 deletions b/‎tests/llama_stack/eval/test_ragas_provider.py‎
Lines changed: 99 additions & 20 deletions
@@ -30,6 +30,8 @@
 
 LOGGER = get_logger(name=__name__)
 
+distribution_name = generate_random_name(prefix="llama-stack-distribution")
+
 
 @pytest.fixture(scope="class")
 def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[DataScienceCluster, Any, Any]:
@@ -87,6 +89,14 @@ def llama_stack_server_config(
         - fms_orchestrator_url_fixture: Fixture name to get FMS orchestrator URL from
         - vector_io_provider: Vector I/O provider type ("milvus" or "milvus-remote")
         - llama_stack_storage_size: Storage size for the deployment
+        - embedding_model: Embedding model identifier for inference
+        - kubeflow_llama_stack_url: LlamaStack service URL for Kubeflow
+        - kubeflow_pipelines_endpoint: Kubeflow Pipelines API endpoint URL
+        - kubeflow_namespace: Namespace for Kubeflow resources
+        - kubeflow_base_image: Base container image for Kubeflow pipelines
+        - kubeflow_results_s3_prefix: S3 prefix for storing Kubeflow results
+        - kubeflow_s3_credentials_secret_name: Secret name for S3 credentials
+        - kubeflow_pipelines_token: Authentication token for Kubeflow Pipelines
 
     Example:
         @pytest.mark.parametrize("llama_stack_server_config",
@@ -136,6 +146,48 @@ def test_with_remote_milvus(llama_stack_server_config):
     if embedding_model:
         env_vars.append({"name": "EMBEDDING_MODEL", "value": embedding_model})
 
+    # Kubeflow-related environment variables
+    if params.get("enable_ragas_remote"):
+        # Get fixtures only when Ragas Remote/Kubeflow is enabled
+        model_namespace = request.getfixturevalue(argname="model_namespace")
+        current_client_token = request.getfixturevalue(argname="current_client_token")
+        dspa_route = request.getfixturevalue(argname="dspa_route")
+        dspa_s3_secret = request.getfixturevalue(argname="dspa_s3_secret")
+
+        # KUBEFLOW_LLAMA_STACK_URL: Build from LlamaStackDistribution service
+        env_vars.append({
+            "name": "KUBEFLOW_LLAMA_STACK_URL",
+            "value": f"http://{distribution_name}-service.{model_namespace.name}.svc.cluster.local:8321",
+        })
+
+        # KUBEFLOW_PIPELINES_ENDPOINT: Get from DSPA route
+        env_vars.append({"name": "KUBEFLOW_PIPELINES_ENDPOINT", "value": f"https://{dspa_route.instance.spec.host}"})
+
+        # KUBEFLOW_NAMESPACE: Use model namespace
+        env_vars.append({"name": "KUBEFLOW_NAMESPACE", "value": model_namespace.name})
+
+        # KUBEFLOW_BASE_IMAGE
+        env_vars.append({
+            "name": "KUBEFLOW_BASE_IMAGE",
+            "value": params.get(
+                "kubeflow_base_image",
+                "quay.io/diegosquayorg/my-ragas-provider-image"
+                "@sha256:3749096c47f7536d6be2a7932e691abebacd578bafbe65bad2f7db475e2b93fb",
+            ),
+        })
+
+        # KUBEFLOW_RESULTS_S3_PREFIX: Build from MinIO bucket
+        env_vars.append({
+            "name": "KUBEFLOW_RESULTS_S3_PREFIX",
+            "value": params.get("kubeflow_results_s3_prefix", "s3://llms/ragas-results"),
+        })
+
+        # KUBEFLOW_S3_CREDENTIALS_SECRET_NAME: Use DSPA secret name
+        env_vars.append({"name": "KUBEFLOW_S3_CREDENTIALS_SECRET_NAME", "value": dspa_s3_secret.name})
+
+        # KUBEFLOW_PIPELINES_TOKEN: Get from current client token
+        env_vars.append({"name": "KUBEFLOW_PIPELINES_TOKEN", "value": str(current_client_token)})
+
     # Depending on parameter vector_io_provider, deploy vector_io provider and obtain required env_vars
     vector_io_provider = params.get("vector_io_provider") or "milvus"
     env_vars_vector_io = vector_io_provider_deployment_config_factory(provider_name=vector_io_provider)
@@ -189,7 +241,6 @@ def llama_stack_distribution(
     llama_stack_server_config: Dict[str, Any],
 ) -> Generator[LlamaStackDistribution, None, None]:
     # Distribution name needs a random substring due to bug RHAIENG-999 / RHAIENG-1139
-    distribution_name = generate_random_name(prefix="llama-stack-distribution")
     with create_llama_stack_distribution(
         client=admin_client,
         name=distribution_name,
 
@@ -18,6 +18,7 @@ class Safety(str, Enum):
     class Eval(str, Enum):
         TRUSTYAI_LMEVAL = "trustyai_lmeval"
         TRUSTYAI_RAGAS_INLINE = "trustyai_ragas_inline"
+        TRUSTYAI_RAGAS_REMOTE = "trustyai_ragas_remote"
 
 
 class ModelInfo(NamedTuple):
 
@@ -2,11 +2,18 @@
 
 import pytest
 from kubernetes.dynamic import DynamicClient
+from ocp_resources.data_science_pipelines_application import DataSciencePipelinesApplication
 from ocp_resources.namespace import Namespace
 from ocp_resources.persistent_volume_claim import PersistentVolumeClaim
 from ocp_resources.pod import Pod
+from ocp_resources.route import Route
+from ocp_resources.secret import Secret
+from ocp_resources.service import Service
+from timeout_sampler import TimeoutSampler
 
 from tests.llama_stack.eval.constants import DK_CUSTOM_DATASET_IMAGE
+from tests.llama_stack.eval.utils import wait_for_dspa_pods
+from utilities.constants import MinIo
 
 
 @pytest.fixture(scope="class")
@@ -87,3 +94,117 @@ def teardown_lmeval_job_pod(admin_client, model_namespace) -> None:
     ]:
         for pod in pods:
             pod.delete()
+
+
+@pytest.fixture(scope="class")
+def dspa(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    minio_pod: Pod,
+    minio_service: Service,
+    dspa_s3_secret: Secret,
+) -> Generator[DataSciencePipelinesApplication, Any, Any]:
+    """
+    Creates a DataSciencePipelinesApplication with MinIO object storage.
+    """
+
+    with DataSciencePipelinesApplication(
+        client=admin_client,
+        name="dspa",
+        namespace=model_namespace.name,
+        dsp_version="v2",
+        pod_to_pod_tls=True,
+        api_server={
+            "deploy": True,
+            "enableOauth": True,
+            "enableSamplePipeline": False,
+            "cacheEnabled": True,
+            "artifactSignedURLExpirySeconds": 60,
+            "pipelineStore": "kubernetes",
+        },
+        database={
+            "disableHealthCheck": False,
+            "mariaDB": {
+                "deploy": True,
+                "pipelineDBName": "mlpipeline",
+                "pvcSize": "10Gi",
+                "username": "mlpipeline",
+            },
+        },
+        object_storage={
+            "disableHealthCheck": False,
+            "enableExternalRoute": False,
+            "externalStorage": {
+                "bucket": "ods-ci-ds-pipelines",
+                "host": f"{minio_service.instance.spec.clusterIP}:{MinIo.Metadata.DEFAULT_PORT}",
+                "region": "us-east-1",
+                "scheme": "http",
+                "s3CredentialsSecret": {
+                    "accessKey": "AWS_ACCESS_KEY_ID",  # pragma: allowlist secret
+                    "secretKey": "AWS_SECRET_ACCESS_KEY",  # pragma: allowlist secret
+                    "secretName": dspa_s3_secret.name,
+                },
+            },
+        },
+        persistence_agent={
+            "deploy": True,
+            "numWorkers": 2,
+        },
+        scheduled_workflow={
+            "deploy": True,
+            "cronScheduleTimezone": "UTC",
+        },
+    ) as dspa_resource:
+        wait_for_dspa_pods(
+            admin_client=admin_client,
+            namespace=model_namespace.name,
+            dspa_name=dspa_resource.name,
+        )
+        yield dspa_resource
+
+
+@pytest.fixture(scope="class")
+def dspa_s3_secret(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    minio_service: Service,
+) -> Generator[Secret, Any, Any]:
+    """
+    Creates a secret for DSPA S3 credentials using MinIO.
+    """
+    with Secret(
+        client=admin_client,
+        name="dashboard-dspa-secret",
+        namespace=model_namespace.name,
+        string_data={
+            "AWS_ACCESS_KEY_ID": MinIo.Credentials.ACCESS_KEY_VALUE,
+            "AWS_SECRET_ACCESS_KEY": MinIo.Credentials.SECRET_KEY_VALUE,
+            "AWS_DEFAULT_REGION": "us-east-1",
+        },
+    ) as secret:
+        yield secret
+
+
+@pytest.fixture(scope="class")
+def dspa_route(
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    dspa: DataSciencePipelinesApplication,
+) -> Generator[Route, Any, Any]:
+    """
+    Retrieves the Route for the DSPA API server.
+    """
+
+    def _get_dspa_route() -> Route | None:
+        routes = list(
+            Route.get(
+                dyn_client=admin_client,
+                namespace=model_namespace.name,
+                name="ds-pipeline-dspa",
+            )
+        )
+        return routes[0] if routes else None
+
+    for route in TimeoutSampler(wait_timeout=120, sleep=5, func=_get_dspa_route):
+        if route:
+            yield route
@@ -8,13 +8,23 @@
 
 RAGAS_DATASET_ID: str = "ragas_dataset"
 RAGAS_INLINE_BENCHMARK_ID = "ragas_benchmark_inline"
+RAGAS_REMOTE_BENCHMARK_ID = "ragas_benchmark_remote"
+
+RAGAS_TEST_DATASET = [
+    {
+        "user_input": "What is the capital of France?",
+        "response": "The capital of France is Paris.",
+        "retrieved_contexts": ["Paris is the capital and most populous city of France."],
+        "reference": "Paris",
+    },
+]
 
 
 @pytest.mark.parametrize(
     "model_namespace, minio_pod, minio_data_connection, llama_stack_server_config",
     [
         pytest.param(
-            {"name": "test-llamastack-ragas"},
+            {"name": "test-llamastack-ragas-inline"},
             MinIo.PodConfig.QWEN_HAP_BPIV2_MINIO_CONFIG,
             {"bucket": "llms"},
             {
@@ -28,37 +38,28 @@
 )
 @pytest.mark.rawdeployment
 @pytest.mark.model_explainability
-class TestLlamaStackRagasProvider:
-    """Tests for LlamaStack Ragas evaluation provider integration."""
+class TestLlamaStackRagasInlineProvider:
+    """Tests for LlamaStack Ragas inline evaluation provider integration."""
 
-    def test_ragas_register_dataset(self, minio_pod, minio_data_connection, llama_stack_client):
+    def test_ragas_inline_register_dataset(self, minio_pod, minio_data_connection, llama_stack_client):
         """Register a RAG evaluation dataset with sample question-answer data."""
-        ragas_dataset = [
-            {
-                "user_input": "What is the capital of France?",
-                "response": "The capital of France is Paris.",
-                "retrieved_contexts": ["Paris is the capital and most populous city of France."],
-                "reference": "Paris",
-            },
-        ]
-
         response = llama_stack_client.datasets.register(
             dataset_id=RAGAS_DATASET_ID,
             purpose="eval/question-answer",
-            source={"type": "rows", "rows": ragas_dataset},
+            source={"type": "rows", "rows": RAGAS_TEST_DATASET},
             metadata={
                 "provider_id": "localfs",
                 "description": "Sample RAG evaluation dataset for Ragas demo",
-                "size": len(ragas_dataset),
+                "size": len(RAGAS_TEST_DATASET),
                 "format": "ragas",
                 "created_at": datetime.now().isoformat(),
             },
         )
 
         assert response.identifier == RAGAS_DATASET_ID
-        assert response.source.rows == ragas_dataset
+        assert response.source.rows == RAGAS_TEST_DATASET
 
-    def test_ragas_register_benchmark(self, minio_pod, minio_data_connection, llama_stack_client):
+    def test_ragas_inline_register_benchmark(self, minio_pod, minio_data_connection, llama_stack_client):
         """Register a Ragas benchmark with answer relevancy scoring function."""
         llama_stack_client.benchmarks.register(
             benchmark_id=RAGAS_INLINE_BENCHMARK_ID,
@@ -72,8 +73,8 @@ def test_ragas_register_benchmark(self, minio_pod, minio_data_connection, llama_
         assert response[0].identifier == RAGAS_INLINE_BENCHMARK_ID
         assert response[0].provider_id == LlamaStackProviders.Eval.TRUSTYAI_RAGAS_INLINE
 
-    def test_ragas_run_eval(self, minio_pod, minio_data_connection, llama_stack_client):
-        """Run an evaluation job using the Ragas benchmark and wait for completion."""
+    def test_ragas_inline_run_eval(self, minio_pod, minio_data_connection, llama_stack_client):
+        """Run an evaluation job using the Ragas inline benchmark and wait for completion."""
         job = llama_stack_client.alpha.eval.run_eval(
             benchmark_id=RAGAS_INLINE_BENCHMARK_ID,
             benchmark_config={
@@ -88,5 +89,83 @@ def test_ragas_run_eval(self, minio_pod, minio_data_connection, llama_stack_clie
         )
 
         wait_for_eval_job_completion(
-            llama_stack_client=llama_stack_client, job_id=job.job_id, benchmark_id=RAGAS_INLINE_BENCHMARK_ID
+            llama_stack_client=llama_stack_client,
+            job_id=job.job_id,
+            benchmark_id=RAGAS_INLINE_BENCHMARK_ID,
+        )
+
+
+@pytest.mark.parametrize(
+    "model_namespace, minio_pod, minio_data_connection, llama_stack_server_config",
+    [
+        pytest.param(
+            {"name": "test-llamastack-ragas-remote"},
+            MinIo.PodConfig.QWEN_HAP_BPIV2_MINIO_CONFIG,
+            {"bucket": "llms"},
+            {
+                "vllm_url_fixture": "qwen_isvc_url",
+                "inference_model": QWEN_MODEL_NAME,
+                "embedding_model": "granite-embedding-125m",
+                "enable_ragas_remote": True,
+            },
+        )
+    ],
+    indirect=True,
+)
+@pytest.mark.rawdeployment
+@pytest.mark.model_explainability
+class TestLlamaStackRagasRemoteProvider:
+    """Tests for LlamaStack Ragas remote evaluation provider integration with Kubeflow Pipelines."""
+
+    def test_ragas_remote_register_dataset(self, minio_pod, minio_data_connection, llama_stack_client):
+        """Register a RAG evaluation dataset with sample question-answer data."""
+        response = llama_stack_client.datasets.register(
+            dataset_id=RAGAS_DATASET_ID,
+            purpose="eval/question-answer",
+            source={"type": "rows", "rows": RAGAS_TEST_DATASET},
+            metadata={
+                "provider_id": "localfs",
+                "description": "Sample RAG evaluation dataset for Ragas demo",
+                "size": len(RAGAS_TEST_DATASET),
+                "format": "ragas",
+                "created_at": datetime.now().isoformat(),
+            },
+        )
+
+        assert response.identifier == RAGAS_DATASET_ID
+        assert response.source.rows == RAGAS_TEST_DATASET
+
+    def test_ragas_remote_register_benchmark(self, minio_pod, minio_data_connection, llama_stack_client):
+        """Register a Ragas benchmark with answer relevancy scoring function using remote provider."""
+        llama_stack_client.benchmarks.register(
+            benchmark_id=RAGAS_REMOTE_BENCHMARK_ID,
+            dataset_id=RAGAS_DATASET_ID,
+            scoring_functions=["answer_relevancy"],
+            provider_id=LlamaStackProviders.Eval.TRUSTYAI_RAGAS_REMOTE,
+        )
+
+        response = llama_stack_client.benchmarks.list()
+        assert response[0].dataset_id == RAGAS_DATASET_ID
+        assert response[0].identifier == RAGAS_REMOTE_BENCHMARK_ID
+        assert response[0].provider_id == LlamaStackProviders.Eval.TRUSTYAI_RAGAS_REMOTE
+
+    def test_ragas_remote_run_eval(self, minio_pod, minio_data_connection, llama_stack_client):
+        """Run an evaluation job using the Ragas remote benchmark and wait for completion."""
+        job = llama_stack_client.alpha.eval.run_eval(
+            benchmark_id=RAGAS_REMOTE_BENCHMARK_ID,
+            benchmark_config={
+                "eval_candidate": {
+                    "model": QWEN_MODEL_NAME,
+                    "type": "model",
+                    "provider_id": LlamaStackProviders.Eval.TRUSTYAI_RAGAS_REMOTE,
+                    "sampling_params": {"temperature": 0.1, "max_tokens": 100},
+                },
+                "scoring_params": {},
+            },
+        )
+
+        wait_for_eval_job_completion(
+            llama_stack_client=llama_stack_client,
+            job_id=job.job_id,
+            benchmark_id=RAGAS_REMOTE_BENCHMARK_ID,
         )