Skip to content

Commit c8033ce

Browse files
authored
Merge branch 'opendatahub-io:main' into ige
2 parents 6891176 + ba71c1c commit c8033ce

10 files changed

Lines changed: 391 additions & 42 deletions

File tree

tests/llama_stack/agents/test_agents.py renamed to tests/llama_stack/agents/test_agents_deprecated.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,19 @@
2121
)
2222
@pytest.mark.rag
2323
@pytest.mark.skip_must_gather
24-
class TestLlamaStackAgents:
25-
"""Test class for LlamaStack Agents API
24+
class TestLlamaStackAgentsDeprecated:
25+
"""Test class for LlamaStack Agents API (Deprecated)
2626
27-
For more information about this API, see:
28-
- https://llamastack.github.io/docs/building_applications/agent
29-
- https://llamastack.github.io/docs/references/python_sdk_reference#agents
30-
- https://llamastack.github.io/docs/building_applications/responses_vs_agents
27+
Deprecation Notice: The LlamaStack Agents API was removed server-side in llama-stack 0.3.0.
28+
It is partially implemented in llama-stack-client using the Responses API
29+
(https://github.com/llamastack/llama-stack-client-python/pull/281).
30+
31+
Users are encouraged to use the Responses API directly.
32+
33+
For more information, see:
34+
- https://llamastack.github.io/docs/api-deprecated/agents
35+
- "Migrating from Agent objects to Responses in Llama Stack":
36+
https://github.com/opendatahub-io/agents/blob/5902bef12c25281eecfcd3d25654de8b02857e33/migration/legacy-agents/responses-api-agent-migration.ipynb
3137
"""
3238

3339
@pytest.mark.smoke
@@ -106,11 +112,18 @@ def test_agents_simple_agent(
106112
)
107113

108114
@pytest.mark.smoke
115+
@pytest.mark.parametrize(
116+
"enable_streaming",
117+
[
118+
pytest.param(False, id="streaming_disabled"),
119+
],
120+
)
109121
def test_agents_rag_agent(
110122
self,
111123
unprivileged_llama_stack_client: LlamaStackClient,
112124
llama_stack_models: ModelInfo,
113125
vector_store_with_example_docs: VectorStore,
126+
enable_streaming: bool,
114127
) -> None:
115128
"""
116129
Test RAG agent that can answer questions about the Torchtune project using the documents
@@ -123,7 +136,8 @@ def test_agents_rag_agent(
123136
Based on "Build a RAG Agent" example available at
124137
https://llamastack.github.io/docs/getting_started/detailed_tutorial
125138
126-
# TODO: update this example to use the vector_store API
139+
Note: streaming is not tested (enable_streaming = False), as it seems to be broken in
140+
llama-stack 0.3.0 (Agents API is only partially implemented)
127141
"""
128142

129143
# Create the RAG agent connected to the vector database
@@ -147,19 +161,26 @@ def test_agents_rag_agent(
147161
rag_agent=rag_agent,
148162
session_id=session_id,
149163
turns_with_expectations=turns_with_expectations,
150-
stream=True,
164+
stream=enable_streaming,
151165
verbose=True,
152166
min_keywords_required=1,
153167
print_events=False,
154168
)
155169

156170
# Assert that validation was successful
157-
assert validation_result["success"], f"RAG agent validation failed. Summary: {validation_result['summary']}"
171+
assert validation_result["success"], (
172+
f"RAG agent validation failed with streaming={enable_streaming}. Summary: {validation_result['summary']}"
173+
)
158174

159175
# Additional assertions for specific requirements
160176
for result in validation_result["results"]:
161-
assert result["event_count"] > 0, f"No events generated for question: {result['question']}"
162-
assert result["response_length"] > 0, f"No response content for question: {result['question']}"
177+
assert result["response_length"] > 0, (
178+
f"No response content for question: {result['question']} (streaming={enable_streaming})"
179+
)
163180
assert len(result["found_keywords"]) > 0, (
164-
f"No expected keywords found in response for: {result['question']}"
181+
f"No expected keywords found in response for: {result['question']} (streaming={enable_streaming})"
165182
)
183+
if enable_streaming:
184+
assert result["event_count"] > 0, (
185+
f"No events generated for question: {result['question']} (streaming={enable_streaming})"
186+
)

tests/llama_stack/conftest.py

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
create_llama_stack_distribution,
2020
wait_for_llama_stack_client_ready,
2121
vector_store_create_file_from_url,
22+
wait_for_unique_llama_stack_pod,
2223
)
2324
from utilities.constants import DscComponents, Annotations
2425
from utilities.data_science_cluster_utils import update_components_in_dsc
@@ -30,6 +31,8 @@
3031

3132
LOGGER = get_logger(name=__name__)
3233

34+
distribution_name = generate_random_name(prefix="llama-stack-distribution")
35+
3336

3437
@pytest.fixture(scope="class")
3538
def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[DataScienceCluster, Any, Any]:
@@ -87,6 +90,14 @@ def llama_stack_server_config(
8790
- fms_orchestrator_url_fixture: Fixture name to get FMS orchestrator URL from
8891
- vector_io_provider: Vector I/O provider type ("milvus" or "milvus-remote")
8992
- llama_stack_storage_size: Storage size for the deployment
93+
- embedding_model: Embedding model identifier for inference
94+
- kubeflow_llama_stack_url: LlamaStack service URL for Kubeflow
95+
- kubeflow_pipelines_endpoint: Kubeflow Pipelines API endpoint URL
96+
- kubeflow_namespace: Namespace for Kubeflow resources
97+
- kubeflow_base_image: Base container image for Kubeflow pipelines
98+
- kubeflow_results_s3_prefix: S3 prefix for storing Kubeflow results
99+
- kubeflow_s3_credentials_secret_name: Secret name for S3 credentials
100+
- kubeflow_pipelines_token: Authentication token for Kubeflow Pipelines
90101
91102
Example:
92103
@pytest.mark.parametrize("llama_stack_server_config",
@@ -136,6 +147,48 @@ def test_with_remote_milvus(llama_stack_server_config):
136147
if embedding_model:
137148
env_vars.append({"name": "EMBEDDING_MODEL", "value": embedding_model})
138149

150+
# Kubeflow-related environment variables
151+
if params.get("enable_ragas_remote"):
152+
# Get fixtures only when Ragas Remote/Kubeflow is enabled
153+
model_namespace = request.getfixturevalue(argname="model_namespace")
154+
current_client_token = request.getfixturevalue(argname="current_client_token")
155+
dspa_route = request.getfixturevalue(argname="dspa_route")
156+
dspa_s3_secret = request.getfixturevalue(argname="dspa_s3_secret")
157+
158+
# KUBEFLOW_LLAMA_STACK_URL: Build from LlamaStackDistribution service
159+
env_vars.append({
160+
"name": "KUBEFLOW_LLAMA_STACK_URL",
161+
"value": f"http://{distribution_name}-service.{model_namespace.name}.svc.cluster.local:8321",
162+
})
163+
164+
# KUBEFLOW_PIPELINES_ENDPOINT: Get from DSPA route
165+
env_vars.append({"name": "KUBEFLOW_PIPELINES_ENDPOINT", "value": f"https://{dspa_route.instance.spec.host}"})
166+
167+
# KUBEFLOW_NAMESPACE: Use model namespace
168+
env_vars.append({"name": "KUBEFLOW_NAMESPACE", "value": model_namespace.name})
169+
170+
# KUBEFLOW_BASE_IMAGE
171+
env_vars.append({
172+
"name": "KUBEFLOW_BASE_IMAGE",
173+
"value": params.get(
174+
"kubeflow_base_image",
175+
"quay.io/diegosquayorg/my-ragas-provider-image"
176+
"@sha256:3749096c47f7536d6be2a7932e691abebacd578bafbe65bad2f7db475e2b93fb",
177+
),
178+
})
179+
180+
# KUBEFLOW_RESULTS_S3_PREFIX: Build from MinIO bucket
181+
env_vars.append({
182+
"name": "KUBEFLOW_RESULTS_S3_PREFIX",
183+
"value": params.get("kubeflow_results_s3_prefix", "s3://llms/ragas-results"),
184+
})
185+
186+
# KUBEFLOW_S3_CREDENTIALS_SECRET_NAME: Use DSPA secret name
187+
env_vars.append({"name": "KUBEFLOW_S3_CREDENTIALS_SECRET_NAME", "value": dspa_s3_secret.name})
188+
189+
# KUBEFLOW_PIPELINES_TOKEN: Get from current client token
190+
env_vars.append({"name": "KUBEFLOW_PIPELINES_TOKEN", "value": str(current_client_token)})
191+
139192
# Depending on parameter vector_io_provider, deploy vector_io provider and obtain required env_vars
140193
vector_io_provider = params.get("vector_io_provider") or "milvus"
141194
env_vars_vector_io = vector_io_provider_deployment_config_factory(provider_name=vector_io_provider)
@@ -144,8 +197,8 @@ def test_with_remote_milvus(llama_stack_server_config):
144197
server_config: Dict[str, Any] = {
145198
"containerSpec": {
146199
"resources": {
147-
"requests": {"cpu": "250m", "memory": "500Mi"},
148-
"limits": {"cpu": "2", "memory": "12Gi"},
200+
"requests": {"cpu": "1", "memory": "3Gi"},
201+
"limits": {"cpu": "3", "memory": "6Gi"},
149202
},
150203
"env": env_vars,
151204
"name": "llama-stack",
@@ -189,7 +242,6 @@ def llama_stack_distribution(
189242
llama_stack_server_config: Dict[str, Any],
190243
) -> Generator[LlamaStackDistribution, None, None]:
191244
# Distribution name needs a random substring due to bug RHAIENG-999 / RHAIENG-1139
192-
distribution_name = generate_random_name(prefix="llama-stack-distribution")
193245
with create_llama_stack_distribution(
194246
client=admin_client,
195247
name=distribution_name,
@@ -208,6 +260,7 @@ def _get_llama_stack_distribution_deployment(
208260
"""
209261
Returns the Deployment resource for a given LlamaStackDistribution.
210262
Note: The deployment is created by the operator; this function retrieves it.
263+
Includes a workaround for RHAIENG-1819 to ensure exactly one pod exists.
211264
212265
Args:
213266
client (DynamicClient): Kubernetes client
@@ -222,9 +275,12 @@ def _get_llama_stack_distribution_deployment(
222275
name=llama_stack_distribution.name,
223276
min_ready_seconds=10,
224277
)
225-
278+
deployment.timeout_seconds = 120
226279
deployment.wait(timeout=120)
227280
deployment.wait_for_replicas()
281+
# Workaround for RHAIENG-1819 (Incorrect number of llama-stack pods deployed after
282+
# creating LlamaStackDistribution after setting custom ca bundle in DSCI)
283+
wait_for_unique_llama_stack_pod(client=client, namespace=llama_stack_distribution.namespace)
228284
yield deployment
229285

230286

@@ -321,6 +377,7 @@ def _create_llama_stack_test_route(
321377
}
322378
}
323379
):
380+
route.wait(timeout=60)
324381
yield route
325382

326383

@@ -355,11 +412,11 @@ def _create_llama_stack_client(
355412
) -> Generator[LlamaStackClient, Any, Any]:
356413
# LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
357414
verifySSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true"
358-
http_client = httpx.Client(verify=verifySSL)
415+
http_client = httpx.Client(verify=verifySSL, timeout=240)
359416
try:
360417
client = LlamaStackClient(
361418
base_url=f"https://{route.host}",
362-
timeout=180.0,
419+
max_retries=3,
363420
http_client=http_client,
364421
)
365422
wait_for_llama_stack_client_ready(client=client)

tests/llama_stack/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class Safety(str, Enum):
1818
class Eval(str, Enum):
1919
TRUSTYAI_LMEVAL = "trustyai_lmeval"
2020
TRUSTYAI_RAGAS_INLINE = "trustyai_ragas_inline"
21+
TRUSTYAI_RAGAS_REMOTE = "trustyai_ragas_remote"
2122

2223

2324
class ModelInfo(NamedTuple):

tests/llama_stack/eval/conftest.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,18 @@
22

33
import pytest
44
from kubernetes.dynamic import DynamicClient
5+
from ocp_resources.data_science_pipelines_application import DataSciencePipelinesApplication
56
from ocp_resources.namespace import Namespace
67
from ocp_resources.persistent_volume_claim import PersistentVolumeClaim
78
from ocp_resources.pod import Pod
9+
from ocp_resources.route import Route
10+
from ocp_resources.secret import Secret
11+
from ocp_resources.service import Service
12+
from timeout_sampler import TimeoutSampler
813

914
from tests.llama_stack.eval.constants import DK_CUSTOM_DATASET_IMAGE
15+
from tests.llama_stack.eval.utils import wait_for_dspa_pods
16+
from utilities.constants import MinIo
1017

1118

1219
@pytest.fixture(scope="class")
@@ -87,3 +94,117 @@ def teardown_lmeval_job_pod(admin_client, model_namespace) -> None:
8794
]:
8895
for pod in pods:
8996
pod.delete()
97+
98+
99+
@pytest.fixture(scope="class")
100+
def dspa(
101+
admin_client: DynamicClient,
102+
model_namespace: Namespace,
103+
minio_pod: Pod,
104+
minio_service: Service,
105+
dspa_s3_secret: Secret,
106+
) -> Generator[DataSciencePipelinesApplication, Any, Any]:
107+
"""
108+
Creates a DataSciencePipelinesApplication with MinIO object storage.
109+
"""
110+
111+
with DataSciencePipelinesApplication(
112+
client=admin_client,
113+
name="dspa",
114+
namespace=model_namespace.name,
115+
dsp_version="v2",
116+
pod_to_pod_tls=True,
117+
api_server={
118+
"deploy": True,
119+
"enableOauth": True,
120+
"enableSamplePipeline": False,
121+
"cacheEnabled": True,
122+
"artifactSignedURLExpirySeconds": 60,
123+
"pipelineStore": "kubernetes",
124+
},
125+
database={
126+
"disableHealthCheck": False,
127+
"mariaDB": {
128+
"deploy": True,
129+
"pipelineDBName": "mlpipeline",
130+
"pvcSize": "10Gi",
131+
"username": "mlpipeline",
132+
},
133+
},
134+
object_storage={
135+
"disableHealthCheck": False,
136+
"enableExternalRoute": False,
137+
"externalStorage": {
138+
"bucket": "ods-ci-ds-pipelines",
139+
"host": f"{minio_service.instance.spec.clusterIP}:{MinIo.Metadata.DEFAULT_PORT}",
140+
"region": "us-east-1",
141+
"scheme": "http",
142+
"s3CredentialsSecret": {
143+
"accessKey": "AWS_ACCESS_KEY_ID", # pragma: allowlist secret
144+
"secretKey": "AWS_SECRET_ACCESS_KEY", # pragma: allowlist secret
145+
"secretName": dspa_s3_secret.name,
146+
},
147+
},
148+
},
149+
persistence_agent={
150+
"deploy": True,
151+
"numWorkers": 2,
152+
},
153+
scheduled_workflow={
154+
"deploy": True,
155+
"cronScheduleTimezone": "UTC",
156+
},
157+
) as dspa_resource:
158+
wait_for_dspa_pods(
159+
admin_client=admin_client,
160+
namespace=model_namespace.name,
161+
dspa_name=dspa_resource.name,
162+
)
163+
yield dspa_resource
164+
165+
166+
@pytest.fixture(scope="class")
167+
def dspa_s3_secret(
168+
admin_client: DynamicClient,
169+
model_namespace: Namespace,
170+
minio_service: Service,
171+
) -> Generator[Secret, Any, Any]:
172+
"""
173+
Creates a secret for DSPA S3 credentials using MinIO.
174+
"""
175+
with Secret(
176+
client=admin_client,
177+
name="dashboard-dspa-secret",
178+
namespace=model_namespace.name,
179+
string_data={
180+
"AWS_ACCESS_KEY_ID": MinIo.Credentials.ACCESS_KEY_VALUE,
181+
"AWS_SECRET_ACCESS_KEY": MinIo.Credentials.SECRET_KEY_VALUE,
182+
"AWS_DEFAULT_REGION": "us-east-1",
183+
},
184+
) as secret:
185+
yield secret
186+
187+
188+
@pytest.fixture(scope="class")
189+
def dspa_route(
190+
admin_client: DynamicClient,
191+
model_namespace: Namespace,
192+
dspa: DataSciencePipelinesApplication,
193+
) -> Generator[Route, Any, Any]:
194+
"""
195+
Retrieves the Route for the DSPA API server.
196+
"""
197+
198+
def _get_dspa_route() -> Route | None:
199+
routes = list(
200+
Route.get(
201+
dyn_client=admin_client,
202+
namespace=model_namespace.name,
203+
name="ds-pipeline-dspa",
204+
)
205+
)
206+
return routes[0] if routes else None
207+
208+
for route in TimeoutSampler(wait_timeout=120, sleep=5, func=_get_dspa_route):
209+
if route:
210+
yield route

0 commit comments

Comments
 (0)