Skip to content

Commit 7354690

Browse files
jgarciaopre-commit-ci[bot]lugi0
authored
Add RAG tests based on llama-stack quickstart and tutorial docs (#445)
* Add RAG tests based on llama-stack quickstart and tutorial docs Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Address feedback and suggestions Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add MR RBAC test for multitenancy scenario (#446) * feat: refactoring of fixtures, first test with refactored code Signed-off-by: lugi0 <lgiorgi@redhat.com> * fix: change wording slightly in guide Signed-off-by: lugi0 <lgiorgi@redhat.com> * fix: move classes to utils Signed-off-by: lugi0 <lgiorgi@redhat.com> * feat: add options in fixture handling Signed-off-by: lugi0 <lgiorgi@redhat.com> * feat: add test for RBAC multitenancy scenario Signed-off-by: lugi0 <lgiorgi@redhat.com> * fix: remove code not used Signed-off-by: lugi0 <lgiorgi@redhat.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: lugi0 <lgiorgi@redhat.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Enhance docstring in lls_client fixture Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * Fix additional linter errors Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> Signed-off-by: lugi0 <lgiorgi@redhat.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Luca Giorgi <lgiorgi@redhat.com>
1 parent 2210a35 commit 7354690

3 files changed

Lines changed: 517 additions & 32 deletions

File tree

tests/rag/conftest.py

Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
1-
from typing import Dict, Generator, Any
2-
import pytest
31
import os
2+
from typing import Any, Dict, Generator
3+
4+
import portforward
5+
import pytest
6+
from _pytest.fixtures import FixtureRequest
47
from kubernetes.dynamic import DynamicClient
8+
from llama_stack_client import LlamaStackClient, APIConnectionError
59
from ocp_resources.data_science_cluster import DataScienceCluster
610
from ocp_resources.deployment import Deployment
7-
from _pytest.fixtures import FixtureRequest
811
from ocp_resources.namespace import Namespace
9-
10-
from utilities.infra import create_ns
12+
from ocp_resources.project_project_openshift_io import Project
1113
from simple_logger.logger import get_logger
12-
from utilities.rag_utils import create_llama_stack_distribution, LlamaStackDistribution
13-
from utilities.data_science_cluster_utils import update_components_in_dsc
14+
from timeout_sampler import TimeoutSampler, retry
15+
1416
from utilities.constants import DscComponents, Timeout
17+
from utilities.data_science_cluster_utils import update_components_in_dsc
1518
from utilities.general import generate_random_name
16-
from timeout_sampler import TimeoutSampler
19+
from utilities.infra import create_ns
20+
from utilities.rag_utils import LlamaStackDistribution, create_llama_stack_distribution
1721

1822
LOGGER = get_logger(name=__name__)
1923

@@ -25,26 +29,28 @@ def llama_stack_server() -> Dict[str, Any]:
2529

2630
return {
2731
"containerSpec": {
32+
"resources": {
33+
"requests": {"cpu": "250m", "memory": "500Mi"},
34+
"limits": {"cpu": "2", "memory": "12Gi"},
35+
},
2836
"env": [
2937
{"name": "INFERENCE_MODEL", "value": rag_vllm_model},
3038
{"name": "VLLM_TLS_VERIFY", "value": "false"},
3139
{"name": "VLLM_API_TOKEN", "value": rag_vllm_token},
3240
{"name": "VLLM_URL", "value": rag_vllm_url},
33-
{"name": "MILVUS_DB_PATH", "value": "/.llama/distributions/remote-vllm/milvus.db"},
41+
{"name": "MILVUS_DB_PATH", "value": "~/.llama/milvus.db"},
42+
{"name": "FMS_ORCHESTRATOR_URL", "value": "http://localhost"},
3443
],
3544
"name": "llama-stack",
3645
"port": 8321,
3746
},
38-
"distribution": {"image": "quay.io/mcampbel/llama-stack:milvus-granite-embedding-125m-english"},
39-
"podOverrides": {
40-
"volumeMounts": [{"mountPath": "/root/.llama", "name": "llama-storage"}],
41-
"volumes": [{"emptyDir": {}, "name": "llama-storage"}],
42-
},
47+
"distribution": {"image": "quay.io/opendatahub/llama-stack:odh"},
48+
"storage": {"size": "5Gi"},
4349
}
4450

4551

4652
@pytest.fixture(scope="class")
47-
def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[None, Any, Any]:
53+
def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[DataScienceCluster, Any, Any]:
4854
with update_components_in_dsc(
4955
dsc=dsc_resource,
5056
components={
@@ -55,19 +61,19 @@ def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[
5561
yield dsc
5662

5763

58-
@pytest.fixture(scope="function")
64+
@pytest.fixture(scope="class")
5965
def rag_test_namespace(
6066
admin_client: DynamicClient, unprivileged_client: DynamicClient
61-
) -> Generator[Namespace, Any, Any]:
62-
namespace_name = generate_random_name(prefix="rag-test-")
63-
with create_ns(namespace_name, admin_client=admin_client, unprivileged_client=unprivileged_client) as ns:
67+
) -> Generator[Namespace | Project, Any, Any]:
68+
namespace_name = generate_random_name(prefix="rag-test")
69+
with create_ns(name=namespace_name, admin_client=admin_client, unprivileged_client=unprivileged_client) as ns:
6470
yield ns
6571

6672

67-
@pytest.fixture(scope="function")
73+
@pytest.fixture(scope="class")
6874
def llama_stack_distribution_from_template(
69-
enabled_llama_stack_operator: Generator[None, Any, Any],
70-
rag_test_namespace: Namespace,
75+
enabled_llama_stack_operator: Generator[DataScienceCluster, Any, Any],
76+
rag_test_namespace: Namespace | Project,
7177
request: FixtureRequest,
7278
admin_client: DynamicClient,
7379
) -> Generator[LlamaStackDistribution, Any, Any]:
@@ -81,9 +87,9 @@ def llama_stack_distribution_from_template(
8187
yield llama_stack_distribution
8288

8389

84-
@pytest.fixture(scope="function")
90+
@pytest.fixture(scope="class")
8591
def llama_stack_distribution_deployment(
86-
rag_test_namespace: Namespace,
92+
rag_test_namespace: Namespace | Project,
8793
admin_client: DynamicClient,
8894
llama_stack_distribution_from_template: Generator[LlamaStackDistribution, Any, Any],
8995
) -> Generator[Deployment, Any, Any]:
@@ -103,3 +109,55 @@ def llama_stack_distribution_deployment(
103109

104110
assert deployment.exists, f"llama stack distribution deployment doesn't exist within {timeout} seconds"
105111
yield deployment
112+
113+
114+
@retry(wait_timeout=Timeout.TIMEOUT_1MIN, sleep=5)
115+
def wait_for_llama_stack_ready(client: LlamaStackClient) -> bool:
116+
try:
117+
client.inspect.health()
118+
version = client.inspect.version()
119+
LOGGER.info(f"Llama Stack server (v{version.version}) is available!")
120+
return True
121+
except APIConnectionError as e:
122+
LOGGER.debug(f"Llama Stack server not ready yet: {e}")
123+
return False
124+
except Exception as e:
125+
LOGGER.warning(f"Unexpected error checking Llama Stack readiness: {e}")
126+
return False
127+
128+
129+
@pytest.fixture(scope="class")
130+
def rag_lls_client(
131+
admin_client: DynamicClient,
132+
rag_test_namespace: Namespace | Project,
133+
llama_stack_distribution_deployment: Deployment,
134+
) -> Generator[LlamaStackClient, Any, Any]:
135+
"""
136+
Returns a ready to use LlamaStackClient, enabling port forwarding
137+
from the llama-stack-server service:8321 to localhost:8321
138+
139+
Args:
140+
admin_client (DynamicClient): Kubernetes dynamic client for cluster operations
141+
rag_test_namespace (Namespace | Project): Namespace or project containing RAG test resources
142+
llama_stack_distribution_deployment (Deployment): LlamaStack distribution deployment resource
143+
144+
Yields:
145+
Generator[LlamaStackClient, Any, Any]: Configured LlamaStackClient for RAG testing
146+
"""
147+
try:
148+
with portforward.forward(
149+
pod_or_service="rag-llama-stack-distribution-service",
150+
namespace=rag_test_namespace.name,
151+
from_port=8321,
152+
to_port=8321,
153+
waiting=15,
154+
):
155+
client = LlamaStackClient(
156+
base_url="http://localhost:8321",
157+
timeout=120.0,
158+
)
159+
wait_for_llama_stack_ready(client=client)
160+
yield client
161+
except Exception as e:
162+
LOGGER.error(f"Failed to set up port forwarding: {e}")
163+
raise

0 commit comments

Comments
 (0)