Skip to content

Commit 02b6927

Browse files
authored
feat: add tests for llamastack lmeval provider (#496)
* feat: add tests for llamastack lmeval provider * add assertions * change ns name
1 parent 50cd309 commit 02b6927

File tree

7 files changed

+241
-171
lines changed

7 files changed

+241
-171
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,6 @@ cython_debug/
166166

167167
# VSCode config
168168
.vscode/
169+
170+
# AI Assistant Config Files
171+
CLAUDE.md

tests/model_explainability/conftest.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,27 @@
11
from typing import Generator, Any
22

33
import pytest
4+
from _pytest.fixtures import FixtureRequest
45
from kubernetes.dynamic import DynamicClient
6+
from llama_stack_client import LlamaStackClient
57
from ocp_resources.config_map import ConfigMap
8+
from ocp_resources.inference_service import InferenceService
9+
from ocp_resources.llama_stack_distribution import LlamaStackDistribution
610
from ocp_resources.namespace import Namespace
711
from ocp_resources.persistent_volume_claim import PersistentVolumeClaim
12+
from ocp_resources.pod import Pod
13+
from ocp_resources.route import Route
14+
from ocp_resources.secret import Secret
15+
from ocp_resources.service import Service
16+
from ocp_resources.serving_runtime import ServingRuntime
817
from pytest_testconfig import config as py_config
918

19+
from tests.model_explainability.guardrails.constants import QWEN_ISVC_NAME
20+
from tests.model_explainability.constants import MNT_MODELS
1021
from tests.model_explainability.trustyai_service.trustyai_service_utils import TRUSTYAI_SERVICE_NAME
22+
from utilities.constants import KServeDeploymentType, RuntimeTemplates
23+
from utilities.inference_utils import create_isvc
24+
from utilities.serving_runtime import ServingRuntimeFromTemplate
1125

1226

1327
@pytest.fixture(scope="class")
@@ -35,3 +49,156 @@ def trustyai_operator_configmap(
3549
name=f"{TRUSTYAI_SERVICE_NAME}-operator-config",
3650
ensure_exists=True,
3751
)
52+
53+
54+
# LlamaStack fixtures
55+
@pytest.fixture(scope="class")
56+
def llamastack_distribution(
57+
request: FixtureRequest,
58+
admin_client: DynamicClient,
59+
model_namespace: Namespace,
60+
qwen_isvc: InferenceService,
61+
) -> Generator[LlamaStackDistribution, None, None]:
62+
fms_orchestrator_url = ""
63+
if hasattr(request, "param") and request.param.get("guardrails_orchestrator_route_fixture"):
64+
guardrails_orchestrator_route_fixture_name = request.param.get("guardrails_orchestrator_route_fixture")
65+
guardrails_orchestrator_route = request.getfixturevalue(argname=guardrails_orchestrator_route_fixture_name)
66+
fms_orchestrator_url = f"https://{guardrails_orchestrator_route.host}"
67+
68+
with LlamaStackDistribution(
69+
name="llama-stack-distribution",
70+
namespace=model_namespace.name,
71+
replicas=1,
72+
server={
73+
"containerSpec": {
74+
"env": [
75+
{
76+
"name": "VLLM_URL",
77+
"value": f"http://{qwen_isvc.name}-predictor.{model_namespace.name}.svc.cluster.local:8032/v1",
78+
},
79+
{
80+
"name": "INFERENCE_MODEL",
81+
"value": MNT_MODELS,
82+
},
83+
{
84+
"name": "MILVUS_DB_PATH",
85+
"value": "~/.llama/milvus.db",
86+
},
87+
{
88+
"name": "VLLM_TLS_VERIFY",
89+
"value": "false",
90+
},
91+
{
92+
"name": "FMS_ORCHESTRATOR_URL",
93+
"value": fms_orchestrator_url,
94+
},
95+
],
96+
"name": "llama-stack",
97+
"port": 8321,
98+
},
99+
"distribution": {"name": "rh-dev"},
100+
"storage": {
101+
"size": "20Gi",
102+
},
103+
},
104+
wait_for_resource=True,
105+
) as lls_dist:
106+
lls_dist.wait_for_status(status=LlamaStackDistribution.Status.READY, timeout=3600)
107+
yield lls_dist
108+
109+
110+
@pytest.fixture(scope="class")
111+
def llamastack_distribution_service(
112+
admin_client: DynamicClient,
113+
model_namespace: Namespace,
114+
llamastack_distribution: LlamaStackDistribution,
115+
) -> Generator[Service, None, None]:
116+
yield Service(
117+
client=admin_client,
118+
name=f"{llamastack_distribution.name}-service",
119+
namespace=model_namespace.name,
120+
wait_for_resource=True,
121+
)
122+
123+
124+
@pytest.fixture(scope="class")
125+
def llamastack_distribution_route(
126+
admin_client: DynamicClient,
127+
model_namespace: Namespace,
128+
llamastack_distribution: LlamaStackDistribution,
129+
llamastack_distribution_service: Service,
130+
) -> Generator[Route, None, None]:
131+
with Route(
132+
client=admin_client,
133+
name=f"{llamastack_distribution.name}-route",
134+
namespace=model_namespace.name,
135+
service=llamastack_distribution_service.name,
136+
) as route:
137+
yield route
138+
139+
140+
@pytest.fixture(scope="class")
141+
def llamastack_client(
142+
admin_client: DynamicClient,
143+
model_namespace: Namespace,
144+
llamastack_distribution_route: Route,
145+
) -> LlamaStackClient:
146+
return LlamaStackClient(base_url=f"http://{llamastack_distribution_route.host}")
147+
148+
149+
@pytest.fixture(scope="class")
150+
def vllm_runtime(
151+
admin_client: DynamicClient,
152+
model_namespace: Namespace,
153+
minio_pod: Pod,
154+
minio_service: Service,
155+
minio_data_connection: Secret,
156+
) -> Generator[ServingRuntime, Any, Any]:
157+
with ServingRuntimeFromTemplate(
158+
client=admin_client,
159+
name="vllm-runtime-cpu-fp16",
160+
namespace=model_namespace.name,
161+
template_name=RuntimeTemplates.VLLM_CUDA,
162+
deployment_type=KServeDeploymentType.RAW_DEPLOYMENT,
163+
runtime_image="quay.io/rh-aiservices-bu/vllm-cpu-openai-ubi9"
164+
"@sha256:d680ff8becb6bbaf83dfee7b2d9b8a2beb130db7fd5aa7f9a6d8286a58cebbfd",
165+
containers={
166+
"kserve-container": {
167+
"args": [
168+
f"--port={str(8032)}",
169+
"--model=/mnt/models",
170+
],
171+
"ports": [{"containerPort": 8032, "protocol": "TCP"}],
172+
"volumeMounts": [{"mountPath": "/dev/shm", "name": "shm"}],
173+
}
174+
},
175+
volumes=[{"emptyDir": {"medium": "Memory", "sizeLimit": "2Gi"}, "name": "shm"}],
176+
) as serving_runtime:
177+
yield serving_runtime
178+
179+
180+
@pytest.fixture(scope="class")
181+
def qwen_isvc(
182+
admin_client: DynamicClient,
183+
model_namespace: Namespace,
184+
minio_pod: Pod,
185+
minio_service: Service,
186+
minio_data_connection: Secret,
187+
vllm_runtime: ServingRuntime,
188+
) -> Generator[InferenceService, Any, Any]:
189+
with create_isvc(
190+
client=admin_client,
191+
name=QWEN_ISVC_NAME,
192+
namespace=model_namespace.name,
193+
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
194+
model_format="vLLM",
195+
runtime=vllm_runtime.name,
196+
storage_key=minio_data_connection.name,
197+
storage_path="Qwen2.5-0.5B-Instruct",
198+
wait_for_predictor_pods=False,
199+
resources={
200+
"requests": {"cpu": "1", "memory": "8Gi"},
201+
"limits": {"cpu": "2", "memory": "10Gi"},
202+
},
203+
) as isvc:
204+
yield isvc
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
MNT_MODELS: str = "/mnt/models"

0 commit comments

Comments
 (0)