Skip to content

Commit 75ff2ea

Browse files
authored
Merge branch 'main' into oom
2 parents 4ab46d5 + 8897f6a commit 75ff2ea

37 files changed

Lines changed: 1848 additions & 442 deletions

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ repos:
3636
exclude: .*/__snapshots__/.*|.*-input\.json$
3737

3838
- repo: https://github.com/astral-sh/ruff-pre-commit
39-
rev: v0.13.0
39+
rev: v0.13.2
4040
hooks:
4141
- id: ruff
4242
- id: ruff-format
@@ -55,7 +55,7 @@ repos:
5555
- id: gitleaks
5656

5757
- repo: https://github.com/pre-commit/mirrors-mypy
58-
rev: v1.18.1
58+
rev: v1.18.2
5959
hooks:
6060
- id: mypy
6161
additional_dependencies: ["types-PyYAML", "types-requests"]

conftest.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
Config,
2020
CollectReport,
2121
)
22+
from _pytest.nodes import Node
2223
from _pytest.terminal import TerminalReporter
2324
from typing import Optional, Any
2425
from pytest_testconfig import config as py_config
@@ -434,9 +435,17 @@ def calculate_must_gather_timer(test_start_time: int) -> int:
434435
return default_duration
435436

436437

438+
def get_all_node_markers(node: Node) -> list[str]:
439+
return [mark.name for mark in list(node.iter_markers())]
440+
441+
442+
def is_skip_must_gather(node: Node) -> bool:
443+
return "skip_must_gather" in get_all_node_markers(node=node)
444+
445+
437446
def pytest_exception_interact(node: Item | Collector, call: CallInfo[Any], report: TestReport | CollectReport) -> None:
438447
LOGGER.error(report.longreprtext)
439-
if node.config.getoption("--collect-must-gather"):
448+
if node.config.getoption("--collect-must-gather") and not is_skip_must_gather(node=node):
440449
test_name = f"{node.fspath}::{node.name}"
441450
LOGGER.info(f"Must-gather collection is enabled for {test_name}.")
442451

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,9 @@ dependencies = [
6969
"marshmallow==3.26.1,<4", # this version is needed for pytest-jira
7070
"pytest-html>=4.1.1",
7171
"fire",
72-
"llama_stack_client==0.2.21",
72+
"llama_stack_client==0.2.23",
7373
"pytest-xdist==3.8.0",
74+
"dictdiffer>=0.9.0",
7475
]
7576

7677
[project.urls]

pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ markers =
2020
ocp_interop: Interop testing with Openshift.
2121
downstream_only: Tests that are specific to downstream
2222
cluster_health: Tests that verifies that cluster is healthy to begin testing
23+
skip_must_gather: Tests that does not require must-gather for triaging
2324

2425
# Model server
2526
modelmesh: Mark tests which are model mesh tests
@@ -33,6 +34,7 @@ markers =
3334
gpu: Mark tests which require GPU resources
3435
multinode: Mark tests which require multiple nodes
3536
keda: Mark tests which are testing KEDA scaling
37+
llmd_cpu: Mark tests which are testing LLMD (LLM Deployment) with CPU resources
3638

3739
# Model Registry:
3840
custom_namespace: mark tests that are to be run with custom namespace

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ def minio_pod(
517517
label=pod_labels,
518518
annotations=request.param.get("annotations"),
519519
) as minio_pod:
520+
minio_pod.wait_for_status(status=Pod.Status.RUNNING)
520521
yield minio_pod
521522

522523

tests/fixtures/trustyai.py

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import pytest
22
from kubernetes.dynamic import DynamicClient
3+
from ocp_resources.data_science_cluster import DataScienceCluster
34
from ocp_resources.deployment import Deployment
45

5-
from typing import Generator, Any
6+
from typing import Generator
67

7-
from ocp_resources.config_map import ConfigMap
88
from ocp_resources.resource import ResourceEditor
99
from pytest_testconfig import py_config
1010

11-
from utilities.constants import Annotations, TRUSTYAI_SERVICE_NAME
11+
from utilities.constants import TRUSTYAI_SERVICE_NAME
12+
from utilities.infra import get_data_science_cluster
1213

1314

1415
@pytest.fixture(scope="class")
@@ -21,36 +22,32 @@ def trustyai_operator_deployment(admin_client: DynamicClient) -> Deployment:
2122
)
2223

2324

24-
@pytest.fixture(scope="function")
25-
def patched_trustyai_configmap_allow_online(
26-
admin_client: DynamicClient, trustyai_operator_deployment: Deployment
27-
) -> Generator[ConfigMap, Any, Any]:
28-
"""
29-
Patches the TrustyAI Operator ConfigMap in order to set allowOnline and allowCodeExecution to true.
30-
These options are needed to run some LMEval tasks, which rely on having access to the internet
31-
and running arbitrary code. The deployment needs to be restarted in order for these changes to be applied.
32-
"""
33-
trustyai_service_operator: str = "trustyai-service-operator"
34-
35-
configmap: ConfigMap = ConfigMap(
36-
client=admin_client,
37-
name=f"{trustyai_service_operator}-config",
38-
namespace=py_config["applications_namespace"],
39-
ensure_exists=True,
40-
)
25+
@pytest.fixture(scope="class")
26+
def patched_dsc_lmeval_allow_all(
27+
admin_client, trustyai_operator_deployment: Deployment
28+
) -> Generator[DataScienceCluster, None, None]:
29+
"""Enable LMEval PermitOnline and PermitCodeExecution flags in the Datascience cluster."""
30+
dsc = get_data_science_cluster(client=admin_client)
4131
with ResourceEditor(
4232
patches={
43-
configmap: {
44-
"metadata": {"annotations": {Annotations.OpenDataHubIo.MANAGED: "false"}},
45-
"data": {
46-
"lmes-allow-online": "true",
47-
"lmes-allow-code-execution": "true",
48-
},
33+
dsc: {
34+
"spec": {
35+
"components": {
36+
"trustyai": {
37+
"eval": {
38+
"lmeval": {
39+
"permitCodeExecution": "allow",
40+
"permitOnline": "allow",
41+
}
42+
}
43+
}
44+
}
45+
}
4946
}
5047
}
5148
):
5249
num_replicas: int = trustyai_operator_deployment.replicas
5350
trustyai_operator_deployment.scale_replicas(replica_count=0)
5451
trustyai_operator_deployment.scale_replicas(replica_count=num_replicas)
5552
trustyai_operator_deployment.wait_for_replicas()
56-
yield configmap
53+
yield dsc

tests/llama_stack/conftest.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[
4040
@pytest.fixture(scope="class")
4141
def llama_stack_server_config(
4242
request: FixtureRequest,
43-
admin_client: DynamicClient,
44-
model_namespace: Namespace,
43+
unprivileged_model_namespace: Namespace,
4544
) -> Dict[str, Any]:
4645
fms_orchestrator_url = "http://localhost"
4746
inference_model = os.getenv("LLS_CORE_INFERENCE_MODEL", "")
@@ -100,15 +99,15 @@ def llama_stack_server_config(
10099

101100
@pytest.fixture(scope="class")
102101
def llama_stack_distribution(
103-
admin_client: DynamicClient,
104-
model_namespace: Namespace,
102+
unprivileged_client: DynamicClient,
103+
unprivileged_model_namespace: Namespace,
105104
enabled_llama_stack_operator: DataScienceCluster,
106105
llama_stack_server_config: Dict[str, Any],
107106
) -> Generator[LlamaStackDistribution, None, None]:
108107
with create_llama_stack_distribution(
109-
client=admin_client,
110-
name="llama-stack-distribution",
111-
namespace=model_namespace.name,
108+
client=unprivileged_client,
109+
name="test-lama-stack-distribution",
110+
namespace=unprivileged_model_namespace.name,
112111
replicas=1,
113112
server=llama_stack_server_config,
114113
) as lls_dist:
@@ -118,11 +117,11 @@ def llama_stack_distribution(
118117

119118
@pytest.fixture(scope="class")
120119
def llama_stack_distribution_deployment(
121-
admin_client: DynamicClient,
120+
unprivileged_client: DynamicClient,
122121
llama_stack_distribution: LlamaStackDistribution,
123122
) -> Generator[Deployment, Any, Any]:
124123
deployment = Deployment(
125-
client=admin_client,
124+
client=unprivileged_client,
126125
namespace=llama_stack_distribution.namespace,
127126
name=llama_stack_distribution.name,
128127
)
@@ -133,15 +132,13 @@ def llama_stack_distribution_deployment(
133132

134133
@pytest.fixture(scope="class")
135134
def llama_stack_client(
136-
admin_client: DynamicClient,
137135
llama_stack_distribution_deployment: Deployment,
138136
) -> Generator[LlamaStackClient, Any, Any]:
139137
"""
140138
Returns a ready to use LlamaStackClient, enabling port forwarding
141139
from the llama-stack-server service:8321 to localhost:8321
142140
143141
Args:
144-
admin_client (DynamicClient): Kubernetes dynamic client for cluster operations
145142
llama_stack_distribution_deployment (Deployment): LlamaStack distribution deployment resource
146143
147144
Yields:

tests/llama_stack/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ class Eval(str, Enum):
1616
TRUSTYAI_LMEVAL = "trustyai_lmeval"
1717

1818

19+
LLS_CORE_POD_FILTER: str = "app=llama-stack"
20+
21+
1922
@dataclass
2023
class TorchTuneTestExpectation:
2124
"""Test expectation for TorchTune documentation questions."""

tests/llama_stack/core/test_llamastack_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
{
1818
"vllm_url_fixture": "qwen_isvc_url",
1919
"inference_model": QWEN_MODEL_NAME,
20-
"llama_stack_storage_size": "10Gi",
20+
"llama_stack_storage_size": "2Gi",
2121
},
2222
)
2323
],

tests/llama_stack/eval/test_lmeval_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_lmeval_register_benchmark(self, minio_pod, minio_data_connection, llama
5252
assert benchmarks[0].provider_id == LlamaStackProviders.Eval.TRUSTYAI_LMEVAL
5353

5454
def test_llamastack_run_eval(
55-
self, minio_pod, minio_data_connection, patched_trustyai_configmap_allow_online, llama_stack_client
55+
self, minio_pod, minio_data_connection, patched_dsc_lmeval_allow_all, llama_stack_client
5656
):
5757
job = llama_stack_client.eval.run_eval(
5858
benchmark_id=TRUSTYAI_LMEVAL_ARCEASY,

0 commit comments

Comments
 (0)