Skip to content

Commit af310e5

Browse files
committed
Feat: Improve LMEval assertions
Add additional validation for LMEval Job pods where pod logs are checked and better exception handling and logging
1 parent 66a1327 commit af310e5

File tree

3 files changed

+59
-15
lines changed

3 files changed

+59
-15
lines changed

tests/model_explainability/lm_eval/test_lm_eval.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from tests.model_explainability.utils import validate_tai_component_images
77

8-
from tests.model_explainability.lm_eval.utils import get_lmeval_tasks
8+
from tests.model_explainability.lm_eval.utils import get_lmeval_tasks, validate_lmeval_job_pod_and_logs
99

1010
LMEVALJOB_COMPLETE_STATE: str = "Complete"
1111

@@ -64,7 +64,7 @@
6464
def test_lmeval_huggingface_model(admin_client, model_namespace, lmevaljob_hf_pod):
6565
"""Tests that verify running common evaluations (and a custom one) on a model pulled directly from HuggingFace.
6666
On each test we run a different evaluation task, limiting it to 0.5% of the questions on each eval."""
67-
lmevaljob_hf_pod.wait_for_status(status=lmevaljob_hf_pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_40MIN)
67+
validate_lmeval_job_pod_and_logs(lmevaljob_hf_pod)
6868

6969

7070
@pytest.mark.parametrize(
@@ -89,9 +89,7 @@ def test_lmeval_local_offline_builtin_tasks_flan_arceasy(
8989
lmevaljob_local_offline_pod,
9090
):
9191
"""Test that verifies that LMEval can run successfully in local, offline mode using builtin tasks"""
92-
lmevaljob_local_offline_pod.wait_for_status(
93-
status=lmevaljob_local_offline_pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_20MIN
94-
)
92+
validate_lmeval_job_pod_and_logs(lmevaljob_local_offline_pod)
9593

9694

9795
@pytest.mark.parametrize(
@@ -124,9 +122,7 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(
124122
lmevaljob_local_offline_pod,
125123
):
126124
"""Test that verifies that LMEval can run successfully in local, offline mode using unitxt"""
127-
lmevaljob_local_offline_pod.wait_for_status(
128-
status=lmevaljob_local_offline_pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_20MIN
129-
)
125+
validate_lmeval_job_pod_and_logs(lmevaljob_local_offline_pod)
130126

131127

132128
@pytest.mark.parametrize(
@@ -140,9 +136,7 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(
140136
)
141137
def test_lmeval_vllm_emulator(admin_client, model_namespace, lmevaljob_vllm_emulator_pod):
142138
"""Basic test that verifies LMEval works with vLLM using a vLLM emulator for more efficient evaluation"""
143-
lmevaljob_vllm_emulator_pod.wait_for_status(
144-
status=lmevaljob_vllm_emulator_pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_20MIN
145-
)
139+
validate_lmeval_job_pod_and_logs(lmevaljob_vllm_emulator_pod)
146140

147141

148142
@pytest.mark.parametrize(
@@ -161,9 +155,7 @@ def test_lmeval_s3_storage(
161155
lmevaljob_s3_offline_pod,
162156
):
163157
"""Test to verify that LMEval works with a model stored in a S3 bucket"""
164-
lmevaljob_s3_offline_pod.wait_for_status(
165-
status=lmevaljob_s3_offline_pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_20MIN
166-
)
158+
validate_lmeval_job_pod_and_logs(lmevaljob_s3_offline_pod)
167159

168160

169161
@pytest.mark.parametrize(

tests/model_explainability/lm_eval/utils.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
from typing import List
2-
2+
import re
33
from kubernetes.dynamic import DynamicClient
44
from ocp_resources.lm_eval_job import LMEvalJob
55
from ocp_resources.pod import Pod
66

7+
from tests.model_explainability.utils import log_pod_failure_logs
78
from utilities.constants import Timeout
89
from simple_logger.logger import get_logger
10+
from timeout_sampler import TimeoutExpiredError
911

1012
import pandas as pd
1113

14+
from utilities.exceptions import PodLogMissMatchError, UnexpectedFailureError
1215

1316
LOGGER = get_logger(name=__name__)
1417

@@ -84,3 +87,28 @@ def get_lmeval_tasks(min_downloads: int | float, max_downloads: int | float | No
8487
LOGGER.info(f"Number of unique LMEval tasks with more than {min_downloads} downloads: {len(unique_tasks)}")
8588

8689
return unique_tasks
90+
91+
def validate_lmeval_job_pod_and_logs(lmevaljob_pod: Pod) -> None:
92+
"""Validate LMEval job pod success and presence of corresponding logs.
93+
94+
Args:
95+
lmevaljob_pod: The LMEvalJob pod.
96+
97+
Returns: None
98+
"""
99+
pod_success_log_regex = r'INFO\sdriver\supdate status: job completed\s\{\"state\":\s\{\"state\"'\
100+
r':\"Complete\",\"reason\":\"Succeeded\",\"message\":\"job completed\"'
101+
try:
102+
lmevaljob_pod.wait_for_status(
103+
status=lmevaljob_pod.Status.RUNNING, timeout=Timeout.TIMEOUT_5MIN
104+
)
105+
except TimeoutExpiredError as e:
106+
raise UnexpectedFailureError(f"LMEval job pod did not reach a running state. Status: {lmevaljob_pod.status}") from e
107+
try:
108+
lmevaljob_pod.wait_for_status(Pod.Status.SUCCEEDED, timeout=Timeout.TIMEOUT_10MIN)
109+
except TimeoutExpiredError as e:
110+
log_pod_failure_logs(LOGGER, lmevaljob_pod)
111+
raise UnexpectedFailureError("LMEval job pod failed from a running state.") from e
112+
if not bool(re.search(pod_success_log_regex, lmevaljob_pod.log())):
113+
log_pod_failure_logs(LOGGER, lmevaljob_pod)
114+
raise PodLogMissMatchError("LMEval job pod failed.")

tests/model_explainability/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from logging import Logger
12
import re
23
from ocp_resources.config_map import ConfigMap
34
from ocp_resources.pod import Pod
@@ -32,3 +33,26 @@ def validate_tai_component_images(
3233
assert container.image in tai_configmap_values, (
3334
f"{container.name} : {container.image} not present in TrustyAI operator configmap."
3435
)
36+
37+
def log_pod_failure_logs(logger: Logger, pod: Pod) -> None:
38+
"""Log pod info and logs to logger.
39+
40+
Args:
41+
logger: logging.Logger
42+
The logger to output to.
43+
pod: Pod
44+
The relevant pod to gain information on.
45+
46+
Returns: None
47+
"""
48+
logger.error("--------------------------------- FAILED POD INFO -----------------------------------")
49+
logger.error(f"Failed Pod Name: {pod.name}")
50+
logger.error(f"Failed Pod Status: {pod.status}")
51+
logger.error(f"Failed Pod IP: {pod.ip}")
52+
logger.error(f"Failed Pod Labels: {pod.labels}")
53+
logger.error(f"Failed Pod Namespace: {pod.namespace}")
54+
logger.error(f"Failed Pod Spec: {pod.instance.spec.to_dict()}")
55+
logger.error("--------------------------------- FAILED POD LOGS ----------------------------------")
56+
logger.error(f"{pod.log()}")
57+
logger.error("--------------------------------- END OF POD LOGS ----------------------------------")
58+

0 commit comments

Comments
 (0)