55
66from tests .model_explainability .utils import validate_tai_component_images
77
8- from tests .model_explainability .lm_eval .utils import get_lmeval_tasks
8+ from tests .model_explainability .lm_eval .utils import get_lmeval_tasks , validate_lmeval_job_pod_and_logs
99
1010LMEVALJOB_COMPLETE_STATE : str = "Complete"
1111
6464def test_lmeval_huggingface_model (admin_client , model_namespace , lmevaljob_hf_pod ):
6565 """Tests that verify running common evaluations (and a custom one) on a model pulled directly from HuggingFace.
6666 On each test we run a different evaluation task, limiting it to 0.5% of the questions on each eval."""
67- lmevaljob_hf_pod . wait_for_status ( status = lmevaljob_hf_pod . Status . SUCCEEDED , timeout = Timeout . TIMEOUT_40MIN )
67+ validate_lmeval_job_pod_and_logs ( lmevaljob_hf_pod )
6868
6969
7070@pytest .mark .parametrize (
@@ -89,9 +89,7 @@ def test_lmeval_local_offline_builtin_tasks_flan_arceasy(
8989 lmevaljob_local_offline_pod ,
9090):
9191 """Test that verifies that LMEval can run successfully in local, offline mode using builtin tasks"""
92- lmevaljob_local_offline_pod .wait_for_status (
93- status = lmevaljob_local_offline_pod .Status .SUCCEEDED , timeout = Timeout .TIMEOUT_20MIN
94- )
92+ validate_lmeval_job_pod_and_logs (lmevaljob_local_offline_pod )
9593
9694
9795@pytest .mark .parametrize (
@@ -124,9 +122,7 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(
124122 lmevaljob_local_offline_pod ,
125123):
126124 """Test that verifies that LMEval can run successfully in local, offline mode using unitxt"""
127- lmevaljob_local_offline_pod .wait_for_status (
128- status = lmevaljob_local_offline_pod .Status .SUCCEEDED , timeout = Timeout .TIMEOUT_20MIN
129- )
125+ validate_lmeval_job_pod_and_logs (lmevaljob_local_offline_pod )
130126
131127
132128@pytest .mark .parametrize (
@@ -140,9 +136,7 @@ def test_lmeval_local_offline_unitxt_tasks_flan_20newsgroups(
140136)
141137def test_lmeval_vllm_emulator (admin_client , model_namespace , lmevaljob_vllm_emulator_pod ):
142138 """Basic test that verifies LMEval works with vLLM using a vLLM emulator for more efficient evaluation"""
143- lmevaljob_vllm_emulator_pod .wait_for_status (
144- status = lmevaljob_vllm_emulator_pod .Status .SUCCEEDED , timeout = Timeout .TIMEOUT_20MIN
145- )
139+ validate_lmeval_job_pod_and_logs (lmevaljob_vllm_emulator_pod )
146140
147141
148142@pytest .mark .parametrize (
@@ -161,9 +155,7 @@ def test_lmeval_s3_storage(
161155 lmevaljob_s3_offline_pod ,
162156):
163157 """Test to verify that LMEval works with a model stored in a S3 bucket"""
164- lmevaljob_s3_offline_pod .wait_for_status (
165- status = lmevaljob_s3_offline_pod .Status .SUCCEEDED , timeout = Timeout .TIMEOUT_20MIN
166- )
158+ validate_lmeval_job_pod_and_logs (lmevaljob_s3_offline_pod )
167159
168160
169161@pytest .mark .parametrize (
0 commit comments