Skip to content

Commit aa8057e

Browse files
committed
Try to get a helpful error out of deepeval.
1 parent 836ac26 commit aa8057e

File tree

2 files changed

+28
-32
lines changed

2 files changed

+28
-32
lines changed

python_components/evaluation/evaluation/exception/evaluation.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,15 @@ async def _ceq_evaluate(self, document, exception) -> Result:
137137
)
138138
details = document.llm_context()
139139
details.append(decision)
140-
test_case = LLMTestCase(
141-
actual_output=[response],
142-
retrieval_context=context,
143-
input="\n\n".join(details),
144-
)
145-
metric.measure(test_case)
146-
if type(metric) is None or metric.verdicts is None:
147-
raise RuntimeError("Metric measurement failed. This is likely due to rate limiting.")
140+
try:
141+
test_case = LLMTestCase(
142+
actual_output=[response],
143+
retrieval_context=context,
144+
input="\n\n".join(details),
145+
)
146+
metric.measure(test_case)
147+
except AttributeError:
148+
raise RuntimeError("Metric measurement failed. This is likely due to rate limiting or metric performance.")
148149
details = {
149150
"verdicts": convert_model_list(metric.verdicts),
150151
"response": response,
@@ -169,16 +170,16 @@ async def _faithfulness_evaluate(self, document, exception):
169170
elif exception == "application":
170171
response = document.ai_exception["why_application"]
171172
context = APPLICATION_EXCEPTION_CONTEXT
172-
173-
metric = MultimodalFaithfulnessMetric(model=self.evaluation_model)
174-
test_case = MLLMTestCase(
175-
input=[],
176-
retrieval_context=context + document.images,
177-
actual_output=[response],
178-
)
179-
metric.measure(test_case)
180-
if type(metric) is None or metric.truths is None or metric.claims is None or metric.verdicts is None:
181-
raise RuntimeError("Metric measurement failed. This is likely due to rate limiting.")
173+
try:
174+
metric = MultimodalFaithfulnessMetric(model=self.evaluation_model)
175+
test_case = MLLMTestCase(
176+
input=[],
177+
retrieval_context=context + document.images,
178+
actual_output=[response],
179+
)
180+
metric.measure(test_case)
181+
except AttributeError:
182+
raise RuntimeError("Metric measurement failed. This is likely due to rate limiting or metric performance.")
182183
details = {
183184
"truths": metric.truths,
184185
"claims": metric.claims,

python_components/evaluation/evaluation/summary/evaluation.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,15 @@ def evaluate(self, document: Document) -> List[Result]:
4141
})))
4242
logger.info("Summarization complete. Performing related evaluations.")
4343
document.ai_summary = result["summary"]
44-
# Begin the DeepEval summary evaluation.
45-
metric = MultimodalInputSummarization(model=self.evaluation_model)
46-
test_case = MLLMTestCase(
47-
input=document.images, actual_output=document.ai_summary
48-
)
49-
metric.measure(test_case)
50-
if (type(metric) is None
51-
or metric.truths is None
52-
or metric.claims is None
53-
or metric.assessment_questions is None
54-
or metric.coverage_verdicts is None
55-
or metric.alignment_verdicts is None
56-
):
57-
raise RuntimeError("Metric measurement failed. This is likely due to rate limiting.")
44+
try:
45+
# Begin the DeepEval summary evaluation.
46+
metric = MultimodalInputSummarization(model=self.evaluation_model)
47+
test_case = MLLMTestCase(
48+
input=document.images, actual_output=document.ai_summary
49+
)
50+
metric.measure(test_case)
51+
except AttributeError:
52+
raise RuntimeError("Metric measurement failed. This is likely due to rate limiting or metric performance.")
5853
details = {
5954
"truths": metric.truths,
6055
"claims": metric.claims,

0 commit comments

Comments
 (0)