Skip to content

Commit 424dc0c

Browse files
committed
Janitorial work
1 parent ae77b51 commit 424dc0c

56 files changed

Lines changed: 285 additions & 1580 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

bb.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
from deepeval.test_case.conversational_test_case import TurnParams
33
from deepeval.test_case.llm_test_case import ToolCall
44

5-
test_case = ConversationalTestCase(
5+
# Conversation 1: Initial contact and address collection
6+
test_case_1 = ConversationalTestCase(
67
chatbot_role="A humble and doubtful wizard",
78
turns=[
89
Turn(
@@ -28,6 +29,13 @@
2829
content="Wonderful. Next, I'll need your bank account number where we can link this new account.",
2930
),
3031
Turn(role="user", content="456789123"),
32+
],
33+
)
34+
35+
# Conversation 2: Account number correction and personal details
36+
test_case_2 = ConversationalTestCase(
37+
chatbot_role="A humble and doubtful wizard",
38+
turns=[
3139
Turn(
3240
role="assistant",
3341
content="This account number seems invalid, can you please double-check?",
@@ -49,6 +57,13 @@
4957
role="assistant",
5058
content="Excellent. Just a few more details. What is your phone number?",
5159
),
60+
],
61+
)
62+
63+
# Conversation 3: Phone details and final confirmation with tools
64+
test_case_3 = ConversationalTestCase(
65+
chatbot_role="A humble and doubtful wizard",
66+
turns=[
5267
Turn(role="user", content="555-0102"),
5368
Turn(
5469
role="assistant",
@@ -122,5 +137,6 @@
122137
# metric = ConversationRelevancyMetric(verbose_mode=True)
123138
# metric = ConversationCompletenessMetric(verbose_mode=True)
124139
# metric = RoleAdherenceMetric(verbose_mode=True)
140+
from deepeval import evaluate
125141

126-
metric.measure(test_case)
142+
evaluate(test_cases=[test_case_1, test_case_2, test_case_3], metrics=[metric])

deepeval/__init__.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
# Optionally add telemetry
66
from ._version import __version__
77

8-
from deepeval.event import track
9-
from deepeval.monitor import monitor, a_monitor, send_feedback, a_send_feedback
8+
from deepeval.monitor import send_feedback, a_send_feedback
109
from deepeval.evaluate import evaluate, assert_test
1110
from deepeval.test_run import on_test_run_end, log_hyperparameters
1211
from deepeval.utils import login_with_confident_api_key
@@ -22,8 +21,6 @@
2221
"login_with_confident_api_key",
2322
"log_hyperparameters",
2423
"track",
25-
"monitor",
26-
"a_monitor",
2724
"a_send_feedback",
2825
"send_feedback",
2926
"evaluate",

deepeval/evaluate/utils.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import os, time
33

44

5+
from deepeval.test_case.conversational_test_case import Turn
6+
from deepeval.test_run.api import TurnApi
57
from deepeval.test_run.test_run import TestRunResultDisplay
68
from deepeval.dataset import Golden
79
from deepeval.metrics import BaseMetric
@@ -96,13 +98,21 @@ def create_test_result(
9698
)
9799

98100

101+
def create_api_turn(turn: Turn, index: int) -> TurnApi:
102+
return TurnApi(
103+
role=turn.role,
104+
content=turn.content,
105+
retrieval_context=turn.retrieval_context,
106+
tools_called=turn.tools_called,
107+
additional_metadata=turn.additional_metadata,
108+
order=index,
109+
)
110+
111+
99112
def create_api_test_case(
100113
test_case: Union[LLMTestCase, ConversationalTestCase, MLLMTestCase],
101114
trace: Optional[TraceApi] = None,
102115
index: Optional[int] = None,
103-
conversational_instance_id: Optional[int] = None,
104-
additional_metadata: Optional[Dict] = None,
105-
comments: Optional[str] = None,
106116
) -> Union[LLMApiTestCase, ConversationalApiTestCase]:
107117
if isinstance(test_case, ConversationalTestCase):
108118
order = (
@@ -127,43 +137,29 @@ def create_api_test_case(
127137
testCases=[],
128138
additionalMetadata=test_case.additional_metadata,
129139
)
130-
api_test_case.instance_id = id(api_test_case)
140+
# api_test_case.instance_id = id(api_test_case)
131141
api_test_case.turns = [
132-
create_api_test_case(
133-
test_case=turn,
142+
create_api_turn(
143+
turn=turn,
134144
index=index,
135-
conversational_instance_id=api_test_case.instance_id,
136-
additional_metadata=turn.additional_metadata,
137-
comments=turn.comments,
138145
)
139146
for index, turn in enumerate(test_case.turns)
140147
]
141148

142149
return api_test_case
143150
else:
144-
if conversational_instance_id:
145-
success = None
146-
name = f"turn_{index}"
147-
order = index
148-
149-
# Manually set the metadata and comments on conversational test case
150-
# to each individual message (test case)
151-
test_case.additional_metadata = additional_metadata
152-
test_case.comments = comments
153-
metrics_data = None
154-
else:
155-
order = (
156-
test_case._dataset_rank
157-
if test_case._dataset_rank is not None
158-
else index
159-
)
151+
order = (
152+
test_case._dataset_rank
153+
if test_case._dataset_rank is not None
154+
else index
155+
)
160156

161-
success = True
162-
if test_case.name is not None:
163-
name = test_case.name
164-
else:
165-
name = os.getenv(PYTEST_RUN_TEST_NAME, f"test_case_{order}")
166-
metrics_data = []
157+
success = True
158+
if test_case.name is not None:
159+
name = test_case.name
160+
else:
161+
name = os.getenv(PYTEST_RUN_TEST_NAME, f"test_case_{order}")
162+
metrics_data = []
167163

168164
if isinstance(test_case, LLMTestCase):
169165
api_test_case = LLMApiTestCase(
@@ -184,7 +180,6 @@ def create_api_test_case(
184180
order=order,
185181
additionalMetadata=test_case.additional_metadata,
186182
comments=test_case.comments,
187-
conversational_instance_id=conversational_instance_id,
188183
trace=trace,
189184
)
190185
elif isinstance(test_case, MLLMTestCase):
@@ -203,7 +198,6 @@ def create_api_test_case(
203198
order=order,
204199
additionalMetadata=test_case.additional_metadata,
205200
comments=test_case.comments,
206-
conversational_instance_id=conversational_instance_id,
207201
)
208202
# llm_test_case_lookup_map[instance_id] = api_test_case
209203
return api_test_case

deepeval/event/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

deepeval/event/api.py

Lines changed: 0 additions & 5 deletions
This file was deleted.

deepeval/event/event.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

deepeval/metrics/answer_relevancy/answer_relevancy.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,11 @@ def __init__(
4848

4949
def measure(
5050
self,
51-
test_case: Union[LLMTestCase, ConversationalTestCase],
51+
test_case: LLMTestCase,
5252
_show_indicator: bool = True,
5353
_in_component: bool = False,
5454
) -> float:
55-
if isinstance(test_case, ConversationalTestCase):
56-
test_case = test_case.turns[-1]
55+
5756
check_llm_test_case_params(test_case, self._required_params, self)
5857

5958
self.evaluation_cost = 0 if self.using_native_model else None
@@ -92,12 +91,11 @@ def measure(
9291

9392
async def a_measure(
9493
self,
95-
test_case: Union[LLMTestCase, ConversationalTestCase],
94+
test_case: LLMTestCase,
9695
_show_indicator: bool = True,
9796
_in_component: bool = False,
9897
) -> float:
99-
if isinstance(test_case, ConversationalTestCase):
100-
test_case = test_case.turns[-1]
98+
10199
check_llm_test_case_params(test_case, self._required_params, self)
102100

103101
self.evaluation_cost = 0 if self.using_native_model else None

deepeval/metrics/bias/bias.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,11 @@ def __init__(
4646

4747
def measure(
4848
self,
49-
test_case: Union[LLMTestCase, ConversationalTestCase],
49+
test_case: LLMTestCase,
5050
_show_indicator: bool = True,
5151
_in_component: bool = False,
5252
) -> float:
53-
if isinstance(test_case, ConversationalTestCase):
54-
test_case = test_case.turns[-1]
53+
5554
check_llm_test_case_params(test_case, self._required_params, self)
5655

5756
self.evaluation_cost = 0 if self.using_native_model else None
@@ -88,12 +87,11 @@ def measure(
8887

8988
async def a_measure(
9089
self,
91-
test_case: Union[LLMTestCase, ConversationalTestCase],
90+
test_case: LLMTestCase,
9291
_show_indicator: bool = True,
9392
_in_component: bool = False,
9493
) -> float:
95-
if isinstance(test_case, ConversationalTestCase):
96-
test_case = test_case.turns[-1]
94+
9795
check_llm_test_case_params(test_case, self._required_params, self)
9896

9997
self.evaluation_cost = 0 if self.using_native_model else None

deepeval/metrics/contextual_precision/contextual_precision.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,11 @@ def __init__(
5252

5353
def measure(
5454
self,
55-
test_case: Union[LLMTestCase, ConversationalTestCase],
55+
test_case: LLMTestCase,
5656
_show_indicator: bool = True,
5757
_in_component: bool = False,
5858
) -> float:
59-
if isinstance(test_case, ConversationalTestCase):
60-
test_case = test_case.turns[-1]
59+
6160
check_llm_test_case_params(test_case, self._required_params, self)
6261

6362
self.evaluation_cost = 0 if self.using_native_model else None
@@ -96,12 +95,11 @@ def measure(
9695

9796
async def a_measure(
9897
self,
99-
test_case: Union[LLMTestCase, ConversationalTestCase],
98+
test_case: LLMTestCase,
10099
_show_indicator: bool = True,
101100
_in_component: bool = False,
102101
) -> float:
103-
if isinstance(test_case, ConversationalTestCase):
104-
test_case = test_case.turns[-1]
102+
105103
check_llm_test_case_params(test_case, self._required_params, self)
106104

107105
self.evaluation_cost = 0 if self.using_native_model else None

deepeval/metrics/contextual_recall/contextual_recall.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,11 @@ def __init__(
5151

5252
def measure(
5353
self,
54-
test_case: Union[LLMTestCase, ConversationalTestCase],
54+
test_case: LLMTestCase,
5555
_show_indicator: bool = True,
5656
_in_component: bool = False,
5757
) -> float:
58-
if isinstance(test_case, ConversationalTestCase):
59-
test_case = test_case.turns[-1]
58+
6059
check_llm_test_case_params(test_case, self._required_params, self)
6160

6261
self.evaluation_cost = 0 if self.using_native_model else None
@@ -93,12 +92,11 @@ def measure(
9392

9493
async def a_measure(
9594
self,
96-
test_case: Union[LLMTestCase, ConversationalTestCase],
95+
test_case: LLMTestCase,
9796
_show_indicator: bool = True,
9897
_in_component: bool = False,
9998
) -> float:
100-
if isinstance(test_case, ConversationalTestCase):
101-
test_case = test_case.turns[-1]
99+
102100
check_llm_test_case_params(test_case, self._required_params, self)
103101

104102
self.evaluation_cost = 0 if self.using_native_model else None

0 commit comments

Comments
 (0)