Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,13 @@ Open `test_chatbot.py` and write your first test case to run an **end-to-end** e
import pytest
from deepeval import assert_test
from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
from deepeval.test_case import LLMTestCase, SingleTurnParams

def test_case():
correctness_metric = GEval(
name="Correctness",
criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
threshold=0.5
)
test_case = LLMTestCase(
Expand Down Expand Up @@ -268,14 +268,14 @@ Use the `@observe` decorator to trace components (LLM calls, retrievers, tool ca

```python
from deepeval.tracing import observe, update_current_span
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
from deepeval.test_case import LLMTestCase, SingleTurnParams
from deepeval.dataset import EvaluationDataset, Golden
from deepeval.metrics import GEval

correctness = GEval(
name="Correctness",
criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
)

@observe(metrics=[correctness])
Expand Down
2 changes: 0 additions & 2 deletions deepeval/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ def login(
settings = get_settings()
save = save or settings.DEEPEVAL_DEFAULT_SAVE or "dotenv:.env.local"
with settings.edit(save=save) as edit_ctx:
settings.API_KEY = key
settings.CONFIDENT_API_KEY = key

handled, path, updated = edit_ctx.result
Expand Down Expand Up @@ -335,7 +334,6 @@ def logout(
settings = get_settings()
save = save or settings.DEEPEVAL_DEFAULT_SAVE or "dotenv:.env.local"
with settings.edit(save=save) as edit_ctx:
settings.API_KEY = None
settings.CONFIDENT_API_KEY = None

handled, path, updated = edit_ctx.result
Expand Down
7 changes: 2 additions & 5 deletions deepeval/confident/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def get_base_api_url():

def get_confident_api_key() -> Optional[str]:
s = get_settings()
key: Optional[SecretStr] = s.CONFIDENT_API_KEY or s.API_KEY
key: Optional[SecretStr] = s.CONFIDENT_API_KEY
return key.get_secret_value() if key else None


Expand All @@ -98,17 +98,14 @@ def set_confident_api_key(api_key: Optional[str]) -> None:
if save is None:
with s.edit(persist=False):
s.CONFIDENT_API_KEY = SecretStr(api_key) if api_key else None
s.API_KEY = SecretStr(api_key) if api_key else None
else:
# Respect default save: update runtime + write to dotenv, but not JSON
with s.edit(save=save, persist=None):
s.CONFIDENT_API_KEY = SecretStr(api_key) if api_key else None
s.API_KEY = SecretStr(api_key) if api_key else None


def is_confident():
confident_api_key = get_confident_api_key()
return confident_api_key is not None
return get_confident_api_key() is not None


def log_retry_error(retry_state: RetryCallState):
Expand Down
4 changes: 0 additions & 4 deletions deepeval/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,10 +377,6 @@ def __setattr__(self, name: str, value):
# Model Keys
#

API_KEY: Optional[SecretStr] = Field(
None,
description="Alias for CONFIDENT_API_KEY (Confident AI API key).",
)
CONFIDENT_API_KEY: Optional[SecretStr] = Field(
None,
description="Confident AI API key (used for uploading results/telemetry to Confident).",
Expand Down
34 changes: 17 additions & 17 deletions deepeval/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
raise ValueError(f"Error processing expected_tools: {e}")
else:
expected_tools.append([])
additional_metadatas = [
metadatas = [
ast.literal_eval(metadata) if metadata else None
for metadata in get_column_data(
df, additional_metadata_col_name, default=""
Expand All @@ -365,7 +365,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
retrieval_context,
tools_called,
expected_tools,
additional_metadata,
metadata,
) in zip(
inputs,
actual_outputs,
Expand All @@ -374,7 +374,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
retrieval_contexts,
tools_called,
expected_tools,
additional_metadatas,
metadatas,
):
self.add_test_case(
LLMTestCase(
Expand All @@ -385,7 +385,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
retrieval_context=retrieval_context,
tools_called=tools_called,
expected_tools=expected_tools,
additional_metadata=additional_metadata,
metadata=metadata,
)
)

Expand Down Expand Up @@ -575,7 +575,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
comments = get_column_data(df, comments_key_name)
name = get_column_data(df, name_key_name)
source_files = get_column_data(df, source_file_col_name)
additional_metadatas = [
metadatas = [
ast.literal_eval(metadata) if metadata else None
for metadata in get_column_data(
df, additional_metadata_col_name, default=""
Expand All @@ -597,7 +597,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
comments,
name,
source_file,
additional_metadata,
metadata,
scenario,
turns,
expected_outcome,
Expand All @@ -613,7 +613,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
comments,
name,
source_files,
additional_metadatas,
metadatas,
scenarios,
turns_raw,
expected_outcomes,
Expand All @@ -630,7 +630,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
context=context,
comments=comments,
name=name,
additional_metadata=additional_metadata,
additional_metadata=metadata,
)
)
else:
Expand All @@ -643,7 +643,7 @@ def get_column_data(df: pd.DataFrame, col_name: str, default=None):
retrieval_context=retrieval_context,
tools_called=tools_called,
expected_tools=expected_tools,
additional_metadata=additional_metadata,
additional_metadata=metadata,
source_file=source_file,
comments=comments,
name=name,
Expand Down Expand Up @@ -688,7 +688,7 @@ def add_goldens_from_json_file(
comments = json_obj.get(comments_key_name)
name = json_obj.get(name_key_name)
parsed_turns = parse_turns(turns) if turns else []
additional_metadata = json_obj.get(additional_metadata_key_name)
metadata = json_obj.get(additional_metadata_key_name)

self.add_golden(
ConversationalGolden(
Expand All @@ -699,7 +699,7 @@ def add_goldens_from_json_file(
context=context,
comments=comments,
name=name,
additional_metadata=additional_metadata,
additional_metadata=metadata,
)
)
else:
Expand All @@ -713,7 +713,7 @@ def add_goldens_from_json_file(
comments = json_obj.get(comments_key_name)
name = json_obj.get(name_key_name)
source_file = json_obj.get(source_file_key_name)
additional_metadata = json_obj.get(additional_metadata_key_name)
metadata = json_obj.get(additional_metadata_key_name)

self.add_golden(
Golden(
Expand All @@ -724,7 +724,7 @@ def add_goldens_from_json_file(
retrieval_context=retrieval_context,
tools_called=tools_called,
expected_tools=expected_tools,
additional_metadata=additional_metadata,
additional_metadata=metadata,
comments=comments,
name=name,
source_file=source_file,
Expand Down Expand Up @@ -803,7 +803,7 @@ def parse_tools(value):
comments = json_obj.get(comments_key_name)
name = json_obj.get(name_key_name)
parsed_turns = parse_turns(turns) if turns else []
additional_metadata = json_obj.get(additional_metadata_key_name)
metadata = json_obj.get(additional_metadata_key_name)
custom_column_key_values = json_obj.get(
custom_column_key_values_key_name
)
Expand All @@ -817,7 +817,7 @@ def parse_tools(value):
context=context,
comments=comments,
name=name,
additional_metadata=additional_metadata,
additional_metadata=metadata,
custom_column_key_values=custom_column_key_values,
)
)
Expand All @@ -839,7 +839,7 @@ def parse_tools(value):
comments = json_obj.get(comments_key_name)
name = json_obj.get(name_key_name)
source_file = json_obj.get(source_file_key_name)
additional_metadata = json_obj.get(additional_metadata_key_name)
metadata = json_obj.get(additional_metadata_key_name)
custom_column_key_values = json_obj.get(
custom_column_key_values_key_name
)
Expand All @@ -853,7 +853,7 @@ def parse_tools(value):
retrieval_context=retrieval_context,
tools_called=tools_called,
expected_tools=expected_tools,
additional_metadata=additional_metadata,
additional_metadata=metadata,
custom_column_key_values=custom_column_key_values,
comments=comments,
name=name,
Expand Down
12 changes: 5 additions & 7 deletions deepeval/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def convert_test_cases_to_goldens(
"retrieval_context": test_case.retrieval_context,
"tools_called": test_case.tools_called,
"expected_tools": test_case.expected_tools,
"additional_metadata": test_case.additional_metadata,
"additional_metadata": test_case.metadata,
}
goldens.append(Golden(**golden))
return goldens
Expand All @@ -47,7 +47,7 @@ def convert_goldens_to_test_cases(
expected_tools=golden.expected_tools,
name=golden.name,
comments=golden.comments,
additional_metadata=golden.additional_metadata,
metadata=golden.additional_metadata,
_dataset_alias=_alias,
_dataset_id=_id,
_dataset_rank=index,
Expand All @@ -71,7 +71,7 @@ def convert_convo_test_cases_to_convo_goldens(
"expected_outcome": test_case.expected_outcome,
"user_description": test_case.user_description,
"context": test_case.context,
"additional_metadata": test_case.additional_metadata,
"additional_metadata": test_case.metadata,
}
goldens.append(ConversationalGolden(**golden))
return goldens
Expand All @@ -91,7 +91,7 @@ def convert_convo_goldens_to_convo_test_cases(
user_description=golden.user_description,
context=golden.context,
name=golden.name,
additional_metadata=golden.additional_metadata,
metadata=golden.additional_metadata,
comments=golden.comments,
_dataset_alias=_alias,
_dataset_id=_id,
Expand Down Expand Up @@ -141,9 +141,7 @@ def _dump_list(models):
"mcp_tools_called": _dump_list(turn.mcp_tools_called),
"mcp_resources_called": _dump_list(turn.mcp_resources_called),
"mcp_prompts_called": _dump_list(turn.mcp_prompts_called),
"additional_metadata": (
turn.additional_metadata if turn.additional_metadata else None
),
"metadata": turn.metadata if turn.metadata else None,
}
res.append(cur_turn)
try:
Expand Down
4 changes: 2 additions & 2 deletions deepeval/evaluate/execute/agentic.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def _a_execute_agentic_test_case(
retrieval_context=current_trace.retrieval_context,
tools_called=current_trace.tools_called,
expected_tools=current_trace.expected_tools,
additional_metadata=golden.additional_metadata,
metadata=golden.additional_metadata,
comments=golden.comments,
name=golden.name,
_dataset_alias=golden._dataset_alias,
Expand Down Expand Up @@ -243,7 +243,7 @@ async def dfs(trace: Trace, span: BaseSpan):
expected_output=None,
context=None,
retrieval_context=None,
additional_metadata=golden.additional_metadata,
metadata=golden.additional_metadata,
tools_called=None,
expected_tools=None,
comments=golden.comments,
Expand Down
2 changes: 1 addition & 1 deletion deepeval/evaluate/execute/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def evaluate_test_cases(
expected_output=current_trace.expected_output,
context=current_trace.context,
retrieval_context=current_trace.retrieval_context,
additional_metadata=golden.additional_metadata,
metadata=golden.additional_metadata,
tools_called=current_trace.tools_called,
expected_tools=current_trace.expected_tools,
comments=golden.comments,
Expand Down
2 changes: 1 addition & 1 deletion deepeval/evaluate/execute/trace_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _assert_test_from_current_trace(
expected_output=current_trace.expected_output,
context=current_trace.context,
retrieval_context=current_trace.retrieval_context,
additional_metadata=golden.additional_metadata,
metadata=golden.additional_metadata,
tools_called=current_trace.tools_called,
expected_tools=current_trace.expected_tools,
comments=golden.comments,
Expand Down
2 changes: 1 addition & 1 deletion deepeval/evaluate/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class TestResult:
context: Optional[List[str]] = None
retrieval_context: Optional[List[str]] = None
turns: Optional[List[TurnApi]] = None
additional_metadata: Optional[Dict] = None
metadata: Optional[Dict] = None


class EvaluationResult(BaseModel):
Expand Down
6 changes: 3 additions & 3 deletions deepeval/evaluate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def create_test_result(
success=api_test_case.success,
metrics_data=api_test_case.metrics_data,
conversational=True,
additional_metadata=api_test_case.additional_metadata,
metadata=api_test_case.metadata,
turns=api_test_case.turns,
)
else:
Expand All @@ -135,7 +135,7 @@ def create_test_result(
actual_output=api_test_case.actual_output,
conversational=False,
multimodal=True,
additional_metadata=api_test_case.additional_metadata,
metadata=api_test_case.metadata,
)
else:
return TestResult(
Expand All @@ -149,7 +149,7 @@ def create_test_result(
retrieval_context=api_test_case.retrieval_context,
conversational=False,
multimodal=False,
additional_metadata=api_test_case.additional_metadata,
metadata=api_test_case.metadata,
)


Expand Down
3 changes: 0 additions & 3 deletions deepeval/key_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ def _secret_env_keys() -> frozenset[str]:


def _env_key_for_legacy_enum(key) -> str:
# For ModelKeyValues, .name == .value, for KeyValues it's the important one:
# KeyValues.API_KEY.name == "API_KEY" (matches Settings), value == "api_key" (legacy json key)
return getattr(key, "name", str(key))


Expand All @@ -48,7 +46,6 @@ def _is_secret_key(key) -> bool:

class KeyValues(Enum):
# Confident AI
API_KEY = "api_key"
CONFIDENT_API_KEY = "confident_api_key"
CONFIDENT_BASE_URL = "confident_base_url"
CONFIDENT_REGION = "confident_region"
Expand Down
8 changes: 4 additions & 4 deletions deepeval/metrics/answer_relevancy/answer_relevancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
generate_with_schema_and_extract,
a_generate_with_schema_and_extract,
)
from deepeval.test_case import LLMTestCase, LLMTestCaseParams, MLLMImage
from deepeval.test_case import LLMTestCase, SingleTurnParams, MLLMImage
from deepeval.metrics import BaseMetric
from deepeval.models import DeepEvalBaseLLM
from deepeval.metrics.answer_relevancy.template import AnswerRelevancyTemplate
Expand All @@ -25,9 +25,9 @@


class AnswerRelevancyMetric(BaseMetric):
_required_params: List[LLMTestCaseParams] = [
LLMTestCaseParams.INPUT,
LLMTestCaseParams.ACTUAL_OUTPUT,
_required_params: List[SingleTurnParams] = [
SingleTurnParams.INPUT,
SingleTurnParams.ACTUAL_OUTPUT,
]

def __init__(
Expand Down
Loading
Loading