Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 101 additions & 88 deletions deepeval/test_run/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,108 +862,121 @@ def post_test_run(self, test_run: TestRun) -> Optional[Tuple[str, str]]:
"Sending a large test run to Confident, this might take a bit longer than usual..."
)

####################
### POST REQUEST ###
####################
if is_conversational_run:
test_run.conversational_test_cases = initial_batch
else:
test_run.test_cases = initial_batch
original_test_cases = test_run.test_cases
original_conversational_test_cases = test_run.conversational_test_cases
original_prompts = test_run.prompts

try:
test_run.prompts = None
body = test_run.model_dump(by_alias=True, exclude_none=True)
except AttributeError:
# Pydantic version below 2.0
body = test_run.dict(by_alias=True, exclude_none=True)
####################
### POST REQUEST ###
####################
if is_conversational_run:
test_run.conversational_test_cases = initial_batch
else:
test_run.test_cases = initial_batch

json_str = json.dumps(body, cls=TestRunEncoder)
body = json.loads(json_str)
try:
test_run.prompts = None
body = test_run.model_dump(by_alias=True, exclude_none=True)
except AttributeError:
# Pydantic version below 2.0
body = test_run.dict(by_alias=True, exclude_none=True)

data, link = api.send_request(
method=HttpMethods.POST,
endpoint=Endpoints.TEST_RUN_ENDPOINT,
body=body,
)
json_str = json.dumps(body, cls=TestRunEncoder)
body = json.loads(json_str)

if not isinstance(data, dict) or "id" not in data:
# try to show helpful details
detail = None
if isinstance(data, dict):
detail = (
data.get("detail")
or data.get("message")
or data.get("error")
)
# fall back to repr for visibility
raise RuntimeError(
f"Confident API response missing 'id'. "
f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
data, link = api.send_request(
method=HttpMethods.POST,
endpoint=Endpoints.TEST_RUN_ENDPOINT,
body=body,
)

res = TestRunHttpResponse(
id=data["id"],
)
if not isinstance(data, dict) or "id" not in data:
# try to show helpful details
detail = None
if isinstance(data, dict):
detail = (
data.get("detail")
or data.get("message")
or data.get("error")
)
# fall back to repr for visibility
raise RuntimeError(
f"Confident API response missing 'id'. "
f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
)

################################################
### Send the remaining test cases in batches ###
################################################
total_remaining = len(remaining_test_cases_to_process)
num_remaining_batches = (
(total_remaining + BATCH_SIZE - 1) // BATCH_SIZE
if total_remaining > 0
else 0
)
res = TestRunHttpResponse(
id=data["id"],
)

for i in range(num_remaining_batches):
start_index = i * BATCH_SIZE
batch = remaining_test_cases_to_process[
start_index : start_index + BATCH_SIZE
]
################################################
### Send the remaining test cases in batches ###
################################################
total_remaining = len(remaining_test_cases_to_process)
num_remaining_batches = (
(total_remaining + BATCH_SIZE - 1) // BATCH_SIZE
if total_remaining > 0
else 0
)

if len(batch) == 0:
break # Should not happen with correct num_remaining_batches, but as a safeguard
for i in range(num_remaining_batches):
start_index = i * BATCH_SIZE
batch = remaining_test_cases_to_process[
start_index : start_index + BATCH_SIZE
]

# Create RemainingTestRun with the correct list populated
if is_conversational_run:
remaining_test_run = RemainingTestRun(
testRunId=res.id,
testCases=[], # This will be empty
conversationalTestCases=batch,
)
else:
remaining_test_run = RemainingTestRun(
testRunId=res.id,
testCases=batch,
conversationalTestCases=[], # This will be empty
)
if len(batch) == 0:
break # Should not happen with correct num_remaining_batches, but as a safeguard

body = None
try:
body = remaining_test_run.model_dump(
by_alias=True, exclude_none=True
)
except AttributeError:
# Pydantic version below 2.0
body = remaining_test_run.dict(by_alias=True, exclude_none=True)
# Create RemainingTestRun with the correct list populated
if is_conversational_run:
remaining_test_run = RemainingTestRun(
testRunId=res.id,
testCases=[], # This will be empty
conversationalTestCases=batch,
)
else:
remaining_test_run = RemainingTestRun(
testRunId=res.id,
testCases=batch,
conversationalTestCases=[], # This will be empty
)

try:
_, _ = api.send_request(
method=HttpMethods.PUT,
endpoint=Endpoints.TEST_RUN_ENDPOINT,
body=body,
)
except Exception as e:
message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}"
raise Exception(message) from e
body = None
try:
body = remaining_test_run.model_dump(
by_alias=True, exclude_none=True
)
except AttributeError:
# Pydantic version below 2.0
body = remaining_test_run.dict(
by_alias=True, exclude_none=True
)

console.print(
"[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
f"[link={link}]{link}[/link]"
)
self.save_final_test_run_link(link)
open_browser(link)
return link, res.id
try:
_, _ = api.send_request(
method=HttpMethods.PUT,
endpoint=Endpoints.TEST_RUN_ENDPOINT,
body=body,
)
except Exception as e:
message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}"
raise Exception(message) from e

console.print(
"[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
f"[link={link}]{link}[/link]"
)
self.save_final_test_run_link(link)
open_browser(link)
return link, res.id
finally:
test_run.test_cases = original_test_cases
test_run.conversational_test_cases = (
original_conversational_test_cases
)
test_run.prompts = original_prompts

def save_test_run_locally(self):
local_folder = os.getenv("DEEPEVAL_RESULTS_FOLDER")
Expand Down
108 changes: 107 additions & 1 deletion tests/test_core/test_run/test_run_manager.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import os
import portalocker
import pytest

import deepeval.test_run.test_run as tr_mod

from types import SimpleNamespace

from deepeval.test_case import LLMTestCase
from deepeval.test_run.test_run import TestRunManager, LLMApiTestCase
from deepeval.test_run.test_run import (
LLMApiTestCase,
PromptData,
TestRun,
TestRunManager,
)
from tests.test_core.helpers import _make_fake_portalocker
from tests.test_core.stubs import RecordingPortalockerLock

Expand Down Expand Up @@ -150,3 +156,103 @@ def fake_fsync(fd: int) -> None:
fsync_calls
), "save_test_run(..., save_under_key=...) should call os.fsync(file.fileno())"
assert fsync_calls[-1] == f.fileno()


def _make_api_test_cases(count: int):
return [
LLMApiTestCase(
name=f"tc{i}",
input=f"in-{i}",
actual_output=f"out-{i}",
order=i,
)
for i in range(count)
]


def test_post_test_run_restores_full_test_case_list_after_batched_upload(
monkeypatch,
):
trm = TestRunManager()
test_cases = _make_api_test_cases(45)
prompts = [PromptData(alias="prompt-1")]
test_run = TestRun(testCases=test_cases, prompts=prompts)
original_test_cases = test_run.test_cases
original_prompts = test_run.prompts
sent_batches = []

class FakeApi:
def send_request(self, method, endpoint, body):
sent_batches.append(
(
method,
len(body["testCases"]),
len(body["conversationalTestCases"]),
)
)
if method == tr_mod.HttpMethods.POST:
return {"id": "run-id"}, "https://confident.example/run-id"
return {"ok": True}, None

monkeypatch.setattr(tr_mod, "Api", FakeApi)
monkeypatch.setattr(tr_mod, "open_browser", lambda link: None)
monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None)
monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None)

result = trm.post_test_run(test_run)

assert result == ("https://confident.example/run-id", "run-id")
assert test_run.test_cases is original_test_cases
assert len(test_run.test_cases) == 45
assert test_run.test_cases[0].name == "tc0"
assert test_run.test_cases[-1].name == "tc44"
assert test_run.prompts is original_prompts
assert sent_batches == [
(tr_mod.HttpMethods.POST, 40, 0),
(tr_mod.HttpMethods.PUT, 5, 0),
]


def test_post_test_run_restores_full_test_case_list_when_batch_upload_fails(
monkeypatch,
):
trm = TestRunManager()
test_cases = _make_api_test_cases(45)
prompts = [PromptData(alias="prompt-1")]
test_run = TestRun(testCases=test_cases, prompts=prompts)
original_test_cases = test_run.test_cases
original_prompts = test_run.prompts
sent_batches = []

class FakeApi:
def send_request(self, method, endpoint, body):
sent_batches.append(
(
method,
len(body["testCases"]),
len(body["conversationalTestCases"]),
)
)
if method == tr_mod.HttpMethods.POST:
return {"id": "run-id"}, "https://confident.example/run-id"
raise RuntimeError("upload failed")

monkeypatch.setattr(tr_mod, "Api", FakeApi)
monkeypatch.setattr(tr_mod, "open_browser", lambda link: None)
monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None)
monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None)

with pytest.raises(
Exception, match="Unexpected error when sending some test cases"
):
trm.post_test_run(test_run)

assert test_run.test_cases is original_test_cases
assert len(test_run.test_cases) == 45
assert test_run.test_cases[0].name == "tc0"
assert test_run.test_cases[-1].name == "tc44"
assert test_run.prompts is original_prompts
assert sent_batches == [
(tr_mod.HttpMethods.POST, 40, 0),
(tr_mod.HttpMethods.PUT, 5, 0),
]
Loading