Skip to content

Commit 53d9a72

Browse files
committed
fix: restore test cases after batched upload truncation
1 parent 5205257 commit 53d9a72

2 files changed

Lines changed: 208 additions & 90 deletions

File tree

deepeval/test_run/test_run.py

Lines changed: 101 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from rich.panel import Panel
4343
from rich.columns import Columns
4444

45-
4645
portalocker = None
4746
if not is_read_only_env():
4847
try:
@@ -863,108 +862,121 @@ def post_test_run(self, test_run: TestRun) -> Optional[Tuple[str, str]]:
863862
"Sending a large test run to Confident, this might take a bit longer than usual..."
864863
)
865864

866-
####################
867-
### POST REQUEST ###
868-
####################
869-
if is_conversational_run:
870-
test_run.conversational_test_cases = initial_batch
871-
else:
872-
test_run.test_cases = initial_batch
865+
original_test_cases = test_run.test_cases
866+
original_conversational_test_cases = test_run.conversational_test_cases
867+
original_prompts = test_run.prompts
873868

874869
try:
875-
test_run.prompts = None
876-
body = test_run.model_dump(by_alias=True, exclude_none=True)
877-
except AttributeError:
878-
# Pydantic version below 2.0
879-
body = test_run.dict(by_alias=True, exclude_none=True)
870+
####################
871+
### POST REQUEST ###
872+
####################
873+
if is_conversational_run:
874+
test_run.conversational_test_cases = initial_batch
875+
else:
876+
test_run.test_cases = initial_batch
880877

881-
json_str = json.dumps(body, cls=TestRunEncoder)
882-
body = json.loads(json_str)
878+
try:
879+
test_run.prompts = None
880+
body = test_run.model_dump(by_alias=True, exclude_none=True)
881+
except AttributeError:
882+
# Pydantic version below 2.0
883+
body = test_run.dict(by_alias=True, exclude_none=True)
883884

884-
data, link = api.send_request(
885-
method=HttpMethods.POST,
886-
endpoint=Endpoints.TEST_RUN_ENDPOINT,
887-
body=body,
888-
)
885+
json_str = json.dumps(body, cls=TestRunEncoder)
886+
body = json.loads(json_str)
889887

890-
if not isinstance(data, dict) or "id" not in data:
891-
# try to show helpful details
892-
detail = None
893-
if isinstance(data, dict):
894-
detail = (
895-
data.get("detail")
896-
or data.get("message")
897-
or data.get("error")
898-
)
899-
# fall back to repr for visibility
900-
raise RuntimeError(
901-
f"Confident API response missing 'id'. "
902-
f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
888+
data, link = api.send_request(
889+
method=HttpMethods.POST,
890+
endpoint=Endpoints.TEST_RUN_ENDPOINT,
891+
body=body,
903892
)
904893

905-
res = TestRunHttpResponse(
906-
id=data["id"],
907-
)
894+
if not isinstance(data, dict) or "id" not in data:
895+
# try to show helpful details
896+
detail = None
897+
if isinstance(data, dict):
898+
detail = (
899+
data.get("detail")
900+
or data.get("message")
901+
or data.get("error")
902+
)
903+
# fall back to repr for visibility
904+
raise RuntimeError(
905+
f"Confident API response missing 'id'. "
906+
f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
907+
)
908908

909-
################################################
910-
### Send the remaining test cases in batches ###
911-
################################################
912-
total_remaining = len(remaining_test_cases_to_process)
913-
num_remaining_batches = (
914-
(total_remaining + BATCH_SIZE - 1) // BATCH_SIZE
915-
if total_remaining > 0
916-
else 0
917-
)
909+
res = TestRunHttpResponse(
910+
id=data["id"],
911+
)
918912

919-
for i in range(num_remaining_batches):
920-
start_index = i * BATCH_SIZE
921-
batch = remaining_test_cases_to_process[
922-
start_index : start_index + BATCH_SIZE
923-
]
913+
################################################
914+
### Send the remaining test cases in batches ###
915+
################################################
916+
total_remaining = len(remaining_test_cases_to_process)
917+
num_remaining_batches = (
918+
(total_remaining + BATCH_SIZE - 1) // BATCH_SIZE
919+
if total_remaining > 0
920+
else 0
921+
)
924922

925-
if len(batch) == 0:
926-
break # Should not happen with correct num_remaining_batches, but as a safeguard
923+
for i in range(num_remaining_batches):
924+
start_index = i * BATCH_SIZE
925+
batch = remaining_test_cases_to_process[
926+
start_index : start_index + BATCH_SIZE
927+
]
927928

928-
# Create RemainingTestRun with the correct list populated
929-
if is_conversational_run:
930-
remaining_test_run = RemainingTestRun(
931-
testRunId=res.id,
932-
testCases=[], # This will be empty
933-
conversationalTestCases=batch,
934-
)
935-
else:
936-
remaining_test_run = RemainingTestRun(
937-
testRunId=res.id,
938-
testCases=batch,
939-
conversationalTestCases=[], # This will be empty
940-
)
929+
if len(batch) == 0:
930+
break # Should not happen with correct num_remaining_batches, but as a safeguard
941931

942-
body = None
943-
try:
944-
body = remaining_test_run.model_dump(
945-
by_alias=True, exclude_none=True
946-
)
947-
except AttributeError:
948-
# Pydantic version below 2.0
949-
body = remaining_test_run.dict(by_alias=True, exclude_none=True)
932+
# Create RemainingTestRun with the correct list populated
933+
if is_conversational_run:
934+
remaining_test_run = RemainingTestRun(
935+
testRunId=res.id,
936+
testCases=[], # This will be empty
937+
conversationalTestCases=batch,
938+
)
939+
else:
940+
remaining_test_run = RemainingTestRun(
941+
testRunId=res.id,
942+
testCases=batch,
943+
conversationalTestCases=[], # This will be empty
944+
)
950945

951-
try:
952-
_, _ = api.send_request(
953-
method=HttpMethods.PUT,
954-
endpoint=Endpoints.TEST_RUN_ENDPOINT,
955-
body=body,
956-
)
957-
except Exception as e:
958-
message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}"
959-
raise Exception(message) from e
946+
body = None
947+
try:
948+
body = remaining_test_run.model_dump(
949+
by_alias=True, exclude_none=True
950+
)
951+
except AttributeError:
952+
# Pydantic version below 2.0
953+
body = remaining_test_run.dict(
954+
by_alias=True, exclude_none=True
955+
)
960956

961-
console.print(
962-
"[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
963-
f"[link={link}]{link}[/link]"
964-
)
965-
self.save_final_test_run_link(link)
966-
open_browser(link)
967-
return link, res.id
957+
try:
958+
_, _ = api.send_request(
959+
method=HttpMethods.PUT,
960+
endpoint=Endpoints.TEST_RUN_ENDPOINT,
961+
body=body,
962+
)
963+
except Exception as e:
964+
message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}"
965+
raise Exception(message) from e
966+
967+
console.print(
968+
"[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
969+
f"[link={link}]{link}[/link]"
970+
)
971+
self.save_final_test_run_link(link)
972+
open_browser(link)
973+
return link, res.id
974+
finally:
975+
test_run.test_cases = original_test_cases
976+
test_run.conversational_test_cases = (
977+
original_conversational_test_cases
978+
)
979+
test_run.prompts = original_prompts
968980

969981
def save_test_run_locally(self):
970982
local_folder = os.getenv("DEEPEVAL_RESULTS_FOLDER")

tests/test_core/test_run/test_run_manager.py

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
import os
22
import portalocker
3+
import pytest
34

45
import deepeval.test_run.test_run as tr_mod
56

67
from types import SimpleNamespace
78

89
from deepeval.test_case import LLMTestCase
9-
from deepeval.test_run.test_run import TestRunManager, LLMApiTestCase
10+
from deepeval.test_run.test_run import (
11+
LLMApiTestCase,
12+
PromptData,
13+
TestRun,
14+
TestRunManager,
15+
)
1016
from tests.test_core.helpers import _make_fake_portalocker
1117
from tests.test_core.stubs import RecordingPortalockerLock
1218

@@ -150,3 +156,103 @@ def fake_fsync(fd: int) -> None:
150156
fsync_calls
151157
), "save_test_run(..., save_under_key=...) should call os.fsync(file.fileno())"
152158
assert fsync_calls[-1] == f.fileno()
159+
160+
161+
def _make_api_test_cases(count: int):
162+
return [
163+
LLMApiTestCase(
164+
name=f"tc{i}",
165+
input=f"in-{i}",
166+
actual_output=f"out-{i}",
167+
order=i,
168+
)
169+
for i in range(count)
170+
]
171+
172+
173+
def test_post_test_run_restores_full_test_case_list_after_batched_upload(
174+
monkeypatch,
175+
):
176+
trm = TestRunManager()
177+
test_cases = _make_api_test_cases(45)
178+
prompts = [PromptData(alias="prompt-1")]
179+
test_run = TestRun(testCases=test_cases, prompts=prompts)
180+
original_test_cases = test_run.test_cases
181+
original_prompts = test_run.prompts
182+
sent_batches = []
183+
184+
class FakeApi:
185+
def send_request(self, method, endpoint, body):
186+
sent_batches.append(
187+
(
188+
method,
189+
len(body["testCases"]),
190+
len(body["conversationalTestCases"]),
191+
)
192+
)
193+
if method == tr_mod.HttpMethods.POST:
194+
return {"id": "run-id"}, "https://confident.example/run-id"
195+
return {"ok": True}, None
196+
197+
monkeypatch.setattr(tr_mod, "Api", FakeApi)
198+
monkeypatch.setattr(tr_mod, "open_browser", lambda link: None)
199+
monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None)
200+
monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None)
201+
202+
result = trm.post_test_run(test_run)
203+
204+
assert result == ("https://confident.example/run-id", "run-id")
205+
assert test_run.test_cases is original_test_cases
206+
assert len(test_run.test_cases) == 45
207+
assert test_run.test_cases[0].name == "tc0"
208+
assert test_run.test_cases[-1].name == "tc44"
209+
assert test_run.prompts is original_prompts
210+
assert sent_batches == [
211+
(tr_mod.HttpMethods.POST, 40, 0),
212+
(tr_mod.HttpMethods.PUT, 5, 0),
213+
]
214+
215+
216+
def test_post_test_run_restores_full_test_case_list_when_batch_upload_fails(
217+
monkeypatch,
218+
):
219+
trm = TestRunManager()
220+
test_cases = _make_api_test_cases(45)
221+
prompts = [PromptData(alias="prompt-1")]
222+
test_run = TestRun(testCases=test_cases, prompts=prompts)
223+
original_test_cases = test_run.test_cases
224+
original_prompts = test_run.prompts
225+
sent_batches = []
226+
227+
class FakeApi:
228+
def send_request(self, method, endpoint, body):
229+
sent_batches.append(
230+
(
231+
method,
232+
len(body["testCases"]),
233+
len(body["conversationalTestCases"]),
234+
)
235+
)
236+
if method == tr_mod.HttpMethods.POST:
237+
return {"id": "run-id"}, "https://confident.example/run-id"
238+
raise RuntimeError("upload failed")
239+
240+
monkeypatch.setattr(tr_mod, "Api", FakeApi)
241+
monkeypatch.setattr(tr_mod, "open_browser", lambda link: None)
242+
monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None)
243+
monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None)
244+
245+
with pytest.raises(
246+
Exception, match="Unexpected error when sending some test cases"
247+
):
248+
trm.post_test_run(test_run)
249+
250+
assert test_run.test_cases is original_test_cases
251+
assert len(test_run.test_cases) == 45
252+
assert test_run.test_cases[0].name == "tc0"
253+
assert test_run.test_cases[-1].name == "tc44"
254+
assert test_run.prompts is original_prompts
255+
assert sent_batches == [
256+
(tr_mod.HttpMethods.POST, 40, 0),
257+
(tr_mod.HttpMethods.PUT, 5, 0),
258+
]

0 commit comments

Comments
 (0)