confident-ai · aerosta · Apr 4, 2026
diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
@@ -862,108 +862,121 @@ def post_test_run(self, test_run: TestRun) -> Optional[Tuple[str, str]]:
                 "Sending a large test run to Confident, this might take a bit longer than usual..."
             )
 
-        ####################
-        ### POST REQUEST ###
-        ####################
-        if is_conversational_run:
-            test_run.conversational_test_cases = initial_batch
-        else:
-            test_run.test_cases = initial_batch
+        original_test_cases = test_run.test_cases
+        original_conversational_test_cases = test_run.conversational_test_cases
+        original_prompts = test_run.prompts
 
         try:
-            test_run.prompts = None
-            body = test_run.model_dump(by_alias=True, exclude_none=True)
-        except AttributeError:
-            # Pydantic version below 2.0
-            body = test_run.dict(by_alias=True, exclude_none=True)
+            ####################
+            ### POST REQUEST ###
+            ####################
+            if is_conversational_run:
+                test_run.conversational_test_cases = initial_batch
+            else:
+                test_run.test_cases = initial_batch
 
-        json_str = json.dumps(body, cls=TestRunEncoder)
-        body = json.loads(json_str)
+            try:
+                test_run.prompts = None
+                body = test_run.model_dump(by_alias=True, exclude_none=True)
+            except AttributeError:
+                # Pydantic version below 2.0
+                body = test_run.dict(by_alias=True, exclude_none=True)
 
-        data, link = api.send_request(
-            method=HttpMethods.POST,
-            endpoint=Endpoints.TEST_RUN_ENDPOINT,
-            body=body,
-        )
+            json_str = json.dumps(body, cls=TestRunEncoder)
+            body = json.loads(json_str)
 
-        if not isinstance(data, dict) or "id" not in data:
-            # try to show helpful details
-            detail = None
-            if isinstance(data, dict):
-                detail = (
-                    data.get("detail")
-                    or data.get("message")
-                    or data.get("error")
-                )
-            # fall back to repr for visibility
-            raise RuntimeError(
-                f"Confident API response missing 'id'. "
-                f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
+            data, link = api.send_request(
+                method=HttpMethods.POST,
+                endpoint=Endpoints.TEST_RUN_ENDPOINT,
+                body=body,
             )
 
-        res = TestRunHttpResponse(
-            id=data["id"],
-        )
+            if not isinstance(data, dict) or "id" not in data:
+                # try to show helpful details
+                detail = None
+                if isinstance(data, dict):
+                    detail = (
+                        data.get("detail")
+                        or data.get("message")
+                        or data.get("error")
+                    )
+                # fall back to repr for visibility
+                raise RuntimeError(
+                    f"Confident API response missing 'id'. "
+                    f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
+                )
 
-        ################################################
-        ### Send the remaining test cases in batches ###
-        ################################################
-        total_remaining = len(remaining_test_cases_to_process)
-        num_remaining_batches = (
-            (total_remaining + BATCH_SIZE - 1) // BATCH_SIZE
-            if total_remaining > 0
-            else 0
-        )
+            res = TestRunHttpResponse(
+                id=data["id"],
+            )
 
-        for i in range(num_remaining_batches):
-            start_index = i * BATCH_SIZE
-            batch = remaining_test_cases_to_process[
-                start_index : start_index + BATCH_SIZE
-            ]
+            ################################################
+            ### Send the remaining test cases in batches ###
+            ################################################
+            total_remaining = len(remaining_test_cases_to_process)
+            num_remaining_batches = (
+                (total_remaining + BATCH_SIZE - 1) // BATCH_SIZE
+                if total_remaining > 0
+                else 0
+            )
 
-            if len(batch) == 0:
-                break  # Should not happen with correct num_remaining_batches, but as a safeguard
+            for i in range(num_remaining_batches):
+                start_index = i * BATCH_SIZE
+                batch = remaining_test_cases_to_process[
+                    start_index : start_index + BATCH_SIZE
+                ]
 
-            # Create RemainingTestRun with the correct list populated
-            if is_conversational_run:
-                remaining_test_run = RemainingTestRun(
-                    testRunId=res.id,
-                    testCases=[],  # This will be empty
-                    conversationalTestCases=batch,
-                )
-            else:
-                remaining_test_run = RemainingTestRun(
-                    testRunId=res.id,
-                    testCases=batch,
-                    conversationalTestCases=[],  # This will be empty
-                )
+                if len(batch) == 0:
+                    break  # Should not happen with correct num_remaining_batches, but as a safeguard
 
-            body = None
-            try:
-                body = remaining_test_run.model_dump(
-                    by_alias=True, exclude_none=True
-                )
-            except AttributeError:
-                # Pydantic version below 2.0
-                body = remaining_test_run.dict(by_alias=True, exclude_none=True)
+                # Create RemainingTestRun with the correct list populated
+                if is_conversational_run:
+                    remaining_test_run = RemainingTestRun(
+                        testRunId=res.id,
+                        testCases=[],  # This will be empty
+                        conversationalTestCases=batch,
+                    )
+                else:
+                    remaining_test_run = RemainingTestRun(
+                        testRunId=res.id,
+                        testCases=batch,
+                        conversationalTestCases=[],  # This will be empty
+                    )
 
-            try:
-                _, _ = api.send_request(
-                    method=HttpMethods.PUT,
-                    endpoint=Endpoints.TEST_RUN_ENDPOINT,
-                    body=body,
-                )
-            except Exception as e:
-                message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}"
-                raise Exception(message) from e
+                body = None
+                try:
+                    body = remaining_test_run.model_dump(
+                        by_alias=True, exclude_none=True
+                    )
+                except AttributeError:
+                    # Pydantic version below 2.0
+                    body = remaining_test_run.dict(
+                        by_alias=True, exclude_none=True
+                    )
 
-        console.print(
-            "[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
-            f"[link={link}]{link}[/link]"
-        )
-        self.save_final_test_run_link(link)
-        open_browser(link)
-        return link, res.id
+                try:
+                    _, _ = api.send_request(
+                        method=HttpMethods.PUT,
+                        endpoint=Endpoints.TEST_RUN_ENDPOINT,
+                        body=body,
+                    )
+                except Exception as e:
+                    message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}"
+                    raise Exception(message) from e
+
+            console.print(
+                "[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
+                f"[link={link}]{link}[/link]"
+            )
+            self.save_final_test_run_link(link)
+            open_browser(link)
+            return link, res.id
+        finally:
+            test_run.test_cases = original_test_cases
+            test_run.conversational_test_cases = (
+                original_conversational_test_cases
+            )
+            test_run.prompts = original_prompts
 
     def save_test_run_locally(self):
         local_folder = os.getenv("DEEPEVAL_RESULTS_FOLDER")

diff --git a/tests/test_core/test_run/test_run_manager.py b/tests/test_core/test_run/test_run_manager.py
@@ -1,12 +1,18 @@
 import os
 import portalocker
+import pytest
 
 import deepeval.test_run.test_run as tr_mod
 
 from types import SimpleNamespace
 
 from deepeval.test_case import LLMTestCase
-from deepeval.test_run.test_run import TestRunManager, LLMApiTestCase
+from deepeval.test_run.test_run import (
+    LLMApiTestCase,
+    PromptData,
+    TestRun,
+    TestRunManager,
+)
 from tests.test_core.helpers import _make_fake_portalocker
 from tests.test_core.stubs import RecordingPortalockerLock
 
@@ -150,3 +156,103 @@ def fake_fsync(fd: int) -> None:
         fsync_calls
     ), "save_test_run(..., save_under_key=...) should call os.fsync(file.fileno())"
     assert fsync_calls[-1] == f.fileno()
+
+
+def _make_api_test_cases(count: int):
+    return [
+        LLMApiTestCase(
+            name=f"tc{i}",
+            input=f"in-{i}",
+            actual_output=f"out-{i}",
+            order=i,
+        )
+        for i in range(count)
+    ]
+
+
+def test_post_test_run_restores_full_test_case_list_after_batched_upload(
+    monkeypatch,
+):
+    trm = TestRunManager()
+    test_cases = _make_api_test_cases(45)
+    prompts = [PromptData(alias="prompt-1")]
+    test_run = TestRun(testCases=test_cases, prompts=prompts)
+    original_test_cases = test_run.test_cases
+    original_prompts = test_run.prompts
+    sent_batches = []
+
+    class FakeApi:
+        def send_request(self, method, endpoint, body):
+            sent_batches.append(
+                (
+                    method,
+                    len(body["testCases"]),
+                    len(body["conversationalTestCases"]),
+                )
+            )
+            if method == tr_mod.HttpMethods.POST:
+                return {"id": "run-id"}, "https://confident.example/run-id"
+            return {"ok": True}, None
+
+    monkeypatch.setattr(tr_mod, "Api", FakeApi)
+    monkeypatch.setattr(tr_mod, "open_browser", lambda link: None)
+    monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None)
+    monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None)
+
+    result = trm.post_test_run(test_run)
+
+    assert result == ("https://confident.example/run-id", "run-id")
+    assert test_run.test_cases is original_test_cases
+    assert len(test_run.test_cases) == 45
+    assert test_run.test_cases[0].name == "tc0"
+    assert test_run.test_cases[-1].name == "tc44"
+    assert test_run.prompts is original_prompts
+    assert sent_batches == [
+        (tr_mod.HttpMethods.POST, 40, 0),
+        (tr_mod.HttpMethods.PUT, 5, 0),
+    ]
+
+
+def test_post_test_run_restores_full_test_case_list_when_batch_upload_fails(
+    monkeypatch,
+):
+    trm = TestRunManager()
+    test_cases = _make_api_test_cases(45)
+    prompts = [PromptData(alias="prompt-1")]
+    test_run = TestRun(testCases=test_cases, prompts=prompts)
+    original_test_cases = test_run.test_cases
+    original_prompts = test_run.prompts
+    sent_batches = []
+
+    class FakeApi:
+        def send_request(self, method, endpoint, body):
+            sent_batches.append(
+                (
+                    method,
+                    len(body["testCases"]),
+                    len(body["conversationalTestCases"]),
+                )
+            )
+            if method == tr_mod.HttpMethods.POST:
+                return {"id": "run-id"}, "https://confident.example/run-id"
+            raise RuntimeError("upload failed")
+
+    monkeypatch.setattr(tr_mod, "Api", FakeApi)
+    monkeypatch.setattr(tr_mod, "open_browser", lambda link: None)
+    monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None)
+    monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None)
+
+    with pytest.raises(
+        Exception, match="Unexpected error when sending some test cases"
+    ):
+        trm.post_test_run(test_run)
+
+    assert test_run.test_cases is original_test_cases
+    assert len(test_run.test_cases) == 45
+    assert test_run.test_cases[0].name == "tc0"
+    assert test_run.test_cases[-1].name == "tc44"
+    assert test_run.prompts is original_prompts
+    assert sent_batches == [
+        (tr_mod.HttpMethods.POST, 40, 0),
+        (tr_mod.HttpMethods.PUT, 5, 0),
+    ]