Defer viewer auto-open to after PDF write in run_evaluation_and_report

DataGomes · DataGomes · commit 860211b4ec21 · 2026-05-11T11:02:00.000-07:00
When the one-shot run_evaluation_and_report tool ran the eval, the
sub-call to handle_run_evaluation auto-opened the viewer immediately
after the eval finished. The viewer loaded the /results page before
generate_report had a chance to write the PDF, so the page showed
"no report has been generated yet" — and the user had to manually
refresh.

- handle_run_evaluation now honors an optional openViewer arg (default
  True for backwards compat). When False it skips the auto-open and
  omits viewResults from the response so the caller owns it.
- run_evaluation_and_report passes openViewer=False, then opens the
  viewer itself only after generate_report returns. Same ensure_viewer
  _running plumbing, same error fallback.
diff --git a/eval_mcp/server.py b/eval_mcp/server.py
@@ -809,9 +809,13 @@ async def run_evaluation_and_report(
 
         config_name = create_data["configName"]
 
+    # Suppress the eval's auto-open: the viewer would load before the PDF
+    # is written and show "no report has been generated yet". We open it
+    # ourselves once the report is on disk.
     eval_result = await handle_run_evaluation({
         "configName": config_name,
         "user_id": uid,
+        "openViewer": False,
     })
     eval_data = json.loads(eval_result[0].text)
     eval_data["configName"] = config_name
@@ -835,6 +839,29 @@ async def run_evaluation_and_report(
     report_data = json.loads(report_result[0].text)
 
     eval_data["report"] = report_data
+
+    viewer_path = f"/results?group={run_id}"
+    try:
+        from eval_mcp.viewer import ensure_viewer_running
+        info = ensure_viewer_running(port=4001, open_path=viewer_path)
+        eval_data["viewerUrl"] = info["url"]
+        if info.get("browserOpened"):
+            eval_data["viewResults"] = (
+                f"Viewer already running; opened {info['url']}"
+                if info.get("alreadyRunning")
+                else f"Started viewer and opened {info['url']}"
+            )
+        elif info.get("error"):
+            eval_data["viewResults"] = (
+                f"Could not auto-start viewer ({info['error']}). "
+                f"Run `eval-mcp view` manually, then open {info['url']}"
+            )
+    except Exception as e:
+        eval_data["viewResults"] = (
+            f"Run `eval-mcp view` in your terminal, then open "
+            f"http://localhost:4001{viewer_path} ({e})"
+        )
+
     return json.dumps(eval_data, indent=2)
 
 
diff --git a/eval_mcp/tools/run_eval.py b/eval_mcp/tools/run_eval.py
@@ -501,24 +501,31 @@ async def handle_run_evaluation(args: Dict[str, Any]) -> List[TextContent]:
         # Auto-open the viewer so the user doesn't have to run a separate
         # command. On any failure we fall back to a manual-instructions string
         # rather than lying that the browser opened successfully.
+        # The one-shot `run_evaluation_and_report` path passes openViewer=False
+        # so it can open the viewer after the PDF report is written; otherwise
+        # the page loads before the report exists and shows "no report yet".
         view_results_msg = f"Run `eval-mcp view` in your terminal, then open {viewer_url}"
-        try:
-            from eval_mcp.viewer import ensure_viewer_running
-            info = ensure_viewer_running(port=4001, open_path=viewer_path)
-            viewer_url = info["url"]
-            if info.get("browserOpened"):
-                if info.get("alreadyRunning"):
-                    view_results_msg = f"Viewer already running; opened {viewer_url}"
-                else:
-                    view_results_msg = f"Started viewer and opened {viewer_url}"
-            elif info.get("error"):
-                logger.warning(f"Viewer auto-start: {info['error']}")
-                view_results_msg = (
-                    f"Could not auto-start viewer ({info['error']}). "
-                    f"Run `eval-mcp view` manually, then open {viewer_url}"
-                )
-        except Exception as e:
-            logger.warning(f"Could not auto-start viewer: {e}")
+        open_viewer = args.get("openViewer", True)
+        if open_viewer:
+            try:
+                from eval_mcp.viewer import ensure_viewer_running
+                info = ensure_viewer_running(port=4001, open_path=viewer_path)
+                viewer_url = info["url"]
+                if info.get("browserOpened"):
+                    if info.get("alreadyRunning"):
+                        view_results_msg = f"Viewer already running; opened {viewer_url}"
+                    else:
+                        view_results_msg = f"Started viewer and opened {viewer_url}"
+                elif info.get("error"):
+                    logger.warning(f"Viewer auto-start: {info['error']}")
+                    view_results_msg = (
+                        f"Could not auto-start viewer ({info['error']}). "
+                        f"Run `eval-mcp view` manually, then open {viewer_url}"
+                    )
+            except Exception as e:
+                logger.warning(f"Could not auto-start viewer: {e}")
+        else:
+            view_results_msg = None
 
         result = {
             "success": True,
@@ -529,13 +536,14 @@ async def handle_run_evaluation(args: Dict[str, Any]) -> List[TextContent]:
             "userDir": str(user_dir),
             "message": "Evaluation completed successfully",
             "summary": results_summary,
-            "viewResults": view_results_msg,
             "nextStep": (
                 f"Call generate_report(group_id=\"{run_id}\") to create a PDF "
                 f"report for the user. Pass `context` describing what they "
                 f"were evaluating so the narrative is tailored."
             ) if run_id else None,
         }
+        if view_results_msg is not None:
+            result["viewResults"] = view_results_msg
 
         return [TextContent(type="text", text=json.dumps(result, indent=2))]