Preserve underlying exception across executor step-failure wrap

yonromai · claude · yonromai · commit 12046aaa7cd2 · 2026-04-21T17:14:13.000-07:00
Marin's StepRunner used to wrap every pipeline failure into a generic `RuntimeError("N step(s) failed")` at the end of its orchestration loop. That wrap threw away the concrete exception type and pushed the real traceback two levels deep — in #5026 the outer log said only `RuntimeError: 1 step(s) failed`, burying the actual `LeaseLostError` that triage needed to see. When exactly one step fails, re-raise the per-step `RuntimeError("Step failed: <name>")` directly; its `__cause__` is already the original exception, so the real type and traceback now surface at the top of the log. With multiple independent failures we keep the summary wrap (callers need the count) but still chain via `from failures[0]` as before. Tests updated: the executor and step-runner tests previously asserted on the old `"1 step(s) failed"` message — they now match on the per-step message directly and keep validating that the original exception is preserved via `__cause__`. Added a new test for the multi-failure summary path. Refs #5026 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/lib/marin/src/marin/execution/step_runner.py b/lib/marin/src/marin/execution/step_runner.py
@@ -232,6 +232,12 @@ def _do_launch(step: StepSpec) -> None:
             _flush_waiting()
 
         if failures:
+            # Preserve the original failing exception so triage can see its real
+            # type and traceback instead of a generic `RuntimeError: N step(s)
+            # failed` wrapper. See marin-community/marin#5026 for the incident
+            # where a LeaseLostError was buried under this wrap.
+            if len(failures) == 1:
+                raise failures[0]
             raise RuntimeError(f"{len(failures)} step(s) failed") from failures[0]
 
     def _launch_step(
diff --git a/tests/execution/test_executor.py b/tests/execution/test_executor.py
@@ -234,7 +234,10 @@ def fn_pass(config: MyConfig | None):
 
     with tempfile.TemporaryDirectory(prefix="executor-") as temp_dir:
         executor_initial = Executor(prefix=temp_dir, executor_info_base_path=temp_dir)
-        with pytest.raises(RuntimeError, match=r"1 step\(s\) failed"):
+        # Single-failure runs re-raise the per-step error directly so the
+        # original exception type/traceback stays visible; see
+        # marin-community/marin#5026.
+        with pytest.raises(RuntimeError, match=r"Step failed: b_"):
             executor_initial.run(steps=[a])
 
         with pytest.raises(FileNotFoundError):
diff --git a/tests/execution/test_step_runner.py b/tests/execution/test_step_runner.py
@@ -398,7 +398,14 @@ def test_runner_raises_clear_error_for_unmet_deps(tmp_path: Path):
 
 
 def test_runner_preserves_underlying_step_exception(tmp_path: Path):
-    """The top-level runner error should retain the original failing exception as a cause."""
+    """The top-level runner error should retain the original failing exception as a cause.
+
+    With a single failed step, the runner re-raises the per-step RuntimeError
+    directly rather than wrapping it in a generic "N step(s) failed" — the
+    per-step error already has the original exception chained via __cause__,
+    so the real exception type and traceback surface at the top of the report.
+    See marin-community/marin#5026.
+    """
 
     def failing_step(_output_path: str) -> None:
         raise ValueError("sentinel step failure")
@@ -410,14 +417,43 @@ def failing_step(_output_path: str) -> None:
     )
 
     runner = StepRunner()
-    with pytest.raises(RuntimeError, match=r"1 step\(s\) failed") as exc_info:
+    with pytest.raises(RuntimeError, match=r"Step failed: failing_step") as exc_info:
         runner.run([step])
 
-    step_failure = exc_info.value.__cause__
-    assert isinstance(step_failure, RuntimeError)
-    assert "Step failed: failing_step" in str(step_failure)
-    assert isinstance(step_failure.__cause__, ValueError)
-    assert "sentinel step failure" in str(step_failure.__cause__)
+    # Original ValueError is preserved as the __cause__ of the per-step error.
+    assert isinstance(exc_info.value.__cause__, ValueError)
+    assert "sentinel step failure" in str(exc_info.value.__cause__)
+
+
+def test_runner_summarizes_multiple_step_failures(tmp_path: Path):
+    """With multiple independent failures, the runner summarizes and chains the first."""
+
+    def failing_step_a(_output_path: str) -> None:
+        raise ValueError("failure A")
+
+    def failing_step_b(_output_path: str) -> None:
+        raise RuntimeError("failure B")
+
+    step_a = StepSpec(
+        name="step_a",
+        override_output_path=(tmp_path / "step_a").as_posix(),
+        fn=failing_step_a,
+    )
+    step_b = StepSpec(
+        name="step_b",
+        override_output_path=(tmp_path / "step_b").as_posix(),
+        fn=failing_step_b,
+    )
+
+    runner = StepRunner()
+    with pytest.raises(RuntimeError, match=r"2 step\(s\) failed") as exc_info:
+        runner.run([step_a, step_b])
+
+    # The first failure is chained so its original exception type/traceback
+    # remains reachable via __cause__.__cause__.
+    first = exc_info.value.__cause__
+    assert isinstance(first, RuntimeError)
+    assert "Step failed:" in str(first)
 
 
 # ---------------------------------------------------------------------------