fix: cap consecutive final_response validation retries (#3851)

strawgate · claude · web-flow · commit cae9333f4f3f · 2026-04-12T13:14:06.000-04:00
* Cap consecutive final_response validation retries to 3 Previously, when the LLM repeatedly called final_response with data that failed validation, the retry loop would continue up to 100 times (the shared max_iterations limit), wasting tokens on a model that cannot satisfy the schema. Add _MAX_VALIDATION_RETRIES (default 3) that caps consecutive validation failures. The counter resets when the LLM calls other tools (not final_response), so the cap only applies to consecutive failures. Fixes #3848 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Add tests for consecutive validation retry cap Tests cover: - Validation failures within cap followed by success - Consecutive validation failures exceeding cap (raises RuntimeError) - Counter reset when LLM calls other tools between validation failures Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Slim down validation retry cap tests Reduce boilerplate with helper functions. Simplify counter-reset test from 5 calls to 4. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix static analysis: move imports to module level and format Move CreateMessageResultWithTools and ToolUseContent imports to the top of the test file so ty can resolve the names used in return-type annotations of the helper functions. Also fix ruff import sorting and formatting issues. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Align validation retry semantics with text-response retries Change `>=` to `>` so _MAX_VALIDATION_RETRIES means "number of retries after the initial attempt" (total = N+1), matching the convention used by _MAX_TEXT_RESPONSE_RETRIES in the text-response retry path. Before: _MAX=3 meant 3 total attempts (>= comparison) After: _MAX=3 means 1 initial + 3 retries = 4 total (> comparison) 🤖 Generated with Claude Code Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/src/fastmcp/server/sampling/run.py b/src/fastmcp/server/sampling/run.py
@@ -46,6 +46,10 @@
 
 ResultT = TypeVar("ResultT")
 
+# Maximum number of consecutive final_response validation retries (not
+# counting the initial attempt) before aborting.  Total attempts = N + 1.
+_MAX_VALIDATION_RETRIES = 3
+
 # Simplified tool choice type - just the mode string instead of the full MCP object
 ToolChoiceOption = Literal["auto", "required", "none"]
 
@@ -615,6 +619,7 @@ async def sample_impl(
     current_messages: str | Sequence[str | SamplingMessage] = messages
 
     text_response_retries = 0
+    consecutive_validation_failures = 0
 
     for _iteration in range(max_iterations):
         step = await sample_step_impl(
@@ -631,9 +636,11 @@ async def sample_impl(
         )
 
         # Check for final_response tool call for structured output
+        had_final_response = False
         if result_type is not None and result_type is not str and step.is_tool_use:
             for tool_call in step.tool_calls:
                 if tool_call.name == "final_response":
+                    had_final_response = True
                     # Validate and return the structured result
                     type_adapter = get_cached_typeadapter(result_type)
 
@@ -660,6 +667,13 @@ async def sample_impl(
                             history=step.history,
                         )
                     except ValidationError as e:
+                        consecutive_validation_failures += 1
+                        if consecutive_validation_failures > _MAX_VALIDATION_RETRIES:
+                            raise RuntimeError(
+                                f"Structured output validation failed "
+                                f"{consecutive_validation_failures} consecutive "
+                                f"times for type {result_type.__name__}: {e}"
+                            ) from e
                         # Validation failed - add error as tool result
                         step.history.append(
                             SamplingMessage(
@@ -683,6 +697,10 @@ async def sample_impl(
                             )
                         )
 
+        # The LLM called tools but not final_response — reset validation counter
+        if not had_final_response:
+            consecutive_validation_failures = 0
+
         # If not a tool use response, we're done
         if not step.is_tool_use:
             # For structured output, the LLM must use the final_response tool
diff --git a/tests/client/test_sampling_result_types.py b/tests/client/test_sampling_result_types.py
@@ -1,5 +1,5 @@
 import pytest
-from mcp.types import TextContent
+from mcp.types import CreateMessageResultWithTools, TextContent, ToolUseContent
 
 from fastmcp import Client, Context, FastMCP
 from fastmcp.client.sampling import RequestContext, SamplingMessage, SamplingParams
@@ -550,3 +550,132 @@ async def t(context: Context) -> str:
 
         assert call_count == 1
         assert result.data == "hello"
+
+
+def _final_response(call_id: str, input_data: dict) -> CreateMessageResultWithTools:
+    """Build a final_response tool-use reply."""
+    return CreateMessageResultWithTools(
+        role="assistant",
+        content=[
+            ToolUseContent(
+                type="tool_use", id=call_id, name="final_response", input=input_data
+            )
+        ],
+        model="test-model",
+        stopReason="toolUse",
+    )
+
+
+def _tool_call(
+    call_id: str, name: str, input_data: dict
+) -> CreateMessageResultWithTools:
+    """Build a regular tool-use reply."""
+    return CreateMessageResultWithTools(
+        role="assistant",
+        content=[
+            ToolUseContent(type="tool_use", id=call_id, name=name, input=input_data)
+        ],
+        model="test-model",
+        stopReason="toolUse",
+    )
+
+
+class TestValidationRetryCap:
+    """Tests for the consecutive validation retry cap (PR #3851)."""
+
+    async def test_validation_failures_within_cap_then_success(self):
+        """Two consecutive failures followed by a valid response succeeds."""
+        from pydantic import BaseModel
+
+        class R(BaseModel):
+            value: int
+
+        call_count = 0
+
+        def handler(messages, params, ctx):
+            nonlocal call_count
+            call_count += 1
+            if call_count <= 2:
+                return _final_response(f"c{call_count}", {"value": "bad"})
+            return _final_response(f"c{call_count}", {"value": 99})
+
+        mcp = FastMCP(sampling_handler=handler)
+
+        @mcp.tool
+        async def t(context: Context) -> str:
+            r = await context.sample(messages="go", result_type=R)
+            return str(r.result.value)
+
+        async with Client(mcp) as client:
+            result = await client.call_tool("t", {})
+
+        assert call_count == 3
+        assert result.data == "99"
+
+    async def test_consecutive_validation_failures_exceed_cap(self):
+        """Always-invalid responses raise ToolError after exceeding the cap."""
+        from pydantic import BaseModel
+
+        from fastmcp.exceptions import ToolError
+        from fastmcp.server.sampling.run import _MAX_VALIDATION_RETRIES
+
+        class R(BaseModel):
+            value: int
+
+        call_count = 0
+
+        def handler(messages, params, ctx):
+            nonlocal call_count
+            call_count += 1
+            return _final_response(f"c{call_count}", {"value": "wrong"})
+
+        mcp = FastMCP(sampling_handler=handler)
+
+        @mcp.tool
+        async def t(context: Context) -> str:
+            return str((await context.sample(messages="go", result_type=R)).result)
+
+        async with Client(mcp) as client:
+            with pytest.raises(ToolError, match="consecutive"):
+                await client.call_tool("t", {})
+
+        # 1 initial attempt + _MAX_VALIDATION_RETRIES retries
+        assert call_count == _MAX_VALIDATION_RETRIES + 1
+
+    async def test_validation_counter_resets_after_other_tool_call(self):
+        """A tool call between validation failures resets the counter."""
+        from pydantic import BaseModel
+
+        class R(BaseModel):
+            value: int
+
+        def helper_tool(x: int) -> str:
+            """A helper tool."""
+            return f"result:{x}"
+
+        call_count = 0
+
+        def handler(messages, params, ctx):
+            nonlocal call_count
+            call_count += 1
+            # fail -> other tool (resets counter) -> fail -> succeed
+            if call_count == 1:
+                return _final_response("c1", {"value": "bad"})
+            if call_count == 2:
+                return _tool_call("c2", "helper_tool", {"x": 1})
+            if call_count == 3:
+                return _final_response("c3", {"value": "bad"})
+            return _final_response("c4", {"value": 42})
+
+        mcp = FastMCP(sampling_handler=handler)
+
+        @mcp.tool
+        async def t(context: Context) -> str:
+            r = await context.sample(messages="go", tools=[helper_tool], result_type=R)
+            return str(r.result.value)
+
+        async with Client(mcp) as client:
+            result = await client.call_tool("t", {})
+
+        assert call_count == 4
+        assert result.data == "42"