fix(micro-fix): increase stall threshold

TimothyZhang7 · TimothyZhang7 · commit add6efe6f1f1 · 2026-03-09T18:40:13.000-07:00
diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py
@@ -165,7 +165,7 @@ class LoopConfig:
     max_tool_calls_per_turn: int = 30
     judge_every_n_turns: int = 1
     stall_detection_threshold: int = 3
-    stall_similarity_threshold: float = 0.7
+    stall_similarity_threshold: float = 0.85
     max_history_tokens: int = 32_000
     store_prefix: str = ""
 
@@ -2955,22 +2955,22 @@ def _ngrams(s: str) -> set[str]:
     def _is_stalled(self, recent_responses: list[str]) -> bool:
         """Detect stall using n-gram similarity.
 
-        Detects when N consecutive responses have similarity >= threshold.
-        This catches phrases like "I'm still stuck" vs "I'm stuck".
+        Detects when ALL N consecutive responses are mutually similar
+        (>= threshold).  A single dissimilar response resets the signal.
+        This catches phrases like "I'm still stuck" vs "I'm stuck"
+        without false-positives on "attempt 1" vs "attempt 2".
         """
         if len(recent_responses) < self._config.stall_detection_threshold:
             return False
         if not recent_responses[0]:
             return False
 
         threshold = self._config.stall_similarity_threshold
-        # Check similarity against all recent responses (excluding self)
-        for i, resp in enumerate(recent_responses):
-            # Compare against all previous responses
-            for prev in recent_responses[:i]:
-                if self._ngram_similarity(resp, prev) >= threshold:
-                    return True
-        return False
+        # Every consecutive pair must be similar
+        for i in range(1, len(recent_responses)):
+            if self._ngram_similarity(recent_responses[i], recent_responses[i - 1]) < threshold:
+                return False
+        return True
 
     @staticmethod
     def _is_transient_error(exc: BaseException) -> bool:
@@ -3049,10 +3049,11 @@ def _is_tool_doom_loop(
         self,
         recent_tool_fingerprints: list[list[tuple[str, str]]],
     ) -> tuple[bool, str]:
-        """Detect doom loop using n-gram similarity on tool inputs.
+        """Detect doom loop via exact fingerprint match.
 
-        Detects when N consecutive turns have similar tool calls.
-        Similarity applies to the canonicalized tool input strings.
+        Detects when N consecutive turns invoke the same tools with
+        identical (canonicalized) arguments.  Different arguments mean
+        different work, so only exact matches count.
 
         Returns (is_doom_loop, description).
         """
@@ -3065,23 +3066,12 @@ def _is_tool_doom_loop(
         if not first:
             return False, ""
 
-        # Convert a turn's list of (name, args) pairs to a single comparable string.
-        def _turn_sig(fp: list[tuple[str, str]]) -> str:
-            return "|".join(f"{name}:{args}" for name, args in fp)
-
-        first_sig = _turn_sig(first)
-        similarity_threshold = self._config.stall_similarity_threshold
-        similar_count = sum(
-            1
-            for fp in recent_tool_fingerprints
-            if self._ngram_similarity(_turn_sig(fp), first_sig) >= similarity_threshold
-        )
-
-        if similar_count >= threshold:
-            tool_names = [name for fp in recent_tool_fingerprints for name, _ in fp]
+        # All turns in the window must match the first exactly
+        if all(fp == first for fp in recent_tool_fingerprints[1:]):
+            tool_names = [name for name, _ in first]
             desc = (
-                f"Doom loop detected: {similar_count}/{len(recent_tool_fingerprints)} "
-                f"consecutive similar tool calls ({', '.join(tool_names)})"
+                f"Doom loop detected: {len(recent_tool_fingerprints)} "
+                f"identical consecutive tool calls ({', '.join(tool_names)})"
             )
             return True, desc
         return False, ""