@@ -165,7 +165,7 @@ class LoopConfig:
165165 max_tool_calls_per_turn : int = 30
166166 judge_every_n_turns : int = 1
167167 stall_detection_threshold : int = 3
168- stall_similarity_threshold : float = 0.7
168+ stall_similarity_threshold : float = 0.85
169169 max_history_tokens : int = 32_000
170170 store_prefix : str = ""
171171
@@ -2955,22 +2955,22 @@ def _ngrams(s: str) -> set[str]:
29552955 def _is_stalled (self , recent_responses : list [str ]) -> bool :
29562956 """Detect stall using n-gram similarity.
29572957
2958- Detects when N consecutive responses have similarity >= threshold.
2959- This catches phrases like "I'm still stuck" vs "I'm stuck".
2958+ Detects when ALL N consecutive responses are mutually similar
2959+ (>= threshold). A single dissimilar response resets the signal.
2960+ This catches phrases like "I'm still stuck" vs "I'm stuck"
2961+ without false-positives on "attempt 1" vs "attempt 2".
29602962 """
29612963 if len (recent_responses ) < self ._config .stall_detection_threshold :
29622964 return False
29632965 if not recent_responses [0 ]:
29642966 return False
29652967
29662968 threshold = self ._config .stall_similarity_threshold
2967- # Check similarity against all recent responses (excluding self)
2968- for i , resp in enumerate (recent_responses ):
2969- # Compare against all previous responses
2970- for prev in recent_responses [:i ]:
2971- if self ._ngram_similarity (resp , prev ) >= threshold :
2972- return True
2973- return False
2969+ # Every consecutive pair must be similar
2970+ for i in range (1 , len (recent_responses )):
2971+ if self ._ngram_similarity (recent_responses [i ], recent_responses [i - 1 ]) < threshold :
2972+ return False
2973+ return True
29742974
29752975 @staticmethod
29762976 def _is_transient_error (exc : BaseException ) -> bool :
@@ -3049,10 +3049,11 @@ def _is_tool_doom_loop(
30493049 self ,
30503050 recent_tool_fingerprints : list [list [tuple [str , str ]]],
30513051 ) -> tuple [bool , str ]:
3052- """Detect doom loop using n-gram similarity on tool inputs .
3052+ """Detect doom loop via exact fingerprint match .
30533053
3054- Detects when N consecutive turns have similar tool calls.
3055- Similarity applies to the canonicalized tool input strings.
3054+ Detects when N consecutive turns invoke the same tools with
3055+ identical (canonicalized) arguments. Different arguments mean
3056+ different work, so only exact matches count.
30563057
30573058 Returns (is_doom_loop, description).
30583059 """
@@ -3065,23 +3066,12 @@ def _is_tool_doom_loop(
30653066 if not first :
30663067 return False , ""
30673068
3068- # Convert a turn's list of (name, args) pairs to a single comparable string.
3069- def _turn_sig (fp : list [tuple [str , str ]]) -> str :
3070- return "|" .join (f"{ name } :{ args } " for name , args in fp )
3071-
3072- first_sig = _turn_sig (first )
3073- similarity_threshold = self ._config .stall_similarity_threshold
3074- similar_count = sum (
3075- 1
3076- for fp in recent_tool_fingerprints
3077- if self ._ngram_similarity (_turn_sig (fp ), first_sig ) >= similarity_threshold
3078- )
3079-
3080- if similar_count >= threshold :
3081- tool_names = [name for fp in recent_tool_fingerprints for name , _ in fp ]
3069+ # All turns in the window must match the first exactly
3070+ if all (fp == first for fp in recent_tool_fingerprints [1 :]):
3071+ tool_names = [name for name , _ in first ]
30823072 desc = (
3083- f"Doom loop detected: { similar_count } / { len (recent_tool_fingerprints )} "
3084- f"consecutive similar tool calls ({ ', ' .join (tool_names )} )"
3073+ f"Doom loop detected: { len (recent_tool_fingerprints )} "
3074+ f"identical consecutive tool calls ({ ', ' .join (tool_names )} )"
30853075 )
30863076 return True , desc
30873077 return False , ""
0 commit comments