Skip to content

Commit add6efe

Browse files
committed
fix(micro-fix): increase stall threshold
1 parent a29ecf8 commit add6efe

File tree

1 file changed

+19
-29
lines changed

1 file changed

+19
-29
lines changed

core/framework/graph/event_loop_node.py

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class LoopConfig:
165165
max_tool_calls_per_turn: int = 30
166166
judge_every_n_turns: int = 1
167167
stall_detection_threshold: int = 3
168-
stall_similarity_threshold: float = 0.7
168+
stall_similarity_threshold: float = 0.85
169169
max_history_tokens: int = 32_000
170170
store_prefix: str = ""
171171

@@ -2955,22 +2955,22 @@ def _ngrams(s: str) -> set[str]:
29552955
def _is_stalled(self, recent_responses: list[str]) -> bool:
29562956
"""Detect stall using n-gram similarity.
29572957
2958-
Detects when N consecutive responses have similarity >= threshold.
2959-
This catches phrases like "I'm still stuck" vs "I'm stuck".
2958+
Detects when ALL N consecutive responses are mutually similar
2959+
(>= threshold). A single dissimilar response resets the signal.
2960+
This catches phrases like "I'm still stuck" vs "I'm stuck"
2961+
without false-positives on "attempt 1" vs "attempt 2".
29602962
"""
29612963
if len(recent_responses) < self._config.stall_detection_threshold:
29622964
return False
29632965
if not recent_responses[0]:
29642966
return False
29652967

29662968
threshold = self._config.stall_similarity_threshold
2967-
# Check similarity against all recent responses (excluding self)
2968-
for i, resp in enumerate(recent_responses):
2969-
# Compare against all previous responses
2970-
for prev in recent_responses[:i]:
2971-
if self._ngram_similarity(resp, prev) >= threshold:
2972-
return True
2973-
return False
2969+
# Every consecutive pair must be similar
2970+
for i in range(1, len(recent_responses)):
2971+
if self._ngram_similarity(recent_responses[i], recent_responses[i - 1]) < threshold:
2972+
return False
2973+
return True
29742974

29752975
@staticmethod
29762976
def _is_transient_error(exc: BaseException) -> bool:
@@ -3049,10 +3049,11 @@ def _is_tool_doom_loop(
30493049
self,
30503050
recent_tool_fingerprints: list[list[tuple[str, str]]],
30513051
) -> tuple[bool, str]:
3052-
"""Detect doom loop using n-gram similarity on tool inputs.
3052+
"""Detect doom loop via exact fingerprint match.
30533053
3054-
Detects when N consecutive turns have similar tool calls.
3055-
Similarity applies to the canonicalized tool input strings.
3054+
Detects when N consecutive turns invoke the same tools with
3055+
identical (canonicalized) arguments. Different arguments mean
3056+
different work, so only exact matches count.
30563057
30573058
Returns (is_doom_loop, description).
30583059
"""
@@ -3065,23 +3066,12 @@ def _is_tool_doom_loop(
30653066
if not first:
30663067
return False, ""
30673068

3068-
# Convert a turn's list of (name, args) pairs to a single comparable string.
3069-
def _turn_sig(fp: list[tuple[str, str]]) -> str:
3070-
return "|".join(f"{name}:{args}" for name, args in fp)
3071-
3072-
first_sig = _turn_sig(first)
3073-
similarity_threshold = self._config.stall_similarity_threshold
3074-
similar_count = sum(
3075-
1
3076-
for fp in recent_tool_fingerprints
3077-
if self._ngram_similarity(_turn_sig(fp), first_sig) >= similarity_threshold
3078-
)
3079-
3080-
if similar_count >= threshold:
3081-
tool_names = [name for fp in recent_tool_fingerprints for name, _ in fp]
3069+
# All turns in the window must match the first exactly
3070+
if all(fp == first for fp in recent_tool_fingerprints[1:]):
3071+
tool_names = [name for name, _ in first]
30823072
desc = (
3083-
f"Doom loop detected: {similar_count}/{len(recent_tool_fingerprints)} "
3084-
f"consecutive similar tool calls ({', '.join(tool_names)})"
3073+
f"Doom loop detected: {len(recent_tool_fingerprints)} "
3074+
f"identical consecutive tool calls ({', '.join(tool_names)})"
30853075
)
30863076
return True, desc
30873077
return False, ""

0 commit comments

Comments
 (0)