Skip to content

Commit b0142ac

Browse files
fix: deduplicate error details in auto-created bug tickets (f2f2-7f31) (merge worktree-20260324-211622)
2 parents 53c96f9 + 9de6d4a commit b0142ac

File tree

3 files changed

+91
-15
lines changed

3 files changed

+91
-15
lines changed

plugins/dso/skills/end-session/error-sweep.sh

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@ THRESHOLD=50
1919
# Source of truth: hooks/track-tool-errors.sh (NOISE_CATEGORIES variable).
2020
NOISE_CATEGORIES="file_not_found command_exit_nonzero"
2121

22-
# _extract_category_details: extract error details for a category as markdown
22+
# _extract_category_details: extract deduplicated error details for a category as markdown
2323
# Args: $1=counter_file $2=category
24-
# Outputs markdown-formatted error details to stdout
24+
# Outputs markdown-formatted error details to stdout, grouped by unique
25+
# (tool_name, error_message) signature with occurrence counts.
2526
_extract_category_details() {
2627
local counter_file="$1"
2728
local category="$2"
2829
python3 - "$counter_file" "$category" <<'PYEOF' 2>/dev/null
2930
import json, sys
31+
from collections import OrderedDict
3032
3133
counter_path = sys.argv[1]
3234
category = sys.argv[2]
@@ -42,20 +44,38 @@ if not errors:
4244
print("No detailed error entries recorded.")
4345
sys.exit(0)
4446
45-
# Show up to 20 most recent entries; note if truncated
47+
# Deduplicate by (tool_name, error_message) — keep first/last timestamps and count
48+
groups = OrderedDict()
49+
for e in errors:
50+
key = (e.get('tool_name', 'N/A'), e.get('error_message', 'N/A'))
51+
if key not in groups:
52+
groups[key] = {
53+
'tool_name': key[0],
54+
'error_message': key[1],
55+
'input_summary': e.get('input_summary', 'N/A'),
56+
'first_seen': e.get('timestamp', 'N/A'),
57+
'last_seen': e.get('timestamp', 'N/A'),
58+
'count': 0,
59+
}
60+
groups[key]['last_seen'] = e.get('timestamp', 'N/A')
61+
groups[key]['count'] += 1
62+
4663
total = len(errors)
47-
shown = errors[-20:]
48-
if total > 20:
49-
print(f"Showing most recent 20 of {total} occurrences.\n")
50-
51-
print("| # | Timestamp | Tool | Input Summary | Error Message |")
52-
print("|---|-----------|------|---------------|---------------|")
53-
for i, e in enumerate(shown, 1):
54-
ts = e.get('timestamp', 'N/A')
55-
tool = e.get('tool_name', 'N/A')
56-
summary = e.get('input_summary', 'N/A').replace('|', '\\|')[:80]
57-
msg = e.get('error_message', 'N/A').replace('|', '\\|')[:120]
58-
print(f"| {i} | {ts} | {tool} | {summary} | {msg} |")
64+
unique = len(groups)
65+
print(f"{total} occurrences, {unique} unique error signature(s).\n")
66+
67+
# Show up to 10 unique signatures, sorted by count descending
68+
signatures = sorted(groups.values(), key=lambda g: g['count'], reverse=True)[:10]
69+
70+
print("| # | Tool | Error Message | Count | First Seen | Last Seen |")
71+
print("|---|------|---------------|-------|------------|-----------|")
72+
for i, g in enumerate(signatures, 1):
73+
tool = g['tool_name']
74+
msg = g['error_message'].replace('|', '\\|')[:120]
75+
count = g['count']
76+
first = g['first_seen']
77+
last = g['last_seen']
78+
print(f"| {i} | {tool} | {msg} | {count} | {first} | {last} |")
5979
PYEOF
6080
}
6181

tests/scripts/test-end-session-error-sweep.sh

100644100755
File mode changed.

tests/skills/test-end-session-error-sweep.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,4 +622,60 @@ fi
622622
assert_eq "test_error_sweep_header_references_step_2_9" "found" "$has_step_29_ref"
623623
assert_pass_if_clean "test_error_sweep_header_references_step_2_9"
624624

625+
# ---------------------------------------------------------------------------
626+
# test_ticket_deduplicates_error_details
627+
# Counter with 50 errors, mostly duplicates. Assert ticket description contains
628+
# deduplicated entries (unique by tool_name + error_message), not raw duplicates.
629+
# ---------------------------------------------------------------------------
630+
_snapshot_fail
631+
_setup_test
632+
trap '_teardown_test' EXIT
633+
# Build counter with 50 permission_denied errors: 45 identical + 3 different + 2 more identical
634+
_DEDUP_ERRORS='[]'
635+
_DEDUP_ERRORS=$(python3 -c "
636+
import json
637+
errors = []
638+
# 45 identical errors
639+
for i in range(45):
640+
errors.append({'category':'permission_denied','timestamp':f'2026-03-15T10:{i:02d}:00Z','tool_name':'Bash','input_summary':'Bash: rm /protected/file.txt','error_message':'permission denied: /protected/file.txt','session_id':f's{i}'})
641+
# 3 different errors
642+
errors.append({'category':'permission_denied','timestamp':'2026-03-15T11:00:00Z','tool_name':'Read','input_summary':'Read: /etc/shadow','error_message':'permission denied: /etc/shadow','session_id':'s45'})
643+
errors.append({'category':'permission_denied','timestamp':'2026-03-15T11:01:00Z','tool_name':'Bash','input_summary':'Bash: chmod 777 /root','error_message':'permission denied: /root','session_id':'s46'})
644+
errors.append({'category':'permission_denied','timestamp':'2026-03-15T11:02:00Z','tool_name':'Write','input_summary':'Write: /usr/bin/test','error_message':'permission denied: /usr/bin/test','session_id':'s47'})
645+
# 2 more duplicates of the first pattern
646+
for i in range(2):
647+
errors.append({'category':'permission_denied','timestamp':f'2026-03-15T12:{i:02d}:00Z','tool_name':'Bash','input_summary':'Bash: rm /protected/file.txt','error_message':'permission denied: /protected/file.txt','session_id':f's{48+i}'})
648+
print(json.dumps({'index':{'permission_denied':50},'errors':errors}))
649+
")
650+
_write_counter_with_errors "$_DEDUP_ERRORS"
651+
_mock_tk_list_empty
652+
_run_sweep
653+
# Ticket should be created
654+
create_calls=$(_count_tk_create_calls)
655+
assert_eq "test_ticket_deduplicates_created" "1" "$create_calls"
656+
# Get the full tk log to check description content
657+
tk_log_content=$(cat "$TK_LOG" 2>/dev/null || true)
658+
# Should contain all 4 unique error signatures (not 20 raw duplicates)
659+
assert_contains "test_ticket_dedup_has_protected_file" "/protected/file.txt" "$tk_log_content"
660+
assert_contains "test_ticket_dedup_has_etc_shadow" "/etc/shadow" "$tk_log_content"
661+
assert_contains "test_ticket_dedup_has_root" "/root" "$tk_log_content"
662+
assert_contains "test_ticket_dedup_has_usr_bin" "/usr/bin/test" "$tk_log_content"
663+
# Should show occurrence counts — the 47 identical errors should show count
664+
assert_contains "test_ticket_dedup_has_occurrence_count" "47" "$tk_log_content"
665+
# Should NOT have 20 rows of the same error — check that "Bash" tool appears
666+
# a reasonable number of times (deduplicated, not raw). In raw mode, "Bash" would
667+
# appear 20 times in the table. Deduplicated, it should appear much fewer times.
668+
_bash_row_count=$(echo "$tk_log_content" | grep -c "| Bash |" 2>/dev/null || echo "0")
669+
# With dedup: 2 unique Bash signatures. Without dedup: 20 rows showing Bash.
670+
# Assert <= 5 to allow some formatting flexibility but catch raw dump.
671+
if [[ "$_bash_row_count" -le 5 ]]; then
672+
_dedup_ok="yes"
673+
else
674+
_dedup_ok="no"
675+
fi
676+
assert_eq "test_ticket_dedup_not_raw_dump" "yes" "$_dedup_ok"
677+
assert_pass_if_clean "test_ticket_deduplicates_error_details"
678+
trap - EXIT
679+
_teardown_test
680+
625681
print_summary

0 commit comments

Comments
 (0)