Skip to content

Commit abf6b0d

Browse files
test(8a5b-e07b): RED — add close benchmark tests for ticket-benchmark.sh
Story db66-0242: benchmark test for <10s close with 200 tickets. Seeds 200 non-archived + 50 archived tickets with mixed types/statuses/deps. Tests assert --mode=close interface (RED — flag not implemented yet). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 66a8d14 commit abf6b0d

File tree

2 files changed

+226
-0
lines changed

2 files changed

+226
-0
lines changed

.test-index

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,4 @@ plugins/dso/agents/red-test-writer.md: tests/agents/test-red-test-writer.sh
189189
plugins/dso/agents/red-test-evaluator.md: tests/agents/test-red-test-evaluator.sh
190190
tests/lib/markdown_helpers.py:tests/docs/test_extract_section_deduplication.py
191191
plugins/dso/scripts/ticket-unblock.py: tests/scripts/test_ticket_unblock.py
192+
plugins/dso/scripts/ticket-benchmark.sh: tests/scripts/test-ticket-benchmark.sh [test_close_benchmark_under_threshold]

tests/scripts/test-ticket-benchmark.sh

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,229 @@ test_benchmark_outputs_timing_info() {
140140
}
141141
test_benchmark_outputs_timing_info
142142

143+
# ── Helper: seed a mixed population of tickets into a test repo ───────────────
144+
# Creates:
145+
# - <epic_count> epics (open)
146+
# - <story_count> stories (in_progress) as children of first epic
147+
# - <task_count> tasks (open) as standalone
148+
# - <archived_count> archived tasks (closed) as standalone
149+
# - <link_count> dependency links between task pairs
150+
# Returns: first epic ID on stdout (for reference)
151+
_seed_mixed_population() {
152+
local repo="$1"
153+
local epic_count="${2:-5}"
154+
local story_count="${3:-10}"
155+
local task_count="${4:-185}"
156+
local archived_count="${5:-50}"
157+
local link_count="${6:-10}"
158+
159+
local first_epic_id=""
160+
161+
# Create epics (open status by default)
162+
local i epic_id
163+
for (( i = 1; i <= epic_count; i++ )); do
164+
epic_id=$(cd "$repo" && bash "$TICKET_SCRIPT" create epic "Benchmark epic $i" 2>/dev/null) || true
165+
if [ $i -eq 1 ]; then first_epic_id="$epic_id"; fi
166+
done
167+
168+
# Create stories (in_progress) as children of first epic
169+
if [ -n "$first_epic_id" ]; then
170+
for (( i = 1; i <= story_count; i++ )); do
171+
local sid
172+
sid=$(cd "$repo" && bash "$TICKET_SCRIPT" create story "Benchmark story $i" "$first_epic_id" 2>/dev/null) || true
173+
# Transition to in_progress
174+
if [ -n "$sid" ]; then
175+
(cd "$repo" && bash "$TICKET_SCRIPT" transition "$sid" open in_progress >/dev/null 2>/dev/null) || true
176+
fi
177+
done
178+
fi
179+
180+
# Create standalone tasks (open)
181+
local task_ids=()
182+
for (( i = 1; i <= task_count; i++ )); do
183+
local tid
184+
tid=$(cd "$repo" && bash "$TICKET_SCRIPT" create task "Benchmark task $i" 2>/dev/null) || true
185+
if [ -n "$tid" ]; then task_ids+=("$tid"); fi
186+
done
187+
188+
# Create archived (closed) tasks
189+
for (( i = 1; i <= archived_count; i++ )); do
190+
local aid
191+
aid=$(cd "$repo" && bash "$TICKET_SCRIPT" create task "Archived task $i" 2>/dev/null) || true
192+
if [ -n "$aid" ]; then
193+
(cd "$repo" && bash "$TICKET_SCRIPT" transition "$aid" open closed --reason="Fixed: benchmark seed" >/dev/null 2>/dev/null) || true
194+
fi
195+
done
196+
197+
# Add dependency links between task pairs
198+
local pair_count="${#task_ids[@]}"
199+
local links_added=0
200+
for (( i = 0; i < pair_count - 1 && links_added < link_count; i += 2 )); do
201+
local src="${task_ids[$i]}"
202+
local tgt="${task_ids[$((i+1))]}"
203+
if [ -n "$src" ] && [ -n "$tgt" ]; then
204+
(cd "$repo" && bash "$TICKET_SCRIPT" link "$src" "$tgt" depends_on >/dev/null 2>/dev/null) || true
205+
(( links_added++ )) || true
206+
fi
207+
done
208+
209+
echo "$first_epic_id"
210+
}
211+
212+
# ── Test 4: close benchmark under 10s with 200 non-archived + 50 archived tickets
213+
echo "Test 4: ticket transition open->closed wall-clock < 10s with 250-ticket population"
214+
test_close_benchmark_under_threshold() {
215+
_snapshot_fail
216+
217+
# RED: ticket-benchmark.sh must support --mode=close
218+
# This test will fail until ticket-benchmark.sh implements --mode=close.
219+
# We call the benchmark script with --mode=close so the test is RED until
220+
# that option is implemented.
221+
local exit_code=0
222+
bash "$BENCHMARK_SCRIPT" --mode=close --threshold 10 >/dev/null 2>/dev/null || exit_code=$?
223+
224+
# Assert: --mode=close is recognised (exits 0 or 1, not 2 for unknown arg)
225+
# Until --mode=close is implemented, ticket-benchmark.sh exits 2 → RED.
226+
assert_ne "benchmark --mode=close is not an unknown-argument error" "2" "$exit_code"
227+
228+
local repo
229+
repo=$(_make_test_repo)
230+
231+
# Seed 200 non-archived + 50 archived tickets with mixed types and links
232+
_seed_mixed_population "$repo" 5 10 185 50 15 >/dev/null
233+
234+
# Create target: a task in open status with no children (simplest closeable state)
235+
local target_id
236+
target_id=$(cd "$repo" && bash "$TICKET_SCRIPT" create task "Target close benchmark task" 2>/dev/null) || true
237+
238+
if [ -z "$target_id" ]; then
239+
assert_eq "target ticket created" "non-empty" "empty"
240+
assert_pass_if_clean "test_close_benchmark_under_threshold"
241+
return
242+
fi
243+
244+
# Time the full ticket transition CLI command (includes open-children guard + flock +
245+
# STATUS event write + ticket-unblock.py subprocess)
246+
local t_start t_end elapsed exit_code_transition
247+
t_start=$(date +%s.%N)
248+
exit_code_transition=0
249+
(cd "$repo" && bash "$TICKET_SCRIPT" transition "$target_id" open closed --reason="Fixed: benchmark" >/dev/null 2>/dev/null) \
250+
|| exit_code_transition=$?
251+
t_end=$(date +%s.%N)
252+
253+
elapsed=$(python3 -c "print(float('$t_end') - float('$t_start'))")
254+
255+
# Assert: transition succeeded
256+
assert_eq "ticket transition open->closed exits 0" "0" "$exit_code_transition"
257+
258+
# Assert: wall-clock time < 10s
259+
local over_threshold
260+
over_threshold=$(python3 -c "print('1' if float('$elapsed') >= 10.0 else '0')")
261+
assert_eq "close wall-clock < 10s (elapsed=${elapsed}s)" "0" "$over_threshold"
262+
263+
assert_pass_if_clean "test_close_benchmark_under_threshold"
264+
}
265+
test_close_benchmark_under_threshold
266+
267+
# ── Test 5: seeded population has realistic mix of types, statuses, and links ──
268+
echo "Test 5: seeded population has >= 3 ticket types, >= 2 statuses, >= 10 dependency links"
269+
test_close_benchmark_realistic_population() {
270+
_snapshot_fail
271+
272+
# RED: requires --mode=close seeding support in ticket-benchmark.sh
273+
local exit_code=0
274+
bash "$BENCHMARK_SCRIPT" --mode=close --threshold 10 >/dev/null 2>/dev/null || exit_code=$?
275+
assert_ne "benchmark --mode=close is not an unknown-argument error" "2" "$exit_code"
276+
277+
local repo
278+
repo=$(_make_test_repo)
279+
280+
# Seed a mixed population: 5 epics, 10 stories (in_progress), 185 tasks (open),
281+
# 50 archived tasks (closed), 15 dependency links
282+
_seed_mixed_population "$repo" 5 10 185 50 15 >/dev/null
283+
284+
local tracker_dir="$repo/.tickets-tracker"
285+
286+
# Count distinct ticket types in the tracker
287+
local type_count
288+
type_count=$(python3 -c "
289+
import os, json, glob
290+
291+
tracker = '$tracker_dir'
292+
types = set()
293+
for d in os.listdir(tracker):
294+
ticket_dir = os.path.join(tracker, d)
295+
if not os.path.isdir(ticket_dir):
296+
continue
297+
for f in sorted(os.listdir(ticket_dir)):
298+
if not f.endswith('-CREATE.json'):
299+
continue
300+
try:
301+
with open(os.path.join(ticket_dir, f)) as fh:
302+
ev = json.load(fh)
303+
t = ev.get('data', {}).get('ticket_type', '')
304+
if t:
305+
types.add(t)
306+
except Exception:
307+
pass
308+
print(len(types))
309+
" 2>/dev/null) || type_count=0
310+
311+
assert_ne "at least 3 distinct ticket types (got $type_count)" "true" \
312+
"$(python3 -c "print('true' if int('${type_count:-0}') < 3 else 'false')" 2>/dev/null || echo 'true')"
313+
314+
# Count distinct statuses by reading STATUS events
315+
local status_count
316+
status_count=$(python3 -c "
317+
import os, json
318+
319+
tracker = '$tracker_dir'
320+
statuses = set()
321+
# Default status (no STATUS event) is 'open'
322+
statuses.add('open')
323+
for d in os.listdir(tracker):
324+
ticket_dir = os.path.join(tracker, d)
325+
if not os.path.isdir(ticket_dir):
326+
continue
327+
for f in sorted(os.listdir(ticket_dir)):
328+
if not f.endswith('-STATUS.json'):
329+
continue
330+
try:
331+
with open(os.path.join(ticket_dir, f)) as fh:
332+
ev = json.load(fh)
333+
s = ev.get('data', {}).get('status', '')
334+
if s:
335+
statuses.add(s)
336+
except Exception:
337+
pass
338+
print(len(statuses))
339+
" 2>/dev/null) || status_count=0
340+
341+
assert_ne "at least 2 distinct statuses (got $status_count)" "true" \
342+
"$(python3 -c "print('true' if int('${status_count:-0}') < 2 else 'false')" 2>/dev/null || echo 'true')"
343+
344+
# Count dependency links (LINK events across all ticket dirs)
345+
local link_count
346+
link_count=$(python3 -c "
347+
import os, json
348+
349+
tracker = '$tracker_dir'
350+
count = 0
351+
for d in os.listdir(tracker):
352+
ticket_dir = os.path.join(tracker, d)
353+
if not os.path.isdir(ticket_dir):
354+
continue
355+
for f in os.listdir(ticket_dir):
356+
if f.endswith('-LINK.json'):
357+
count += 1
358+
print(count)
359+
" 2>/dev/null) || link_count=0
360+
361+
assert_ne "at least 10 dependency links (got $link_count)" "true" \
362+
"$(python3 -c "print('true' if int('${link_count:-0}') < 10 else 'false')" 2>/dev/null || echo 'true')"
363+
364+
assert_pass_if_clean "test_close_benchmark_realistic_population"
365+
}
366+
test_close_benchmark_realistic_population
367+
143368
print_summary

0 commit comments

Comments
 (0)