|
12 | 12 | from .analysis import ( |
13 | 13 | compute_stats, find_roots, longest_path, fan_out_distribution, |
14 | 14 | connected_components, detect_repeated_paths, detect_common_transitions, |
15 | | - detect_fan_in_patterns, detect_branch_collapse, |
| 15 | + detect_fan_in_patterns, detect_branch_collapse, classify_topology, |
| 16 | + TOPOLOGY_PHENOTYPES, |
16 | 17 | ) |
17 | 18 | from .annotation import load_annotation, save_annotation, list_annotated, list_unannotated, TASK_TYPES, SOURCES |
18 | 19 | from .causality import causal_quality_report |
@@ -234,6 +235,12 @@ def cli(argv: list[str] | None = None) -> None: |
234 | 235 | p_an.add_argument("--list", action="store_true", dest="_list", help="List all annotated sessions") |
235 | 236 | p_an.add_argument("--unannotated", action="store_true", help="List sessions without annotations") |
236 | 237 |
|
| 238 | + p_cr = sub.add_parser("corpus", help="Query and filter session corpus") |
| 239 | + p_cr.add_argument("--runtime", help="Filter by runtime (e.g. claude, codex, opencode)") |
| 240 | + p_cr.add_argument("--task", choices=list(TASK_TYPES), help="Filter by task type") |
| 241 | + p_cr.add_argument("--topology", choices=list(TOPOLOGY_PHENOTYPES), help="Filter by topology phenotype") |
| 242 | + p_cr.add_argument("--source", choices=list(SOURCES), help="Filter by session source") |
| 243 | + |
237 | 244 | p_cmp = sub.add_parser("compare", help="Compare two sessions side by side") |
238 | 245 | p_cmp.add_argument("session_a", help="First session ID") |
239 | 246 | p_cmp.add_argument("session_b", help="Second session ID") |
@@ -563,6 +570,9 @@ def _load(sid: str | None): |
563 | 570 | elif args.command == "annotate": |
564 | 571 | _handle_annotate(store, args) |
565 | 572 |
|
| 573 | + elif args.command == "corpus": |
| 574 | + _handle_corpus(store, args) |
| 575 | + |
566 | 576 | elif args.command == "compare": |
567 | 577 | _handle_compare(store, args) |
568 | 578 |
|
@@ -880,6 +890,62 @@ def _handle_annotate(store, args) -> None: |
880 | 890 | print(f" {k}: {v}") |
881 | 891 |
|
882 | 892 |
|
| 893 | +def _handle_corpus(store, args) -> None: |
| 894 | + """Handle ``causetrace corpus``.""" |
| 895 | + sids = store.list_sessions() |
| 896 | + if not sids: |
| 897 | + print("No sessions found.") |
| 898 | + return |
| 899 | + |
| 900 | + rows = [] |
| 901 | + for sid in sids: |
| 902 | + annotation = load_annotation(sid) |
| 903 | + runtime = annotation.get("runtime", annotation.get("agent", "")) or "" |
| 904 | + task = annotation.get("task_type", "") or "" |
| 905 | + source = annotation.get("source", "") or "" |
| 906 | + topology = annotation.get("topology", "") or "" |
| 907 | + |
| 908 | + events = store.load(sid) |
| 909 | + stats = compute_stats(events) if events else {} |
| 910 | + if not topology: |
| 911 | + topology = classify_topology(stats) |
| 912 | + topology = topology or "" |
| 913 | + |
| 914 | + rows.append({ |
| 915 | + "session_id": sid, |
| 916 | + "runtime": runtime, |
| 917 | + "task": task, |
| 918 | + "topology": topology, |
| 919 | + "events": stats.get("event_count", 0), |
| 920 | + "depth": stats.get("max_depth", 0), |
| 921 | + "roots": stats.get("root_count", 0), |
| 922 | + "source": source, |
| 923 | + }) |
| 924 | + |
| 925 | + # Filter |
| 926 | + if args.runtime: |
| 927 | + rows = [r for r in rows if args.runtime.lower() in r["runtime"].lower()] |
| 928 | + if args.task: |
| 929 | + rows = [r for r in rows if r["task"] == args.task] |
| 930 | + if args.topology: |
| 931 | + rows = [r for r in rows if r["topology"] == args.topology] |
| 932 | + if args.source: |
| 933 | + rows = [r for r in rows if r["source"] == args.source] |
| 934 | + |
| 935 | + if not rows: |
| 936 | + print("No matching sessions.") |
| 937 | + return |
| 938 | + |
| 939 | + # Print table |
| 940 | + header = f"{'Session ID':24s} {'Runtime':12s} {'Task':14s} {'Topology':22s} {'Events':>6s} {'Depth':>5s} {'Roots':>5s}" |
| 941 | + print(f"Corpus: {len(rows)} session(s)\n") |
| 942 | + print(header) |
| 943 | + print("-" * len(header)) |
| 944 | + for r in rows: |
| 945 | + sid = r["session_id"][:22] |
| 946 | + print(f"{sid:24s} {r['runtime']:12s} {r['task']:14s} {r['topology']:22s} {r['events']:6d} {r['depth']:5d} {r['roots']:5d}") |
| 947 | + |
| 948 | + |
883 | 949 | def _handle_compare(store, args) -> None: |
884 | 950 | """Handle `causetrace compare`.""" |
885 | 951 | sid_a = args.session_a |
|
0 commit comments