Skip to content

Commit fbbf902

Browse files
Your Nameclaude
andcommitted
Add corpus query command and topology morphology classification
- classify_topology(stats) — heuristic structural phenotype detection: dominant_chain, multi_root_exploration, fan_out_heavy, collapsed_repair, mixed - TOPOLOGY_PHENOTYPES — description dict for CLI filter choices - causetrace corpus [--runtime] [--task] [--topology] [--source] — filter/query the session corpus with per-session topology stats - 5 new tests for classify_topology phenotypes Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 14dccaf commit fbbf902

5 files changed

Lines changed: 174 additions & 4 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Recent history uses concise imperative subjects such as `Add ...`, `Fix ...`, `U
3636
<!-- gitnexus:start -->
3737
# GitNexus — Code Intelligence
3838

39-
This project is indexed by GitNexus as **causetrace** (1733 symbols, 2949 relationships, 111 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
39+
This project is indexed by GitNexus as **causetrace** (1738 symbols, 2954 relationships, 113 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
4040

4141
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
4242

CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ python3 tools/promote.py devto-post docs/promotion/blog_<topic>.md
150150
<!-- gitnexus:start -->
151151
# GitNexus — Code Intelligence
152152

153-
This project is indexed by GitNexus as **causetrace** (1733 symbols, 2949 relationships, 111 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
153+
This project is indexed by GitNexus as **causetrace** (1738 symbols, 2954 relationships, 113 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
154154

155155
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
156156

causetrace/analysis.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
transition_entropy, branch_density, root_spawning_rate,
99
path_reuse_ratio
1010
11+
Layer 1.3 — Morphology (structural phenotype classification):
12+
classify_topology
13+
1114
Layer 2 — Pattern (repeated structures, no semantic interpretation):
1215
detect_repeated_paths, detect_common_transitions,
1316
detect_fan_in_patterns, detect_branch_collapse
@@ -489,6 +492,74 @@ def path_reuse_ratio(events, max_depth: int = 10) -> dict:
489492
}
490493

491494

495+
# ---------------------------------------------------------------------------
496+
# Layer 1.3 — Morphology classification
497+
# ---------------------------------------------------------------------------
498+
499+
def classify_topology(stats: dict) -> str:
500+
"""Classify session topology into a structural phenotype.
501+
502+
Heuristic-only, no semantic interpretation. Uses ``compute_stats`` output.
503+
504+
Phenotypes:
505+
506+
``dominant_chain``
507+
Single (or near-single) root, deep relative to size, low branching.
508+
Typical of linear fix-and-test loops.
509+
510+
``multi_root_exploration``
511+
Many roots, shallow depth, low reuse.
512+
Typical of reading/searching/discovery behaviour.
513+
514+
``fan_out_heavy``
515+
One or few roots with wide branching, moderate depth.
516+
Typical of parallel task spawning.
517+
518+
``collapsed_repair``
519+
Significant fan-in / multi-parent convergence.
520+
Typical of iterative refinement converging on a target.
521+
522+
``mixed``
523+
No phenotype clearly dominates.
524+
"""
525+
rc = stats.get("root_count", 0)
526+
mc = stats.get("event_count", 1)
527+
depth = stats.get("max_depth", 0)
528+
avg_depth = stats.get("avg_depth", 0.0)
529+
fan_out_avg = stats.get("fan_out_avg", 0.0)
530+
fan_out_max = stats.get("fan_out_max", 0)
531+
multi_parent = stats.get("multi_parent_count", 0)
532+
533+
depth_ratio = depth / mc if mc > 0 else 0
534+
535+
# dominant_chain — few roots, deep relative to size
536+
if rc <= 2 and depth_ratio > 0.3 and fan_out_avg < 1.5:
537+
return "dominant_chain"
538+
539+
# fan_out_heavy — wide branching from few roots
540+
if rc <= 3 and fan_out_max >= 4 and fan_out_avg >= 1.5:
541+
return "fan_out_heavy"
542+
543+
# collapsed_repair — significant multi-parent convergence
544+
if multi_parent >= 3 and multi_parent / mc > 0.05:
545+
return "collapsed_repair"
546+
547+
# multi_root_exploration — many roots, shallow
548+
if rc >= 5 and avg_depth < 3:
549+
return "multi_root_exploration"
550+
551+
return "mixed"
552+
553+
554+
TOPOLOGY_PHENOTYPES = {
555+
"dominant_chain": "Single-chain deep topology, low branching",
556+
"multi_root_exploration": "Many shallow roots, exploration-like",
557+
"fan_out_heavy": "Wide branching from few roots",
558+
"collapsed_repair": "Significant multi-parent convergence",
559+
"mixed": "No dominant structural phenotype",
560+
}
561+
562+
492563
# ---------------------------------------------------------------------------
493564
# Layer 1.5 — Temporal partitioning primitive (no semantics, no state naming)
494565
# ---------------------------------------------------------------------------

causetrace/cli.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from .analysis import (
1313
compute_stats, find_roots, longest_path, fan_out_distribution,
1414
connected_components, detect_repeated_paths, detect_common_transitions,
15-
detect_fan_in_patterns, detect_branch_collapse,
15+
detect_fan_in_patterns, detect_branch_collapse, classify_topology,
16+
TOPOLOGY_PHENOTYPES,
1617
)
1718
from .annotation import load_annotation, save_annotation, list_annotated, list_unannotated, TASK_TYPES, SOURCES
1819
from .causality import causal_quality_report
@@ -234,6 +235,12 @@ def cli(argv: list[str] | None = None) -> None:
234235
p_an.add_argument("--list", action="store_true", dest="_list", help="List all annotated sessions")
235236
p_an.add_argument("--unannotated", action="store_true", help="List sessions without annotations")
236237

238+
p_cr = sub.add_parser("corpus", help="Query and filter session corpus")
239+
p_cr.add_argument("--runtime", help="Filter by runtime (e.g. claude, codex, opencode)")
240+
p_cr.add_argument("--task", choices=list(TASK_TYPES), help="Filter by task type")
241+
p_cr.add_argument("--topology", choices=list(TOPOLOGY_PHENOTYPES), help="Filter by topology phenotype")
242+
p_cr.add_argument("--source", choices=list(SOURCES), help="Filter by session source")
243+
237244
p_cmp = sub.add_parser("compare", help="Compare two sessions side by side")
238245
p_cmp.add_argument("session_a", help="First session ID")
239246
p_cmp.add_argument("session_b", help="Second session ID")
@@ -563,6 +570,9 @@ def _load(sid: str | None):
563570
elif args.command == "annotate":
564571
_handle_annotate(store, args)
565572

573+
elif args.command == "corpus":
574+
_handle_corpus(store, args)
575+
566576
elif args.command == "compare":
567577
_handle_compare(store, args)
568578

@@ -880,6 +890,62 @@ def _handle_annotate(store, args) -> None:
880890
print(f" {k}: {v}")
881891

882892

893+
def _handle_corpus(store, args) -> None:
894+
"""Handle ``causetrace corpus``."""
895+
sids = store.list_sessions()
896+
if not sids:
897+
print("No sessions found.")
898+
return
899+
900+
rows = []
901+
for sid in sids:
902+
annotation = load_annotation(sid)
903+
runtime = annotation.get("runtime", annotation.get("agent", "")) or ""
904+
task = annotation.get("task_type", "") or ""
905+
source = annotation.get("source", "") or ""
906+
topology = annotation.get("topology", "") or ""
907+
908+
events = store.load(sid)
909+
stats = compute_stats(events) if events else {}
910+
if not topology:
911+
topology = classify_topology(stats)
912+
topology = topology or ""
913+
914+
rows.append({
915+
"session_id": sid,
916+
"runtime": runtime,
917+
"task": task,
918+
"topology": topology,
919+
"events": stats.get("event_count", 0),
920+
"depth": stats.get("max_depth", 0),
921+
"roots": stats.get("root_count", 0),
922+
"source": source,
923+
})
924+
925+
# Filter
926+
if args.runtime:
927+
rows = [r for r in rows if args.runtime.lower() in r["runtime"].lower()]
928+
if args.task:
929+
rows = [r for r in rows if r["task"] == args.task]
930+
if args.topology:
931+
rows = [r for r in rows if r["topology"] == args.topology]
932+
if args.source:
933+
rows = [r for r in rows if r["source"] == args.source]
934+
935+
if not rows:
936+
print("No matching sessions.")
937+
return
938+
939+
# Print table
940+
header = f"{'Session ID':24s} {'Runtime':12s} {'Task':14s} {'Topology':22s} {'Events':>6s} {'Depth':>5s} {'Roots':>5s}"
941+
print(f"Corpus: {len(rows)} session(s)\n")
942+
print(header)
943+
print("-" * len(header))
944+
for r in rows:
945+
sid = r["session_id"][:22]
946+
print(f"{sid:24s} {r['runtime']:12s} {r['task']:14s} {r['topology']:22s} {r['events']:6d} {r['depth']:5d} {r['roots']:5d}")
947+
948+
883949
def _handle_compare(store, args) -> None:
884950
"""Handle `causetrace compare`."""
885951
sid_a = args.session_a

tests/test_dag_fixtures.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
compute_stats, find_roots, longest_path, connected_components,
1919
detect_common_transitions, detect_fan_in_patterns, detect_repeated_paths,
2020
windowed, transition_entropy, branch_density, root_spawning_rate,
21-
path_reuse_ratio,
21+
path_reuse_ratio, classify_topology,
2222
)
2323

2424
FIXTURE_DIR = Path(__file__).resolve().parent / "fixtures" / "dags"
@@ -456,6 +456,39 @@ def test_path_reuse_ratio_empty():
456456
assert r["total_paths"] == 0
457457

458458

459+
# ── Topology classification ──
460+
461+
def test_classify_topology_dominant_chain():
462+
stats = {"root_count": 1, "event_count": 100, "max_depth": 80,
463+
"avg_depth": 40.0, "fan_out_avg": 0.5, "fan_out_max": 1,
464+
"multi_parent_count": 0}
465+
assert classify_topology(stats) == "dominant_chain"
466+
467+
def test_classify_topology_multi_root():
468+
stats = {"root_count": 12, "event_count": 50, "max_depth": 2,
469+
"avg_depth": 0.8, "fan_out_avg": 0.3, "fan_out_max": 2,
470+
"multi_parent_count": 0}
471+
assert classify_topology(stats) == "multi_root_exploration"
472+
473+
def test_classify_topology_fan_out_heavy():
474+
stats = {"root_count": 1, "event_count": 50, "max_depth": 3,
475+
"avg_depth": 1.5, "fan_out_avg": 3.0, "fan_out_max": 20,
476+
"multi_parent_count": 0}
477+
assert classify_topology(stats) == "fan_out_heavy"
478+
479+
def test_classify_topology_collapsed():
480+
stats = {"root_count": 2, "event_count": 100, "max_depth": 10,
481+
"avg_depth": 4.0, "fan_out_avg": 0.8, "fan_out_max": 3,
482+
"multi_parent_count": 8}
483+
assert classify_topology(stats) == "collapsed_repair"
484+
485+
def test_classify_topology_mixed():
486+
stats = {"root_count": 4, "event_count": 100, "max_depth": 15,
487+
"avg_depth": 5.0, "fan_out_avg": 0.9, "fan_out_max": 3,
488+
"multi_parent_count": 1}
489+
assert classify_topology(stats) == "mixed"
490+
491+
459492
# ── Invariant battery (parametrized over fixtures) ──
460493

461494
INVARIANT_FIXTURES = [

0 commit comments

Comments
 (0)