From 415e23b32dd2134ef2e6e65bb18988457e8ffc19 Mon Sep 17 00:00:00 2001 From: Stefan Hudici Date: Fri, 3 Jul 2026 12:54:51 +0300 Subject: [PATCH] fix: read TESTED_BY edges in the parser's direction everywhere The parser emits TESTED_BY as production -> test (parser.py, all five language blocks): source is the tested symbol, target is the test. Half the consumers read the edge the other way around (test -> production), so they silently returned nothing on real graphs: - tools/query.py tests_for: queried incoming edges of the production node and returned edge sources. Real graphs never have TESTED_BY edges targeting production code, so the edge-based path always came up empty and only the test_ naming fallback ever fired. - graph.py get_transitive_tests (direct, bare-name fallback, and transitive blocks): same inversion, so direct and transitive test coverage was always empty. This also silently zeroed the test coverage factor in compute_risk_score, which calls it. - graph.py load_flow_adjacency: has_tested_by collected edge targets (the tests) instead of sources (the tested code), so flow criticality's tested_count was always 0. - changes.py test-gap detection: checked incoming edges, so every changed function was flagged as a test gap regardless of coverage. - enrich.py: the Tests: context line never listed anything. - refactor.py dead-code detection: has_test_refs looked for incoming TESTED_BY edges and bare-name matches on the target column; both are impossible under the emitted direction. Test references are the node's outgoing TESTED_BY edges (with a bare-source fallback, since the edge source inherits an unresolved CALLS target name). Consumers that already read the emitted direction (risk_index in tools/build.py, knowledge gaps and suggested questions in analysis.py, review context in tools/review.py) are unchanged. Test seeds in test_changes, test_enrich, test_graph and test_integration_v2 seeded the inverted direction to match the inverted consumers; they now seed what the parser emits. New regression tests pin the direction at both ends: the parser must emit production -> test (test_parser), and tests_for must find a test whose name defeats the naming-convention fallback, via the edge alone (test_tools). Co-Authored-By: Claude Fable 5 --- code_review_graph/changes.py | 2 +- code_review_graph/communities.py | 4 +-- code_review_graph/enrich.py | 6 ++-- code_review_graph/graph.py | 47 +++++++++++++------------- code_review_graph/refactor.py | 22 ++++++++---- code_review_graph/tools/query.py | 5 +-- tests/test_changes.py | 5 +-- tests/test_enrich.py | 4 +-- tests/test_graph.py | 2 +- tests/test_integration_v2.py | 4 +-- tests/test_parser.py | 18 ++++++++++ tests/test_tools.py | 57 ++++++++++++++++++++++++++++++-- 12 files changed, 129 insertions(+), 47 deletions(-) diff --git a/code_review_graph/changes.py b/code_review_graph/changes.py index 77dc25ad..c9559028 100644 --- a/code_review_graph/changes.py +++ b/code_review_graph/changes.py @@ -352,7 +352,7 @@ def analyze_changes( for node in changed_funcs: if node.is_test: continue - tested = store.get_edges_by_target(node.qualified_name) + tested = store.get_edges_by_source(node.qualified_name) if not any(e.kind == "TESTED_BY" for e in tested): test_gaps.append({ "name": _sanitize_name(node.name), diff --git a/code_review_graph/communities.py b/code_review_graph/communities.py index c8a3de8b..f14a208d 100644 --- a/code_review_graph/communities.py +++ b/code_review_graph/communities.py @@ -830,8 +830,8 @@ def get_architecture_overview(store: GraphStore) -> dict[str, Any]: cross_counts: Counter[tuple[int, int]] = Counter() for e in all_edges: - # TESTED_BY edges are expected cross-community coupling (test → code), - # not an architectural smell. + # TESTED_BY edges are expected cross-community coupling (code and + # its tests), not an architectural smell. if e.kind == "TESTED_BY": continue src_comm = node_to_community.get(e.source_qualified) diff --git a/code_review_graph/enrich.py b/code_review_graph/enrich.py index f95c334a..81b4c303 100644 --- a/code_review_graph/enrich.py +++ b/code_review_graph/enrich.py @@ -158,11 +158,11 @@ def _format_node_context( if flow_names: lines.append(f" Flows: {', '.join(flow_names)}") - # Tests + # Tests (TESTED_BY edges point production -> test) tests: list[str] = [] - for e in store.get_edges_by_target(qn): + for e in store.get_edges_by_source(qn): if e.kind == "TESTED_BY" and len(tests) < 3: - t = store.get_node(e.source_qualified) + t = store.get_node(e.target_qualified) if t: tests.append(t.name) if tests: diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py index 4ed3c7fc..fffe1e22 100644 --- a/code_review_graph/graph.py +++ b/code_review_graph/graph.py @@ -377,7 +377,7 @@ def get_transitive_tests( ) -> list[dict]: """Find tests covering a node, including indirect (transitive) coverage. - 1. Direct: TESTED_BY edges targeting this node (+ bare-name fallback). + 1. Direct: TESTED_BY edges from this node to its tests (+ bare-name fallback). 2. Indirect: follow outgoing CALLS edges up to *max_depth* hops, then collect TESTED_BY edges on each callee. @@ -421,31 +421,32 @@ def _node_dict(qn: str, indirect: bool) -> dict | None: "indirect": indirect, } - # Direct TESTED_BY + # Direct TESTED_BY (edges point production -> test) for qn in input_qns: for row in conn.execute( - "SELECT source_qualified FROM edges " - "WHERE target_qualified = ? AND kind = 'TESTED_BY'", + "SELECT target_qualified FROM edges " + "WHERE source_qualified = ? AND kind = 'TESTED_BY'", (qn,), ).fetchall(): - src = row["source_qualified"] - if src not in seen: - seen.add(src) - d = _node_dict(src, indirect=False) + tst = row["target_qualified"] + if tst not in seen: + seen.add(tst) + d = _node_dict(tst, indirect=False) if d: results.append(d) - # Bare-name fallback for direct + # Bare-name fallback for direct: the edge source inherits the CALLS + # target, which can be a bare name when unresolved cross-file. bare = qualified_name.rsplit("::", 1)[-1] if "::" in qualified_name else qualified_name for row in conn.execute( - "SELECT source_qualified FROM edges " - "WHERE target_qualified = ? AND kind = 'TESTED_BY'", + "SELECT target_qualified FROM edges " + "WHERE source_qualified = ? AND kind = 'TESTED_BY'", (bare,), ).fetchall(): - src = row["source_qualified"] - if src not in seen: - seen.add(src) - d = _node_dict(src, indirect=False) + tst = row["target_qualified"] + if tst not in seen: + seen.add(tst) + d = _node_dict(tst, indirect=False) if d: results.append(d) @@ -464,14 +465,14 @@ def _node_dict(qn: str, indirect: bool) -> dict | None: next_frontier = set(list(next_frontier)[:max_frontier]) for callee in next_frontier: for row in conn.execute( - "SELECT source_qualified FROM edges " - "WHERE target_qualified = ? AND kind = 'TESTED_BY'", + "SELECT target_qualified FROM edges " + "WHERE source_qualified = ? AND kind = 'TESTED_BY'", (callee,), ).fetchall(): - src = row["source_qualified"] - if src not in seen: - seen.add(src) - d = _node_dict(src, indirect=True) + tst = row["target_qualified"] + if tst not in seen: + seen.add(tst) + d = _node_dict(tst, indirect=True) if d: results.append(d) frontier = next_frontier @@ -1269,8 +1270,8 @@ def load_flow_adjacency(self) -> "FlowAdjacency": kind, src, tgt = row["kind"], row["source_qualified"], row["target_qualified"] if kind == "CALLS": calls_out.setdefault(src, []).append(tgt) - else: # TESTED_BY - has_tested_by.add(tgt) + else: # TESTED_BY points production -> test; the SOURCE is tested + has_tested_by.add(src) return FlowAdjacency( calls_out=calls_out, diff --git a/code_review_graph/refactor.py b/code_review_graph/refactor.py index 938762f3..17984c04 100644 --- a/code_review_graph/refactor.py +++ b/code_review_graph/refactor.py @@ -492,19 +492,29 @@ def _is_plausible_caller( if _is_plausible_caller(e.file_path, node.file_path, node.name) ] incoming = incoming + all_bare - if not any(e.kind == "TESTED_BY" for e in incoming): - bare_tb = store.search_edges_by_target_name(node.name, kind="TESTED_BY") - bare_tb = [ - e for e in bare_tb + # TESTED_BY edges point production -> test, so a node's test + # references are its OUTGOING edges. The edge source inherits the + # CALLS target, which can be a bare name when unresolved cross-file. + test_refs = [ + e for e in store.get_edges_by_source(node.qualified_name) + if e.kind == "TESTED_BY" + ] + if not test_refs: + bare_tb_rows = conn.execute( + "SELECT * FROM edges WHERE kind = 'TESTED_BY'" + " AND source_qualified = ?", + (node.name,), + ).fetchall() + test_refs = [ + e for e in (store._row_to_edge(r) for r in bare_tb_rows) if _is_plausible_caller(e.file_path, node.file_path, node.name) ] - incoming = incoming + bare_tb # Check INHERITS -- classes with subclasses are not dead. if node.kind == "Class" and not any(e.kind == "INHERITS" for e in incoming): bare_inh = store.search_edges_by_target_name(node.name, kind="INHERITS") incoming = incoming + bare_inh has_callers = any(e.kind == "CALLS" for e in incoming) - has_test_refs = any(e.kind == "TESTED_BY" for e in incoming) + has_test_refs = bool(test_refs) has_importers = any(e.kind == "IMPORTS_FROM" for e in incoming) has_references = any(e.kind == "REFERENCES" for e in incoming) has_subclasses = any(e.kind == "INHERITS" for e in incoming) diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py index 3b442f8a..5ba344da 100644 --- a/code_review_graph/tools/query.py +++ b/code_review_graph/tools/query.py @@ -293,9 +293,10 @@ def query_graph( results.append(node_to_dict(child)) elif pattern == "tests_for": - for e in store.get_edges_by_target(qn): + # TESTED_BY edges point production -> test. + for e in store.get_edges_by_source(qn): if e.kind == "TESTED_BY": - test = store.get_node(e.source_qualified) + test = store.get_node(e.target_qualified) if test: results.append(node_to_dict(test)) # Also search by naming convention diff --git a/tests/test_changes.py b/tests/test_changes.py index 34dfdbb3..8332db2b 100644 --- a/tests/test_changes.py +++ b/tests/test_changes.py @@ -64,10 +64,11 @@ def _add_call(self, source_qn: str, target_qn: str, path: str = "app.py") -> Non self.store.commit() def _add_tested_by(self, test_qn: str, target_qn: str, path: str = "app.py") -> None: + # The parser emits TESTED_BY as production (target_qn) -> test (test_qn). edge = EdgeInfo( kind="TESTED_BY", - source=test_qn, - target=target_qn, + source=target_qn, + target=test_qn, file_path=path, line=1, ) diff --git a/tests/test_enrich.py b/tests/test_enrich.py index 862f20c6..501adb00 100644 --- a/tests/test_enrich.py +++ b/tests/test_enrich.py @@ -100,8 +100,8 @@ def _seed_data(self): ), EdgeInfo( kind="TESTED_BY", - source=f"{self.tmpdir}/test_parser.py::test_parse_file", - target=f"{self.tmpdir}/parser.py::parse_file", + source=f"{self.tmpdir}/parser.py::parse_file", + target=f"{self.tmpdir}/test_parser.py::test_parse_file", file_path=f"{self.tmpdir}/test_parser.py", line=1, ), ] diff --git a/tests/test_graph.py b/tests/test_graph.py index fb407810..566c2205 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -401,7 +401,7 @@ def test_uncapped_small_frontier_unchanged(self): # Only callee_2 has a test if i == 2: self.store.upsert_edge(EdgeInfo( - kind="TESTED_BY", source=test_qn, target=callee_qn, + kind="TESTED_BY", source=callee_qn, target=test_qn, file_path="/t/test_hub.py", line=1, )) self.store.commit() diff --git a/tests/test_integration_v2.py b/tests/test_integration_v2.py index 48225bb9..397c21d6 100644 --- a/tests/test_integration_v2.py +++ b/tests/test_integration_v2.py @@ -180,8 +180,8 @@ def _seed_realistic_graph(self): # --- Edges: tested_by --- s.upsert_edge(EdgeInfo( - kind="TESTED_BY", source="test_auth.py::test_login", - target="auth.py::login", file_path="test_auth.py", line=5, + kind="TESTED_BY", source="auth.py::login", + target="test_auth.py::test_login", file_path="test_auth.py", line=5, )) s.commit() diff --git a/tests/test_parser.py b/tests/test_parser.py index d1d96411..51f496a0 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -357,6 +357,24 @@ def test_tested_by_edges_generated(self): tested_by = [e for e in edges if e.kind == "TESTED_BY"] assert len(tested_by) >= 1 + def test_tested_by_edges_point_from_production_to_test(self): + """TESTED_BY direction: source = production symbol, target = the test. + + Consumers (tests_for, get_transitive_tests, test-gap detection, + flow criticality) query by source_qualified, so a flipped edge + silently breaks all of them. + """ + nodes, edges = self.parser.parse_file(FIXTURES / "test_sample.py") + tested_by = [e for e in edges if e.kind == "TESTED_BY"] + assert tested_by + for e in tested_by: + assert "::test_" in e.target, ( + f"TESTED_BY target must be the test, got {e.target!r}" + ) + assert "::test_" not in e.source, ( + f"TESTED_BY source must be production code, got {e.source!r}" + ) + def test_recursion_depth_guard(self): """Parser should not crash on deeply nested code.""" # Generate Python code with many nested functions (> _MAX_AST_DEPTH) diff --git a/tests/test_tools.py b/tests/test_tools.py index 578536d4..76a01166 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -265,6 +265,56 @@ def test_callees_of_includes_resolved_and_bare_target_callees(self): } +class TestQueryGraphTestsFor: + """Regression tests for tests_for reading TESTED_BY in the parser's + direction (production -> test).""" + + def setup_method(self): + self.tmp_dir = tempfile.mkdtemp() + self.root = Path(self.tmp_dir).resolve() + (self.root / ".git").mkdir() + (self.root / ".code-review-graph").mkdir() + + self.prod_file = str(self.root / "billing.py") + self.test_file = str(self.root / "test_billing.py") + self.db_path = str(self.root / ".code-review-graph" / "graph.db") + with GraphStore(self.db_path) as store: + store.upsert_node(NodeInfo( + kind="Function", name="compute_total", file_path=self.prod_file, + line_start=1, line_end=10, language="python", + )) + # Deliberately NOT named test_compute_total so the naming + # convention fallback cannot find it -- only the edge can. + store.upsert_node(NodeInfo( + kind="Test", name="test_sums_line_items", file_path=self.test_file, + line_start=1, line_end=8, language="python", is_test=True, + )) + store.upsert_edge(EdgeInfo( + kind="TESTED_BY", + source=f"{self.prod_file}::compute_total", + target=f"{self.test_file}::test_sums_line_items", + file_path=self.test_file, + line=3, + )) + store.commit() + + def teardown_method(self): + import shutil + + shutil.rmtree(self.tmp_dir, ignore_errors=True) + + def test_tests_for_follows_production_to_test_edges(self): + result = query_graph( + pattern="tests_for", + target=f"{self.prod_file}::compute_total", + repo_root=str(self.root), + ) + + assert result["status"] == "ok" + names = {r["name"] for r in result["results"]} + assert "test_sums_line_items" in names + + def _seed_repo_relative_graph(root: Path) -> None: """Seed graph data with cwd-relative paths, as eval repos currently do.""" graph_dir = root / ".code-review-graph" @@ -1216,8 +1266,8 @@ def _seed_graph(self): Shape (auth.py community, community_id=1): login -> check_token (CALLS, internal) logout -> check_token (CALLS, internal) - test_login -> login (TESTED_BY) - test_login -> logout (TESTED_BY) + login -> test_login (TESTED_BY, production -> test) + logout -> test_login (TESTED_BY, production -> test) (login is called from db.py::query to force cross-community edges into caller_counts) @@ -1276,7 +1326,8 @@ def _seed_graph(self): target="auth.py::login", file_path="db.py", line=3, )) - # TESTED_BY edges from the Test node back to auth functions. + # TESTED_BY edges from the auth functions to their Test node + # (the parser emits production -> test). self.store.upsert_edge(EdgeInfo( kind="TESTED_BY", source="auth.py::login", target="tests/test_auth.py::test_login",