From 415e23b32dd2134ef2e6e65bb18988457e8ffc19 Mon Sep 17 00:00:00 2001
From: Stefan Hudici <shudici@gmail.com>
Date: Fri, 3 Jul 2026 12:54:51 +0300
Subject: [PATCH] fix: read TESTED_BY edges in the parser's direction
 everywhere

The parser emits TESTED_BY as production -> test (parser.py, all five
language blocks): source is the tested symbol, target is the test.
Half the consumers read the edge the other way around (test -> production),
so they silently returned nothing on real graphs:

- tools/query.py tests_for: queried incoming edges of the production
  node and returned edge sources. Real graphs never have TESTED_BY
  edges targeting production code, so the edge-based path always came
  up empty and only the test_<name> naming fallback ever fired.
- graph.py get_transitive_tests (direct, bare-name fallback, and
  transitive blocks): same inversion, so direct and transitive test
  coverage was always empty. This also silently zeroed the test
  coverage factor in compute_risk_score, which calls it.
- graph.py load_flow_adjacency: has_tested_by collected edge targets
  (the tests) instead of sources (the tested code), so flow
  criticality's tested_count was always 0.
- changes.py test-gap detection: checked incoming edges, so every
  changed function was flagged as a test gap regardless of coverage.
- enrich.py: the Tests: context line never listed anything.
- refactor.py dead-code detection: has_test_refs looked for incoming
  TESTED_BY edges and bare-name matches on the target column; both are
  impossible under the emitted direction. Test references are the
  node's outgoing TESTED_BY edges (with a bare-source fallback, since
  the edge source inherits an unresolved CALLS target name).

Consumers that already read the emitted direction (risk_index in
tools/build.py, knowledge gaps and suggested questions in analysis.py,
review context in tools/review.py) are unchanged.

Test seeds in test_changes, test_enrich, test_graph and
test_integration_v2 seeded the inverted direction to match the
inverted consumers; they now seed what the parser emits. New
regression tests pin the direction at both ends: the parser must emit
production -> test (test_parser), and tests_for must find a test whose
name defeats the naming-convention fallback, via the edge alone
(test_tools).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 code_review_graph/changes.py     |  2 +-
 code_review_graph/communities.py |  4 +--
 code_review_graph/enrich.py      |  6 ++--
 code_review_graph/graph.py       | 47 +++++++++++++-------------
 code_review_graph/refactor.py    | 22 ++++++++----
 code_review_graph/tools/query.py |  5 +--
 tests/test_changes.py            |  5 +--
 tests/test_enrich.py             |  4 +--
 tests/test_graph.py              |  2 +-
 tests/test_integration_v2.py     |  4 +--
 tests/test_parser.py             | 18 ++++++++++
 tests/test_tools.py              | 57 ++++++++++++++++++++++++++++++--
 12 files changed, 129 insertions(+), 47 deletions(-)
diff --git a/code_review_graph/changes.py b/code_review_graph/changes.py
index 77dc25ad..c9559028 100644
--- a/code_review_graph/changes.py
+++ b/code_review_graph/changes.py
@@ -352,7 +352,7 @@ def analyze_changes(
     for node in changed_funcs:
         if node.is_test:
             continue
-        tested = store.get_edges_by_target(node.qualified_name)
+        tested = store.get_edges_by_source(node.qualified_name)
         if not any(e.kind == "TESTED_BY" for e in tested):
             test_gaps.append({
                 "name": _sanitize_name(node.name),
diff --git a/code_review_graph/communities.py b/code_review_graph/communities.py
index c8a3de8b..f14a208d 100644
--- a/code_review_graph/communities.py
+++ b/code_review_graph/communities.py
@@ -830,8 +830,8 @@ def get_architecture_overview(store: GraphStore) -> dict[str, Any]:
     cross_counts: Counter[tuple[int, int]] = Counter()
 
     for e in all_edges:
-        # TESTED_BY edges are expected cross-community coupling (test → code),
-        # not an architectural smell.
+        # TESTED_BY edges are expected cross-community coupling (code and
+        # its tests), not an architectural smell.
         if e.kind == "TESTED_BY":
             continue
         src_comm = node_to_community.get(e.source_qualified)
diff --git a/code_review_graph/enrich.py b/code_review_graph/enrich.py
index f95c334a..81b4c303 100644
--- a/code_review_graph/enrich.py
+++ b/code_review_graph/enrich.py
@@ -158,11 +158,11 @@ def _format_node_context(
     if flow_names:
         lines.append(f"  Flows: {', '.join(flow_names)}")
 
-    # Tests
+    # Tests (TESTED_BY edges point production -> test)
     tests: list[str] = []
-    for e in store.get_edges_by_target(qn):
+    for e in store.get_edges_by_source(qn):
         if e.kind == "TESTED_BY" and len(tests) < 3:
-            t = store.get_node(e.source_qualified)
+            t = store.get_node(e.target_qualified)
             if t:
                 tests.append(t.name)
     if tests:
diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py
index 4ed3c7fc..fffe1e22 100644
--- a/code_review_graph/graph.py
+++ b/code_review_graph/graph.py
@@ -377,7 +377,7 @@ def get_transitive_tests(
     ) -> list[dict]:
         """Find tests covering a node, including indirect (transitive) coverage.
 
-        1. Direct: TESTED_BY edges targeting this node (+ bare-name fallback).
+        1. Direct: TESTED_BY edges from this node to its tests (+ bare-name fallback).
         2. Indirect: follow outgoing CALLS edges up to *max_depth* hops,
            then collect TESTED_BY edges on each callee.
 
@@ -421,31 +421,32 @@ def _node_dict(qn: str, indirect: bool) -> dict | None:
                 "indirect": indirect,
             }
 
-        # Direct TESTED_BY
+        # Direct TESTED_BY (edges point production -> test)
         for qn in input_qns:
             for row in conn.execute(
-                "SELECT source_qualified FROM edges "
-                "WHERE target_qualified = ? AND kind = 'TESTED_BY'",
+                "SELECT target_qualified FROM edges "
+                "WHERE source_qualified = ? AND kind = 'TESTED_BY'",
                 (qn,),
             ).fetchall():
-                src = row["source_qualified"]
-                if src not in seen:
-                    seen.add(src)
-                    d = _node_dict(src, indirect=False)
+                tst = row["target_qualified"]
+                if tst not in seen:
+                    seen.add(tst)
+                    d = _node_dict(tst, indirect=False)
                     if d:
                         results.append(d)
 
-        # Bare-name fallback for direct
+        # Bare-name fallback for direct: the edge source inherits the CALLS
+        # target, which can be a bare name when unresolved cross-file.
         bare = qualified_name.rsplit("::", 1)[-1] if "::" in qualified_name else qualified_name
         for row in conn.execute(
-            "SELECT source_qualified FROM edges "
-            "WHERE target_qualified = ? AND kind = 'TESTED_BY'",
+            "SELECT target_qualified FROM edges "
+            "WHERE source_qualified = ? AND kind = 'TESTED_BY'",
             (bare,),
         ).fetchall():
-            src = row["source_qualified"]
-            if src not in seen:
-                seen.add(src)
-                d = _node_dict(src, indirect=False)
+            tst = row["target_qualified"]
+            if tst not in seen:
+                seen.add(tst)
+                d = _node_dict(tst, indirect=False)
                 if d:
                     results.append(d)
 
@@ -464,14 +465,14 @@ def _node_dict(qn: str, indirect: bool) -> dict | None:
                 next_frontier = set(list(next_frontier)[:max_frontier])
             for callee in next_frontier:
                 for row in conn.execute(
-                    "SELECT source_qualified FROM edges "
-                    "WHERE target_qualified = ? AND kind = 'TESTED_BY'",
+                    "SELECT target_qualified FROM edges "
+                    "WHERE source_qualified = ? AND kind = 'TESTED_BY'",
                     (callee,),
                 ).fetchall():
-                    src = row["source_qualified"]
-                    if src not in seen:
-                        seen.add(src)
-                        d = _node_dict(src, indirect=True)
+                    tst = row["target_qualified"]
+                    if tst not in seen:
+                        seen.add(tst)
+                        d = _node_dict(tst, indirect=True)
                         if d:
                             results.append(d)
             frontier = next_frontier
@@ -1269,8 +1270,8 @@ def load_flow_adjacency(self) -> "FlowAdjacency":
             kind, src, tgt = row["kind"], row["source_qualified"], row["target_qualified"]
             if kind == "CALLS":
                 calls_out.setdefault(src, []).append(tgt)
-            else:  # TESTED_BY
-                has_tested_by.add(tgt)
+            else:  # TESTED_BY points production -> test; the SOURCE is tested
+                has_tested_by.add(src)
 
         return FlowAdjacency(
             calls_out=calls_out,
diff --git a/code_review_graph/refactor.py b/code_review_graph/refactor.py
index 938762f3..17984c04 100644
--- a/code_review_graph/refactor.py
+++ b/code_review_graph/refactor.py
@@ -492,19 +492,29 @@ def _is_plausible_caller(
                 if _is_plausible_caller(e.file_path, node.file_path, node.name)
             ]
             incoming = incoming + all_bare
-        if not any(e.kind == "TESTED_BY" for e in incoming):
-            bare_tb = store.search_edges_by_target_name(node.name, kind="TESTED_BY")
-            bare_tb = [
-                e for e in bare_tb
+        # TESTED_BY edges point production -> test, so a node's test
+        # references are its OUTGOING edges. The edge source inherits the
+        # CALLS target, which can be a bare name when unresolved cross-file.
+        test_refs = [
+            e for e in store.get_edges_by_source(node.qualified_name)
+            if e.kind == "TESTED_BY"
+        ]
+        if not test_refs:
+            bare_tb_rows = conn.execute(
+                "SELECT * FROM edges WHERE kind = 'TESTED_BY'"
+                " AND source_qualified = ?",
+                (node.name,),
+            ).fetchall()
+            test_refs = [
+                e for e in (store._row_to_edge(r) for r in bare_tb_rows)
                 if _is_plausible_caller(e.file_path, node.file_path, node.name)
             ]
-            incoming = incoming + bare_tb
         # Check INHERITS -- classes with subclasses are not dead.
         if node.kind == "Class" and not any(e.kind == "INHERITS" for e in incoming):
             bare_inh = store.search_edges_by_target_name(node.name, kind="INHERITS")
             incoming = incoming + bare_inh
         has_callers = any(e.kind == "CALLS" for e in incoming)
-        has_test_refs = any(e.kind == "TESTED_BY" for e in incoming)
+        has_test_refs = bool(test_refs)
         has_importers = any(e.kind == "IMPORTS_FROM" for e in incoming)
         has_references = any(e.kind == "REFERENCES" for e in incoming)
         has_subclasses = any(e.kind == "INHERITS" for e in incoming)
diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py
index 3b442f8a..5ba344da 100644
--- a/code_review_graph/tools/query.py
+++ b/code_review_graph/tools/query.py
@@ -293,9 +293,10 @@ def query_graph(
                         results.append(node_to_dict(child))
 
         elif pattern == "tests_for":
-            for e in store.get_edges_by_target(qn):
+            # TESTED_BY edges point production -> test.
+            for e in store.get_edges_by_source(qn):
                 if e.kind == "TESTED_BY":
-                    test = store.get_node(e.source_qualified)
+                    test = store.get_node(e.target_qualified)
                     if test:
                         results.append(node_to_dict(test))
             # Also search by naming convention
diff --git a/tests/test_changes.py b/tests/test_changes.py
index 34dfdbb3..8332db2b 100644
--- a/tests/test_changes.py
+++ b/tests/test_changes.py
@@ -64,10 +64,11 @@ def _add_call(self, source_qn: str, target_qn: str, path: str = "app.py") -> Non
         self.store.commit()
 
     def _add_tested_by(self, test_qn: str, target_qn: str, path: str = "app.py") -> None:
+        # The parser emits TESTED_BY as production (target_qn) -> test (test_qn).
         edge = EdgeInfo(
             kind="TESTED_BY",
-            source=test_qn,
-            target=target_qn,
+            source=target_qn,
+            target=test_qn,
             file_path=path,
             line=1,
         )
diff --git a/tests/test_enrich.py b/tests/test_enrich.py
index 862f20c6..501adb00 100644
--- a/tests/test_enrich.py
+++ b/tests/test_enrich.py
@@ -100,8 +100,8 @@ def _seed_data(self):
             ),
             EdgeInfo(
                 kind="TESTED_BY",
-                source=f"{self.tmpdir}/test_parser.py::test_parse_file",
-                target=f"{self.tmpdir}/parser.py::parse_file",
+                source=f"{self.tmpdir}/parser.py::parse_file",
+                target=f"{self.tmpdir}/test_parser.py::test_parse_file",
                 file_path=f"{self.tmpdir}/test_parser.py", line=1,
             ),
         ]
diff --git a/tests/test_graph.py b/tests/test_graph.py
index fb407810..566c2205 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -401,7 +401,7 @@ def test_uncapped_small_frontier_unchanged(self):
             # Only callee_2 has a test
             if i == 2:
                 self.store.upsert_edge(EdgeInfo(
-                    kind="TESTED_BY", source=test_qn, target=callee_qn,
+                    kind="TESTED_BY", source=callee_qn, target=test_qn,
                     file_path="/t/test_hub.py", line=1,
                 ))
         self.store.commit()
diff --git a/tests/test_integration_v2.py b/tests/test_integration_v2.py
index 48225bb9..397c21d6 100644
--- a/tests/test_integration_v2.py
+++ b/tests/test_integration_v2.py
@@ -180,8 +180,8 @@ def _seed_realistic_graph(self):
 
         # --- Edges: tested_by ---
         s.upsert_edge(EdgeInfo(
-            kind="TESTED_BY", source="test_auth.py::test_login",
-            target="auth.py::login", file_path="test_auth.py", line=5,
+            kind="TESTED_BY", source="auth.py::login",
+            target="test_auth.py::test_login", file_path="test_auth.py", line=5,
         ))
 
         s.commit()
diff --git a/tests/test_parser.py b/tests/test_parser.py
index d1d96411..51f496a0 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -357,6 +357,24 @@ def test_tested_by_edges_generated(self):
         tested_by = [e for e in edges if e.kind == "TESTED_BY"]
         assert len(tested_by) >= 1
 
+    def test_tested_by_edges_point_from_production_to_test(self):
+        """TESTED_BY direction: source = production symbol, target = the test.
+
+        Consumers (tests_for, get_transitive_tests, test-gap detection,
+        flow criticality) query by source_qualified, so a flipped edge
+        silently breaks all of them.
+        """
+        nodes, edges = self.parser.parse_file(FIXTURES / "test_sample.py")
+        tested_by = [e for e in edges if e.kind == "TESTED_BY"]
+        assert tested_by
+        for e in tested_by:
+            assert "::test_" in e.target, (
+                f"TESTED_BY target must be the test, got {e.target!r}"
+            )
+            assert "::test_" not in e.source, (
+                f"TESTED_BY source must be production code, got {e.source!r}"
+            )
+
     def test_recursion_depth_guard(self):
         """Parser should not crash on deeply nested code."""
         # Generate Python code with many nested functions (> _MAX_AST_DEPTH)
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 578536d4..76a01166 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -265,6 +265,56 @@ def test_callees_of_includes_resolved_and_bare_target_callees(self):
         }
 
 
+class TestQueryGraphTestsFor:
+    """Regression tests for tests_for reading TESTED_BY in the parser's
+    direction (production -> test)."""
+
+    def setup_method(self):
+        self.tmp_dir = tempfile.mkdtemp()
+        self.root = Path(self.tmp_dir).resolve()
+        (self.root / ".git").mkdir()
+        (self.root / ".code-review-graph").mkdir()
+
+        self.prod_file = str(self.root / "billing.py")
+        self.test_file = str(self.root / "test_billing.py")
+        self.db_path = str(self.root / ".code-review-graph" / "graph.db")
+        with GraphStore(self.db_path) as store:
+            store.upsert_node(NodeInfo(
+                kind="Function", name="compute_total", file_path=self.prod_file,
+                line_start=1, line_end=10, language="python",
+            ))
+            # Deliberately NOT named test_compute_total so the naming
+            # convention fallback cannot find it -- only the edge can.
+            store.upsert_node(NodeInfo(
+                kind="Test", name="test_sums_line_items", file_path=self.test_file,
+                line_start=1, line_end=8, language="python", is_test=True,
+            ))
+            store.upsert_edge(EdgeInfo(
+                kind="TESTED_BY",
+                source=f"{self.prod_file}::compute_total",
+                target=f"{self.test_file}::test_sums_line_items",
+                file_path=self.test_file,
+                line=3,
+            ))
+            store.commit()
+
+    def teardown_method(self):
+        import shutil
+
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    def test_tests_for_follows_production_to_test_edges(self):
+        result = query_graph(
+            pattern="tests_for",
+            target=f"{self.prod_file}::compute_total",
+            repo_root=str(self.root),
+        )
+
+        assert result["status"] == "ok"
+        names = {r["name"] for r in result["results"]}
+        assert "test_sums_line_items" in names
+
+
 def _seed_repo_relative_graph(root: Path) -> None:
     """Seed graph data with cwd-relative paths, as eval repos currently do."""
     graph_dir = root / ".code-review-graph"
@@ -1216,8 +1266,8 @@ def _seed_graph(self):
         Shape (auth.py community, community_id=1):
             login  ->  check_token   (CALLS, internal)
             logout ->  check_token   (CALLS, internal)
-            test_login -> login      (TESTED_BY)
-            test_login -> logout     (TESTED_BY)
+            login  -> test_login     (TESTED_BY, production -> test)
+            logout -> test_login     (TESTED_BY, production -> test)
             (login is called from db.py::query to force cross-community
              edges into caller_counts)
 
@@ -1276,7 +1326,8 @@ def _seed_graph(self):
             target="auth.py::login", file_path="db.py", line=3,
         ))
 
-        # TESTED_BY edges from the Test node back to auth functions.
+        # TESTED_BY edges from the auth functions to their Test node
+        # (the parser emits production -> test).
         self.store.upsert_edge(EdgeInfo(
             kind="TESTED_BY", source="auth.py::login",
             target="tests/test_auth.py::test_login",