Skip to content

Commit ce6b5c8

Browse files
committed
fix: merge C# stub nodes with real cross-language definitions
When C# code inherits from a type defined in F# (e.g. SqliteBookStore extends BookStore from Interfaces.fs), the C# extractor creates a stub node with an empty source_file. This stub disconnects the inheritance edge from the real F# definition. Add a post-extraction pass that merges stub nodes into real definitions by matching labels. Prioritize definition files (Interfaces.fs, Domain.fs, Types.fs) so inherits edges point to abstract types rather than implementation classes. Made-with: Cursor
1 parent 7a0a5ac commit ce6b5c8

1 file changed

Lines changed: 98 additions & 0 deletions

File tree

graphify/extract.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3179,6 +3179,104 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
31793179
import logging
31803180
logging.getLogger(__name__).warning("Cross-file import resolution failed, skipping: %s", exc)
31813181

3182+
# ── Disambiguate colliding node IDs across files ─────────────────────────
3183+
# _make_id(stem, name) collides when two files have the same stem (e.g.
3184+
# Program.cs in different projects). Detect and rename with parent dir.
3185+
id_to_files: dict[str, list[dict]] = {}
3186+
for n in all_nodes:
3187+
nid = n["id"]
3188+
id_to_files.setdefault(nid, []).append(n)
3189+
3190+
rename_map: dict[str, str] = {}
3191+
dedup_nodes: list[dict] = []
3192+
seen_final_ids: set[str] = set()
3193+
for nid, node_list in id_to_files.items():
3194+
real_nodes = [n for n in node_list if n.get("source_file")]
3195+
unique_files = {n["source_file"] for n in real_nodes}
3196+
if len(unique_files) > 1:
3197+
for n in node_list:
3198+
sf = n.get("source_file", "")
3199+
if sf:
3200+
parent = Path(sf).parent.name
3201+
new_id = _make_id(parent, nid)
3202+
if new_id not in seen_final_ids:
3203+
n["id"] = new_id
3204+
rename_map[nid + "|" + sf] = new_id
3205+
seen_final_ids.add(new_id)
3206+
dedup_nodes.append(n)
3207+
else:
3208+
if nid not in seen_final_ids:
3209+
seen_final_ids.add(nid)
3210+
dedup_nodes.append(n)
3211+
else:
3212+
for n in node_list:
3213+
if nid not in seen_final_ids:
3214+
seen_final_ids.add(nid)
3215+
dedup_nodes.append(n)
3216+
3217+
if rename_map:
3218+
file_to_renames: dict[str, dict[str, str]] = {}
3219+
for key, new_id in rename_map.items():
3220+
old_id, sf = key.rsplit("|", 1)
3221+
file_to_renames.setdefault(sf, {})[old_id] = new_id
3222+
3223+
all_nodes = dedup_nodes
3224+
for e in all_edges:
3225+
sf = e.get("source_file", "")
3226+
renames = file_to_renames.get(sf, {})
3227+
if e["source"] in renames:
3228+
e["source"] = renames[e["source"]]
3229+
if e["target"] in renames:
3230+
e["target"] = renames[e["target"]]
3231+
else:
3232+
first_seen: set[str] = set()
3233+
unique_nodes: list[dict] = []
3234+
for n in all_nodes:
3235+
if n["id"] not in first_seen:
3236+
first_seen.add(n["id"])
3237+
unique_nodes.append(n)
3238+
all_nodes = unique_nodes
3239+
3240+
3241+
# ── Cross-language node merge ─────────────────────────────────────────────
3242+
# C# extractors create stub nodes (empty source_file) for base types that
3243+
# may actually be defined in F# files (or other C# files). Merge stubs
3244+
# into real definitions so edges point to the canonical node.
3245+
#
3246+
# Priority: prefer nodes from definition files (Interfaces.fs, Domain.fs)
3247+
# over implementation files, so inherits edges point to abstract types.
3248+
_DEFINITION_FILES = {"interfaces", "domain", "types", "contracts", "abstractions"}
3249+
3250+
real_by_label: dict[str, str] = {}
3251+
for n in all_nodes:
3252+
sf = n.get("source_file", "")
3253+
if sf:
3254+
lbl = n["label"].strip("()").lower()
3255+
stem_lower = Path(sf).stem.lower()
3256+
existing = real_by_label.get(lbl)
3257+
if existing is None:
3258+
real_by_label[lbl] = n["id"]
3259+
elif stem_lower in _DEFINITION_FILES:
3260+
real_by_label[lbl] = n["id"]
3261+
3262+
stub_ids: set[str] = set()
3263+
stub_to_real: dict[str, str] = {}
3264+
for n in all_nodes:
3265+
if not n.get("source_file"):
3266+
lbl = n["label"].strip("()").lower()
3267+
real_nid = real_by_label.get(lbl)
3268+
if real_nid and real_nid != n["id"]:
3269+
stub_to_real[n["id"]] = real_nid
3270+
stub_ids.add(n["id"])
3271+
3272+
if stub_to_real:
3273+
all_nodes = [n for n in all_nodes if n["id"] not in stub_ids]
3274+
for e in all_edges:
3275+
if e["source"] in stub_to_real:
3276+
e["source"] = stub_to_real[e["source"]]
3277+
if e["target"] in stub_to_real:
3278+
e["target"] = stub_to_real[e["target"]]
3279+
31823280
# Cross-file call resolution for all languages
31833281
# Each extractor saved unresolved calls in raw_calls. Now that we have all
31843282
# nodes from all files, resolve any callee that exists in another file.

0 commit comments

Comments
 (0)