@@ -3179,6 +3179,104 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
31793179 import logging
31803180 logging .getLogger (__name__ ).warning ("Cross-file import resolution failed, skipping: %s" , exc )
31813181
3182+ # ── Disambiguate colliding node IDs across files ─────────────────────────
3183+ # _make_id(stem, name) collides when two files have the same stem (e.g.
3184+ # Program.cs in different projects). Detect and rename with parent dir.
3185+ id_to_files : dict [str , list [dict ]] = {}
3186+ for n in all_nodes :
3187+ nid = n ["id" ]
3188+ id_to_files .setdefault (nid , []).append (n )
3189+
3190+ rename_map : dict [str , str ] = {}
3191+ dedup_nodes : list [dict ] = []
3192+ seen_final_ids : set [str ] = set ()
3193+ for nid , node_list in id_to_files .items ():
3194+ real_nodes = [n for n in node_list if n .get ("source_file" )]
3195+ unique_files = {n ["source_file" ] for n in real_nodes }
3196+ if len (unique_files ) > 1 :
3197+ for n in node_list :
3198+ sf = n .get ("source_file" , "" )
3199+ if sf :
3200+ parent = Path (sf ).parent .name
3201+ new_id = _make_id (parent , nid )
3202+ if new_id not in seen_final_ids :
3203+ n ["id" ] = new_id
3204+ rename_map [nid + "|" + sf ] = new_id
3205+ seen_final_ids .add (new_id )
3206+ dedup_nodes .append (n )
3207+ else :
3208+ if nid not in seen_final_ids :
3209+ seen_final_ids .add (nid )
3210+ dedup_nodes .append (n )
3211+ else :
3212+ for n in node_list :
3213+ if nid not in seen_final_ids :
3214+ seen_final_ids .add (nid )
3215+ dedup_nodes .append (n )
3216+
3217+ if rename_map :
3218+ file_to_renames : dict [str , dict [str , str ]] = {}
3219+ for key , new_id in rename_map .items ():
3220+ old_id , sf = key .rsplit ("|" , 1 )
3221+ file_to_renames .setdefault (sf , {})[old_id ] = new_id
3222+
3223+ all_nodes = dedup_nodes
3224+ for e in all_edges :
3225+ sf = e .get ("source_file" , "" )
3226+ renames = file_to_renames .get (sf , {})
3227+ if e ["source" ] in renames :
3228+ e ["source" ] = renames [e ["source" ]]
3229+ if e ["target" ] in renames :
3230+ e ["target" ] = renames [e ["target" ]]
3231+ else :
3232+ first_seen : set [str ] = set ()
3233+ unique_nodes : list [dict ] = []
3234+ for n in all_nodes :
3235+ if n ["id" ] not in first_seen :
3236+ first_seen .add (n ["id" ])
3237+ unique_nodes .append (n )
3238+ all_nodes = unique_nodes
3239+
3240+
3241+ # ── Cross-language node merge ─────────────────────────────────────────────
3242+ # C# extractors create stub nodes (empty source_file) for base types that
3243+ # may actually be defined in F# files (or other C# files). Merge stubs
3244+ # into real definitions so edges point to the canonical node.
3245+ #
3246+ # Priority: prefer nodes from definition files (Interfaces.fs, Domain.fs)
3247+ # over implementation files, so inherits edges point to abstract types.
3248+ _DEFINITION_FILES = {"interfaces" , "domain" , "types" , "contracts" , "abstractions" }
3249+
3250+ real_by_label : dict [str , str ] = {}
3251+ for n in all_nodes :
3252+ sf = n .get ("source_file" , "" )
3253+ if sf :
3254+ lbl = n ["label" ].strip ("()" ).lower ()
3255+ stem_lower = Path (sf ).stem .lower ()
3256+ existing = real_by_label .get (lbl )
3257+ if existing is None :
3258+ real_by_label [lbl ] = n ["id" ]
3259+ elif stem_lower in _DEFINITION_FILES :
3260+ real_by_label [lbl ] = n ["id" ]
3261+
3262+ stub_ids : set [str ] = set ()
3263+ stub_to_real : dict [str , str ] = {}
3264+ for n in all_nodes :
3265+ if not n .get ("source_file" ):
3266+ lbl = n ["label" ].strip ("()" ).lower ()
3267+ real_nid = real_by_label .get (lbl )
3268+ if real_nid and real_nid != n ["id" ]:
3269+ stub_to_real [n ["id" ]] = real_nid
3270+ stub_ids .add (n ["id" ])
3271+
3272+ if stub_to_real :
3273+ all_nodes = [n for n in all_nodes if n ["id" ] not in stub_ids ]
3274+ for e in all_edges :
3275+ if e ["source" ] in stub_to_real :
3276+ e ["source" ] = stub_to_real [e ["source" ]]
3277+ if e ["target" ] in stub_to_real :
3278+ e ["target" ] = stub_to_real [e ["target" ]]
3279+
31823280 # Cross-file call resolution for all languages
31833281 # Each extractor saved unresolved calls in raw_calls. Now that we have all
31843282 # nodes from all files, resolve any callee that exists in another file.
0 commit comments