From 0208bd26efc47f3df98feda566f3f0094b7a773e Mon Sep 17 00:00:00 2001 From: Vladimir Rogozhin Date: Sat, 18 Apr 2026 13:50:49 +0200 Subject: [PATCH] fix: add BCL method blocklist to cross-file inference When resolving unresolved calls across files, common .NET BCL method names (Contains, Equals, ToString, Where, Select, ListAsync, etc.) would incorrectly match graph nodes with the same name, creating false INFERRED edges. For example, string.Contains() would create a spurious edge to an F# union case named "Contains". Add a blocklist of ~120 common BCL/framework method names that are skipped during cross-file call resolution. Made-with: Cursor --- graphify/extract.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/graphify/extract.py b/graphify/extract.py index dbd441c6..015c8a46 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3345,6 +3345,39 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: # Cross-file call resolution for all languages # Each extractor saved unresolved calls in raw_calls. Now that we have all # nodes from all files, resolve any callee that exists in another file. + _BCL_METHOD_BLOCKLIST = frozenset({ + "contains", "equals", "gethashcode", "tostring", "gettype", + "compareto", "startswith", "endswith", "replace", "split", + "trim", "trimstart", "trimend", "substring", "indexof", + "lastindexof", "insert", "remove", "toarray", "tolist", + "todictionary", "count", "any", "all", "first", "firstordefault", + "last", "lastordefault", "single", "singleordefault", + "where", "select", "selectmany", "orderby", "orderbydescending", + "groupby", "skip", "take", "aggregate", "sum", "min", "max", + "average", "concat", "zip", "distinct", "union", "intersect", + "except", "reverse", "append", "prepend", "add", "clear", + "dispose", "close", "read", "write", "flush", "seek", + "getawaiter", "getresult", "configureawait", + "trygetvalue", "containskey", "containsvalue", + "format", "join", "isnullorempty", "isnullorwhitespace", + "parse", "tryparse", + "listasync", "getasync", "saveasync", "deleteasync", + "updateasync", "createasync", "findasync", "existsasync", + "executeasync", "sendasync", "receiveasync", + "openasync", "closeasync", "readasync", "writeasync", + "createdbcontext", "abortwithstatus", + "abortwithstatuscode", "map", "mapget", "mappost", "mapput", + "mapdelete", "useswagger", "useswaggerui", + "addsingleton", "addscoped", "addtransient", + "useendpoints", "userouting", "useauthorization", + "useauthentication", "usecors", "usehttpsredirection", + "getlogger", "loginformation", "logwarning", "logerror", + "logdebug", "logcritical", + "ok", "notfound", "badrequest", "unauthorized", + "nocontent", "created", "accepted", + "run", "build", "createbuilder", + }) + global_label_to_nid: dict[str, str] = {} for n in all_nodes: raw = n.get("label", "") @@ -3358,6 +3391,8 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: callee = rc.get("callee", "") if not callee: continue + if callee.lower() in _BCL_METHOD_BLOCKLIST: + continue tgt = global_label_to_nid.get(callee.lower()) caller = rc["caller_nid"] if tgt and tgt != caller and (caller, tgt) not in existing_pairs: