From 0208bd26efc47f3df98feda566f3f0094b7a773e Mon Sep 17 00:00:00 2001
From: Vladimir Rogozhin <vladimirrogozhin90@gmail.com>
Date: Sat, 18 Apr 2026 13:50:49 +0200
Subject: [PATCH] fix: add BCL method blocklist to cross-file inference

When resolving unresolved calls across files, common .NET BCL method
names (Contains, Equals, ToString, Where, Select, ListAsync, etc.)
would incorrectly match graph nodes with the same name, creating
false INFERRED edges. For example, string.Contains() would create
a spurious edge to an F# union case named "Contains".

Add a blocklist of ~120 common BCL/framework method names that are
skipped during cross-file call resolution.

Made-with: Cursor
---
 graphify/extract.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/graphify/extract.py b/graphify/extract.py
index dbd441c6..015c8a46 100644
--- a/graphify/extract.py
+++ b/graphify/extract.py
@@ -3345,6 +3345,39 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
     # Cross-file call resolution for all languages
     # Each extractor saved unresolved calls in raw_calls. Now that we have all
     # nodes from all files, resolve any callee that exists in another file.
+    _BCL_METHOD_BLOCKLIST = frozenset({
+        "contains", "equals", "gethashcode", "tostring", "gettype",
+        "compareto", "startswith", "endswith", "replace", "split",
+        "trim", "trimstart", "trimend", "substring", "indexof",
+        "lastindexof", "insert", "remove", "toarray", "tolist",
+        "todictionary", "count", "any", "all", "first", "firstordefault",
+        "last", "lastordefault", "single", "singleordefault",
+        "where", "select", "selectmany", "orderby", "orderbydescending",
+        "groupby", "skip", "take", "aggregate", "sum", "min", "max",
+        "average", "concat", "zip", "distinct", "union", "intersect",
+        "except", "reverse", "append", "prepend", "add", "clear",
+        "dispose", "close", "read", "write", "flush", "seek",
+        "getawaiter", "getresult", "configureawait",
+        "trygetvalue", "containskey", "containsvalue",
+        "format", "join", "isnullorempty", "isnullorwhitespace",
+        "parse", "tryparse",
+        "listasync", "getasync", "saveasync", "deleteasync",
+        "updateasync", "createasync", "findasync", "existsasync",
+        "executeasync", "sendasync", "receiveasync",
+        "openasync", "closeasync", "readasync", "writeasync",
+        "createdbcontext", "abortwithstatus",
+        "abortwithstatuscode", "map", "mapget", "mappost", "mapput",
+        "mapdelete", "useswagger", "useswaggerui",
+        "addsingleton", "addscoped", "addtransient",
+        "useendpoints", "userouting", "useauthorization",
+        "useauthentication", "usecors", "usehttpsredirection",
+        "getlogger", "loginformation", "logwarning", "logerror",
+        "logdebug", "logcritical",
+        "ok", "notfound", "badrequest", "unauthorized",
+        "nocontent", "created", "accepted",
+        "run", "build", "createbuilder",
+    })
+
     global_label_to_nid: dict[str, str] = {}
     for n in all_nodes:
         raw = n.get("label", "")
@@ -3358,6 +3391,8 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
             callee = rc.get("callee", "")
             if not callee:
                 continue
+            if callee.lower() in _BCL_METHOD_BLOCKLIST:
+                continue
             tgt = global_label_to_nid.get(callee.lower())
             caller = rc["caller_nid"]
             if tgt and tgt != caller and (caller, tgt) not in existing_pairs: