Skip to content

Commit 478bdcb

Browse files
committed
fix: deep extraction improvements for C# and F#
Multiple fixes to improve graph connectivity: C# improvements: - Add object_creation_expression to call_types (new Type() → edge) - Extract generic type arguments from invocation_expression (AddDbContext<BookDbContext> → edge to BookDbContext) - Walk constructor_declaration bodies for calls via _csharp_extra_walk - Walk global_statement (top-level C# code in Program.cs) for calls (app.UseMiddleware<ApiKeyMiddleware> → edge to ApiKeyMiddleware) F# improvements: - Fix member_defn body extraction: find body after '=' token instead of using child_by_field_name("body") which returns None for F# members - Extract root identifier from dot_expression chains (PipelineMetrics.counter.Add → edge to PipelineMetrics module) Razor improvements: - Extract generic type arguments from @code blocks (ShowDialogAsync<SubmitBookDialog> → edge to SubmitBookDialog) - Extract new Type() patterns from @code blocks Impact: main component grew from 995 to 1058 nodes. Previously disconnected: BookDbContext, ChapterStore, PageGistStore, BookProcessingHub, BkdWebApplicationFactory, PipelineMetrics, ApiKeyMiddleware, SubmitBookDialog — all now connected. Made-with: Cursor
1 parent 68d4de3 commit 478bdcb

1 file changed

Lines changed: 90 additions & 13 deletions

File tree

graphify/extract.py

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def _csharp_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path:
384384
nodes: list, edges: list, seen_ids: set, function_bodies: list,
385385
parent_class_nid: str | None, add_node_fn, add_edge_fn,
386386
walk_fn) -> bool:
387-
"""Handle namespace_declaration for C#. Returns True if handled."""
387+
"""Handle namespace_declaration and constructor_declaration for C#."""
388388
if node.type == "namespace_declaration":
389389
name_node = node.child_by_field_name("name")
390390
if name_node:
@@ -398,6 +398,15 @@ def _csharp_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path:
398398
for child in body.children:
399399
walk_fn(child, parent_class_nid)
400400
return True
401+
if node.type == "constructor_declaration":
402+
owner = parent_class_nid or file_nid
403+
body = node.child_by_field_name("body")
404+
if body:
405+
function_bodies.append((owner, body))
406+
return True
407+
if node.type == "global_statement":
408+
function_bodies.append((file_nid, node))
409+
return True
401410
return False
402411

403412

@@ -519,15 +528,16 @@ def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: s
519528
class_types=frozenset({"class_declaration", "interface_declaration"}),
520529
function_types=frozenset({"method_declaration"}),
521530
import_types=frozenset({"using_directive"}),
522-
call_types=frozenset({"invocation_expression"}),
531+
call_types=frozenset({"invocation_expression", "object_creation_expression"}),
523532
call_function_field="function",
524533
call_accessor_node_types=frozenset({"member_access_expression"}),
525534
call_accessor_field="name",
526535
body_fallback_child_types=("declaration_list",),
527-
function_boundary_types=frozenset({"method_declaration"}),
536+
function_boundary_types=frozenset({"method_declaration", "constructor_declaration"}),
528537
import_handler=_import_csharp,
529538
)
530539

540+
531541
_KOTLIN_CONFIG = LanguageConfig(
532542
ts_module="tree_sitter_kotlin",
533543
class_types=frozenset({"class_declaration", "object_declaration"}),
@@ -996,6 +1006,18 @@ def walk_calls(node, caller_nid: str) -> None:
9961006
if child.type == "identifier":
9971007
callee_name = _read_text(child, source)
9981008
break
1009+
elif config.ts_module == "tree_sitter_c_sharp" and node.type == "object_creation_expression":
1010+
# C#: new Type(...) → emit call to the created type
1011+
for child in node.children:
1012+
if child.type == "identifier":
1013+
callee_name = _read_text(child, source)
1014+
break
1015+
elif child.type == "generic_name":
1016+
callee_name = _read_text(child, source).split("<")[0]
1017+
break
1018+
elif child.type == "qualified_name":
1019+
callee_name = _read_text(child, source).split(".")[-1]
1020+
break
9991021
elif config.ts_module == "tree_sitter_c_sharp" and node.type == "invocation_expression":
10001022
# C#: try name field, then first named child
10011023
name_node = node.child_by_field_name("name")
@@ -1010,6 +1032,18 @@ def walk_calls(node, caller_nid: str) -> None:
10101032
else:
10111033
callee_name = raw
10121034
break
1035+
# Extract generic type arguments (e.g. AddDbContext<BookDbContext>)
1036+
full_text = _read_text(node, source)
1037+
import re as _re_inline
1038+
for gm in _re_inline.finditer(r'<([A-Z][A-Za-z0-9]+)(?:\s*,\s*([A-Z][A-Za-z0-9]+))?>', full_text):
1039+
for g in (gm.group(1), gm.group(2)):
1040+
if g and g != callee_name:
1041+
raw_calls.append({
1042+
"caller_nid": caller_nid,
1043+
"callee": g,
1044+
"source_file": str_path,
1045+
"source_location": f"L{node.start_point[0] + 1}",
1046+
})
10131047
elif config.ts_module == "tree_sitter_php":
10141048
# PHP: distinguish call expression subtypes
10151049
if node.type == "function_call_expression":
@@ -1547,18 +1581,25 @@ def walk(node, parent_nid: str | None = None) -> None:
15471581
if t == "member_defn":
15481582
for child in node.children:
15491583
if child.type == "method_or_prop_defn":
1584+
member_name = None
1585+
body_node = None
1586+
found_eq = False
15501587
for sc in child.children:
15511588
if sc.type == "property_or_ident":
15521589
member_name = text(sc)
1553-
line = node.start_point[0] + 1
1554-
member_nid = _make_id(stem, member_name)
1555-
add_node(member_nid, f"{member_name}()", line)
1556-
if parent_nid:
1557-
add_edge(parent_nid, member_nid, "contains", line)
1558-
body = child.child_by_field_name("body")
1559-
if body:
1560-
function_bodies.append((member_nid, body))
1561-
break
1590+
elif sc.type == "=":
1591+
found_eq = True
1592+
elif found_eq and sc.is_named and body_node is None:
1593+
body_node = sc
1594+
if member_name:
1595+
line = node.start_point[0] + 1
1596+
member_nid = _make_id(stem, member_name)
1597+
add_node(member_nid, f"{member_name}()", line)
1598+
if parent_nid:
1599+
add_edge(parent_nid, member_nid, "contains", line)
1600+
if body_node:
1601+
function_bodies.append((member_nid, body_node))
1602+
break
15621603
return
15631604

15641605
for child in node.children:
@@ -1610,6 +1651,22 @@ def walk_calls(node, caller_nid: str) -> None:
16101651
"source_file": str_path,
16111652
"source_location": f"L{node.start_point[0] + 1}",
16121653
})
1654+
dot_text = text(child)
1655+
root_id = dot_text.split(".")[0]
1656+
if root_id and root_id[0].isupper() and root_id != callee:
1657+
tgt2 = label_to_nid.get(root_id.lower())
1658+
if tgt2 and tgt2 != caller_nid:
1659+
pair2 = (caller_nid, tgt2)
1660+
if pair2 not in seen_call_pairs:
1661+
seen_call_pairs.add(pair2)
1662+
add_edge(caller_nid, tgt2, "calls",
1663+
node.start_point[0] + 1, confidence="EXTRACTED")
1664+
else:
1665+
raw_calls.append({
1666+
"caller_nid": caller_nid, "callee": root_id,
1667+
"source_file": str_path,
1668+
"source_location": f"L{node.start_point[0] + 1}",
1669+
})
16131670
break
16141671
elif child.type in ("identifier", "long_identifier"):
16151672
callee = text(child).split(".")[-1]
@@ -1774,6 +1831,26 @@ def add_edge(src: str, tgt: str, relation: str, line: int,
17741831
"source_file": str_path,
17751832
"source_location": f"L{line + block_text[:m.start()].count(chr(10))}",
17761833
})
1834+
for m in _re.finditer(r'<([A-Z][A-Za-z0-9]+)>', block_text):
1835+
callee = m.group(1)
1836+
if callee not in seen_callees and callee != stem:
1837+
seen_callees.add(callee)
1838+
raw_calls.append({
1839+
"caller_nid": page_nid,
1840+
"callee": callee,
1841+
"source_file": str_path,
1842+
"source_location": f"L{line + block_text[:m.start()].count(chr(10))}",
1843+
})
1844+
for m in _re.finditer(r'\bnew\s+([A-Z][A-Za-z0-9]+)\s*[(\[{]', block_text):
1845+
callee = m.group(1)
1846+
if callee not in seen_callees and callee != stem:
1847+
seen_callees.add(callee)
1848+
raw_calls.append({
1849+
"caller_nid": page_nid,
1850+
"callee": callee,
1851+
"source_file": str_path,
1852+
"source_location": f"L{line + block_text[:m.start()].count(chr(10))}",
1853+
})
17771854

17781855
clean_edges = [e for e in edges if e["source"] in seen_ids and
17791856
(e["target"] in seen_ids or e["relation"] == "imports")]
@@ -3774,7 +3851,7 @@ def collect_files(target: Path, *, follow_symlinks: bool = False, root: Path | N
37743851
_EXTENSIONS = {
37753852
".py", ".js", ".ts", ".tsx", ".go", ".rs",
37763853
".java", ".c", ".h", ".cpp", ".cc", ".cxx", ".hpp",
3777-
".rb", ".cs", ".fs", ".fsx", ".kt", ".kts", ".scala", ".php", ".swift",
3854+
".rb", ".cs", ".fs", ".fsx", ".razor", ".kt", ".kts", ".scala", ".php", ".swift",
37783855
".lua", ".toc", ".zig", ".ps1",
37793856
".m", ".mm",
37803857
}

0 commit comments

Comments
 (0)