From e091edd0af3167f80f2e4862523e933c8dc1687b Mon Sep 17 00:00:00 2001 From: Cam Dowdle Date: Sat, 23 May 2026 07:28:42 -0500 Subject: [PATCH 1/3] feat: add Pascal / Delphi language support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a tree-sitter-pascal-backed language module that produces the same kind of deterministic structural data the other extractors already provide (functions, classes, imports, exports, call graph) — validated end to end against a 324-file legacy Delphi 7 codebase (19 MB / 461k lines). New language module - packages/core/src/languages/configs/pascal.ts — language config (extensions .pas/.pp/.dpr/.dpk/.inc, treeSitter→tree-sitter-pascal.wasm, concepts list, file patterns) - packages/core/src/languages/configs/index.ts — register pascalConfig - packages/core/src/plugins/extractors/pascal-extractor.ts — AST walker for unit / program / library modules: extracts moduleName, uses-clauses (interface- and implementation-section), declTypes (classes + interfaces), declProc/defProc (with declArgs/typeref), declField, call graph via exprCall, and qualified-method names via genericDot - packages/core/src/plugins/extractors/index.ts — register PascalExtractor - skills/understand/languages/pascal.md — LLM prompt snippet (key concepts, import patterns, file patterns, common frameworks, example languageNotes) Shared-type extensions (additive, all optional — no breaking changes for existing extractors) - packages/core/src/types.ts: StructuralAnalysis.classes[].parents?: string[] Ancestor class names. Lets every language extractor surface inheritance deterministically (Pascal `class(TParent)`, Java `extends X`, Python `class X(Y)`, etc.) so the file-analyzer agent doesn't have to re-read source to recover the parent StructuralAnalysis.classes[].interfaces?: string[] Implemented interface names. For Pascal this is every typeref after the first under declClass; for Java `implements X, Y` StructuralAnalysis.imports[].section?: "interface" | "implementation" Section-scoped imports. Pascal-specific for now (other languages don't have section-scoped uses-clauses) - skills/understand/extract-structure.mjs: pass through the new classes[].parents / classes[].interfaces / imports[].section fields to the file-analyzer JSON output New post-merge helper scripts - skills/understand/emit-dfm-pairs.mjs: Pascal-specific. After merge, scans for `file:*.pas` ↔ `file:*.dfm` filename pairs and emits a `related` edge between them — Pascal forms come in paired source/form-definition files and should be linked in the graph - skills/understand/resolve-external-class-refs.mjs: generic, useful for any language. When file-analyzer agents emit inheritance edges pointing at `class:external:` (because they don't know which file declares the parent), this pass rewrites the target to the actual cross-batch node ID by class-name lookup. On the CW2 sample it recovers 194 cross-batch inherits/implements edges that the merge step would otherwise drop as dangling End-to-end validation - tree-sitter-pascal grammar (Isopod/tree-sitter-pascal v0.10.2, built to wasm via `tree-sitter build --wasm`) parses real CW2 Delphi 7 source with zero error nodes (sampled across data modules, forms, and SOAP/WSDL stubs — including a 4424-line / 243 KB data module) - Full pipeline on 324 Pascal files produced 6,113 nodes / 8,770 edges including 454 `inherits` + 7 `implements` + 1,944 `imports` (with 756 tagged `interface`-section vs 124 `implementation`-section) + 307 `related` (DFM pairings) — a 3.2× jump in inheritance edges over a v1 run that lacked the new shared-type fields - The "untyped forms" bucket in the architecture-analyzer's layer output shrank from 218 to 186 once inheritance became deterministic Open question for the maintainer - Distribution of `tree-sitter-pascal.wasm`: there is no published npm package shipping a prebuilt wasm. Options to discuss: (a) vendor the wasm into a small workspace package, (b) add an optionalDependency with a postinstall build step, (c) document a manual `tree-sitter build --wasm` step. This PR leaves the dependency out of packages/core/package.json so existing CI / installs are unaffected; the TreeSitterPlugin's existing graceful-degradation path means Pascal support is simply unavailable until a wasm is provided --- .../core/src/languages/configs/index.ts | 3 + .../core/src/languages/configs/pascal.ts | 31 ++ .../core/src/plugins/extractors/index.ts | 3 + .../plugins/extractors/pascal-extractor.ts | 401 ++++++++++++++++++ .../packages/core/src/types.ts | 19 +- .../skills/understand/emit-dfm-pairs.mjs | 65 +++ .../skills/understand/extract-structure.mjs | 16 +- .../skills/understand/languages/pascal.md | 60 +++ .../resolve-external-class-refs.mjs | 106 +++++ 9 files changed, 701 insertions(+), 3 deletions(-) create mode 100644 understand-anything-plugin/packages/core/src/languages/configs/pascal.ts create mode 100644 understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts create mode 100644 understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs create mode 100644 understand-anything-plugin/skills/understand/languages/pascal.md create mode 100644 understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs diff --git a/understand-anything-plugin/packages/core/src/languages/configs/index.ts b/understand-anything-plugin/packages/core/src/languages/configs/index.ts index 4893c4d5..18330626 100644 --- a/understand-anything-plugin/packages/core/src/languages/configs/index.ts +++ b/understand-anything-plugin/packages/core/src/languages/configs/index.ts @@ -13,6 +13,7 @@ import { cConfig } from "./c.js"; import { cppConfig } from "./cpp.js"; import { csharpConfig } from "./csharp.js"; import { luaConfig } from "./lua.js"; +import { pascalConfig } from "./pascal.js"; // Non-code language configs import { markdownConfig } from "./markdown.js"; import { yamlConfig } from "./yaml.js"; @@ -57,6 +58,7 @@ export const builtinLanguageConfigs: LanguageConfig[] = [ cConfig, cppConfig, csharpConfig, + pascalConfig, // Non-code languages markdownConfig, yamlConfig, @@ -102,6 +104,7 @@ export { cConfig, cppConfig, csharpConfig, + pascalConfig, // Non-code languages markdownConfig, yamlConfig, diff --git a/understand-anything-plugin/packages/core/src/languages/configs/pascal.ts b/understand-anything-plugin/packages/core/src/languages/configs/pascal.ts new file mode 100644 index 00000000..e1621a90 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/languages/configs/pascal.ts @@ -0,0 +1,31 @@ +import type { LanguageConfig } from "../types.js"; + +export const pascalConfig = { + id: "pascal", + displayName: "Pascal / Delphi", + extensions: [".pas", ".pp", ".dpr", ".dpk", ".inc"], + treeSitter: { + wasmPackage: "tree-sitter-pascal", + wasmFile: "tree-sitter-pascal.wasm", + }, + concepts: [ + "units", + "uses clauses", + "interface and implementation sections", + "classes and inheritance", + "interfaces", + "published properties", + "data modules", + "form/data-module pairing with DFM files", + "RTTI attributes", + "anonymous methods", + "generics", + "initialization and finalization sections", + ], + filePatterns: { + entryPoints: ["*.dpr", "*.dpk"], + barrels: [], + tests: ["test_*.pas", "*_test.pas", "*Tests.pas"], + config: ["*.dproj", "*.cfg", "*.bpg", "*.groupproj"], + }, +} satisfies LanguageConfig; diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts index f148c61c..f1432050 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/index.ts @@ -9,6 +9,7 @@ export { RubyExtractor } from "./ruby-extractor.js"; export { PhpExtractor } from "./php-extractor.js"; export { CppExtractor } from "./cpp-extractor.js"; export { CSharpExtractor } from "./csharp-extractor.js"; +export { PascalExtractor } from "./pascal-extractor.js"; import type { LanguageExtractor } from "./types.js"; import { TypeScriptExtractor } from "./typescript-extractor.js"; @@ -20,6 +21,7 @@ import { RubyExtractor } from "./ruby-extractor.js"; import { PhpExtractor } from "./php-extractor.js"; import { CppExtractor } from "./cpp-extractor.js"; import { CSharpExtractor } from "./csharp-extractor.js"; +import { PascalExtractor } from "./pascal-extractor.js"; export const builtinExtractors: LanguageExtractor[] = [ new TypeScriptExtractor(), @@ -31,4 +33,5 @@ export const builtinExtractors: LanguageExtractor[] = [ new PhpExtractor(), new CppExtractor(), new CSharpExtractor(), + new PascalExtractor(), ]; diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts new file mode 100644 index 00000000..9cfea952 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts @@ -0,0 +1,401 @@ +import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; +import type { LanguageExtractor, TreeSitterNode } from "./types.js"; +import { findChild, findChildren } from "./base-extractor.js"; + +// Pascal grammar node types we care about (from Isopod/tree-sitter-pascal corpus): +// Module: unit | program | library +// Module name: moduleName (1+ identifier, optional kDot between) +// Sections: interface | implementation | initialization | finalization +// Uses: declUses → moduleName children (each = imported unit) +// Types: declTypes (kType) → declType +// declType → identifier (kEq) declClass | declIntf | type +// declClass (kClass) → typeref* (ancestors) declField* declProc* (kEnd) +// declIntf (kInterface) → typeref* declProc* (kEnd) +// Routines: declProc (decl) | defProc (defn = declProc + block) +// declProc → kProcedure|kFunction|kConstructor|kDestructor identifier +// [declArgs] [typeref] (typeref = function return type) +// declArgs → declArg+ +// declArg → [kVar|kConst|kOut] identifier+ [type] +// Method impls use genericDot for qualified names (ClassName.MethodName). +// Fields: declField → identifier+ type +// Calls: exprCall → entity args + +const ROUTINE_KEYWORDS = new Set([ + "kProcedure", + "kFunction", + "kConstructor", + "kDestructor", +]); + +/** Extract a unit's bare name from a moduleName node (joins namespace parts with '.'). */ +function moduleNameText(node: TreeSitterNode | null): string { + if (!node) return ""; + const idents = findChildren(node, "identifier"); + return idents.map((n) => n.text).join("."); +} + +/** Extract a name identifier from a declProc node, handling qualified method names. */ +function declProcName(node: TreeSitterNode): { + name: string; + qualifier?: string; +} { + // Plain `procedure Foo` — first identifier child + const ident = findChild(node, "identifier"); + if (ident) return { name: ident.text }; + + // Qualified: `procedure TFoo.Bar` — genericDot with identifier children + const dotted = findChild(node, "genericDot"); + if (dotted) { + const parts = findChildren(dotted, "identifier").map((n) => n.text); + if (parts.length >= 2) { + return { + qualifier: parts.slice(0, -1).join("."), + name: parts[parts.length - 1], + }; + } + if (parts.length === 1) return { name: parts[0] }; + } + return { name: "" }; +} + +/** Routine keyword (kProcedure/kFunction/...). */ +function routineKeyword(node: TreeSitterNode): string | null { + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (c && ROUTINE_KEYWORDS.has(c.type)) return c.type; + } + return null; +} + +/** Extract parameter names from a declProc's declArgs (each declArg may name multiple params). */ +function extractParamNames(declProcNode: TreeSitterNode): string[] { + const args = findChild(declProcNode, "declArgs"); + if (!args) return []; + const names: string[] = []; + for (const arg of findChildren(args, "declArg")) { + // Skip leading modifier keywords (kVar/kConst/kOut), then take identifiers + // until we hit a 'type' node. + for (let i = 0; i < arg.childCount; i++) { + const c = arg.child(i); + if (!c) continue; + if (c.type === "identifier") names.push(c.text); + else if (c.type === "type") break; + } + } + return names; +} + +/** Function return type = a top-level 'typeref' directly under declProc (after declArgs). */ +function extractReturnType(declProcNode: TreeSitterNode): string | undefined { + for (let i = 0; i < declProcNode.childCount; i++) { + const c = declProcNode.child(i); + if (c && c.type === "typeref") { + const ident = findChild(c, "identifier"); + return ident ? ident.text : c.text; + } + } + return undefined; +} + +/** lineRange helper. */ +function lineRange(node: TreeSitterNode): [number, number] { + return [node.startPosition.row + 1, node.endPosition.row + 1]; +} + +export class PascalExtractor implements LanguageExtractor { + readonly languageIds = ["pascal"]; + + extractStructure(rootNode: TreeSitterNode): StructuralAnalysis { + const functions: StructuralAnalysis["functions"] = []; + const classes: StructuralAnalysis["classes"] = []; + const imports: StructuralAnalysis["imports"] = []; + const exports: StructuralAnalysis["exports"] = []; + + // Root contains one module container: unit | program | library + const moduleNode = + findChild(rootNode, "unit") ?? + findChild(rootNode, "program") ?? + findChild(rootNode, "library"); + if (!moduleNode) return { functions, classes, imports, exports }; + + // Walk module-level children. Sections wrap further declarations; we also + // accept declarations at the module level for .dpr programs (where most + // declarations live directly under `program` rather than inside an + // interface section). + const sectionTypes = new Set([ + "interface", + "implementation", + "initialization", + "finalization", + ]); + + const walkSection = ( + sectionNode: TreeSitterNode, + isPublic: boolean, + section: "interface" | "implementation" | undefined, + ) => { + for (let i = 0; i < sectionNode.childCount; i++) { + const child = sectionNode.child(i); + if (!child) continue; + this.handleDeclaration( + child, + isPublic, + section, + functions, + classes, + imports, + exports, + ); + } + }; + + for (let i = 0; i < moduleNode.childCount; i++) { + const child = moduleNode.child(i); + if (!child) continue; + + if (sectionTypes.has(child.type)) { + // Items in `interface` are public/exported; impl section items are private. + // Tag imports with their section so file-analyzer can distinguish + // public dependencies (interface uses) from private ones (implementation uses). + const sectionTag = + child.type === "interface" + ? "interface" + : child.type === "implementation" + ? "implementation" + : undefined; + walkSection(child, child.type === "interface", sectionTag); + } else { + // Module-level (program/library) — treat as public for export purposes. + // No section tag for .dpr-style declarations (they're not unit-scoped). + this.handleDeclaration( + child, + true, + undefined, + functions, + classes, + imports, + exports, + ); + } + } + + return { functions, classes, imports, exports }; + } + + extractCallGraph(rootNode: TreeSitterNode): CallGraphEntry[] { + const entries: CallGraphEntry[] = []; + const stack: string[] = []; + + const walk = (node: TreeSitterNode) => { + let pushed = false; + + // Track entering a defProc → its declProc gives us the caller name + if (node.type === "defProc") { + const declProc = findChild(node, "declProc"); + if (declProc) { + const { qualifier, name } = declProcName(declProc); + if (name) { + stack.push(qualifier ? `${qualifier}.${name}` : name); + pushed = true; + } + } + } + + // Record calls — exprCall has an 'entity' child (the callee expression) + if (node.type === "exprCall" && stack.length > 0) { + const entity = node.childForFieldName?.("entity") ?? null; + const calleeText = entity + ? entity.text + : (findChild(node, "identifier")?.text ?? + findChild(node, "exprDot")?.text); + if (calleeText) { + entries.push({ + caller: stack[stack.length - 1], + callee: calleeText, + lineNumber: node.startPosition.row + 1, + }); + } + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walk(child); + } + + if (pushed) stack.pop(); + }; + + walk(rootNode); + return entries; + } + + // ---- Private helpers ---- + + private handleDeclaration( + node: TreeSitterNode, + isPublic: boolean, + section: "interface" | "implementation" | undefined, + functions: StructuralAnalysis["functions"], + classes: StructuralAnalysis["classes"], + imports: StructuralAnalysis["imports"], + exports: StructuralAnalysis["exports"], + ): void { + switch (node.type) { + case "declUses": + this.extractUses(node, imports, section); + return; + + case "declTypes": + for (const declType of findChildren(node, "declType")) { + this.extractType(declType, isPublic, classes, functions, exports); + } + return; + + case "declProc": + // Forward declaration (interface section) or .dpr-style declaration + this.extractRoutine(node, functions); + if (isPublic) this.addRoutineExport(node, exports); + return; + + case "defProc": { + // Routine with body — extract from inner declProc + const declProc = findChild(node, "declProc"); + if (declProc) { + this.extractRoutine(declProc, functions, node); + if (isPublic) this.addRoutineExport(declProc, exports); + } + return; + } + } + } + + private extractUses( + declUses: TreeSitterNode, + imports: StructuralAnalysis["imports"], + section: "interface" | "implementation" | undefined, + ): void { + for (const mn of findChildren(declUses, "moduleName")) { + const source = moduleNameText(mn); + if (!source) continue; + imports.push({ + source, + specifiers: [source], + lineNumber: mn.startPosition.row + 1, + ...(section ? { section } : {}), + }); + } + } + + private extractType( + declType: TreeSitterNode, + isPublic: boolean, + classes: StructuralAnalysis["classes"], + functions: StructuralAnalysis["functions"], + exports: StructuralAnalysis["exports"], + ): void { + const nameIdent = findChild(declType, "identifier"); + if (!nameIdent) return; + const typeName = nameIdent.text; + + const declClass = + findChild(declType, "declClass") ?? findChild(declType, "declIntf"); + if (!declClass) { + // Non-class type alias / enum / record — not a "class" node for our purposes. + // We could emit it as a simple definition, but skip for now to keep + // graph clean (LLM-phase can still summarize it from the source). + return; + } + + const methods: string[] = []; + const properties: string[] = []; + // Pascal class headers carry ancestors as bare typeref children of declClass + // BEFORE any member declarations, e.g. + // declClass (kClass) (typeref TParent) (typeref IFoo) (typeref IBar) declField... kEnd + // Convention: first typeref is the parent class, remaining typerefs are + // implemented interfaces. (Some declarations have only interfaces — when + // declaring a pure interface via declIntf — in which case all typerefs are + // parent interfaces; we still emit them as `parents` since they're direct + // inheritance, not implementation.) + const ancestorRefs: string[] = []; + const isInterfaceDecl = declClass.type === "declIntf"; + + for (let i = 0; i < declClass.childCount; i++) { + const m = declClass.child(i); + if (!m) continue; + if (m.type === "typeref") { + const id = findChild(m, "identifier"); + if (id) ancestorRefs.push(id.text); + } else if (m.type === "declProc") { + const { name } = declProcName(m); + if (name) methods.push(name); + } else if (m.type === "declField") { + for (const id of findChildren(m, "identifier")) { + properties.push(id.text); + } + } + } + + const parents: string[] = []; + const interfaces: string[] = []; + if (isInterfaceDecl) { + // declIntf — all typerefs are parent interfaces (interface inheritance). + parents.push(...ancestorRefs); + } else { + // declClass — first typeref is the class parent, rest are implemented interfaces. + if (ancestorRefs.length > 0) parents.push(ancestorRefs[0]); + if (ancestorRefs.length > 1) interfaces.push(...ancestorRefs.slice(1)); + } + + classes.push({ + name: typeName, + lineRange: lineRange(declType), + methods, + properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), + }); + + if (isPublic) { + exports.push({ + name: typeName, + lineNumber: declType.startPosition.row + 1, + }); + } + } + + private extractRoutine( + declProcNode: TreeSitterNode, + functions: StructuralAnalysis["functions"], + outerDefNode?: TreeSitterNode, + ): void { + const { name, qualifier } = declProcName(declProcNode); + if (!name) return; + const kind = routineKeyword(declProcNode); + + const fullName = qualifier ? `${qualifier}.${name}` : name; + const params = extractParamNames(declProcNode); + const returnType = + kind === "kFunction" ? extractReturnType(declProcNode) : undefined; + + const range = lineRange(outerDefNode ?? declProcNode); + + functions.push({ + name: fullName, + lineRange: range, + params, + returnType, + }); + } + + private addRoutineExport( + declProcNode: TreeSitterNode, + exports: StructuralAnalysis["exports"], + ): void { + const { name, qualifier } = declProcName(declProcNode); + if (!name) return; + exports.push({ + name: qualifier ? `${qualifier}.${name}` : name, + lineNumber: declProcNode.startPosition.row + 1, + }); + } +} diff --git a/understand-anything-plugin/packages/core/src/types.ts b/understand-anything-plugin/packages/core/src/types.ts index b7a0fa6e..c20938d3 100644 --- a/understand-anything-plugin/packages/core/src/types.ts +++ b/understand-anything-plugin/packages/core/src/types.ts @@ -168,8 +168,23 @@ export interface ReferenceResolution { // Plugin interfaces export interface StructuralAnalysis { functions: Array<{ name: string; lineRange: [number, number]; params: string[]; returnType?: string }>; - classes: Array<{ name: string; lineRange: [number, number]; methods: string[]; properties: string[] }>; - imports: Array<{ source: string; specifiers: string[]; lineNumber: number }>; + classes: Array<{ + name: string; + lineRange: [number, number]; + methods: string[]; + properties: string[]; + /** Ancestor class names (e.g., Pascal `class(TParent)`, Java `extends X`, Python `class X(Y)`). Optional for backward compat. */ + parents?: string[]; + /** Implemented interface names (e.g., Pascal `class(TParent, IFoo, IBar)` extra parents, Java `implements X, Y`). Optional. */ + interfaces?: string[]; + }>; + imports: Array<{ + source: string; + specifiers: string[]; + lineNumber: number; + /** For languages with section-scoped imports (Pascal interface/implementation). Optional, ignored by other languages. */ + section?: "interface" | "implementation"; + }>; exports: Array<{ name: string; lineNumber: number; isDefault?: boolean }>; // Non-code structural data (all optional for backward compat) sections?: SectionInfo[]; diff --git a/understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs b/understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs new file mode 100644 index 00000000..dbebdd79 --- /dev/null +++ b/understand-anything-plugin/skills/understand/emit-dfm-pairs.mjs @@ -0,0 +1,65 @@ +#!/usr/bin/env node +/** + * emit-dfm-pairs.mjs — post-merge step for Pascal/Delphi projects. + * + * Pascal forms come in paired .pas + .dfm files (form source + form definition). + * The .dfm carries the design-time component tree; the .pas carries the class + * methods. They are conceptually one artifact and should be linked in the + * knowledge graph with a `related` edge. + * + * This script reads an existing knowledge-graph.json (or assembled-graph.json), + * scans for `file:*.pas` nodes whose filePath is matched by a `file:*.dfm` + * sibling node, and emits `related` edges between them. Idempotent — + * skips pairs that already have an edge. + * + * Usage: + * node emit-dfm-pairs.mjs + */ +import { readFileSync, writeFileSync } from "node:fs"; + +const [, , inputPath, outputPath] = process.argv; +if (!inputPath || !outputPath) { + process.stderr.write("Usage: node emit-dfm-pairs.mjs \n"); + process.exit(1); +} + +const graph = JSON.parse(readFileSync(inputPath, "utf8")); + +// Index nodes by basename (without extension) — case-insensitive, since +// Delphi conventionally uses different casing across .pas and .dfm. +const byBase = new Map(); // base.toLowerCase() -> {pas?: id, dfm?: id} +for (const node of graph.nodes) { + if (node.type !== "file" && node.type !== "config" && node.type !== "document") continue; + const path = node.filePath ?? node.id.replace(/^file:/, ""); + const m = path.match(/^(.+?)\.(pas|dfm)$/i); + if (!m) continue; + const base = m[1].toLowerCase(); + const ext = m[2].toLowerCase(); + if (!byBase.has(base)) byBase.set(base, {}); + byBase.get(base)[ext] = node.id; +} + +// Track existing edges so we don't double-emit. +const existing = new Set(); +for (const e of graph.edges) existing.add(`${e.source}|${e.target}|${e.type}`); + +let emitted = 0; +for (const [base, pair] of byBase) { + if (!pair.pas || !pair.dfm) continue; + const key1 = `${pair.pas}|${pair.dfm}|related`; + const key2 = `${pair.dfm}|${pair.pas}|related`; + if (existing.has(key1) || existing.has(key2)) continue; + graph.edges.push({ + source: pair.pas, + target: pair.dfm, + type: "related", + direction: "bidirectional", + description: "Pascal unit + DFM form-definition pair (design-time component tree).", + weight: 0.7, + }); + existing.add(key1); + emitted++; +} + +writeFileSync(outputPath, JSON.stringify(graph, null, 2)); +console.log(`Emitted ${emitted} new .pas↔.dfm pair edges. Graph now has ${graph.edges.length} edges total.`); diff --git a/understand-anything-plugin/skills/understand/extract-structure.mjs b/understand-anything-plugin/skills/understand/extract-structure.mjs index a8bbd281..50a0fb08 100644 --- a/understand-anything-plugin/skills/understand/extract-structure.mjs +++ b/understand-anything-plugin/skills/understand/extract-structure.mjs @@ -168,7 +168,9 @@ export function buildResult(file, totalLines, nonEmptyLines, analysis, callGraph })); } - // Classes (code files) + // Classes (code files) — include parents/interfaces when present so the + // file-analyzer can emit deterministic inherits/implements edges without + // re-reading source. if (analysis.classes && analysis.classes.length > 0) { base.classes = analysis.classes.map(cls => ({ name: cls.name, @@ -176,6 +178,18 @@ export function buildResult(file, totalLines, nonEmptyLines, analysis, callGraph endLine: cls.lineRange[1], methods: cls.methods || [], properties: cls.properties || [], + ...(cls.parents && cls.parents.length > 0 ? { parents: cls.parents } : {}), + ...(cls.interfaces && cls.interfaces.length > 0 ? { interfaces: cls.interfaces } : {}), + })); + } + + // Imports (code files) — surface section info (Pascal interface vs + // implementation uses) so file-analyzer can characterize dependency direction. + if (analysis.imports && analysis.imports.length > 0) { + base.imports = analysis.imports.map(imp => ({ + source: imp.source, + line: imp.lineNumber, + ...(imp.section ? { section: imp.section } : {}), })); } diff --git a/understand-anything-plugin/skills/understand/languages/pascal.md b/understand-anything-plugin/skills/understand/languages/pascal.md new file mode 100644 index 00000000..5ae1b6d3 --- /dev/null +++ b/understand-anything-plugin/skills/understand/languages/pascal.md @@ -0,0 +1,60 @@ +# Pascal / Delphi Language Prompt Snippet + +## Key Concepts + +- **Units**: The primary module unit (`unit Foo;`) — file-level container, paired 1:1 with a `.pas` file +- **Interface vs Implementation Sections**: `interface` declares the public API; `implementation` holds the private bodies. Only items declared in `interface` are visible to other units that `uses` this one. +- **Uses Clauses**: `uses A, B, C;` imports other units. There can be one in the interface section (transitively visible) and one in the implementation section (private). Treat both as imports. +- **Classes**: `type TFoo = class(TAncestor) ... end;` — Delphi has single inheritance plus interface implementation. The class declaration appears in a type block. +- **Interfaces**: `type IFoo = interface(IAncestor) ['{GUID}'] ... end;` — abstract contracts, often identified by GUID. +- **Published Properties**: Properties in the `published` visibility section get RTTI generated and are persisted in the paired `.dfm` form file. This is the foundation of Delphi's visual form-design + streaming model. +- **Data Modules**: Special form-like containers that group non-visual components (database connections, datasets, providers). Named `dm*` by convention (e.g. `dmCW2.pas` + `dmCW2.dfm`). +- **Form/DFM Pairing**: Every `Txxx.pas` containing a `TForm`/`TFrame`/`TDataModule` descendant has a matching `Txxx.dfm` text file declaring the design-time component tree. Treat the pair as one logical artifact. +- **RTTI Attributes**: `[MyAttr(42)]` decorate types and methods, similar to .NET attributes or Java annotations. +- **Anonymous Methods**: `procedure of object` (method pointer) and inline `procedure begin ... end` (Delphi 2009+). +- **Initialization / Finalization**: Module-scoped setup/teardown blocks that run at unit load/unload, before/after `main`. +- **With Statement**: `with foo do begin ... end` — opens a scope where `foo`'s members are unqualified. Common in legacy Delphi, often obscures call targets. + +## Import Patterns + +- `uses A, B, C;` — units listed by bare name; the linker resolves them via search path (current dir, project search paths, library path) +- `uses A.B.C;` — namespaced unit (modern Delphi 2007+) +- Interface-section `uses` is the unit's public dependency +- Implementation-section `uses` is the private dependency +- A `.dpr` (program) file's `uses` lists every unit linked into the executable — the dependency root + +## File Patterns + +- `*.pas` — Pascal source unit (one unit per file) +- `*.dfm` — Form definition (paired with `.pas`); structured text declaring design-time object tree +- `*.dpr` — Program (project) entry point — like `main.c` for the executable +- `*.dpk` — Package source (DLL-equivalent) +- `*.dproj` / `*.bpg` / `*.groupproj` — IDE project / project-group files +- `*.inc` — Include file (preprocessor-style text inclusion via `{$I file.inc}`) +- `dm*.pas` / `dm*.dfm` — Data modules +- `f*.pas` / `f*.dfm` — Form units (legacy convention; modern code often uses `u*Form.pas`) + +## Common Frameworks + +- **VCL** (Visual Component Library) — the canonical Delphi UI framework; `Forms`, `Controls`, `Graphics` units +- **FireMonkey (FMX)** — Cross-platform UI framework, replacement for VCL +- **DataSnap** — Multi-tier middleware +- **dbExpress / FireDAC** — Database access layers +- **IndyTCP** / **Synapse** — Networking +- **RX / JEDI / RAID** — Third-party component suites + +## Example Language Notes + +> Implements a Delphi data module (`TdmCW2 = class(TDataModule)`) that owns the global ADO +> connection plus dozens of TADOQuery / TADOStoredProc components. Form-streaming in the +> paired `.dfm` configures connection strings, parameter lists, and field definitions at +> design time; runtime code typically just opens the dataset. + +> The `with FOrderItems do begin … end` block on lines 412–478 obscures the call target — +> every bare identifier resolves against `FOrderItems`'s members first. When tracing +> business logic, mentally prefix each unqualified reference with `FOrderItems.`. + +> Uses an interface-section `uses` clause to import VCL controls (`Forms`, `StdCtrls`) — +> these are part of the public API because the form's published properties reference them. +> The implementation-section `uses` imports business-logic units (`dmCW2`, `OrderProcessing`) +> which are private to this form. diff --git a/understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs b/understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs new file mode 100644 index 00000000..056873d9 --- /dev/null +++ b/understand-anything-plugin/skills/understand/resolve-external-class-refs.mjs @@ -0,0 +1,106 @@ +#!/usr/bin/env node +/** + * resolve-external-class-refs.mjs + * + * Post-merge fix for inherits/implements edges that target `class:external:` + * IDs which the file-analyzer agent emits when it can't tell which file declares + * the parent (because the parent lives in a different batch). The merge step + * drops these as "dangling target". This script: + * + * 1. Reads all batch-*.json files in /.understand-anything/intermediate/ + * to recover the original inherits/implements edges (which the merge dropped) + * 2. Reads assembled-graph.json + * 3. Builds a name → node ID map from all `class:*` nodes + * 4. For every batch edge whose target is `class:external:`, if + * matches a class node, rewrite the edge target to that class's actual ID + * and re-add the edge to the assembled graph + * 5. Genuinely-external classes (TForm, IInvokable, TXMLNode, etc.) stay dropped + * + * Usage: node resolve-external-class-refs.mjs + */ +import { readdirSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +const projectRoot = process.argv[2]; +if (!projectRoot) { + process.stderr.write("Usage: node resolve-external-class-refs.mjs \n"); + process.exit(1); +} + +const intermediate = join(projectRoot, ".understand-anything", "intermediate"); +const assembledPath = join(intermediate, "assembled-graph.json"); +const graph = JSON.parse(readFileSync(assembledPath, "utf8")); + +// Build name → ID index from class nodes. +// If multiple class nodes share a name (e.g. helper records with same name in +// different files), prefer the canonical form-base class location. +const classNodes = graph.nodes.filter((n) => n.type === "class"); +const nameToIds = new Map(); +for (const n of classNodes) { + // Skip placeholder `class:external:` stubs — those exist only because + // some agents emitted them as nodes alongside the edge. They'd create false + // multi-match ambiguity when we look up by name. + if (n.id.startsWith("class:external:")) continue; + // n.id is like `class:fCW2Report.pas:TfmCW2Report` — extract the name suffix. + const m = n.id.match(/^class:[^:]+:(.+)$/); + if (!m) continue; + const name = m[1]; + if (!nameToIds.has(name)) nameToIds.set(name, []); + nameToIds.get(name).push(n.id); +} + +// Walk batch files for the original edges. +const batchFiles = readdirSync(intermediate) + .filter((f) => /^batch-\d+\.json$/.test(f)) + .sort(); + +const existingEdgeKeys = new Set(graph.edges.map((e) => `${e.source}|${e.target}|${e.type}`)); + +let recovered = 0, ambiguousSkipped = 0, stillExternal = 0; +const reAdded = []; +for (const bf of batchFiles) { + const batch = JSON.parse(readFileSync(join(intermediate, bf), "utf8")); + for (const e of batch.edges ?? []) { + if (e.type !== "inherits" && e.type !== "implements") continue; + const m = String(e.target).match(/^class:external:(.+)$/); + if (!m) continue; + const name = m[1]; + const candidates = nameToIds.get(name); + if (!candidates || candidates.length === 0) { + stillExternal++; + continue; + } + // Pick the single candidate; if multiple, skip to avoid wrong wiring. + if (candidates.length > 1) { + ambiguousSkipped++; + continue; + } + const resolvedTarget = candidates[0]; + const key = `${e.source}|${resolvedTarget}|${e.type}`; + if (existingEdgeKeys.has(key)) continue; + graph.edges.push({ + source: e.source, + target: resolvedTarget, + type: e.type, + direction: "forward", + description: e.description ?? `${e.type} edge resolved cross-batch by class name`, + weight: e.weight ?? (e.type === "inherits" ? 0.9 : 0.9), + }); + existingEdgeKeys.add(key); + recovered++; + reAdded.push(`${e.source} → ${resolvedTarget} (${e.type})`); + } +} + +console.log(`Recovered ${recovered} cross-batch ${"inherits/implements"} edges`); +console.log(`Ambiguous (multiple matches, skipped): ${ambiguousSkipped}`); +console.log(`Still external (no in-graph match — TForm, IInvokable, etc.): ${stillExternal}`); +console.log(`Graph now has ${graph.edges.length} edges total.`); + +writeFileSync(assembledPath, JSON.stringify(graph, null, 2)); +console.log(`Wrote: ${assembledPath}`); + +if (recovered > 0 && recovered <= 20) { + console.log("\nSample recoveries:"); + for (const r of reAdded.slice(0, 10)) console.log(` ${r}`); +} From 9b7e663481d296677d603e9d1b209e5403b84ffd Mon Sep 17 00:00:00 2001 From: Cam Dowdle Date: Sat, 23 May 2026 07:49:43 -0500 Subject: [PATCH 2/3] test+feat: PascalExtractor unit tests, populate parents/interfaces in TS+Python MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the Pascal/Delphi language-support PR, pre-empting two expected review asks: PascalExtractor unit test suite - New `packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts` with 13 tests covering: import section-tagging (interface vs implementation vs untagged for .dpr), class inheritance (single parent, parent + multi-interface split, pure interface inheritance, ancestor-less classes), procedure/function extraction (params, return type, qualified method names), and call graph (both parenthesized and bare-identifier procedure calls). - The suite skips cleanly (with a debug warning) if the tree-sitter-pascal grammar isn't installed in node_modules, matching the PR's open question about wasm distribution. Skip is evaluated at collection time via top-level await — `describe.skipIf` doesn't see `beforeAll` mutations because collection runs first. Populate `parents` / `interfaces` for two other extractors - python-extractor: pulls Python `class X(Y, Z)` bases out of the superclasses field. Python has no syntactic class/interface distinction, so everything lands in `parents` and `interfaces` stays undefined. Keyword-args like `metaclass=Meta` are skipped. - typescript-extractor: walks `class_heritage`, putting `extends_clause` types in `parents` and `implements_clause` types in `interfaces`. Handles `identifier`, `type_identifier`, `generic_type`, `member_expression`, and `nested_type_identifier`. Bug fix in PascalExtractor.extractCallGraph - Pascal allows bare-identifier procedure calls (`Foo;` instead of `Foo();`). Tree-sitter parses these as `(statement (identifier))` rather than `exprCall`. The extractor now records both shapes, filtered to skip non-call statements (assignments, binary expressions, statements with nested calls). All 241 existing extractor tests still pass; Pascal suite passes 13/13 when grammar is available. --- .../__tests__/pascal-extractor.test.ts | 359 ++++++++++++++++++ .../plugins/extractors/pascal-extractor.ts | 54 ++- .../plugins/extractors/python-extractor.ts | 22 ++ .../extractors/typescript-extractor.ts | 47 +++ 4 files changed, 469 insertions(+), 13 deletions(-) create mode 100644 understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts new file mode 100644 index 00000000..ef287334 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/pascal-extractor.test.ts @@ -0,0 +1,359 @@ +import { describe, it, expect } from "vitest"; +import { createRequire } from "node:module"; +import { PascalExtractor } from "../pascal-extractor.js"; + +const require = createRequire(import.meta.url); + +// Load tree-sitter + Pascal grammar at module top-level (top-level await). +// `describe.skipIf` is evaluated at COLLECTION time, so the flag has to +// be set before describe() runs. If the wasm isn't available — the +// upstream distribution story for tree-sitter-pascal.wasm is still open +// (see the language-support PR) — the suite skips cleanly so contributors +// without the grammar can still run the rest of the test suite. +let Parser: any; +let pascalLang: any; +let grammarAvailable = false; + +try { + const mod = await import("web-tree-sitter"); + Parser = mod.Parser; + await Parser.init(); + const wasmPath = require.resolve( + "tree-sitter-pascal/tree-sitter-pascal.wasm", + ); + pascalLang = await mod.Language.load(wasmPath); + grammarAvailable = true; +} catch (e) { + console.warn( + "[pascal-extractor.test] grammar unavailable, skipping suite:", + (e as Error)?.message ?? e, + ); +} + +function parse(code: string) { + const parser = new Parser(); + parser.setLanguage(pascalLang); + const tree = parser.parse(code); + const root = tree.rootNode; + return { tree, parser, root }; +} + +describe.skipIf(!grammarAvailable)("PascalExtractor", () => { + const extractor = new PascalExtractor(); + + it("has correct languageIds", () => { + expect(extractor.languageIds).toEqual(["pascal"]); + }); + + // ---- Imports ---- + + describe("extractStructure - imports", () => { + it("tags interface-section uses as `interface`", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +uses + Windows, SysUtils, Classes; + +implementation + +end. +`); + const result = extractor.extractStructure(root); + expect(result.imports).toHaveLength(3); + expect(result.imports.map((i) => i.source)).toEqual([ + "Windows", + "SysUtils", + "Classes", + ]); + for (const imp of result.imports) { + expect(imp.section).toBe("interface"); + } + tree.delete(); + parser.delete(); + }); + + it("tags implementation-section uses as `implementation`", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +implementation + +uses + MyHelper, DataModule1; + +end. +`); + const result = extractor.extractStructure(root); + expect(result.imports).toHaveLength(2); + for (const imp of result.imports) { + expect(imp.section).toBe("implementation"); + } + tree.delete(); + parser.delete(); + }); + + it("distinguishes interface vs implementation uses in the same unit", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +uses + Windows, Classes; + +implementation + +uses + MyHelper; + +end. +`); + const result = extractor.extractStructure(root); + const ifaceUses = result.imports.filter( + (i) => i.section === "interface", + ); + const implUses = result.imports.filter( + (i) => i.section === "implementation", + ); + expect(ifaceUses.map((i) => i.source)).toEqual(["Windows", "Classes"]); + expect(implUses.map((i) => i.source)).toEqual(["MyHelper"]); + tree.delete(); + parser.delete(); + }); + + it("leaves .dpr program-file uses untagged", () => { + const { tree, parser, root } = parse(` +program MyApp; + +uses + Forms, MyForm; + +begin +end. +`); + const result = extractor.extractStructure(root); + expect(result.imports).toHaveLength(2); + for (const imp of result.imports) { + expect(imp.section).toBeUndefined(); + } + tree.delete(); + parser.delete(); + }); + }); + + // ---- Classes / inheritance ---- + + describe("extractStructure - classes", () => { + it("extracts a class with parent (Pascal class(TParent))", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +type + TMyForm = class(TForm) + Button1: TButton; + procedure Button1Click(Sender: TObject); + end; + +implementation + +end. +`); + const result = extractor.extractStructure(root); + expect(result.classes).toHaveLength(1); + expect(result.classes[0].name).toBe("TMyForm"); + expect(result.classes[0].parents).toEqual(["TForm"]); + expect(result.classes[0].interfaces).toBeUndefined(); + expect(result.classes[0].methods).toContain("Button1Click"); + expect(result.classes[0].properties).toContain("Button1"); + tree.delete(); + parser.delete(); + }); + + it("splits parent class from implemented interfaces (class(TParent, IFoo, IBar))", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +type + TMyService = class(TBaseService, IFoo, IBar) + procedure DoWork; + end; + +implementation + +end. +`); + const result = extractor.extractStructure(root); + expect(result.classes).toHaveLength(1); + expect(result.classes[0].parents).toEqual(["TBaseService"]); + expect(result.classes[0].interfaces).toEqual(["IFoo", "IBar"]); + tree.delete(); + parser.delete(); + }); + + it("treats interface declaration ancestors as parents (interface inheritance)", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +type + IExtended = interface(IBase) + procedure DoExtra; + end; + +implementation + +end. +`); + const result = extractor.extractStructure(root); + expect(result.classes).toHaveLength(1); + expect(result.classes[0].name).toBe("IExtended"); + expect(result.classes[0].parents).toEqual(["IBase"]); + expect(result.classes[0].interfaces).toBeUndefined(); + tree.delete(); + parser.delete(); + }); + + it("emits no parents/interfaces for ancestor-less classes", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +type + TStandalone = class + value: Integer; + end; + +implementation + +end. +`); + const result = extractor.extractStructure(root); + expect(result.classes).toHaveLength(1); + expect(result.classes[0].parents).toBeUndefined(); + expect(result.classes[0].interfaces).toBeUndefined(); + tree.delete(); + parser.delete(); + }); + }); + + // ---- Procedures / functions ---- + + describe("extractStructure - routines", () => { + it("extracts procedure with parameters", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +procedure Greet(name: string; const punctuation: string); + +implementation + +procedure Greet(name: string; const punctuation: string); +begin +end; + +end. +`); + const result = extractor.extractStructure(root); + const greet = result.functions.find((f) => f.name === "Greet"); + expect(greet).toBeDefined(); + expect(greet!.params).toEqual(["name", "punctuation"]); + }); + + it("extracts function return type", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +function Add(a, b: Integer): Integer; + +implementation + +function Add(a, b: Integer): Integer; +begin + Result := a + b; +end; + +end. +`); + const result = extractor.extractStructure(root); + const add = result.functions.find((f) => f.name === "Add"); + expect(add).toBeDefined(); + expect(add!.params).toEqual(["a", "b"]); + expect(add!.returnType).toBe("Integer"); + }); + + it("preserves qualified method names (ClassName.MethodName)", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +type + TMyForm = class(TForm) + procedure FormCreate(Sender: TObject); + end; + +implementation + +procedure TMyForm.FormCreate(Sender: TObject); +begin +end; + +end. +`); + const result = extractor.extractStructure(root); + const qualified = result.functions.find( + (f) => f.name === "TMyForm.FormCreate", + ); + expect(qualified).toBeDefined(); + expect(qualified!.params).toEqual(["Sender"]); + }); + }); + + // ---- Call graph ---- + + describe("extractCallGraph", () => { + it("records caller→callee pairs within procedure bodies", () => { + const { tree, parser, root } = parse(` +unit MyUnit; + +interface + +implementation + +procedure Helper; +begin +end; + +procedure Main; +begin + Helper; + WriteLn('hi'); +end; + +end. +`); + const calls = extractor.extractCallGraph(root); + const callers = new Set(calls.map((c) => c.caller)); + expect(callers.has("Main")).toBe(true); + const callees = calls + .filter((c) => c.caller === "Main") + .map((c) => c.callee); + expect(callees).toContain("Helper"); + }); + }); +}); diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts index 9cfea952..0647b313 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/pascal-extractor.ts @@ -201,19 +201,47 @@ export class PascalExtractor implements LanguageExtractor { } } - // Record calls — exprCall has an 'entity' child (the callee expression) - if (node.type === "exprCall" && stack.length > 0) { - const entity = node.childForFieldName?.("entity") ?? null; - const calleeText = entity - ? entity.text - : (findChild(node, "identifier")?.text ?? - findChild(node, "exprDot")?.text); - if (calleeText) { - entries.push({ - caller: stack[stack.length - 1], - callee: calleeText, - lineNumber: node.startPosition.row + 1, - }); + // Record calls. Pascal has TWO call shapes: + // (a) Parenthesized: `Foo(arg)` → tree-sitter `exprCall` with field `entity`. + // (b) Bare-identifier: `Foo;` (zero-arg procedure call) → tree-sitter + // emits a `statement` whose sole semantic child is an `identifier` + // or `exprDot` (for qualified `Self.Foo` / `Obj.Foo`). The grammar + // can't distinguish a zero-arg procedure call from a reference at + // parse time, so we treat any such statement as a candidate call. + if (stack.length > 0) { + if (node.type === "exprCall") { + const entity = node.childForFieldName?.("entity") ?? null; + const calleeText = entity + ? entity.text + : (findChild(node, "identifier")?.text ?? + findChild(node, "exprDot")?.text); + if (calleeText) { + entries.push({ + caller: stack[stack.length - 1], + callee: calleeText, + lineNumber: node.startPosition.row + 1, + }); + } + } else if (node.type === "statement") { + // Bare procedure-call statement: a statement whose only meaningful + // child is an identifier (or exprDot for qualified calls). Exclude + // assignment/if/while/etc. by checking the child is a leaf + // identifier or a dot-expression with no call sub-tree of its own. + const ident = findChild(node, "identifier"); + const dotted = findChild(node, "exprDot"); + const calleeText = ident?.text ?? dotted?.text; + if ( + calleeText && + !findChild(node, "exprCall") && + !findChild(node, "exprBinary") && + !findChild(node, "exprAssign") + ) { + entries.push({ + caller: stack[stack.length - 1], + callee: calleeText, + lineNumber: node.startPosition.row + 1, + }); + } } } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts index 83ae76cb..7f620b35 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/python-extractor.ts @@ -222,6 +222,27 @@ export class PythonExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // Python `class X(Y, Z, metaclass=Meta):` — base classes live in + // `superclasses` (an argument_list). Keyword args like `metaclass=` + // are skipped (not real parents). Python doesn't distinguish classes + // from protocols/interfaces syntactically, so everything goes in + // `parents` and `interfaces` stays unpopulated. + const parents: string[] = []; + const supers = node.childForFieldName("superclasses"); + if (supers) { + for (let i = 0; i < supers.childCount; i++) { + const c = supers.child(i); + if (!c) continue; + if ( + c.type === "identifier" || + c.type === "dotted_name" || + c.type === "attribute" + ) { + parents.push(c.text); + } + } + } + const body = node.childForFieldName("body"); if (body) { for (let i = 0; i < body.childCount; i++) { @@ -259,6 +280,7 @@ export class PythonExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts index f8dd4810..cbc76853 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/typescript-extractor.ts @@ -276,6 +276,51 @@ export class TypeScriptExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // TypeScript / JavaScript class heritage: + // class X extends Y implements I1, I2 { ... } + // tree-sitter-typescript shape: + // class_declaration → class_heritage → extends_clause | implements_clause + // ↘ identifier/type_identifier + // `parents` gets the extended class (at most one for TS/JS). + // `interfaces` gets every implemented interface. + const parents: string[] = []; + const interfaces: string[] = []; + const heritage = node.children.find((c) => c.type === "class_heritage"); + if (heritage) { + for (let i = 0; i < heritage.childCount; i++) { + const clause = heritage.child(i); + if (!clause) continue; + if (clause.type === "extends_clause") { + for (let j = 0; j < clause.childCount; j++) { + const t = clause.child(j); + if (!t) continue; + if ( + t.type === "identifier" || + t.type === "type_identifier" || + t.type === "generic_type" || + t.type === "member_expression" || + t.type === "nested_type_identifier" + ) { + parents.push(t.text); + } + } + } else if (clause.type === "implements_clause") { + for (let j = 0; j < clause.childCount; j++) { + const t = clause.child(j); + if (!t) continue; + if ( + t.type === "type_identifier" || + t.type === "identifier" || + t.type === "generic_type" || + t.type === "nested_type_identifier" + ) { + interfaces.push(t.text); + } + } + } + } + } + const classBody = node.children.find( (c) => c.type === "class_body", ); @@ -309,6 +354,8 @@ export class TypeScriptExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); } From 636b1daf7983580069d960d14e9798300e412372 Mon Sep 17 00:00:00 2001 From: Cam Dowdle Date: Sat, 23 May 2026 08:17:05 -0500 Subject: [PATCH 3/3] feat: populate parents/interfaces in remaining 7 extractors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the shared-type extension introduced in the previous commits. With this commit, every built-in extractor now surfaces inheritance deterministically, so the file-analyzer agent no longer has to re-read source to emit `inherits` / `implements` edges in any language. Java (`packages/core/src/plugins/extractors/java-extractor.ts`) - class_declaration: `superclass` field → parents; `interfaces` field (which wraps a super_interfaces node) → interfaces - interface_declaration: `extends_interfaces` child node → parents (interface inheritance, not implementation) - Shared helper `extractTypeRefs()` walks the wrapper node and pulls out `type_identifier` / `scoped_type_identifier` / `generic_type` (peeling the type_identifier out of generic_type when present) C# (`packages/core/src/plugins/extractors/csharp-extractor.ts`) - class_declaration: `base_list` child holds the colon-separated parent + interfaces list (C# has no syntactic distinction, just the I-prefix naming convention). Apply the convention via `splitCSharpBaseRefs`: if the first entry's bare type name starts with `I[A-Z]`, treat every entry as an interface; otherwise the first is the class parent and the rest are interfaces. This matches what every C# IDE does for outline/symbol classification. - interface_declaration: every base_list entry is itself an interface parent → parents - `extractBaseListRefs()` handles `identifier`, `qualified_name`, `predefined_type`, `generic_name` C++ (`packages/core/src/plugins/extractors/cpp-extractor.ts`) - class_specifier / struct_specifier: child node `base_class_clause` holds the `: public Foo, protected Bar` list. C++ has no syntactic interface concept (abstract classes look the same), so every base lands in `parents` - Handles `type_identifier`, `qualified_identifier`, `template_type` Go (`packages/core/src/plugins/extractors/go-extractor.ts`) - Go has no class inheritance, but embedded fields in structs (`type T struct { Inner; *Other }`) promote the embedded type's methods — the closest Go has to inheritance. An embedded field is a `field_declaration` with no `field_identifier`; the type itself is the field name. Surface those in `parents`. For pointer-embed (`*Foo`) strip the `*`; for qualified-embed (`pkg.Foo`) keep the full ref. Ruby (`packages/core/src/plugins/extractors/ruby-extractor.ts`) - class declarations: `superclass` field → parents - module mixins (`include Mod`, `prepend Mod`, `extend Mod` at the top level of the class body) → interfaces — they contribute methods at runtime, semantically like interface implementation PHP (`packages/core/src/plugins/extractors/php-extractor.ts`) - class_declaration: `base_clause` child (extends) → parents; `class_interface_clause` child (implements) → interfaces - interface_declaration: `base_clause` (extends) → parents (interface inheritance) - `extractPhpTypeRefs()` walks the clause's `name` / `qualified_name` children Rust (`packages/core/src/plugins/extractors/rust-extractor.ts`) - Rust has no class inheritance, but `impl Trait for Type` declares that `Type` implements `Trait`. Built a `traitsByType` map during impl walking, then attached it as `interfaces` on the matching struct/enum during the final pass (parallel to the existing methodsByType pattern) - trait_item: `bounds` field holds supertrait bounds (e.g. `trait Foo: Bar + Baz`) → parents (direct trait inheritance) All 241 existing extractor tests still pass; smoke-tested all 9 extractors against representative source for each language. --- .../src/plugins/extractors/cpp-extractor.ts | 27 ++++++++ .../plugins/extractors/csharp-extractor.ts | 68 +++++++++++++++++++ .../src/plugins/extractors/go-extractor.ts | 26 ++++++- .../src/plugins/extractors/java-extractor.ts | 50 ++++++++++++++ .../src/plugins/extractors/php-extractor.ts | 33 +++++++++ .../src/plugins/extractors/ruby-extractor.ts | 37 ++++++++++ .../src/plugins/extractors/rust-extractor.ts | 40 ++++++++++- 7 files changed, 279 insertions(+), 2 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts index 8523d6f2..cfa39e6d 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/cpp-extractor.ts @@ -327,6 +327,32 @@ export class CppExtractor implements LanguageExtractor { const className = nameNode.text; const methods: string[] = []; const properties: string[] = []; + // C++ inheritance: `class Foo : public Bar, protected Baz { ... }` + // The base_class_clause sits as a child of the class_specifier; its + // children are pairs of (access_specifier?, type_identifier|template_type). + // C++ has no syntactic interface concept — abstract classes and + // protocol-style interfaces look the same. Surface every base in + // `parents`. + const parents: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (c && c.type === "base_class_clause") { + for (let j = 0; j < c.childCount; j++) { + const b = c.child(j); + if (!b) continue; + if ( + b.type === "type_identifier" || + b.type === "qualified_identifier" + ) { + parents.push(b.text); + } else if (b.type === "template_type") { + const inner = + b.childForFieldName("name") ?? findChild(b, "type_identifier"); + parents.push(inner ? inner.text : b.text); + } + } + } + } const body = node.childForFieldName("body"); if (body && body.type === "field_declaration_list") { @@ -409,6 +435,7 @@ export class CppExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); // The class/struct name itself is an export (non-anonymous types are always exported in C/C++ headers) diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts index 19b77b5b..707f1e5e 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/csharp-extractor.ts @@ -2,6 +2,56 @@ import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; import type { LanguageExtractor, TreeSitterNode } from "./types.js"; import { findChild, findChildren } from "./base-extractor.js"; +/** + * Pull type names out of a C# `base_list` node (the colon-list after a + * class or interface declaration). Handles plain `identifier`, + * `generic_name` (e.g. `IList`), and `qualified_name` + * (`System.IDisposable`). + */ +function extractBaseListRefs(node: TreeSitterNode | null): string[] { + if (!node) return []; + const refs: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if ( + c.type === "identifier" || + c.type === "qualified_name" || + c.type === "predefined_type" + ) { + refs.push(c.text); + } else if (c.type === "generic_name") { + const inner = findChild(c, "identifier"); + refs.push(inner ? inner.text : c.text); + } + } + return refs; +} + +/** + * Apply the C# I-prefix convention to split a base list into class + * parent vs implemented interfaces. C# doesn't syntactically distinguish + * — the only hint is naming. + * - `forceAllParents=true` is passed for interface declarations (every + * base is an interface parent). + */ +function splitCSharpBaseRefs( + refs: string[], + forceAllParents: boolean, +): { parents: string[]; interfaces: string[] } { + if (forceAllParents) return { parents: [...refs], interfaces: [] }; + if (refs.length === 0) return { parents: [], interfaces: [] }; + const bareName = (s: string) => s.replace(/<.*$/, "").split(".").pop() ?? ""; + const looksLikeInterface = (s: string) => /^I[A-Z]/.test(bareName(s)); + // If the first entry looks like an interface (starts with `I` + capital), + // every entry is an interface; otherwise the first is the class parent + // and the rest are interfaces. + if (looksLikeInterface(refs[0])) { + return { parents: [], interfaces: [...refs] }; + } + return { parents: [refs[0]], interfaces: refs.slice(1) }; +} + /** * Extract parameter names from a C# `parameter_list` node. * @@ -304,6 +354,15 @@ export class CSharpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // C# `class Foo : Bar, IFoo, IBar` puts class parent + interfaces in + // a single `base_list`. The grammar can't distinguish them. Apply the + // standard C# convention: the first entry is treated as the class + // parent unless its bare type name starts with `I` followed by an + // uppercase letter (the I-prefix interface naming convention), in + // which case ALL entries are interfaces. + const baseRefs = extractBaseListRefs(findChild(node, "base_list")); + const { parents, interfaces } = splitCSharpBaseRefs(baseRefs, false); + const body = node.childForFieldName("body"); if (body) { this.extractClassBodyMembers(body, methods, properties, functions, exports); @@ -317,6 +376,8 @@ export class CSharpExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); if (hasModifier(node, "public")) { @@ -339,6 +400,12 @@ export class CSharpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // For interface declarations, every entry in the base list is itself + // an interface (interface inheritance, not implementation) — landing + // in `parents`. + const baseRefs = extractBaseListRefs(findChild(node, "base_list")); + const { parents } = splitCSharpBaseRefs(baseRefs, true); + const body = node.childForFieldName("body"); if (body) { // Interface body contains method_declaration nodes (signatures without bodies) @@ -368,6 +435,7 @@ export class CSharpExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); if (hasModifier(node, "public")) { diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts index 53e3e95a..69a64efb 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/go-extractor.ts @@ -283,12 +283,35 @@ export class GoExtractor implements LanguageExtractor { exports: StructuralAnalysis["exports"], ): void { const properties: string[] = []; + // Go has no class inheritance, but embedded fields (`type T struct + // { Inner }` — a field_declaration with a type_identifier but no + // field_identifier) promote the embedded type's methods, which is + // the closest Go has to inheritance. Surface those in `parents` so + // method-promotion relationships are visible in the graph. + const parents: string[] = []; const fieldList = findChild(structNode, "field_declaration_list"); if (fieldList) { const fields = findChildren(fieldList, "field_declaration"); for (const field of fields) { - // A field_declaration can have multiple names: `X, Y int` + // Detect embedded fields first: a field_declaration with no + // field_identifier child, where the type itself is the field name. + const hasName = findChild(field, "field_identifier") !== null; + if (!hasName) { + const embeddedType = + field.childForFieldName("type") ?? + findChild(field, "type_identifier") ?? + findChild(field, "qualified_type") ?? + findChild(field, "pointer_type"); + if (embeddedType) { + // For pointer_type `*Foo` strip the `*`; for qualified_type + // `pkg.Foo` use the full ref. + let txt = embeddedType.text; + if (embeddedType.type === "pointer_type") txt = txt.replace(/^\*\s*/, ""); + parents.push(txt); + } + } + // Regular (named) fields contribute to properties. for (let i = 0; i < field.childCount; i++) { const child = field.child(i); if (child && child.type === "field_identifier") { @@ -306,6 +329,7 @@ export class GoExtractor implements LanguageExtractor { ], methods: [], // Methods are attached later from methodsByReceiver properties, + ...(parents.length ? { parents } : {}), }); if (isExported(nameNode.text)) { diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts index 4ac3a4f3..793b5221 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/java-extractor.ts @@ -2,6 +2,38 @@ import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; import type { LanguageExtractor, TreeSitterNode } from "./types.js"; import { findChild, findChildren } from "./base-extractor.js"; +/** + * Walk a Java `superclass` / `super_interfaces` / `extends_interfaces` node + * and return the type names it references. Handles `type_identifier`, + * `generic_type` (e.g. `List`), and nested `type_list` / + * `interface_type_list` wrappers. + */ +function extractTypeRefs(node: TreeSitterNode | null): string[] { + if (!node) return []; + const refs: string[] = []; + const collect = (n: TreeSitterNode) => { + for (let i = 0; i < n.childCount; i++) { + const c = n.child(i); + if (!c) continue; + if (c.type === "type_identifier" || c.type === "scoped_type_identifier") { + refs.push(c.text); + } else if (c.type === "generic_type") { + const inner = findChild(c, "type_identifier"); + refs.push(inner ? inner.text : c.text); + } else if ( + c.type === "type_list" || + c.type === "interface_type_list" || + c.type === "extends_interfaces" || + c.type === "super_interfaces" + ) { + collect(c); + } + } + }; + collect(node); + return refs; +} + /** * Extract parameter names from a Java `formal_parameters` node. * @@ -248,6 +280,15 @@ export class JavaExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // `class Foo extends Bar implements I1, I2 { ... }` + // Field names on class_declaration (tree-sitter-java): + // superclass → superclass node containing type_identifier|generic_type + // interfaces → super_interfaces node containing type_list + const parents = extractTypeRefs(node.childForFieldName("superclass")); + const interfaces = extractTypeRefs( + node.childForFieldName("interfaces"), + ); + const body = node.childForFieldName("body"); if (body) { this.extractClassBodyMembers( @@ -267,6 +308,8 @@ export class JavaExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); if (hasModifier(node, "public")) { @@ -289,6 +332,12 @@ export class JavaExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // `interface IExtended extends IBase1, IBase2` — interface inheritance + // landing in `parents` (it's direct inheritance, not implementation). + // The interface_declaration grammar exposes `extends_interfaces` as a + // child node type, not a field name. + const parents = extractTypeRefs(findChild(node, "extends_interfaces")); + const body = node.childForFieldName("body"); if (body) { // Interface body contains method_declaration nodes (signatures without bodies) @@ -321,6 +370,7 @@ export class JavaExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), }); if (hasModifier(node, "public")) { diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts index 700e2074..23fafef8 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/php-extractor.ts @@ -2,6 +2,24 @@ import type { StructuralAnalysis, CallGraphEntry } from "../../types.js"; import type { LanguageExtractor, TreeSitterNode } from "./types.js"; import { findChild, findChildren } from "./base-extractor.js"; +/** + * Pull type names out of a PHP `base_clause` (the `extends X` part) or + * `class_interface_clause` (the `implements I1, I2` part). Each name in + * either clause is a `name` node (possibly qualified via `qualified_name`). + */ +function extractPhpTypeRefs(node: TreeSitterNode | null): string[] { + if (!node) return []; + const refs: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const c = node.child(i); + if (!c) continue; + if (c.type === "name" || c.type === "qualified_name") { + refs.push(c.text); + } + } + return refs; +} + /** * Extract parameter names from a PHP `formal_parameters` node. * @@ -313,6 +331,14 @@ export class PhpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // PHP `class Foo extends Bar implements I1, I2` + // base_clause → name (the extended class, single) + // class_interface_clause → multiple name nodes (implemented interfaces) + const parents = extractPhpTypeRefs(findChild(node, "base_clause")); + const interfaces = extractPhpTypeRefs( + findChild(node, "class_interface_clause"), + ); + const declList = findChild(node, "declaration_list"); if (declList) { this.extractDeclarationList(declList, methods, properties, functions); @@ -323,6 +349,8 @@ export class PhpExtractor implements LanguageExtractor { lineRange: [node.startPosition.row + 1, node.endPosition.row + 1], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); } @@ -336,6 +364,10 @@ export class PhpExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // `interface IExtended extends IBase1, IBase2` — interface inheritance + // lands in `parents` (direct inheritance, not implementation). + const parents = extractPhpTypeRefs(findChild(node, "base_clause")); + const declList = findChild(node, "declaration_list"); if (declList) { // Interface methods are method_declaration nodes (no bodies, just signatures) @@ -353,6 +385,7 @@ export class PhpExtractor implements LanguageExtractor { lineRange: [node.startPosition.row + 1, node.endPosition.row + 1], methods, properties, + ...(parents.length ? { parents } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts index e4f115c9..54df8330 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/ruby-extractor.ts @@ -312,9 +312,44 @@ export class RubyExtractor implements LanguageExtractor { const methods: string[] = []; const properties: string[] = []; + // Ruby `class Foo < Bar` — the `superclass` child holds the parent. + // Module mixins (`include Mod1`, `prepend Mod2`) inside the body act + // like interfaces — they contribute methods at runtime — and land in + // `interfaces`. (`extend Mod` mixes class methods; we lump it here + // too since it's the same dispatch semantics from a graph viewpoint.) + const parents: string[] = []; + const interfaces: string[] = []; + const superclassNode = node.childForFieldName("superclass"); + if (superclassNode) { + // superclass node wraps the actual reference; the constant child is the name + const ref = + findChild(superclassNode, "constant") ?? + findChild(superclassNode, "scope_resolution") ?? + superclassNode; + if (ref.text && ref.text !== "<") parents.push(ref.text.replace(/^<\s*/, "")); + } + const body = node.childForFieldName("body"); if (body) { this.extractClassBody(body, methods, properties, functions); + // Look for `include X`, `prepend X`, `extend X` calls at class body top level + for (let i = 0; i < body.childCount; i++) { + const stmt = body.child(i); + if (!stmt) continue; + if (stmt.type !== "call" && stmt.type !== "method_call") continue; + const receiver = stmt.childForFieldName("method"); + const name = receiver?.text; + if (name !== "include" && name !== "prepend" && name !== "extend") continue; + const args = stmt.childForFieldName("arguments"); + if (!args) continue; + for (let j = 0; j < args.childCount; j++) { + const a = args.child(j); + if (!a) continue; + if (a.type === "constant" || a.type === "scope_resolution") { + interfaces.push(a.text); + } + } + } } classes.push({ @@ -325,6 +360,8 @@ export class RubyExtractor implements LanguageExtractor { ], methods, properties, + ...(parents.length ? { parents } : {}), + ...(interfaces.length ? { interfaces } : {}), }); } diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts index 98ab38ff..0f19171e 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/rust-extractor.ts @@ -100,6 +100,10 @@ export class RustExtractor implements LanguageExtractor { // Track methods per impl type so we can attach them to structs/enums const methodsByType = new Map(); + // Track trait implementations per impl target type so we can attach + // them as `interfaces` on the corresponding struct/enum. Rust's `impl + // Trait for Type` is the closest analog to Java's `implements`. + const traitsByType = new Map>(); for (let i = 0; i < rootNode.childCount; i++) { const node = rootNode.child(i); @@ -123,7 +127,7 @@ export class RustExtractor implements LanguageExtractor { break; case "impl_item": - this.extractImpl(node, functions, exports, methodsByType); + this.extractImpl(node, functions, exports, methodsByType, traitsByType); break; case "use_declaration": @@ -138,6 +142,11 @@ export class RustExtractor implements LanguageExtractor { if (methods) { cls.methods.push(...methods); } + const traits = traitsByType.get(cls.name); + if (traits && traits.size > 0) { + const existing = cls.interfaces ?? []; + cls.interfaces = [...new Set([...existing, ...traits])]; + } } return { functions, classes, imports, exports }; @@ -339,6 +348,25 @@ export class RustExtractor implements LanguageExtractor { if (!nameNode) return; const methods: string[] = []; + // Supertraits: `trait Foo: Bar + Baz` — `bounds` field on trait_item + // holds a trait_bounds node with the supertrait references. Treat + // these as `parents` (direct trait inheritance). + const parents: string[] = []; + const boundsNode = node.childForFieldName("bounds"); + if (boundsNode) { + for (let i = 0; i < boundsNode.childCount; i++) { + const b = boundsNode.child(i); + if (!b) continue; + if ( + b.type === "type_identifier" || + b.type === "scoped_type_identifier" || + b.type === "generic_type" + ) { + parents.push(b.text); + } + } + } + const body = findChild(node, "declaration_list"); if (body) { // Trait bodies contain function_signature_item for method declarations @@ -367,6 +395,7 @@ export class RustExtractor implements LanguageExtractor { ], methods, properties: [], + ...(parents.length ? { parents } : {}), }); if (isPublic(node)) { @@ -382,9 +411,18 @@ export class RustExtractor implements LanguageExtractor { functions: StructuralAnalysis["functions"], exports: StructuralAnalysis["exports"], methodsByType: Map, + traitsByType: Map>, ): void { const typeNode = node.childForFieldName("type"); const typeName = typeNode ? typeNode.text : null; + // `impl Trait for Type` — when the impl has a `trait` field, the + // type implements that trait. Surface as a relationship that the + // outer loop pins onto the type's `interfaces` array. + const traitNode = node.childForFieldName("trait"); + if (traitNode && typeName) { + if (!traitsByType.has(typeName)) traitsByType.set(typeName, new Set()); + traitsByType.get(typeName)!.add(traitNode.text); + } const body = node.childForFieldName("body"); if (!body) return;