From 4712ab7fb6ed577826de06d59a486e6a7e933100 Mon Sep 17 00:00:00 2001 From: Tirth Kanani Date: Sun, 14 Jun 2026 18:17:44 +0100 Subject: [PATCH] fix(base-extractor): preserve full string value across escape sequences in getStringValue tree-sitter splits a string literal's contents into multiple string_fragment nodes whenever an escape_sequence appears between them. getStringValue returned only the first string_fragment, silently dropping everything from the first escape onward (e.g. './a\tb' became './a'). This truncated import sources for any module path containing an escape. Fix concatenates the text of every content child (string_fragment and escape_sequence) instead of returning only the first fragment, preserving the full raw value. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../__tests__/base-extractor.test.ts | 96 +++++++++++++++++++ .../src/plugins/extractors/base-extractor.ts | 8 +- 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/base-extractor.test.ts diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/base-extractor.test.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/base-extractor.test.ts new file mode 100644 index 00000000..93214fee --- /dev/null +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/__tests__/base-extractor.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect, beforeAll } from "vitest"; +import { createRequire } from "node:module"; +import { getStringValue } from "../base-extractor.js"; +import { TypeScriptExtractor } from "../typescript-extractor.js"; +import type { TreeSitterNode } from "../types.js"; + +const require = createRequire(import.meta.url); + +// Load tree-sitter + TypeScript grammar once +let Parser: any; +let Language: any; +let tsLang: any; + +beforeAll(async () => { + const mod = await import("web-tree-sitter"); + Parser = mod.Parser; + Language = mod.Language; + await Parser.init(); + const wasmPath = require.resolve( + "tree-sitter-typescript/tree-sitter-typescript.wasm", + ); + tsLang = await Language.load(wasmPath); +}); + +function parse(code: string) { + const parser = new Parser(); + parser.setLanguage(tsLang); + const tree = parser.parse(code); + const root = tree.rootNode; + return { tree, parser, root }; +} + +/** Find the first descendant of the given type. */ +function findFirst(node: TreeSitterNode, type: string): TreeSitterNode | null { + if (node.type === type) return node; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) { + const found = findFirst(child, type); + if (found) return found; + } + } + return null; +} + +describe("getStringValue", () => { + it("returns the full string value even when an escape sequence splits the fragments", () => { + const { tree, parser, root } = parse(`import x from './a\\tb';`); + const stringNode = findFirst(root, "string"); + expect(stringNode).not.toBeNull(); + + // tree-sitter splits the contents into [string_fragment 'a', escape_sequence '\t', string_fragment 'b']. + // The full raw value must be preserved, not truncated at the first escape. + expect(getStringValue(stringNode!)).toBe("./a\\tb"); + + tree.delete(); + parser.delete(); + }); + + it("preserves trailing fragments after an escaped quote", () => { + const { tree, parser, root } = parse(`import x from "a\\"b";`); + const stringNode = findFirst(root, "string"); + expect(stringNode).not.toBeNull(); + + expect(getStringValue(stringNode!)).toContain('a\\"b'); + + tree.delete(); + parser.delete(); + }); + + it("returns plain strings without escapes unchanged", () => { + const { tree, parser, root } = parse(`import x from './a';`); + const stringNode = findFirst(root, "string"); + expect(stringNode).not.toBeNull(); + + expect(getStringValue(stringNode!)).toBe("./a"); + + tree.delete(); + parser.delete(); + }); +}); + +describe("TypeScriptExtractor import source with escapes", () => { + const extractor = new TypeScriptExtractor(); + + it("records the full import source for paths containing an escape sequence", () => { + const { tree, parser, root } = parse(`import x from './a\\tb';`); + const result = extractor.extractStructure(root); + + expect(result.imports).toHaveLength(1); + expect(result.imports[0].source).toBe("./a\\tb"); + + tree.delete(); + parser.delete(); + }); +}); diff --git a/understand-anything-plugin/packages/core/src/plugins/extractors/base-extractor.ts b/understand-anything-plugin/packages/core/src/plugins/extractors/base-extractor.ts index 814462f1..a3cbc6b5 100644 --- a/understand-anything-plugin/packages/core/src/plugins/extractors/base-extractor.ts +++ b/understand-anything-plugin/packages/core/src/plugins/extractors/base-extractor.ts @@ -14,12 +14,16 @@ export function traverse( /** Extract the unquoted string value from a string-like node. */ export function getStringValue(node: TreeSitterNode): string { + let value = ""; + let found = false; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); - if (child && child.type === "string_fragment") { - return child.text; + if (child && (child.type === "string_fragment" || child.type === "escape_sequence")) { + value += child.text; + found = true; } } + if (found) return value; return node.text.replace(/^['"`]|['"`]$/g, ""); }