From 9505533194680f72455dcf7e6bd460a467b91698 Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Thu, 9 Apr 2026 10:12:21 -0700 Subject: [PATCH 1/3] feat: resolve TypeScript tsconfig path aliases in import edges (#147) Add load_tsconfig_paths() to detect.py that walks up the directory tree to find tsconfig.json and extracts compilerOptions.paths. Add resolve_ts_alias() to map alias prefixes (e.g. @/*) to their resolved filesystem paths. Update extract_js() to load aliases for the file being processed and pass them through a closure to _import_js(), so aliased imports like @/components/Button resolve to real module names in the edge graph. --- graphify/detect.py | 69 +++++++++++++++++++ graphify/extract.py | 24 ++++++- .../fixtures/tsconfig_alias/src/pages/Home.ts | 11 +++ tests/fixtures/tsconfig_alias/tsconfig.json | 9 +++ tests/test_extract.py | 59 +++++++++++++++- 5 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 tests/fixtures/tsconfig_alias/src/pages/Home.ts create mode 100644 tests/fixtures/tsconfig_alias/tsconfig.json diff --git a/graphify/detect.py b/graphify/detect.py index 9a5f16e0..1bf7bc71 100644 --- a/graphify/detect.py +++ b/graphify/detect.py @@ -469,3 +469,72 @@ def detect_incremental(root: Path, manifest_path: str = _MANIFEST_PATH) -> dict: full["new_total"] = new_total full["deleted_files"] = deleted_files return full + + +def load_tsconfig_paths(root: Path) -> dict[str, str]: + """Parse tsconfig.json compilerOptions.paths and return an alias→prefix map. + + Walks up from *root* until a tsconfig.json is found or the filesystem root + is reached. Returns a dict mapping each alias prefix (e.g. ``"@/"`` or + ``"@components/"```) to its resolved filesystem prefix (e.g. ``"src/"``). + + Only the first glob pattern for each alias is used; ``*`` wildcards are + stripped to give a plain prefix that can be used with ``str.startswith``. + + Returns an empty dict when no tsconfig.json is found or when it contains + no ``paths`` mapping. + """ + # Walk up directory tree to find tsconfig.json + current = Path(root).resolve() + tsconfig_path: Path | None = None + while True: + candidate = current / "tsconfig.json" + if candidate.exists(): + tsconfig_path = candidate + break + parent = current.parent + if parent == current: + break + current = parent + + if tsconfig_path is None: + return {} + + try: + data = json.loads(tsconfig_path.read_text(encoding="utf-8")) + except Exception: + return {} + + compiler_options = data.get("compilerOptions", {}) + base_url = compiler_options.get("baseUrl", ".") + paths = compiler_options.get("paths", {}) + if not paths: + return {} + + alias_map: dict[str, str] = {} + tsconfig_dir = tsconfig_path.parent + base_dir = (tsconfig_dir / base_url).resolve() + + for alias, targets in paths.items(): + if not targets: + continue + # Strip trailing /* from alias to get the prefix used in import strings + alias_prefix = alias.rstrip("*").rstrip("/") + # Use first target, strip trailing /* + target = targets[0].rstrip("*").rstrip("/") + resolved = (base_dir / target).resolve() + alias_map[alias_prefix] = str(resolved) + + return alias_map + + +def resolve_ts_alias(import_path: str, alias_map: dict[str, str]) -> str: + """Replace a TypeScript path alias with its resolved filesystem path. + + Returns the original *import_path* unchanged if no alias matches. + """ + for alias_prefix, resolved_prefix in alias_map.items(): + if import_path == alias_prefix or import_path.startswith(alias_prefix + "/"): + remainder = import_path[len(alias_prefix):] + return resolved_prefix + remainder + return import_path diff --git a/graphify/extract.py b/graphify/extract.py index c767e07f..7ad97339 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -125,11 +125,16 @@ def _import_python(node, source: bytes, file_nid: str, stem: str, edges: list, s }) -def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str) -> None: +def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str, + alias_map: dict | None = None) -> None: for child in node.children: if child.type == "string": raw = _read_text(child, source).strip("'\"` ") - module_name = raw.lstrip("./").split("/")[-1] + resolved = raw + if alias_map: + from .detect import resolve_ts_alias + resolved = resolve_ts_alias(raw, alias_map) + module_name = resolved.lstrip("./").split("/")[-1] if module_name: tgt_nid = _make_id(module_name) edges.append({ @@ -1097,7 +1102,20 @@ def extract_python(path: Path) -> dict: def extract_js(path: Path) -> dict: """Extract classes, functions, arrow functions, and imports from a .js/.ts/.tsx file.""" - config = _TS_CONFIG if path.suffix in (".ts", ".tsx") else _JS_CONFIG + from .detect import load_tsconfig_paths + import dataclasses + + base_config = _TS_CONFIG if path.suffix in (".ts", ".tsx") else _JS_CONFIG + alias_map = load_tsconfig_paths(path.parent) + + if alias_map: + def _import_js_with_aliases(node, source, file_nid, stem, edges, str_path): + _import_js(node, source, file_nid, stem, edges, str_path, alias_map=alias_map) + + config = dataclasses.replace(base_config, import_handler=_import_js_with_aliases) + else: + config = base_config + return _extract_generic(path, config) diff --git a/tests/fixtures/tsconfig_alias/src/pages/Home.ts b/tests/fixtures/tsconfig_alias/src/pages/Home.ts new file mode 100644 index 00000000..0e8e683a --- /dev/null +++ b/tests/fixtures/tsconfig_alias/src/pages/Home.ts @@ -0,0 +1,11 @@ +import Button from "@/components/Button"; +import { useAuth } from "@/hooks/useAuth"; +import Sidebar from "@components/Sidebar"; + +class HomePage { + render() { + return "home"; + } +} + +export { HomePage }; diff --git a/tests/fixtures/tsconfig_alias/tsconfig.json b/tests/fixtures/tsconfig_alias/tsconfig.json new file mode 100644 index 00000000..f9685c7d --- /dev/null +++ b/tests/fixtures/tsconfig_alias/tsconfig.json @@ -0,0 +1,9 @@ +{ + "compilerOptions": { + "baseUrl": ".", + "paths": { + "@/*": ["src/*"], + "@components/*": ["src/components/*"] + } + } +} diff --git a/tests/test_extract.py b/tests/test_extract.py index 3d5b9f53..2d3a047d 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -1,5 +1,6 @@ from pathlib import Path -from graphify.extract import extract_python, extract, collect_files, _make_id +from graphify.extract import extract_python, extract, collect_files, _make_id, extract_js +from graphify.detect import load_tsconfig_paths, resolve_ts_alias FIXTURES = Path(__file__).parent / "fixtures" @@ -168,3 +169,59 @@ def test_calls_deduplication(): result = extract_python(FIXTURES / "sample_calls.py") call_pairs = [(e["source"], e["target"]) for e in result["edges"] if e["relation"] == "calls"] assert len(call_pairs) == len(set(call_pairs)), "Duplicate calls edges found" + + +# ── tsconfig path alias tests ───────────────────────────────────────────────── + +TSCONFIG_FIXTURE = FIXTURES / "tsconfig_alias" + + +def test_load_tsconfig_paths_finds_config(): + """load_tsconfig_paths returns a non-empty map when tsconfig.json with paths exists.""" + alias_map = load_tsconfig_paths(TSCONFIG_FIXTURE / "src" / "pages") + assert "@" in alias_map or any(k.startswith("@") for k in alias_map) + + +def test_load_tsconfig_paths_no_config(tmp_path): + """load_tsconfig_paths returns empty dict when no tsconfig.json is found.""" + result = load_tsconfig_paths(tmp_path) + assert result == {} + + +def test_resolve_ts_alias_replaces_prefix(): + """resolve_ts_alias maps @/foo/bar to the resolved path.""" + alias_map = {"@": "/project/src", "@components": "/project/src/components"} + result = resolve_ts_alias("@/hooks/useAuth", alias_map) + assert result == "/project/src/hooks/useAuth" + + +def test_resolve_ts_alias_longer_prefix_wins(): + """More specific alias (@components) takes precedence over shorter one (@).""" + alias_map = {"@": "/project/src", "@components": "/project/src/components"} + result = resolve_ts_alias("@components/Sidebar", alias_map) + assert result == "/project/src/components/Sidebar" + + +def test_resolve_ts_alias_no_match(): + """resolve_ts_alias returns the original path when no alias matches.""" + alias_map = {"@": "/project/src"} + assert resolve_ts_alias("./local/module", alias_map) == "./local/module" + assert resolve_ts_alias("react", alias_map) == "react" + + +def test_extract_js_resolves_aliases(): + """extract_js resolves tsconfig path aliases to real module names in edges.""" + import pytest + result = extract_js(TSCONFIG_FIXTURE / "src" / "pages" / "Home.ts") + if result.get("error") and "not installed" in result["error"]: + pytest.skip(f"tree-sitter backend not installed: {result['error']}") + import_targets = { + e["target"] for e in result["edges"] if e["relation"] == "imports_from" + } + # @/components/Button → Button, @/hooks/useAuth → useAuth, @components/Sidebar → Sidebar + lowered = {t.lower() for t in import_targets} + assert "button" in lowered, f"Expected 'button' in targets, got: {import_targets}" + # Aliases should NOT appear raw as targets + assert not any("@" in t for t in import_targets), ( + f"Raw alias found in targets: {import_targets}" + ) From 3fbfa6f9d53f7e72c5e1dc591f37e340f1f1ea9f Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Fri, 24 Apr 2026 18:20:15 -0700 Subject: [PATCH 2/3] fix: parse tsconfig.json as JSONC to handle comments and trailing commas Real-world tsconfig.json files use JSONC syntax (// and /* */ comments, trailing commas) by default. The plain json.loads() call silently returned {} on any such file, making alias resolution a no-op in practice. Replace with a string-aware JSONC stripper that handles line comments, block comments (including multi-line), trailing commas, and string literals containing comment-like sequences. --- graphify/detect.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/graphify/detect.py b/graphify/detect.py index 1bf7bc71..d76749c0 100644 --- a/graphify/detect.py +++ b/graphify/detect.py @@ -501,7 +501,35 @@ def load_tsconfig_paths(root: Path) -> dict[str, str]: return {} try: - data = json.loads(tsconfig_path.read_text(encoding="utf-8")) + raw = tsconfig_path.read_text(encoding="utf-8") + # tsconfig.json is JSONC: strip // and /* */ comments and trailing commas + out: list[str] = [] + i, n = 0, len(raw) + while i < n: + c = raw[i] + if c == '"': + j = i + 1 + while j < n: + if raw[j] == "\\" and j + 1 < n: + j += 2 + continue + if raw[j] == '"': + j += 1 + break + j += 1 + out.append(raw[i:j]) + i = j + elif c == "/" and i + 1 < n and raw[i + 1] == "/": + nl = raw.find("\n", i) + i = n if nl == -1 else nl + elif c == "/" and i + 1 < n and raw[i + 1] == "*": + end = raw.find("*/", i + 2) + i = n if end == -1 else end + 2 + else: + out.append(c) + i += 1 + stripped = re.sub(r",\s*([}\]])", r"\1", "".join(out)) + data = json.loads(stripped) except Exception: return {} From 586c1c930bc8f2bc3c241e894efad53b53b7d8bd Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Sat, 25 Apr 2026 11:15:20 -0700 Subject: [PATCH 3/3] fix: include tsconfig hash in cache key for JS/TS files extract_js() now resolves aliases from tsconfig.json, but the cache was keyed only on source file contents. A tsconfig change would leave stale import edges in the cache until the source files themselves changed. Add an extra_key parameter to file_hash/load_cached/save_cached. In the extraction loop, JS/TS files with a non-empty alias map hash the serialised alias map and mix it into the cache key, so any change to compilerOptions.paths or baseUrl triggers a cache miss. --- graphify/cache.py | 27 +++++++++++++++++++-------- graphify/extract.py | 15 +++++++++++++-- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/graphify/cache.py b/graphify/cache.py index f198e416..4df712d5 100644 --- a/graphify/cache.py +++ b/graphify/cache.py @@ -7,13 +7,20 @@ from pathlib import Path -def file_hash(path: Path) -> str: - """SHA256 of file contents + resolved path. Prevents cache collisions on identical content.""" +def file_hash(path: Path, extra_key: bytes = b"") -> str: + """SHA256 of file contents + resolved path + optional extra key. + + extra_key allows callers to mix in additional context (e.g. a tsconfig + hash) so that cache entries are invalidated when that context changes. + """ p = Path(path) h = hashlib.sha256() h.update(p.read_bytes()) h.update(b"\x00") h.update(str(p.resolve()).encode()) + if extra_key: + h.update(b"\x00") + h.update(extra_key) return h.hexdigest() @@ -24,15 +31,17 @@ def cache_dir(root: Path = Path(".")) -> Path: return d -def load_cached(path: Path, root: Path = Path(".")) -> dict | None: +def load_cached(path: Path, root: Path = Path("."), extra_key: bytes = b"") -> dict | None: """Return cached extraction for this file if hash matches, else None. - Cache key: SHA256 of file contents. + Cache key: SHA256 of file contents + resolved path + extra_key. + extra_key should include any external context the extraction depends on + (e.g. a hash of the effective tsconfig.json for JS/TS files). Cache value: stored as graphify-out/cache/{hash}.json Returns None if no cache entry or file has changed. """ try: - h = file_hash(path) + h = file_hash(path, extra_key) except OSError: return None entry = cache_dir(root) / f"{h}.json" @@ -44,13 +53,15 @@ def load_cached(path: Path, root: Path = Path(".")) -> dict | None: return None -def save_cached(path: Path, result: dict, root: Path = Path(".")) -> None: +def save_cached(path: Path, result: dict, root: Path = Path("."), extra_key: bytes = b"") -> None: """Save extraction result for this file. - Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current file contents. + Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current + file contents + resolved path + extra_key. extra_key must match the value + used in load_cached so that lookups and stores are consistent. result should be a dict with 'nodes' and 'edges' lists. """ - h = file_hash(path) + h = file_hash(path, extra_key) entry = cache_dir(root) / f"{h}.json" tmp = entry.with_suffix(".tmp") try: diff --git a/graphify/extract.py b/graphify/extract.py index 7ad97339..f3d893f9 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -2650,13 +2650,24 @@ def extract(paths: list[Path]) -> dict: extractor = _DISPATCH.get(path.suffix) if extractor is None: continue - cached = load_cached(path, root) + # For JS/TS files, include the effective tsconfig.json content in the + # cache key so that alias-map changes invalidate cached import edges. + extra_key = b"" + if path.suffix in (".js", ".ts", ".tsx", ".jsx"): + from .detect import load_tsconfig_paths + import hashlib as _hashlib + alias_map = load_tsconfig_paths(path.parent) + if alias_map: + extra_key = _hashlib.sha256( + json.dumps(alias_map, sort_keys=True).encode() + ).digest() + cached = load_cached(path, root, extra_key) if cached is not None: per_file.append(cached) continue result = extractor(path) if "error" not in result: - save_cached(path, result, root) + save_cached(path, result, root, extra_key) per_file.append(result) if total >= _PROGRESS_INTERVAL: print(f" AST extraction: {total}/{total} files (100%)", flush=True)