diff --git a/graphify/cache.py b/graphify/cache.py index 7f73db06..4652ac73 100644 --- a/graphify/cache.py +++ b/graphify/cache.py @@ -17,11 +17,13 @@ def _body_content(content: bytes) -> bytes: return content -def file_hash(path: Path) -> str: - """SHA256 of file contents + resolved path. Prevents cache collisions on identical content. +def file_hash(path: Path, extra_key: bytes = b"") -> str: + """SHA256 of file contents + resolved path + optional extra key. For Markdown files (.md), only the body below the YAML frontmatter is hashed, so metadata-only changes (e.g. reviewed, status, tags) do not invalidate the cache. + extra_key allows callers to mix in additional context (e.g. a tsconfig + hash) so that cache entries are invalidated when that context changes. """ p = Path(path) raw = p.read_bytes() @@ -30,6 +32,9 @@ def file_hash(path: Path) -> str: h.update(content) h.update(b"\x00") h.update(str(p.resolve()).encode()) + if extra_key: + h.update(b"\x00") + h.update(extra_key) return h.hexdigest() @@ -40,15 +45,17 @@ def cache_dir(root: Path = Path(".")) -> Path: return d -def load_cached(path: Path, root: Path = Path(".")) -> dict | None: +def load_cached(path: Path, root: Path = Path("."), extra_key: bytes = b"") -> dict | None: """Return cached extraction for this file if hash matches, else None. - Cache key: SHA256 of file contents. + Cache key: SHA256 of file contents + resolved path + extra_key. + extra_key should include any external context the extraction depends on + (e.g. a hash of the effective tsconfig.json for JS/TS files). Cache value: stored as graphify-out/cache/{hash}.json Returns None if no cache entry or file has changed. """ try: - h = file_hash(path) + h = file_hash(path, extra_key) except OSError: return None entry = cache_dir(root) / f"{h}.json" @@ -60,13 +67,15 @@ def load_cached(path: Path, root: Path = Path(".")) -> dict | None: return None -def save_cached(path: Path, result: dict, root: Path = Path(".")) -> None: +def save_cached(path: Path, result: dict, root: Path = Path("."), extra_key: bytes = b"") -> None: """Save extraction result for this file. - Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current file contents. + Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current + file contents + resolved path + extra_key. extra_key must match the value + used in load_cached so that lookups and stores are consistent. result should be a dict with 'nodes' and 'edges' lists. """ - h = file_hash(path) + h = file_hash(path, extra_key) entry = cache_dir(root) / f"{h}.json" tmp = entry.with_suffix(".tmp") try: diff --git a/graphify/detect.py b/graphify/detect.py index e9dc701f..d1fb2f85 100644 --- a/graphify/detect.py +++ b/graphify/detect.py @@ -488,3 +488,100 @@ def detect_incremental(root: Path, manifest_path: str = _MANIFEST_PATH) -> dict: full["new_total"] = new_total full["deleted_files"] = deleted_files return full + + +def load_tsconfig_paths(root: Path) -> dict[str, str]: + """Parse tsconfig.json compilerOptions.paths and return an alias→prefix map. + + Walks up from *root* until a tsconfig.json is found or the filesystem root + is reached. Returns a dict mapping each alias prefix (e.g. ``"@/"`` or + ``"@components/"```) to its resolved filesystem prefix (e.g. ``"src/"``). + + Only the first glob pattern for each alias is used; ``*`` wildcards are + stripped to give a plain prefix that can be used with ``str.startswith``. + + Returns an empty dict when no tsconfig.json is found or when it contains + no ``paths`` mapping. + """ + # Walk up directory tree to find tsconfig.json + current = Path(root).resolve() + tsconfig_path: Path | None = None + while True: + candidate = current / "tsconfig.json" + if candidate.exists(): + tsconfig_path = candidate + break + parent = current.parent + if parent == current: + break + current = parent + + if tsconfig_path is None: + return {} + + try: + raw = tsconfig_path.read_text(encoding="utf-8") + # tsconfig.json is JSONC: strip // and /* */ comments and trailing commas + out: list[str] = [] + i, n = 0, len(raw) + while i < n: + c = raw[i] + if c == '"': + j = i + 1 + while j < n: + if raw[j] == "\\" and j + 1 < n: + j += 2 + continue + if raw[j] == '"': + j += 1 + break + j += 1 + out.append(raw[i:j]) + i = j + elif c == "/" and i + 1 < n and raw[i + 1] == "/": + nl = raw.find("\n", i) + i = n if nl == -1 else nl + elif c == "/" and i + 1 < n and raw[i + 1] == "*": + end = raw.find("*/", i + 2) + i = n if end == -1 else end + 2 + else: + out.append(c) + i += 1 + stripped = re.sub(r",\s*([}\]])", r"\1", "".join(out)) + data = json.loads(stripped) + except Exception: + return {} + + compiler_options = data.get("compilerOptions", {}) + base_url = compiler_options.get("baseUrl", ".") + paths = compiler_options.get("paths", {}) + if not paths: + return {} + + alias_map: dict[str, str] = {} + tsconfig_dir = tsconfig_path.parent + base_dir = (tsconfig_dir / base_url).resolve() + + for alias, targets in paths.items(): + if not targets: + continue + # Strip trailing /* from alias to get the prefix used in import strings + alias_prefix = alias.rstrip("*").rstrip("/") + # Use first target, strip trailing /* + target = targets[0].rstrip("*").rstrip("/") + resolved = (base_dir / target).resolve() + alias_map[alias_prefix] = str(resolved) + + return alias_map + + +def resolve_ts_alias(import_path: str, alias_map: dict[str, str]) -> str: + """Replace a TypeScript path alias with its resolved filesystem path. + + Returns the original *import_path* unchanged if no alias matches. + """ + for alias_prefix, resolved_prefix in alias_map.items(): + if import_path == alias_prefix or import_path.startswith(alias_prefix + "/"): + remainder = import_path[len(alias_prefix):] + return resolved_prefix + remainder + return import_path diff --git a/graphify/extract.py b/graphify/extract.py index 65e62c64..a977cb24 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -125,11 +125,16 @@ def _import_python(node, source: bytes, file_nid: str, stem: str, edges: list, s }) -def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str) -> None: +def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str, + alias_map: dict | None = None) -> None: for child in node.children: if child.type == "string": raw = _read_text(child, source).strip("'\"` ") - module_name = raw.lstrip("./").split("/")[-1] + resolved = raw + if alias_map: + from .detect import resolve_ts_alias + resolved = resolve_ts_alias(raw, alias_map) + module_name = resolved.lstrip("./").split("/")[-1] if module_name: tgt_nid = _make_id(module_name) edges.append({ @@ -1097,7 +1102,20 @@ def extract_python(path: Path) -> dict: def extract_js(path: Path) -> dict: """Extract classes, functions, arrow functions, and imports from a .js/.ts/.tsx file.""" - config = _TS_CONFIG if path.suffix in (".ts", ".tsx") else _JS_CONFIG + from .detect import load_tsconfig_paths + import dataclasses + + base_config = _TS_CONFIG if path.suffix in (".ts", ".tsx") else _JS_CONFIG + alias_map = load_tsconfig_paths(path.parent) + + if alias_map: + def _import_js_with_aliases(node, source, file_nid, stem, edges, str_path): + _import_js(node, source, file_nid, stem, edges, str_path, alias_map=alias_map) + + config = dataclasses.replace(base_config, import_handler=_import_js_with_aliases) + else: + config = base_config + return _extract_generic(path, config) @@ -2632,13 +2650,24 @@ def extract(paths: list[Path]) -> dict: extractor = _DISPATCH.get(path.suffix) if extractor is None: continue - cached = load_cached(path, root) + # For JS/TS files, include the effective tsconfig.json content in the + # cache key so that alias-map changes invalidate cached import edges. + extra_key = b"" + if path.suffix in (".js", ".ts", ".tsx", ".jsx"): + from .detect import load_tsconfig_paths + import hashlib as _hashlib + alias_map = load_tsconfig_paths(path.parent) + if alias_map: + extra_key = _hashlib.sha256( + json.dumps(alias_map, sort_keys=True).encode() + ).digest() + cached = load_cached(path, root, extra_key) if cached is not None: per_file.append(cached) continue result = extractor(path) if "error" not in result: - save_cached(path, result, root) + save_cached(path, result, root, extra_key) per_file.append(result) if total >= _PROGRESS_INTERVAL: print(f" AST extraction: {total}/{total} files (100%)", flush=True) diff --git a/tests/fixtures/tsconfig_alias/src/pages/Home.ts b/tests/fixtures/tsconfig_alias/src/pages/Home.ts new file mode 100644 index 00000000..0e8e683a --- /dev/null +++ b/tests/fixtures/tsconfig_alias/src/pages/Home.ts @@ -0,0 +1,11 @@ +import Button from "@/components/Button"; +import { useAuth } from "@/hooks/useAuth"; +import Sidebar from "@components/Sidebar"; + +class HomePage { + render() { + return "home"; + } +} + +export { HomePage }; diff --git a/tests/fixtures/tsconfig_alias/tsconfig.json b/tests/fixtures/tsconfig_alias/tsconfig.json new file mode 100644 index 00000000..f9685c7d --- /dev/null +++ b/tests/fixtures/tsconfig_alias/tsconfig.json @@ -0,0 +1,9 @@ +{ + "compilerOptions": { + "baseUrl": ".", + "paths": { + "@/*": ["src/*"], + "@components/*": ["src/components/*"] + } + } +} diff --git a/tests/test_extract.py b/tests/test_extract.py index 3d5b9f53..2d3a047d 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -1,5 +1,6 @@ from pathlib import Path -from graphify.extract import extract_python, extract, collect_files, _make_id +from graphify.extract import extract_python, extract, collect_files, _make_id, extract_js +from graphify.detect import load_tsconfig_paths, resolve_ts_alias FIXTURES = Path(__file__).parent / "fixtures" @@ -168,3 +169,59 @@ def test_calls_deduplication(): result = extract_python(FIXTURES / "sample_calls.py") call_pairs = [(e["source"], e["target"]) for e in result["edges"] if e["relation"] == "calls"] assert len(call_pairs) == len(set(call_pairs)), "Duplicate calls edges found" + + +# ── tsconfig path alias tests ───────────────────────────────────────────────── + +TSCONFIG_FIXTURE = FIXTURES / "tsconfig_alias" + + +def test_load_tsconfig_paths_finds_config(): + """load_tsconfig_paths returns a non-empty map when tsconfig.json with paths exists.""" + alias_map = load_tsconfig_paths(TSCONFIG_FIXTURE / "src" / "pages") + assert "@" in alias_map or any(k.startswith("@") for k in alias_map) + + +def test_load_tsconfig_paths_no_config(tmp_path): + """load_tsconfig_paths returns empty dict when no tsconfig.json is found.""" + result = load_tsconfig_paths(tmp_path) + assert result == {} + + +def test_resolve_ts_alias_replaces_prefix(): + """resolve_ts_alias maps @/foo/bar to the resolved path.""" + alias_map = {"@": "/project/src", "@components": "/project/src/components"} + result = resolve_ts_alias("@/hooks/useAuth", alias_map) + assert result == "/project/src/hooks/useAuth" + + +def test_resolve_ts_alias_longer_prefix_wins(): + """More specific alias (@components) takes precedence over shorter one (@).""" + alias_map = {"@": "/project/src", "@components": "/project/src/components"} + result = resolve_ts_alias("@components/Sidebar", alias_map) + assert result == "/project/src/components/Sidebar" + + +def test_resolve_ts_alias_no_match(): + """resolve_ts_alias returns the original path when no alias matches.""" + alias_map = {"@": "/project/src"} + assert resolve_ts_alias("./local/module", alias_map) == "./local/module" + assert resolve_ts_alias("react", alias_map) == "react" + + +def test_extract_js_resolves_aliases(): + """extract_js resolves tsconfig path aliases to real module names in edges.""" + import pytest + result = extract_js(TSCONFIG_FIXTURE / "src" / "pages" / "Home.ts") + if result.get("error") and "not installed" in result["error"]: + pytest.skip(f"tree-sitter backend not installed: {result['error']}") + import_targets = { + e["target"] for e in result["edges"] if e["relation"] == "imports_from" + } + # @/components/Button → Button, @/hooks/useAuth → useAuth, @components/Sidebar → Sidebar + lowered = {t.lower() for t in import_targets} + assert "button" in lowered, f"Expected 'button' in targets, got: {import_targets}" + # Aliases should NOT appear raw as targets + assert not any("@" in t for t in import_targets), ( + f"Raw alias found in targets: {import_targets}" + )