Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions graphify/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ def _body_content(content: bytes) -> bytes:
return content


def file_hash(path: Path) -> str:
"""SHA256 of file contents + resolved path. Prevents cache collisions on identical content.
def file_hash(path: Path, extra_key: bytes = b"") -> str:
"""SHA256 of file contents + resolved path + optional extra key.

For Markdown files (.md), only the body below the YAML frontmatter is hashed,
so metadata-only changes (e.g. reviewed, status, tags) do not invalidate the cache.
extra_key allows callers to mix in additional context (e.g. a tsconfig
hash) so that cache entries are invalidated when that context changes.
"""
p = Path(path)
raw = p.read_bytes()
Expand All @@ -30,6 +32,9 @@ def file_hash(path: Path) -> str:
h.update(content)
h.update(b"\x00")
h.update(str(p.resolve()).encode())
if extra_key:
h.update(b"\x00")
h.update(extra_key)
return h.hexdigest()


Expand All @@ -40,15 +45,17 @@ def cache_dir(root: Path = Path(".")) -> Path:
return d


def load_cached(path: Path, root: Path = Path(".")) -> dict | None:
def load_cached(path: Path, root: Path = Path("."), extra_key: bytes = b"") -> dict | None:
"""Return cached extraction for this file if hash matches, else None.

Cache key: SHA256 of file contents.
Cache key: SHA256 of file contents + resolved path + extra_key.
extra_key should include any external context the extraction depends on
(e.g. a hash of the effective tsconfig.json for JS/TS files).
Cache value: stored as graphify-out/cache/{hash}.json
Returns None if no cache entry or file has changed.
"""
try:
h = file_hash(path)
h = file_hash(path, extra_key)
except OSError:
return None
entry = cache_dir(root) / f"{h}.json"
Expand All @@ -60,13 +67,15 @@ def load_cached(path: Path, root: Path = Path(".")) -> dict | None:
return None


def save_cached(path: Path, result: dict, root: Path = Path(".")) -> None:
def save_cached(path: Path, result: dict, root: Path = Path("."), extra_key: bytes = b"") -> None:
"""Save extraction result for this file.

Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current file contents.
Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current
file contents + resolved path + extra_key. extra_key must match the value
used in load_cached so that lookups and stores are consistent.
result should be a dict with 'nodes' and 'edges' lists.
"""
h = file_hash(path)
h = file_hash(path, extra_key)
entry = cache_dir(root) / f"{h}.json"
tmp = entry.with_suffix(".tmp")
try:
Expand Down
97 changes: 97 additions & 0 deletions graphify/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,100 @@ def detect_incremental(root: Path, manifest_path: str = _MANIFEST_PATH) -> dict:
full["new_total"] = new_total
full["deleted_files"] = deleted_files
return full


def load_tsconfig_paths(root: Path) -> dict[str, str]:
"""Parse tsconfig.json compilerOptions.paths and return an alias→prefix map.

Walks up from *root* until a tsconfig.json is found or the filesystem root
is reached. Returns a dict mapping each alias prefix (e.g. ``"@/"`` or
``"@components/"```) to its resolved filesystem prefix (e.g. ``"src/"``).

Only the first glob pattern for each alias is used; ``*`` wildcards are
stripped to give a plain prefix that can be used with ``str.startswith``.

Returns an empty dict when no tsconfig.json is found or when it contains
no ``paths`` mapping.
"""
# Walk up directory tree to find tsconfig.json
current = Path(root).resolve()
tsconfig_path: Path | None = None
while True:
candidate = current / "tsconfig.json"
if candidate.exists():
tsconfig_path = candidate
break
parent = current.parent
if parent == current:
break
current = parent

if tsconfig_path is None:
return {}

try:
raw = tsconfig_path.read_text(encoding="utf-8")
# tsconfig.json is JSONC: strip // and /* */ comments and trailing commas
out: list[str] = []
i, n = 0, len(raw)
while i < n:
c = raw[i]
if c == '"':
j = i + 1
while j < n:
if raw[j] == "\\" and j + 1 < n:
j += 2
continue
if raw[j] == '"':
j += 1
break
j += 1
out.append(raw[i:j])
i = j
elif c == "/" and i + 1 < n and raw[i + 1] == "/":
nl = raw.find("\n", i)
i = n if nl == -1 else nl
elif c == "/" and i + 1 < n and raw[i + 1] == "*":
end = raw.find("*/", i + 2)
i = n if end == -1 else end + 2
else:
out.append(c)
i += 1
stripped = re.sub(r",\s*([}\]])", r"\1", "".join(out))
data = json.loads(stripped)
except Exception:
return {}

compiler_options = data.get("compilerOptions", {})
base_url = compiler_options.get("baseUrl", ".")
paths = compiler_options.get("paths", {})
if not paths:
return {}

alias_map: dict[str, str] = {}
tsconfig_dir = tsconfig_path.parent
base_dir = (tsconfig_dir / base_url).resolve()

for alias, targets in paths.items():
if not targets:
continue
# Strip trailing /* from alias to get the prefix used in import strings
alias_prefix = alias.rstrip("*").rstrip("/")
# Use first target, strip trailing /*
target = targets[0].rstrip("*").rstrip("/")
resolved = (base_dir / target).resolve()
alias_map[alias_prefix] = str(resolved)

return alias_map


def resolve_ts_alias(import_path: str, alias_map: dict[str, str]) -> str:
"""Replace a TypeScript path alias with its resolved filesystem path.

Returns the original *import_path* unchanged if no alias matches.
"""
for alias_prefix, resolved_prefix in alias_map.items():
if import_path == alias_prefix or import_path.startswith(alias_prefix + "/"):
remainder = import_path[len(alias_prefix):]
return resolved_prefix + remainder
return import_path
39 changes: 34 additions & 5 deletions graphify/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,16 @@ def _import_python(node, source: bytes, file_nid: str, stem: str, edges: list, s
})


def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str) -> None:
def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str,
alias_map: dict | None = None) -> None:
for child in node.children:
if child.type == "string":
raw = _read_text(child, source).strip("'\"` ")
module_name = raw.lstrip("./").split("/")[-1]
resolved = raw
if alias_map:
from .detect import resolve_ts_alias
resolved = resolve_ts_alias(raw, alias_map)
module_name = resolved.lstrip("./").split("/")[-1]
if module_name:
tgt_nid = _make_id(module_name)
edges.append({
Expand Down Expand Up @@ -1097,7 +1102,20 @@ def extract_python(path: Path) -> dict:

def extract_js(path: Path) -> dict:
"""Extract classes, functions, arrow functions, and imports from a .js/.ts/.tsx file."""
config = _TS_CONFIG if path.suffix in (".ts", ".tsx") else _JS_CONFIG
from .detect import load_tsconfig_paths
import dataclasses

base_config = _TS_CONFIG if path.suffix in (".ts", ".tsx") else _JS_CONFIG
alias_map = load_tsconfig_paths(path.parent)

if alias_map:
def _import_js_with_aliases(node, source, file_nid, stem, edges, str_path):
_import_js(node, source, file_nid, stem, edges, str_path, alias_map=alias_map)

config = dataclasses.replace(base_config, import_handler=_import_js_with_aliases)
else:
config = base_config

return _extract_generic(path, config)


Expand Down Expand Up @@ -2632,13 +2650,24 @@ def extract(paths: list[Path]) -> dict:
extractor = _DISPATCH.get(path.suffix)
if extractor is None:
continue
cached = load_cached(path, root)
# For JS/TS files, include the effective tsconfig.json content in the
# cache key so that alias-map changes invalidate cached import edges.
extra_key = b""
if path.suffix in (".js", ".ts", ".tsx", ".jsx"):
from .detect import load_tsconfig_paths
import hashlib as _hashlib
alias_map = load_tsconfig_paths(path.parent)
if alias_map:
extra_key = _hashlib.sha256(
json.dumps(alias_map, sort_keys=True).encode()
).digest()
cached = load_cached(path, root, extra_key)
if cached is not None:
per_file.append(cached)
continue
result = extractor(path)
if "error" not in result:
save_cached(path, result, root)
save_cached(path, result, root, extra_key)
per_file.append(result)
if total >= _PROGRESS_INTERVAL:
print(f" AST extraction: {total}/{total} files (100%)", flush=True)
Expand Down
11 changes: 11 additions & 0 deletions tests/fixtures/tsconfig_alias/src/pages/Home.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import Button from "@/components/Button";
import { useAuth } from "@/hooks/useAuth";
import Sidebar from "@components/Sidebar";

class HomePage {
render() {
return "home";
}
}

export { HomePage };
9 changes: 9 additions & 0 deletions tests/fixtures/tsconfig_alias/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"compilerOptions": {
"baseUrl": ".",
"paths": {
"@/*": ["src/*"],
"@components/*": ["src/components/*"]
}
}
}
59 changes: 58 additions & 1 deletion tests/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
from graphify.extract import extract_python, extract, collect_files, _make_id
from graphify.extract import extract_python, extract, collect_files, _make_id, extract_js
from graphify.detect import load_tsconfig_paths, resolve_ts_alias

FIXTURES = Path(__file__).parent / "fixtures"

Expand Down Expand Up @@ -168,3 +169,59 @@ def test_calls_deduplication():
result = extract_python(FIXTURES / "sample_calls.py")
call_pairs = [(e["source"], e["target"]) for e in result["edges"] if e["relation"] == "calls"]
assert len(call_pairs) == len(set(call_pairs)), "Duplicate calls edges found"


# ── tsconfig path alias tests ─────────────────────────────────────────────────

TSCONFIG_FIXTURE = FIXTURES / "tsconfig_alias"


def test_load_tsconfig_paths_finds_config():
"""load_tsconfig_paths returns a non-empty map when tsconfig.json with paths exists."""
alias_map = load_tsconfig_paths(TSCONFIG_FIXTURE / "src" / "pages")
assert "@" in alias_map or any(k.startswith("@") for k in alias_map)


def test_load_tsconfig_paths_no_config(tmp_path):
"""load_tsconfig_paths returns empty dict when no tsconfig.json is found."""
result = load_tsconfig_paths(tmp_path)
assert result == {}


def test_resolve_ts_alias_replaces_prefix():
"""resolve_ts_alias maps @/foo/bar to the resolved path."""
alias_map = {"@": "/project/src", "@components": "/project/src/components"}
result = resolve_ts_alias("@/hooks/useAuth", alias_map)
assert result == "/project/src/hooks/useAuth"


def test_resolve_ts_alias_longer_prefix_wins():
"""More specific alias (@components) takes precedence over shorter one (@)."""
alias_map = {"@": "/project/src", "@components": "/project/src/components"}
result = resolve_ts_alias("@components/Sidebar", alias_map)
assert result == "/project/src/components/Sidebar"


def test_resolve_ts_alias_no_match():
"""resolve_ts_alias returns the original path when no alias matches."""
alias_map = {"@": "/project/src"}
assert resolve_ts_alias("./local/module", alias_map) == "./local/module"
assert resolve_ts_alias("react", alias_map) == "react"


def test_extract_js_resolves_aliases():
"""extract_js resolves tsconfig path aliases to real module names in edges."""
import pytest
result = extract_js(TSCONFIG_FIXTURE / "src" / "pages" / "Home.ts")
if result.get("error") and "not installed" in result["error"]:
pytest.skip(f"tree-sitter backend not installed: {result['error']}")
import_targets = {
e["target"] for e in result["edges"] if e["relation"] == "imports_from"
}
# @/components/Button → Button, @/hooks/useAuth → useAuth, @components/Sidebar → Sidebar
lowered = {t.lower() for t in import_targets}
assert "button" in lowered, f"Expected 'button' in targets, got: {import_targets}"
# Aliases should NOT appear raw as targets
assert not any("@" in t for t in import_targets), (
f"Raw alias found in targets: {import_targets}"
)