Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 175 additions & 84 deletions graphify/__main__.py

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions graphify/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,22 @@ def _query_subgraph_tokens(G: nx.Graph, question: str, depth: int = 3) -> int:


def run_benchmark(
graph_path: str = "graphify-out/graph.json",
graph_path: str | Path | None = None,
corpus_words: int | None = None,
questions: list[str] | None = None,
) -> dict:
"""Measure token reduction: corpus tokens vs graphify query tokens.

Args:
graph_path: path to the built graph
graph_path: path to the built graph; defaults to ``$GRAPHIFY_HOME/graph.json``
corpus_words: total word count from detect() output; if None, estimated from graph
questions: list of questions to benchmark; defaults to _SAMPLE_QUESTIONS

Returns dict with: corpus_tokens, avg_query_tokens, reduction_ratio, per_question
"""
if graph_path is None:
from . import paths as _paths
graph_path = _paths.graph_path()
data = json.loads(Path(graph_path).read_text(encoding="utf-8"))
try:
G = json_graph.node_link_graph(data, edges="links")
Expand Down
7 changes: 6 additions & 1 deletion graphify/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,16 +180,21 @@ def deduplicate_by_label(nodes: list[dict], edges: list[dict]) -> tuple[list[dic

def build_merge(
new_chunks: list[dict],
graph_path: str | Path = "graphify-out/graph.json",
graph_path: str | Path | None = None,
prune_sources: list[str] | None = None,
*,
directed: bool = False,
) -> nx.Graph:
"""Load existing graph.json, merge new chunks into it, and save back.

*graph_path* defaults to ``$GRAPHIFY_HOME/graph.json``.

Never replaces — only grows (or prunes deleted-file nodes via prune_sources).
Safe to call repeatedly: existing nodes and edges are preserved.
"""
if graph_path is None:
from . import paths as _paths
graph_path = _paths.graph_path()
from networkx.readwrite import json_graph as _jg

graph_path = Path(graph_path)
Expand Down
16 changes: 9 additions & 7 deletions graphify/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,19 @@ def file_hash(path: Path, root: Path = Path(".")) -> str:


def cache_dir(root: Path = Path(".")) -> Path:
"""Returns graphify-out/cache/ - creates it if needed."""
d = Path(root).resolve() / "graphify-out" / "cache"
d.mkdir(parents=True, exist_ok=True)
return d
"""Per-file extraction cache directory under *root* — created if missing.

Resolves to ``$GRAPHIFY_HOME/cache``. See :mod:`graphify.paths`.
"""
from . import paths
return paths.cache_dir(root, create=True)


def load_cached(path: Path, root: Path = Path(".")) -> dict | None:
"""Return cached extraction for this file if hash matches, else None.

Cache key: SHA256 of file contents.
Cache value: stored as graphify-out/cache/{hash}.json
Cache value: stored as ``<cache_dir>/{hash}.json``.
Returns None if no cache entry or file has changed.
"""
try:
Expand All @@ -73,7 +75,7 @@ def load_cached(path: Path, root: Path = Path(".")) -> dict | None:
def save_cached(path: Path, result: dict, root: Path = Path(".")) -> None:
"""Save extraction result for this file.

Stores as graphify-out/cache/{hash}.json where hash = SHA256 of current file contents.
Stores as ``<cache_dir>/{hash}.json`` where hash = SHA256 of current file contents.
result should be a dict with 'nodes' and 'edges' lists.

No-ops if `path` is not a regular file. Subagent-produced semantic fragments
Expand Down Expand Up @@ -108,7 +110,7 @@ def cached_files(root: Path = Path(".")) -> set[str]:


def clear_cache(root: Path = Path(".")) -> None:
"""Delete all graphify-out/cache/*.json files."""
"""Delete all cached extraction entries."""
d = cache_dir(root)
for f in d.glob("*.json"):
f.unlink()
Expand Down
Loading