Skip to content

Commit 0b3e6eb

Browse files
fix: skip office sidecar writes during dry-run
detect() now accepts write_sidecars=False; when disabled, office files are counted directly without calling convert_office_file() or touching graphify-out/converted/. The dry-run CLI branch passes this flag so the no-write promise holds even for .docx/.xlsx corpora. Adds test_dry_run_office_no_sidecar_written to assert convert_office_file is never called during dry-run.
1 parent 0007278 commit 0b3e6eb

3 files changed

Lines changed: 26 additions & 8 deletions

File tree

graphify/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ def main() -> None:
797797
print(f"error: path not found: {root}", file=sys.stderr)
798798
sys.exit(1)
799799
from graphify.detect import detect as _detect
800-
result = _detect(root)
800+
result = _detect(root, write_sidecars=False)
801801
files = result["files"]
802802
total_files = result["total_files"]
803803
total_words = result["total_words"]

graphify/detect.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def _is_ignored(path: Path, root: Path, patterns: list[str]) -> bool:
299299
return False
300300

301301

302-
def detect(root: Path, *, follow_symlinks: bool = False) -> dict:
302+
def detect(root: Path, *, follow_symlinks: bool = False, write_sidecars: bool = True) -> dict:
303303
files: dict[FileType, list[str]] = {
304304
FileType.CODE: [],
305305
FileType.DOCUMENT: [],
@@ -366,13 +366,18 @@ def detect(root: Path, *, follow_symlinks: bool = False) -> dict:
366366
if ftype:
367367
# Office files: convert to markdown sidecar so subagents can read them
368368
if p.suffix.lower() in OFFICE_EXTENSIONS:
369-
md_path = convert_office_file(p, converted_dir)
370-
if md_path:
371-
files[ftype].append(str(md_path))
372-
total_words += count_words(md_path)
369+
if write_sidecars:
370+
md_path = convert_office_file(p, converted_dir)
371+
if md_path:
372+
files[ftype].append(str(md_path))
373+
total_words += count_words(md_path)
374+
else:
375+
# Conversion failed (library not installed) - skip with note
376+
skipped_sensitive.append(str(p) + " [office conversion failed - pip install graphifyy[office]]")
373377
else:
374-
# Conversion failed (library not installed) - skip with note
375-
skipped_sensitive.append(str(p) + " [office conversion failed - pip install graphifyy[office]]")
378+
# dry-run: count words directly without writing any files
379+
files[ftype].append(str(p))
380+
total_words += count_words(p)
376381
continue
377382
files[ftype].append(str(p))
378383
total_words += count_words(p)

tests/test_dry_run.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,16 @@ def test_dry_run_no_graphify_out_written(tmp_path):
6060
(tmp_path / "a.py").write_text("a = 1\n")
6161
out, _ = _run_main(["graphify", "dry-run", str(tmp_path)])
6262
assert "No files were written" in out
63+
64+
65+
def test_dry_run_office_no_sidecar_written(tmp_path):
66+
"""dry-run must not write office sidecars even when .docx/.xlsx files are present."""
67+
from unittest.mock import MagicMock, patch as mpatch
68+
69+
# Create a fake .docx so detect sees it as an office file
70+
(tmp_path / "report.docx").write_bytes(b"PK\x03\x04") # minimal docx magic bytes
71+
72+
with mpatch("graphify.detect.convert_office_file") as mock_convert:
73+
_run_main(["graphify", "dry-run", str(tmp_path)])
74+
75+
mock_convert.assert_not_called()

0 commit comments

Comments
 (0)