From a868c8afc9b8dc84fba4f1ab5d88dbd157df2cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Compagnon?= Date: Mon, 7 Jul 2025 00:47:04 +0200 Subject: [PATCH] feat(cli): add options to disable exports --- crawler_to_md/cli.py | 29 +++++++++++++++++++++------- tests/test_cli.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 tests/test_cli.py diff --git a/crawler_to_md/cli.py b/crawler_to_md/cli.py index fde7dcf..ae6475b 100644 --- a/crawler_to_md/cli.py +++ b/crawler_to_md/cli.py @@ -88,6 +88,18 @@ def main(): help="Delay between requests in seconds", default=0, ) + parser.add_argument( + "--no-markdown", + action="store_true", + help="Disable generation of the compiled Markdown file", + default=False, + ) + parser.add_argument( + "--no-json", + action="store_true", + help="Disable generation of the compiled JSON file", + default=False, + ) try: import argcomplete @@ -174,12 +186,13 @@ def main(): logger.info("ExportManager initialized.") - export_manager.export_to_markdown(os.path.join(output, f"{output_name}.md")) - logger.info("Export to markdown completed.") - + if not args.no_markdown: + export_manager.export_to_markdown(os.path.join(output, f"{output_name}.md")) + logger.info("Export to markdown completed.") - export_manager.export_to_json(os.path.join(output, f"{output_name}.json")) - logger.info("Export to JSON completed.") + if not args.no_json: + export_manager.export_to_json(os.path.join(output, f"{output_name}.json")) + logger.info("Export to JSON completed.") output_folder_ei = None if args.export_individual: @@ -191,8 +204,10 @@ def main(): markdown_path = os.path.join(output, f"{output_name}.md") json_path = os.path.join(output, f"{output_name}.json") - print("\033[94mMarkdown file generated at: \033[0m", markdown_path) - print("\033[92mJSON file generated at: \033[0m", json_path) + if not args.no_markdown: + print("\033[94mMarkdown file generated at: \033[0m", markdown_path) + if not args.no_json: + print("\033[92mJSON file generated at: \033[0m", json_path) if args.export_individual and output_folder_ei: print( "\033[95mIndividual Markdown files exported to: \033[0m", diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..063bcc7 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,46 @@ +import sys +from crawler_to_md import cli +from crawler_to_md.export_manager import ExportManager +from crawler_to_md.scraper import Scraper + + +def _run_cli(monkeypatch, tmp_path, extra_args): + calls = {"md": False, "json": False} + + def fake_export_markdown(self, path): + calls["md"] = True + + def fake_export_json(self, path): + calls["json"] = True + + monkeypatch.setattr(ExportManager, "export_to_markdown", fake_export_markdown) + monkeypatch.setattr(ExportManager, "export_to_json", fake_export_json) + monkeypatch.setattr(Scraper, "start_scraping", lambda *a, **k: None) + + cache_folder = tmp_path / "cache" + args = [ + "prog", + "--url", + "http://example.com", + "--output-folder", + str(tmp_path), + "--cache-folder", + str(cache_folder), + ] + extra_args + + monkeypatch.setattr(sys, "argv", args) + cli.main() + return calls + + +def test_cli_default_exports(monkeypatch, tmp_path): + calls = _run_cli(monkeypatch, tmp_path, []) + assert calls["md"] is True + assert calls["json"] is True + + +def test_cli_disable_exports(monkeypatch, tmp_path): + calls = _run_cli(monkeypatch, tmp_path, ["--no-markdown", "--no-json"]) + assert calls["md"] is False + assert calls["json"] is False +