Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions crawler_to_md/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ def main():
help="Delay between requests in seconds",
default=0,
)
parser.add_argument(
"--no-markdown",
action="store_true",
help="Disable generation of the compiled Markdown file",
default=False,
)
parser.add_argument(
"--no-json",
action="store_true",
help="Disable generation of the compiled JSON file",
default=False,
)

try:
import argcomplete
Expand Down Expand Up @@ -174,12 +186,13 @@ def main():
logger.info("ExportManager initialized.")


export_manager.export_to_markdown(os.path.join(output, f"{output_name}.md"))
logger.info("Export to markdown completed.")

if not args.no_markdown:
export_manager.export_to_markdown(os.path.join(output, f"{output_name}.md"))
logger.info("Export to markdown completed.")

export_manager.export_to_json(os.path.join(output, f"{output_name}.json"))
logger.info("Export to JSON completed.")
if not args.no_json:
export_manager.export_to_json(os.path.join(output, f"{output_name}.json"))
logger.info("Export to JSON completed.")

output_folder_ei = None
if args.export_individual:
Expand All @@ -191,8 +204,10 @@ def main():

markdown_path = os.path.join(output, f"{output_name}.md")
json_path = os.path.join(output, f"{output_name}.json")
print("\033[94mMarkdown file generated at: \033[0m", markdown_path)
print("\033[92mJSON file generated at: \033[0m", json_path)
if not args.no_markdown:
print("\033[94mMarkdown file generated at: \033[0m", markdown_path)
if not args.no_json:
print("\033[92mJSON file generated at: \033[0m", json_path)
if args.export_individual and output_folder_ei:
print(
"\033[95mIndividual Markdown files exported to: \033[0m",
Expand Down
46 changes: 46 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import sys
from crawler_to_md import cli
from crawler_to_md.export_manager import ExportManager
from crawler_to_md.scraper import Scraper


def _run_cli(monkeypatch, tmp_path, extra_args):
calls = {"md": False, "json": False}

def fake_export_markdown(self, path):
calls["md"] = True

def fake_export_json(self, path):
calls["json"] = True

monkeypatch.setattr(ExportManager, "export_to_markdown", fake_export_markdown)
monkeypatch.setattr(ExportManager, "export_to_json", fake_export_json)
monkeypatch.setattr(Scraper, "start_scraping", lambda *a, **k: None)

cache_folder = tmp_path / "cache"
args = [
"prog",
"--url",
"http://example.com",
"--output-folder",
str(tmp_path),
"--cache-folder",
str(cache_folder),
] + extra_args

monkeypatch.setattr(sys, "argv", args)
cli.main()
return calls


def test_cli_default_exports(monkeypatch, tmp_path):
calls = _run_cli(monkeypatch, tmp_path, [])
assert calls["md"] is True
assert calls["json"] is True


def test_cli_disable_exports(monkeypatch, tmp_path):
calls = _run_cli(monkeypatch, tmp_path, ["--no-markdown", "--no-json"])
assert calls["md"] is False
assert calls["json"] is False

Loading