-
Notifications
You must be signed in to change notification settings - Fork 0
fix: add configurability to search stop words #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| import hashlib | ||
| import json | ||
| import re | ||
| import shutil | ||
| import sys | ||
| import time | ||
|
|
@@ -429,6 +430,15 @@ def build_site( | |
| clean_urls = config.site.clean_urls | ||
| base_path = config.site.base_path or get_base_path(config.site.base_url) | ||
|
|
||
| # Resolve CDN auto-detection by scanning raw content | ||
| math_cdn = config.theme.math_cdn | ||
| if math_cdn == "auto": | ||
| _math_re = re.compile(r"\$\$|```math|\$[^\s\d$]") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Previously flagged and still present. |
||
| math_cdn = any(_math_re.search(p.content) for p in pages) | ||
| mermaid_cdn = config.theme.mermaid_cdn | ||
| if mermaid_cdn == "auto": | ||
| mermaid_cdn = any("```mermaid" in p.content for p in pages) | ||
|
|
||
| # Incremental build setup | ||
| manifest: BuildManifest | None = None | ||
| manifest_path = site_root / ".rockgarden" / "build-manifest.json" | ||
|
|
@@ -439,6 +449,7 @@ def build_site( | |
| cur_template_hash = compute_template_hash(site_root, config.theme.name) | ||
| cur_macro_hash = compute_macro_hash(site_root) | ||
| output_dir_str = str(output.resolve()) | ||
| cur_cdn_flags = f"math={math_cdn},mermaid={mermaid_cdn}" | ||
|
|
||
| manifest = BuildManifest.load(manifest_path) | ||
| if manifest and not manifest.needs_full_rebuild( | ||
|
|
@@ -447,6 +458,7 @@ def build_site( | |
| cur_macro_hash, | ||
| output_dir_str, | ||
| len(pages), | ||
| cur_cdn_flags, | ||
| ): | ||
| use_incremental = True | ||
| else: | ||
|
|
@@ -456,6 +468,7 @@ def build_site( | |
| macro_hash=cur_macro_hash, | ||
| output_dir=output_dir_str, | ||
| page_count=len(pages), | ||
| cdn_flags=cur_cdn_flags, | ||
| ) | ||
|
|
||
| collections = partition_collections(pages, config.collections, source) | ||
|
|
@@ -529,14 +542,15 @@ def build_site( | |
| "daisyui_theme": config.theme.daisyui_default, | ||
| "daisyui_themes": config.theme.daisyui_themes, | ||
| "search_enabled": config.theme.search, | ||
| "search_stopwords": config.search.stopwords, | ||
| "build_info": build_info, | ||
| "cache_hash": cache_hash, | ||
| "user_styles": user_styles, | ||
| "user_scripts": user_scripts, | ||
| "assets_dir": assets_dir, | ||
| "main_content_padding": config.theme.main_content_padding, | ||
| "math_cdn": config.theme.math_cdn, | ||
| "mermaid_cdn": config.theme.mermaid_cdn, | ||
| "math_cdn": math_cdn, | ||
| "mermaid_cdn": mermaid_cdn, | ||
| "feed_enabled": config.feed.enabled and bool(config.site.base_url), | ||
| "feed_path": config.feed.path, | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| """Tests for CDN auto-detection (math/mermaid).""" | ||
|
|
||
| from rockgarden.config import Config, SiteConfig, ThemeConfig | ||
| from rockgarden.output.builder import build_site | ||
|
|
||
|
|
||
| def _build_with_content(tmp_path, content, theme_config=None): | ||
| """Build a single-page site and return the output HTML.""" | ||
| source = tmp_path / "content" | ||
| source.mkdir() | ||
| (source / "page.md").write_text(content) | ||
| output = tmp_path / "output" | ||
| config = Config( | ||
| site=SiteConfig(source=source, output=output), | ||
| theme=theme_config or ThemeConfig(), | ||
| ) | ||
| build_site(config, source, output) | ||
| return (output / "page" / "index.html").read_text() | ||
|
|
||
|
|
||
| def test_auto_math_detected(tmp_path): | ||
| html = _build_with_content(tmp_path, "# Math\n\n$x^2$\n") | ||
| assert "katex" in html.lower() | ||
|
|
||
|
|
||
| def test_auto_math_not_detected(tmp_path): | ||
| html = _build_with_content(tmp_path, "# No math\n\nJust text.\n") | ||
| assert "katex" not in html.lower() | ||
|
|
||
|
|
||
| def test_auto_mermaid_detected(tmp_path): | ||
| html = _build_with_content( | ||
| tmp_path, "# Diagram\n\n```mermaid\ngraph LR\n A-->B\n```\n" | ||
| ) | ||
| assert "mermaid" in html | ||
|
|
||
|
|
||
| def test_auto_mermaid_not_detected(tmp_path): | ||
| html = _build_with_content(tmp_path, "# No diagrams\n\nJust text.\n") | ||
| assert "mermaid.esm" not in html | ||
|
|
||
|
|
||
| def test_math_cdn_true_always_loads(tmp_path): | ||
| html = _build_with_content( | ||
| tmp_path, "# No math\n\nJust text.\n", ThemeConfig(math_cdn=True) | ||
| ) | ||
| assert "katex" in html.lower() | ||
|
|
||
|
|
||
| def test_math_cdn_false_never_loads(tmp_path): | ||
| html = _build_with_content( | ||
| tmp_path, "# Math\n\n$x^2$\n", ThemeConfig(math_cdn=False) | ||
| ) | ||
| assert "katex" not in html.lower() | ||
|
|
||
|
|
||
| def test_mermaid_cdn_true_always_loads(tmp_path): | ||
| html = _build_with_content( | ||
| tmp_path, "# No diagrams\n\nJust text.\n", ThemeConfig(mermaid_cdn=True) | ||
| ) | ||
| assert "mermaid.esm" in html | ||
|
|
||
|
|
||
| def test_mermaid_cdn_false_never_loads(tmp_path): | ||
| html = _build_with_content( | ||
| tmp_path, | ||
| "# Diagram\n\n```mermaid\ngraph LR\n A-->B\n```\n", | ||
| ThemeConfig(mermaid_cdn=False), | ||
| ) | ||
| assert "mermaid.esm" not in html | ||
|
|
||
|
|
||
| def test_math_block_detected(tmp_path): | ||
| html = _build_with_content(tmp_path, "# Math\n\n```math\nx^2\n```\n") | ||
| assert "katex" in html.lower() | ||
|
|
||
|
|
||
| def test_dollar_sign_in_prose_no_math(tmp_path): | ||
| html = _build_with_content(tmp_path, "# Pricing\n\nCosts $5 per month.\n") | ||
| assert "katex" not in html.lower() | ||
|
|
||
|
|
||
| def test_block_math_double_dollar(tmp_path): | ||
| html = _build_with_content(tmp_path, "# Math\n\n$$x^2 + y^2$$\n") | ||
| assert "katex" in html.lower() | ||
|
|
||
|
|
||
| def test_config_auto_default(): | ||
| config = ThemeConfig() | ||
| assert config.math_cdn == "auto" | ||
| assert config.mermaid_cdn == "auto" | ||
|
|
||
|
|
||
| def test_config_invalid_cdn_value(): | ||
| import pytest | ||
|
|
||
| with pytest.raises(ValueError, match="must be true, false, or 'auto'"): | ||
| ThemeConfig(math_cdn="always") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| """Tests for search stopword configuration.""" | ||
|
|
||
| import pytest | ||
|
|
||
| from rockgarden.config import Config, SearchConfig, SiteConfig | ||
| from rockgarden.output.builder import build_site | ||
|
|
||
|
|
||
| def test_stopwords_default(): | ||
| config = SearchConfig() | ||
| assert config.stopwords == "default" | ||
|
|
||
|
|
||
| def test_stopwords_none(): | ||
| config = SearchConfig(stopwords="none") | ||
| assert config.stopwords == "none" | ||
|
|
||
|
|
||
| def test_stopwords_custom_list(): | ||
| config = SearchConfig(stopwords=["the", "a", "an"]) | ||
| assert config.stopwords == ["the", "a", "an"] | ||
|
|
||
|
|
||
| def test_stopwords_invalid_string(): | ||
| with pytest.raises(ValueError, match="stopwords must be"): | ||
| SearchConfig(stopwords="custom") | ||
|
|
||
|
|
||
| def _build_and_get_html(tmp_path, stopwords="default"): | ||
| source = tmp_path / "content" | ||
| source.mkdir() | ||
| (source / "page.md").write_text("# Hello\n\nSome content.\n") | ||
| output = tmp_path / "output" | ||
| config = Config( | ||
| site=SiteConfig(source=source, output=output), | ||
| search=SearchConfig(stopwords=stopwords), | ||
| ) | ||
| build_site(config, source, output) | ||
| return (output / "page" / "index.html").read_text() | ||
|
|
||
|
|
||
| def test_default_stopwords_no_pipeline_change(tmp_path): | ||
| html = _build_and_get_html(tmp_path, "default") | ||
| assert "lunr.stopWordFilter" not in html | ||
| assert "generateStopWordFilter" not in html | ||
|
|
||
|
|
||
| def test_none_stopwords_removes_filter(tmp_path): | ||
| html = _build_and_get_html(tmp_path, "none") | ||
| assert "this.pipeline.remove(lunr.stopWordFilter)" in html | ||
|
|
||
|
|
||
| def test_custom_stopwords_sets_filter(tmp_path): | ||
| html = _build_and_get_html(tmp_path, ["the", "a"]) | ||
| assert "generateStopWordFilter" in html | ||
| assert '"the"' in html | ||
| assert '"a"' in html |
This comment was marked as outdated.
Sorry, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor edge case that's not worth addressing at this time.