fix path traversal

tylerdave · tylerdave · commit ce39054970bb · 2026-03-01T16:48:20.000-05:00
diff --git a/src/rockgarden/output/tags.py b/src/rockgarden/output/tags.py
@@ -1,20 +1,28 @@
 """Tag index page generation."""
 
+import re
 from pathlib import Path
 
 from jinja2 import Environment
 
 from rockgarden.content.models import Page
-from rockgarden.urls import get_url
+from rockgarden.urls import get_tag_url, get_tags_root_url, get_url
 
 
 def normalize_tag(tag: str) -> str:
     """Normalize a tag to a URL-safe slug.
 
-    Strips leading '#' and lowercases. Tags 'Python', '#python', and 'python'
-    all normalize to 'python'.
+    Strips leading '#', lowercases, and replaces any character that is not
+    alphanumeric, hyphen, or underscore with a hyphen. This prevents path
+    traversal via tags containing '/' or '..'.
+
+    Tags 'Python', '#python', and 'python' all normalize to 'python'.
+    Obsidian nested tags like 'character/pc' normalize to 'character-pc'.
     """
-    return tag.lstrip("#").lower()
+    slug = tag.lstrip("#").lower()
+    slug = re.sub(r"[^a-z0-9_-]", "-", slug)
+    slug = re.sub(r"-+", "-", slug)
+    return slug.strip("-")
 
 
 def collect_tags(pages: list[Page]) -> dict[str, list[Page]]:
@@ -56,7 +64,10 @@ def build_tag_pages(
             pages=page_entries,
             site=site_config,
         )
-        out_file = output / "tags" / tag_slug / "index.html"
+        if clean_urls:
+            out_file = output / "tags" / tag_slug / "index.html"
+        else:
+            out_file = output / "tags" / f"{tag_slug}.html"
         out_file.parent.mkdir(parents=True, exist_ok=True)
         out_file.write_text(html)
 
diff --git a/src/rockgarden/render/engine.py b/src/rockgarden/render/engine.py
@@ -10,6 +10,7 @@
 from rockgarden.config import Config
 from rockgarden.content.models import Page
 from rockgarden.nav.tree import NavNode
+from rockgarden.urls import get_tag_url, get_tags_root_url
 
 
 def _make_format_datetime(tz_name: str):
@@ -62,6 +63,9 @@ def create_engine(
         autoescape=True,
     )
     env.filters["format_datetime"] = _make_format_datetime(config.dates.timezone)
+    clean_urls = config.site.clean_urls
+    env.globals["tag_url"] = lambda slug: get_tag_url(slug, clean_urls)
+    env.globals["tags_root_url"] = get_tags_root_url(clean_urls)
     return env
 
 
diff --git a/src/rockgarden/templates/folder_index.html b/src/rockgarden/templates/folder_index.html
@@ -51,7 +51,7 @@
               {% for tag in child.tags %}
               {% set tag_slug = tag.lstrip('#').lower() %}
               {% if site.tag_index %}
-              <a href="/tags/{{ tag_slug }}/" class="badge badge-sm badge-ghost hover:badge-primary">{{ tag.lstrip('#') }}</a>
+              <a href="{{ tag_url(tag_slug) }}" class="badge badge-sm badge-ghost hover:badge-primary">{{ tag.lstrip('#') }}</a>
               {% else %}
               <span class="badge badge-sm badge-ghost">{{ tag.lstrip('#') }}</span>
               {% endif %}
diff --git a/src/rockgarden/templates/page.html b/src/rockgarden/templates/page.html
@@ -28,7 +28,7 @@
                     {% for tag in tags %}
                     {% set tag_slug = tag.lstrip('#').lower() %}
                     {% if site.tag_index %}
-                    <a href="/tags/{{ tag_slug }}/" class="badge badge-sm badge-ghost hover:badge-primary">{{ tag.lstrip('#') }}</a>
+                    <a href="{{ tag_url(tag_slug) }}" class="badge badge-sm badge-ghost hover:badge-primary">{{ tag.lstrip('#') }}</a>
                     {% else %}
                     <span class="badge badge-sm badge-ghost">{{ tag.lstrip('#') }}</span>
                     {% endif %}
diff --git a/src/rockgarden/templates/tag_index.html b/src/rockgarden/templates/tag_index.html
@@ -8,7 +8,7 @@
         <nav aria-label="Breadcrumb" class="text-sm breadcrumbs mb-4">
             <ul>
                 <li><a href="/">Home</a></li>
-                <li><a href="/tags/">Tags</a></li>
+                <li><a href="{{ tags_root_url }}">Tags</a></li>
                 <li>#{{ tag }}</li>
             </ul>
         </nav>
diff --git a/src/rockgarden/templates/tags_root.html b/src/rockgarden/templates/tags_root.html
@@ -16,7 +16,7 @@ <h1 class="text-3xl font-bold mb-6">Tags</h1>
             {% if tags %}
             <div class="flex flex-wrap gap-3">
                 {% for tag_slug, count in tags.items() %}
-                <a href="/tags/{{ tag_slug }}/" class="badge badge-lg badge-ghost hover:badge-primary gap-1">
+                <a href="{{ tag_url(tag_slug) }}" class="badge badge-lg badge-ghost hover:badge-primary gap-1">
                     #{{ tag_slug }}
                     <span class="text-base-content/50 text-xs">{{ count }}</span>
                 </a>
diff --git a/src/rockgarden/urls.py b/src/rockgarden/urls.py
@@ -73,6 +73,28 @@ def get_url(slug: str, clean_urls: bool = True) -> str:
     return f"/{slug}.html"
 
 
+def get_tag_url(tag_slug: str, clean_urls: bool = True) -> str:
+    """Get URL for a tag index page.
+
+    Args:
+        tag_slug: Normalized tag slug (e.g., "python").
+        clean_urls: If True, uses trailing slash format.
+
+    Returns:
+        URL path:
+        - clean_urls=True:  "python" → "/tags/python/"
+        - clean_urls=False: "python" → "/tags/python.html"
+    """
+    if clean_urls:
+        return f"/tags/{tag_slug}/"
+    return f"/tags/{tag_slug}.html"
+
+
+def get_tags_root_url(clean_urls: bool = True) -> str:
+    """Get URL for the tags root index page."""
+    return "/tags/" if clean_urls else "/tags/index.html"
+
+
 def get_folder_url(folder_path: str, clean_urls: bool = True) -> str:
     """Get URL for a folder.