|
1 | 1 | """Tag index page generation.""" |
2 | 2 |
|
| 3 | +import re |
3 | 4 | from pathlib import Path |
4 | 5 |
|
5 | 6 | from jinja2 import Environment |
6 | 7 |
|
7 | 8 | from rockgarden.content.models import Page |
8 | | -from rockgarden.urls import get_url |
| 9 | +from rockgarden.urls import get_tag_url, get_tags_root_url, get_url |
9 | 10 |
|
10 | 11 |
|
11 | 12 | def normalize_tag(tag: str) -> str: |
12 | 13 | """Normalize a tag to a URL-safe slug. |
13 | 14 |
|
14 | | - Strips leading '#' and lowercases. Tags 'Python', '#python', and 'python' |
15 | | - all normalize to 'python'. |
| 15 | + Strips leading '#', lowercases, and replaces any character that is not |
| 16 | + alphanumeric, hyphen, or underscore with a hyphen. This prevents path |
| 17 | + traversal via tags containing '/' or '..'. |
| 18 | +
|
| 19 | + Tags 'Python', '#python', and 'python' all normalize to 'python'. |
| 20 | + Obsidian nested tags like 'character/pc' normalize to 'character-pc'. |
16 | 21 | """ |
17 | | - return tag.lstrip("#").lower() |
| 22 | + slug = tag.lstrip("#").lower() |
| 23 | + slug = re.sub(r"[^a-z0-9_-]", "-", slug) |
| 24 | + slug = re.sub(r"-+", "-", slug) |
| 25 | + return slug.strip("-") |
18 | 26 |
|
19 | 27 |
|
20 | 28 | def collect_tags(pages: list[Page]) -> dict[str, list[Page]]: |
@@ -56,7 +64,10 @@ def build_tag_pages( |
56 | 64 | pages=page_entries, |
57 | 65 | site=site_config, |
58 | 66 | ) |
59 | | - out_file = output / "tags" / tag_slug / "index.html" |
| 67 | + if clean_urls: |
| 68 | + out_file = output / "tags" / tag_slug / "index.html" |
| 69 | + else: |
| 70 | + out_file = output / "tags" / f"{tag_slug}.html" |
60 | 71 | out_file.parent.mkdir(parents=True, exist_ok=True) |
61 | 72 | out_file.write_text(html) |
62 | 73 |
|
|
0 commit comments