|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Hugo to MkDocs Migration Script |
| 4 | +
|
| 5 | +Converts Hugo-formatted Markdown files to MkDocs Material format: |
| 6 | +1. Copies .md files from source to destination |
| 7 | +2. Renames _index.md to index.md |
| 8 | +3. Converts {{< relref "..." >}} to relative links |
| 9 | +4. Converts {{< hint type=X >}} to !!! X admonitions |
| 10 | +5. Strips {{< toc >}} and {{< toc-tree >}} |
| 11 | +6. Cleans frontmatter (removes type, normalizes draft) |
| 12 | +""" |
| 13 | + |
| 14 | +import os |
| 15 | +import re |
| 16 | +import sys |
| 17 | +import shutil |
| 18 | +from pathlib import Path |
| 19 | +from typing import Optional |
| 20 | + |
| 21 | + |
| 22 | +def convert_relref(match: re.Match, current_file: Path, source_root: Path) -> str: |
| 23 | + """Convert Hugo relref shortcode to relative markdown link.""" |
| 24 | + ref_path = match.group(1).strip('"\'') |
| 25 | + |
| 26 | + # Remove any leading ../ and normalize |
| 27 | + ref_path = ref_path.lstrip('./') |
| 28 | + |
| 29 | + # Handle various path formats |
| 30 | + if ref_path.startswith('../'): |
| 31 | + # Keep relative paths as-is but convert to .md |
| 32 | + parts = ref_path.split('/') |
| 33 | + # Convert to proper relative path |
| 34 | + result = ref_path |
| 35 | + else: |
| 36 | + # Simple reference - convert to relative path |
| 37 | + result = ref_path |
| 38 | + |
| 39 | + # Ensure .md extension |
| 40 | + if not result.endswith('.md'): |
| 41 | + # Check if it's a directory reference (should become index.md) |
| 42 | + if '/' in result or not '.' in result.split('/')[-1]: |
| 43 | + if not result.endswith('/'): |
| 44 | + result += '/' |
| 45 | + result += 'index.md' |
| 46 | + else: |
| 47 | + result += '.md' |
| 48 | + |
| 49 | + # Replace _index.md with index.md |
| 50 | + result = result.replace('_index.md', 'index.md') |
| 51 | + |
| 52 | + return result |
| 53 | + |
| 54 | + |
| 55 | +def convert_hint_to_admonition(match: re.Match) -> str: |
| 56 | + """Convert Hugo hint shortcode to MkDocs admonition.""" |
| 57 | + hint_type = match.group(1) |
| 58 | + # Map Hugo hint types to MkDocs admonition types |
| 59 | + type_map = { |
| 60 | + 'note': 'note', |
| 61 | + 'info': 'info', |
| 62 | + 'warning': 'warning', |
| 63 | + 'danger': 'danger', |
| 64 | + 'tip': 'tip', |
| 65 | + 'important': 'warning', |
| 66 | + } |
| 67 | + admon_type = type_map.get(hint_type, 'note') |
| 68 | + return f'!!! {admon_type}' |
| 69 | + |
| 70 | + |
| 71 | +def clean_frontmatter(content: str) -> str: |
| 72 | + """Clean and normalize YAML frontmatter.""" |
| 73 | + # Match frontmatter block |
| 74 | + fm_pattern = re.compile(r'^---\s*\n(.*?)\n---\s*\n', re.DOTALL) |
| 75 | + match = fm_pattern.match(content) |
| 76 | + |
| 77 | + if not match: |
| 78 | + return content |
| 79 | + |
| 80 | + frontmatter = match.group(1) |
| 81 | + rest = content[match.end():] |
| 82 | + |
| 83 | + # Parse frontmatter lines |
| 84 | + lines = frontmatter.split('\n') |
| 85 | + new_lines = [] |
| 86 | + |
| 87 | + for line in lines: |
| 88 | + # Skip 'type' field |
| 89 | + if line.strip().startswith('type:'): |
| 90 | + continue |
| 91 | + # Skip 'date' field (Hugo-specific) |
| 92 | + if line.strip().startswith('date:'): |
| 93 | + continue |
| 94 | + # Normalize draft field |
| 95 | + if line.strip().startswith('draft:'): |
| 96 | + if 'true' in line.lower(): |
| 97 | + new_lines.append('draft: true') |
| 98 | + # Skip if draft: false (no need to include) |
| 99 | + continue |
| 100 | + # Keep other fields |
| 101 | + if line.strip(): |
| 102 | + new_lines.append(line) |
| 103 | + |
| 104 | + if new_lines: |
| 105 | + return '---\n' + '\n'.join(new_lines) + '\n---\n\n' + rest.lstrip() |
| 106 | + else: |
| 107 | + return rest.lstrip() |
| 108 | + |
| 109 | + |
| 110 | +def convert_file(source_path: Path, dest_path: Path, source_root: Path) -> None: |
| 111 | + """Convert a single Hugo markdown file to MkDocs format.""" |
| 112 | + with open(source_path, 'r', encoding='utf-8') as f: |
| 113 | + content = f.read() |
| 114 | + |
| 115 | + # 1. Clean frontmatter |
| 116 | + content = clean_frontmatter(content) |
| 117 | + |
| 118 | + # 2. Convert {{< relref "..." >}} to relative links |
| 119 | + # Pattern matches: [text]({{< relref "path" >}}) |
| 120 | + relref_pattern = re.compile(r'\{\{<\s*relref\s+["\']?([^"\'}>]+)["\']?\s*>\}\}') |
| 121 | + content = relref_pattern.sub( |
| 122 | + lambda m: convert_relref(m, source_path, source_root), |
| 123 | + content |
| 124 | + ) |
| 125 | + |
| 126 | + # 3. Convert {{< hint type=X >}} to !!! X |
| 127 | + hint_start_pattern = re.compile(r'\{\{<\s*hint\s+type=(\w+)\s*>\}\}') |
| 128 | + content = hint_start_pattern.sub(convert_hint_to_admonition, content) |
| 129 | + |
| 130 | + # Remove {{< /hint >}} end tags |
| 131 | + content = re.sub(r'\{\{<\s*/hint\s*>\}\}', '', content) |
| 132 | + |
| 133 | + # 4. Strip {{< toc >}} and {{< toc-tree >}} |
| 134 | + content = re.sub(r'\{\{<\s*toc\s*>\}\}', '', content) |
| 135 | + content = re.sub(r'\{\{<\s*toc-tree\s*>\}\}', '', content) |
| 136 | + |
| 137 | + # 5. Clean up any remaining Hugo shortcodes (warn about them) |
| 138 | + remaining = re.findall(r'\{\{<.*?>\}\}', content) |
| 139 | + if remaining: |
| 140 | + print(f" Warning: Remaining shortcodes in {source_path}: {remaining}") |
| 141 | + |
| 142 | + # 6. Clean up multiple blank lines |
| 143 | + content = re.sub(r'\n{3,}', '\n\n', content) |
| 144 | + |
| 145 | + # Ensure parent directory exists |
| 146 | + dest_path.parent.mkdir(parents=True, exist_ok=True) |
| 147 | + |
| 148 | + with open(dest_path, 'w', encoding='utf-8') as f: |
| 149 | + f.write(content) |
| 150 | + |
| 151 | + |
| 152 | +def migrate_directory(source_dir: Path, dest_dir: Path, source_root: Optional[Path] = None) -> int: |
| 153 | + """Migrate all markdown files from source to destination.""" |
| 154 | + if source_root is None: |
| 155 | + source_root = source_dir |
| 156 | + |
| 157 | + file_count = 0 |
| 158 | + |
| 159 | + for source_path in source_dir.rglob('*.md'): |
| 160 | + # Calculate relative path |
| 161 | + rel_path = source_path.relative_to(source_dir) |
| 162 | + |
| 163 | + # Rename _index.md to index.md |
| 164 | + parts = list(rel_path.parts) |
| 165 | + if parts[-1] == '_index.md': |
| 166 | + parts[-1] = 'index.md' |
| 167 | + |
| 168 | + # Convert camelCase filenames to kebab-case |
| 169 | + new_name = parts[-1] |
| 170 | + if new_name != 'index.md': |
| 171 | + # Convert camelCase to kebab-case |
| 172 | + new_name = re.sub(r'([a-z])([A-Z])', r'\1-\2', new_name).lower() |
| 173 | + parts[-1] = new_name |
| 174 | + |
| 175 | + dest_path = dest_dir / Path(*parts) |
| 176 | + |
| 177 | + print(f" {source_path.name} -> {dest_path.relative_to(dest_dir)}") |
| 178 | + convert_file(source_path, dest_path, source_root) |
| 179 | + file_count += 1 |
| 180 | + |
| 181 | + return file_count |
| 182 | + |
| 183 | + |
| 184 | +def main(): |
| 185 | + if len(sys.argv) < 3: |
| 186 | + print("Usage: migrate-hugo.py <source_dir> <dest_dir>") |
| 187 | + print() |
| 188 | + print("Example:") |
| 189 | + print(" migrate-hugo.py ../riddl/doc/src/main/hugo/content/tutorial/ docs/riddl/tutorials/") |
| 190 | + sys.exit(1) |
| 191 | + |
| 192 | + source_dir = Path(sys.argv[1]) |
| 193 | + dest_dir = Path(sys.argv[2]) |
| 194 | + |
| 195 | + if not source_dir.exists(): |
| 196 | + print(f"Error: Source directory does not exist: {source_dir}") |
| 197 | + sys.exit(1) |
| 198 | + |
| 199 | + print(f"Migrating Hugo content from: {source_dir}") |
| 200 | + print(f" to: {dest_dir}") |
| 201 | + print() |
| 202 | + |
| 203 | + file_count = migrate_directory(source_dir, dest_dir) |
| 204 | + |
| 205 | + print() |
| 206 | + print(f"Migrated {file_count} files successfully.") |
| 207 | + print() |
| 208 | + print("Next steps:") |
| 209 | + print(" 1. Review converted files for any remaining issues") |
| 210 | + print(" 2. Update mkdocs.yml navigation") |
| 211 | + print(" 3. Run 'mkdocs serve' to verify rendering") |
| 212 | + |
| 213 | + |
| 214 | +if __name__ == '__main__': |
| 215 | + main() |
0 commit comments