Fixes to html generator

aothms · aothms · commit 49f333a92521 · 2026-04-21T21:04:20.000+02:00
diff --git a/.github/workflows/publish-output-2.yml b/.github/workflows/publish-output-2.yml
@@ -140,7 +140,6 @@ jobs:
         run: |
           set -euxo pipefail
           python -m generators.html --output ../output/html -j 4
-          find "$REPO_DIR/.." -type d
 
       - name: Sync site into output repository
         shell: bash
diff --git a/code/generators/html/builder.py b/code/generators/html/builder.py
diff --git a/code/generators/html/markdown.py b/code/generators/html/markdown.py
@@ -291,7 +291,7 @@ def process_markdown(self, resource, mdc, process_quotes=True, number_headings=F
                 mark.string = keyword
                 
                 if "deprecation" in css_class:
-                    anchor = soup.new_tag("a", href=f"{self.base}/content/terms_and_definitions.htm#deprecation")
+                    anchor = soup.new_tag("a", href=f"{self.base}/content/terms_and_definitions.html#deprecation")
                     icon = soup.new_tag("i")
                     icon["data-feather"] = "link"
                     anchor.append(icon)
diff --git a/code/generators/html/refiner.py b/code/generators/html/refiner.py
@@ -3,6 +3,7 @@
 from dataclasses import dataclass
 from html import escape
 import re
+import sys
 import uuid
 from pathlib import Path
 from typing import Iterable
@@ -13,6 +14,8 @@
 from pygments.lexer import RegexLexer, words
 from pygments.token import Comment, Keyword, Name, Number, Operator, Punctuation, String, Text
 
+# Path.relative_to(walk_up=True)
+assert not (tuple(sys.version_info) < (3,12))
 
 def BeautifulSoup(*args):
     return bs4.BeautifulSoup(*args, features="lxml")
@@ -266,40 +269,18 @@ def _decorate_title(self, soup) -> None:
     def _normalize_urls(self, soup, public_path: str) -> None:
         for tag, attr in (("a", "href"), ("img", "src"), ("script", "src"), ("link", "href")):
             for element in soup.find_all(tag):
-                value = element.get(attr)
-                if not value:
-                    continue
-                normalized = self._normalize_url(public_path, value)
-                if normalized is None:
-                    continue
-                element[attr] = normalized
+                if value := element.get(attr):
+                    if normalized := self._normalize_url(public_path, value):
+                        element[attr] = normalized
 
     def _normalize_url(self, public_path: str, value: str) -> str | None:
-        if not value or value.startswith(("data:", "mailto:", "javascript:", "tel:", "#")):
+        if not value or value.startswith(("data:", "mailto:", "javascript:", "tel:", "#", "http:", "https:")):
             return None
-
-        base = public_path
-        if not base.endswith("/"):
-            base = str(Path(base).parent).replace("\\", "/")
-            if not base.startswith("/"):
-                base = "/" + base
-            if base == "/.":
-                base = "/"
-            if not base.endswith("/"):
-                base += "/"
-
-        absolute = urljoin(f"https://example.invalid{base}", value)
-        parsed = urlparse(absolute)
-        if parsed.scheme not in ("http", "https"):
+        try:
+            return Path(value).relative_to(Path(public_path).parent, walk_up=True).as_posix()
+        except ValueError:
+            print(f"Error normalizing path {value} within page {public_path}")
             return None
-        if parsed.netloc != "example.invalid":
-            return value
-
-        path = parsed.path or "/"
-        if not path.startswith("/"):
-            path = "/" + path
-
-        return urlunparse(("", "", path, "", parsed.query, parsed.fragment))
 
     def _wrap_figures_and_tables(self, soup) -> None:
         main_content = soup.find(id="main-content")
@@ -437,7 +418,7 @@ def _linkify_schema_names(self, soup, public_path: str, collector: ListingCollec
                 if start > cursor:
                     replacement.append(text[cursor:start])
                 raw = match.group(0)
-                anchor = soup.new_tag("a", href=f"/lexical/{canonical}.htm")
+                anchor = soup.new_tag("a", href=f"/lexical/{canonical}.html")
                 anchor.string = raw
                 replacement.append(anchor)
                 collector.add_reference(canonical, public_path)
@@ -516,7 +497,7 @@ def _render_named_segments(self, token_type, value: str) -> str:
         canonical = self.canonical_names.get(value.upper())
         if canonical is None:
             return segment
-        return f'<a href="/lexical/{canonical}.htm">{segment}</a>'
+        return f'<a href="/lexical/{canonical}.html">{segment}</a>'
 
     def _iter_schema_matches(self, value: str):
         for match in self.name_pattern.finditer(value):
diff --git a/code/generators/util/md.py b/code/generators/util/md.py
@@ -41,7 +41,7 @@ def parse_document(*, fn=None, data=None, linesep="", as_text=True):
         first.insert_before(soup.new_tag('h1', 'DocumentRoot'))
 
     
-    headings = soup.find_all(re.compile("h\d"))
+    headings = soup.find_all(re.compile(r"h\d"))
     next_heading = headings[1:] + [None]
     
     root = None
diff --git a/code/generators/util/xmi_document.py b/code/generators/util/xmi_document.py
@@ -347,7 +347,7 @@ def __iter__(self):
 
         def format_aggr(ag):
             ag_names = ("ARRAY", "LIST", "SET", "BAG")
-            m = re.match('(\w)(U)?\[(\d+):(\d+|\?)\]', ag)
+            m = re.match(r'(\w)(U)?\[(\d+):(\d+|\?)\]', ag)
             assert m
             t, u, l, h = m.groups()
             unique = ("UNIQUE",) if u else ()
diff --git a/code/templates/main.html b/code/templates/main.html
@@ -1,15 +1,15 @@
 <html lang="en">
 <head>
-    <link rel="stylesheet" href="{{ base }}/assets/css/mini-default.css">
-    <link rel="stylesheet" href="{{ base }}/assets/css/style.css">
+    <link rel="stylesheet" href="{{ relative_base }}/assets/css/mini-default.css">
+    <link rel="stylesheet" href="{{ relative_base }}/assets/css/style.css">
     <meta name="viewport" content="width=device-width, initial-scale=1">
     <meta charset="utf-8">
     {% if is_iso %}
     <title>ISO 16739-1 Documentation</title>
     {% else %}
-    <title>{{ spec_version_string }} Documentation</title>
+    <title>{{ spec_version_string }} Documentation {{ relative_base }} {{ relative_base }}</title>
     {% endif %}
-    <link rel="icon" type="image/x-icon" href="{{ base }}/assets/img/favicon.ico">
+    <link rel="icon" type="image/x-icon" href="{{ relative_base }}/assets/img/favicon.ico">
     <script>window.is_iso = {% if is_iso %}true{% else %}false{% endif %};</script>
 </head>
 <body class="{{body_class or ''}}">