Skip to content

Commit 49f333a

Browse files
committed
Fixes to html generator
1 parent d3b17f7 commit 49f333a

7 files changed

Lines changed: 103 additions & 112 deletions

File tree

.github/workflows/publish-output-2.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ jobs:
140140
run: |
141141
set -euxo pipefail
142142
python -m generators.html --output ../output/html -j 4
143-
find "$REPO_DIR/.." -type d
144143
145144
- name: Sync site into output repository
146145
shell: bash

code/generators/html/builder.py

Lines changed: 83 additions & 72 deletions
Large diffs are not rendered by default.

code/generators/html/markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def process_markdown(self, resource, mdc, process_quotes=True, number_headings=F
291291
mark.string = keyword
292292

293293
if "deprecation" in css_class:
294-
anchor = soup.new_tag("a", href=f"{self.base}/content/terms_and_definitions.htm#deprecation")
294+
anchor = soup.new_tag("a", href=f"{self.base}/content/terms_and_definitions.html#deprecation")
295295
icon = soup.new_tag("i")
296296
icon["data-feather"] = "link"
297297
anchor.append(icon)

code/generators/html/refiner.py

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from dataclasses import dataclass
44
from html import escape
55
import re
6+
import sys
67
import uuid
78
from pathlib import Path
89
from typing import Iterable
@@ -13,6 +14,8 @@
1314
from pygments.lexer import RegexLexer, words
1415
from pygments.token import Comment, Keyword, Name, Number, Operator, Punctuation, String, Text
1516

17+
# Path.relative_to(walk_up=True)
18+
assert not (tuple(sys.version_info) < (3,12))
1619

1720
def BeautifulSoup(*args):
1821
return bs4.BeautifulSoup(*args, features="lxml")
@@ -266,40 +269,18 @@ def _decorate_title(self, soup) -> None:
266269
def _normalize_urls(self, soup, public_path: str) -> None:
267270
for tag, attr in (("a", "href"), ("img", "src"), ("script", "src"), ("link", "href")):
268271
for element in soup.find_all(tag):
269-
value = element.get(attr)
270-
if not value:
271-
continue
272-
normalized = self._normalize_url(public_path, value)
273-
if normalized is None:
274-
continue
275-
element[attr] = normalized
272+
if value := element.get(attr):
273+
if normalized := self._normalize_url(public_path, value):
274+
element[attr] = normalized
276275

277276
def _normalize_url(self, public_path: str, value: str) -> str | None:
278-
if not value or value.startswith(("data:", "mailto:", "javascript:", "tel:", "#")):
277+
if not value or value.startswith(("data:", "mailto:", "javascript:", "tel:", "#", "http:", "https:")):
279278
return None
280-
281-
base = public_path
282-
if not base.endswith("/"):
283-
base = str(Path(base).parent).replace("\\", "/")
284-
if not base.startswith("/"):
285-
base = "/" + base
286-
if base == "/.":
287-
base = "/"
288-
if not base.endswith("/"):
289-
base += "/"
290-
291-
absolute = urljoin(f"https://example.invalid{base}", value)
292-
parsed = urlparse(absolute)
293-
if parsed.scheme not in ("http", "https"):
279+
try:
280+
return Path(value).relative_to(Path(public_path).parent, walk_up=True).as_posix()
281+
except ValueError:
282+
print(f"Error normalizing path {value} within page {public_path}")
294283
return None
295-
if parsed.netloc != "example.invalid":
296-
return value
297-
298-
path = parsed.path or "/"
299-
if not path.startswith("/"):
300-
path = "/" + path
301-
302-
return urlunparse(("", "", path, "", parsed.query, parsed.fragment))
303284

304285
def _wrap_figures_and_tables(self, soup) -> None:
305286
main_content = soup.find(id="main-content")
@@ -437,7 +418,7 @@ def _linkify_schema_names(self, soup, public_path: str, collector: ListingCollec
437418
if start > cursor:
438419
replacement.append(text[cursor:start])
439420
raw = match.group(0)
440-
anchor = soup.new_tag("a", href=f"/lexical/{canonical}.htm")
421+
anchor = soup.new_tag("a", href=f"/lexical/{canonical}.html")
441422
anchor.string = raw
442423
replacement.append(anchor)
443424
collector.add_reference(canonical, public_path)
@@ -516,7 +497,7 @@ def _render_named_segments(self, token_type, value: str) -> str:
516497
canonical = self.canonical_names.get(value.upper())
517498
if canonical is None:
518499
return segment
519-
return f'<a href="/lexical/{canonical}.htm">{segment}</a>'
500+
return f'<a href="/lexical/{canonical}.html">{segment}</a>'
520501

521502
def _iter_schema_matches(self, value: str):
522503
for match in self.name_pattern.finditer(value):

code/generators/util/md.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def parse_document(*, fn=None, data=None, linesep="", as_text=True):
4141
first.insert_before(soup.new_tag('h1', 'DocumentRoot'))
4242

4343

44-
headings = soup.find_all(re.compile("h\d"))
44+
headings = soup.find_all(re.compile(r"h\d"))
4545
next_heading = headings[1:] + [None]
4646

4747
root = None

code/generators/util/xmi_document.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def __iter__(self):
347347

348348
def format_aggr(ag):
349349
ag_names = ("ARRAY", "LIST", "SET", "BAG")
350-
m = re.match('(\w)(U)?\[(\d+):(\d+|\?)\]', ag)
350+
m = re.match(r'(\w)(U)?\[(\d+):(\d+|\?)\]', ag)
351351
assert m
352352
t, u, l, h = m.groups()
353353
unique = ("UNIQUE",) if u else ()

code/templates/main.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
<html lang="en">
22
<head>
3-
<link rel="stylesheet" href="{{ base }}/assets/css/mini-default.css">
4-
<link rel="stylesheet" href="{{ base }}/assets/css/style.css">
3+
<link rel="stylesheet" href="{{ relative_base }}/assets/css/mini-default.css">
4+
<link rel="stylesheet" href="{{ relative_base }}/assets/css/style.css">
55
<meta name="viewport" content="width=device-width, initial-scale=1">
66
<meta charset="utf-8">
77
{% if is_iso %}
88
<title>ISO 16739-1 Documentation</title>
99
{% else %}
10-
<title>{{ spec_version_string }} Documentation</title>
10+
<title>{{ spec_version_string }} Documentation {{ relative_base }} {{ relative_base }}</title>
1111
{% endif %}
12-
<link rel="icon" type="image/x-icon" href="{{ base }}/assets/img/favicon.ico">
12+
<link rel="icon" type="image/x-icon" href="{{ relative_base }}/assets/img/favicon.ico">
1313
<script>window.is_iso = {% if is_iso %}true{% else %}false{% endif %};</script>
1414
</head>
1515
<body class="{{body_class or ''}}">

0 commit comments

Comments
 (0)