Skip to content

Commit 91a8850

Browse files
committed
docs: report manpage NAME drift as a CI warning
After the doc build, scripts/manpage-name-check.py scans the built man tree for pages whose rendered .TH name disagrees with their filename, the sign of a translated NAME line that drifted from the command identifier. Quiet when nothing drifted. When something has, it prints each page with the string to search in Weblate and its current value side by side, so the output is a ready worklist. Never fails the build: in CI it writes that list to the job summary and emits one warning pointing at it. The NAME lines are managed in Weblate, so fixes go there.
1 parent bb54eb0 commit 91a8850

3 files changed

Lines changed: 155 additions & 1 deletion

File tree

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ jobs:
175175
run: |
176176
set -x
177177
.github/scripts/build-doc.sh
178+
- name: Check manpage names match filenames
179+
run: scripts/manpage-name-check.py docs/build/man
178180
- name: Verify no untracked or modified files after build
179181
run: |
180182
#*.po and documentation.pot are modifyed by build. Ignore them for now.

docs/src/Submakefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ MAN_SRCS = $(sort \
100100
# asciidoctor names the troff from the page's NAME section, not the make
101101
# target, so a translated NAME can misname the file and collide with another
102102
# page under -j. Pin the name with -o. NAME drift itself is reported
103-
# separately by scripts/manpage-name-check.sh (a CI warning), not here, to
103+
# separately by scripts/manpage-name-check.py (a CI warning), not here, to
104104
# keep the build output quiet.
105105
define MAKE_MANPAGE
106106
@mkdir -p $(dir $@)

scripts/manpage-name-check.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env python3
2+
# Report man pages whose rendered NAME drifted from their filename.
3+
# asciidoctor takes the .TH name from the page's NAME section, the build pins the filename with -o, so a translated NAME just leaves the two disagreeing.
4+
# Run after building the docs. Argument: the built man tree (default docs/build/man).
5+
# Quiet when nothing drifted. When something has, it prints each page with the
6+
# string to search in Weblate and its current value, so a translator can fix it.
7+
# Never fails the build: under GitHub Actions it writes the same list to the job
8+
# summary and emits one warning pointing at it. The NAME lines are managed in
9+
# Weblate, so fixes go there.
10+
11+
import os
12+
import re
13+
import sys
14+
import glob
15+
16+
# Anchor to the repo root (the script lives in scripts/) so it runs from anywhere.
17+
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
18+
MAN = os.path.join(ROOT, 'docs/build/man')
19+
PO = os.path.join(ROOT, 'docs/po')
20+
SRC = os.path.join(ROOT, 'docs/src/man')
21+
22+
def th_name(troff):
23+
"""The .TH page name, lowercased with the roff backslash stripped, or None for an .so alias stub."""
24+
with open(troff, encoding='utf-8', errors='replace') as f:
25+
if f.readline().startswith('.so '):
26+
return None
27+
f.seek(0)
28+
for line in f:
29+
m = re.match(r'\.TH "([^"]*)"', line)
30+
if m:
31+
return m.group(1).replace('\\', '').lower()
32+
return None
33+
34+
def name_line(page):
35+
"""The first line of the NAME section of a page's AsciiDoc source, or None for a comp page rendered straight to troff."""
36+
for adoc in sorted(glob.glob(f'{SRC}/man*/{page}.*.adoc')):
37+
lines = open(adoc, encoding='utf-8', errors='replace').read().splitlines()
38+
for i, line in enumerate(lines):
39+
if line.strip() == '== NAME':
40+
for rest in lines[i + 1:]:
41+
if rest.strip():
42+
return rest.strip()
43+
return None
44+
45+
_po = {}
46+
def po(lang):
47+
"""Parse docs/po/<lang>.po into {msgid: msgstr}, joining the continuation lines that wrap one string."""
48+
if lang not in _po:
49+
d = {}
50+
mid = mstr = mode = None
51+
for line in open(f'{PO}/{lang}.po', encoding='utf-8', errors='replace'):
52+
if line.startswith('msgid '):
53+
if mid is not None:
54+
d[mid] = mstr or ''
55+
mid, mstr, mode = line[6:].strip().strip('"'), None, 'id'
56+
elif line.startswith('msgstr '):
57+
mstr, mode = line[7:].strip().strip('"'), 'str'
58+
elif line.startswith('"'):
59+
s = line.strip().strip('"')
60+
if mode == 'id':
61+
mid += s
62+
elif mode == 'str':
63+
mstr += s
64+
elif not line.strip() and mid is not None:
65+
d[mid] = mstr or ''
66+
mid = mstr = mode = None
67+
if mid is not None:
68+
d[mid] = mstr or ''
69+
_po[lang] = d
70+
return _po[lang]
71+
72+
def weblate_string(lang, page):
73+
"""Return (kind, search, current): the Weblate string type for this page, the text to search for, and its current translation."""
74+
d = po(lang)
75+
th = page.upper()
76+
# A comp page's name is a troff .TH string, searched by the uppercase name.
77+
if d.get(th, '').strip() and d[th].strip().lower() != page.lower():
78+
return 'TH', th, d[th]
79+
# An AsciiDoc page's name lives in the NAME line, searched by its text.
80+
nl = name_line(page)
81+
if nl:
82+
cur = d.get(nl)
83+
if cur is None: # the source line and the msgid can differ in trailing text
84+
for mid, mstr in d.items():
85+
if mid.startswith(page) and ' - ' in mid:
86+
nl, cur = mid, mstr
87+
break
88+
return 'Plain text', nl, cur if cur is not None else '(search the text)'
89+
return '?', page, '(unknown)'
90+
91+
def drifted(man):
92+
"""Yield (lang, page) for every built man page whose .TH name does not match its filename."""
93+
for dirpath, _dirs, files in os.walk(man):
94+
for fn in files:
95+
if not re.search(r'\.\d$', fn):
96+
continue
97+
th = th_name(os.path.join(dirpath, fn))
98+
page = re.sub(r'\.\d+$', '', fn)
99+
if th is None or th == page.lower():
100+
continue
101+
lang = os.path.relpath(dirpath, man).split(os.sep)[0]
102+
yield ('en' if re.fullmatch(r'man\d+', lang) else lang), page
103+
104+
def collect(man):
105+
"""All drifted rows as (lang, page, kind, search, current), sorted, English dropped."""
106+
rows = []
107+
for lang, page in drifted(man):
108+
if lang == 'en': # English is the reference, it never drifts
109+
continue
110+
kind, search, current = weblate_string(lang, page)
111+
rows.append((lang, page, kind, search, current))
112+
rows.sort()
113+
return rows
114+
115+
def print_local(rows):
116+
print(f'# {len(rows)} man-page NAME mismatches. Search the "search" text in the Weblate docs component, language in brackets.')
117+
for lang, page, kind, search, current in rows:
118+
print(f'\n[{lang}] {page} ({kind})')
119+
print(f' search: {search}')
120+
print(f' current: {current}')
121+
122+
def write_summary(rows, fh):
123+
fh.write(f'## Manpage NAME mismatches: {len(rows)}\n\n')
124+
fh.write('Translated NAME lines that drifted from the command name.\n')
125+
fh.write('Search the "search" text in the Weblate docs component for the language and fix it there.\n\n')
126+
fh.write('| lang | page | kind | search | current |\n')
127+
fh.write('| ---- | ---- | ---- | ------ | ------- |\n')
128+
for lang, page, kind, search, current in rows:
129+
cells = (c.replace('|', '\\|') for c in (lang, page, kind, search, current))
130+
fh.write('| %s | %s | %s | %s | %s |\n' % tuple(cells))
131+
132+
def main(argv):
133+
man = argv[1] if len(argv) > 1 else MAN
134+
# A built tree has troff pages; without them the docs are not built and a
135+
# silent "all clean" would read as success when nothing was actually scanned.
136+
if not any(re.search(r'\.\d$', f) for _root, _dirs, files in os.walk(man) for f in files):
137+
print(f'{man}: no built man pages found, build the docs first (make manpages docs).', file=sys.stderr)
138+
return 1
139+
rows = collect(man)
140+
if not rows:
141+
return 0 # quiet when nothing drifted
142+
print_local(rows)
143+
summary = os.environ.get('GITHUB_STEP_SUMMARY')
144+
if summary:
145+
with open(summary, 'a', encoding='utf-8') as fh:
146+
write_summary(rows, fh)
147+
if os.environ.get('GITHUB_ACTIONS'):
148+
print(f'::warning title=manpage NAME drift::{len(rows)} manpage NAME line(s) disagree with their filename, see the job summary for the list to fix in Weblate')
149+
return 0
150+
151+
if __name__ == '__main__':
152+
sys.exit(main(sys.argv))

0 commit comments

Comments
 (0)