Skip to content

Commit a362180

Browse files
committed
fix italics
1 parent 67b43a5 commit a362180

File tree

1 file changed

+116
-152
lines changed

1 file changed

+116
-152
lines changed

scripts/update.py

Lines changed: 116 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@
22
# -*- coding: utf-8 -*-
33
"""
44
Archive one-or-many public Matrix rooms.
5-
Creates archive/<slug>/{index.html,room_log.txt}
6-
plus a root index.html listing every room.
5+
Creates archive/<slug>/{index.html, room_log.txt}
6+
and a root index.html listing all rooms by their human titles.
77
"""
88

9-
# ─── std-lib ──────────────────────────────────────────────────────────
9+
# ─── std-lib ──────────────────────────────────────────────────────────
1010
import os, sys, json, subprocess, shlex, hashlib, colorsys, logging, re, html
1111
import collections, pathlib, urllib.parse
1212
from datetime import datetime, timezone
1313

14-
# ══════════════════════════════════════════════════════════════════════
14+
# ════════════════════════════════════════════════
1515
# ░░ CONFIG ░░
16-
# ══════════════════════════════════════════════════════════════════════
16+
# ════════════════════════════════════════════════
1717
HS = os.environ["MATRIX_HS"]
1818
USER_ID = os.environ["MATRIX_USER"]
1919
TOKEN = os.environ["MATRIX_TOKEN"]
@@ -31,9 +31,9 @@
3131
format="%(levelname)s: %(message)s", stream=sys.stderr)
3232
os.environ["NIO_LOG_LEVEL"] = "error"
3333

34-
# ══════════════════════════════════════════════════════════════════════
35-
# ░░ matrix-commander credentials ░░
36-
# ══════════════════════════════════════════════════════════════════════
34+
# ════════════════════════════════════════════════
35+
# ░░ matrix-commander creds ░░
36+
# ════════════════════════════════════════════════
3737
cred_file = pathlib.Path("mc_creds.json")
3838
store_dir = pathlib.Path("store"); store_dir.mkdir(exist_ok=True)
3939
if not cred_file.exists():
@@ -46,243 +46,207 @@
4646
}))
4747
CRED = ["--credentials", str(cred_file), "--store", str(store_dir)]
4848

49-
# ══════════════════════════════════════════════════════════════════════
49+
# ════════════════════════════════════════════════
5050
# ░░ helpers ░░
51-
# ══════════════════════════════════════════════════════════════════════
51+
# ════════════════════════════════════════════════
5252
def run(cmd, timeout=None) -> str:
5353
res = subprocess.run(cmd, text=True, capture_output=True, timeout=timeout)
5454
if res.returncode:
5555
raise subprocess.CalledProcessError(res.returncode, cmd, res.stdout, res.stderr)
5656
return res.stdout
5757

58-
def json_lines(blob: str):
58+
def json_lines(blob:str):
5959
for ln in blob.splitlines():
6060
if ln and ln[0] in "{[":
61-
try:
62-
yield json.loads(ln)
63-
except json.JSONDecodeError:
64-
pass
61+
try: yield json.loads(ln)
62+
except json.JSONDecodeError: pass
6563

6664
when = lambda e: datetime.utcfromtimestamp(e["origin_server_ts"]/1000)
67-
nice_user = lambda u: u.lstrip("@").split(":", 1)[0]
68-
slug = lambda s: urllib.parse.quote(s, safe="").replace("%", "_")
65+
nice_user = lambda u: u.lstrip("@").split(":",1)[0]
66+
slug = lambda s: urllib.parse.quote(s, safe="").replace("%","_")
6967

70-
# ── colour hashing (good distribution) ────────────────────────────────
71-
def rich_color(uid: str) -> str:
68+
def rich_color(uid:str)->str:
7269
d = hashlib.sha1(uid.encode()).digest()
73-
h,l,s = int.from_bytes(d[:2], "big")/0xFFFF, .55+(d[2]/255-.5)*.25, .55+(d[3]/255-.5)*.25
70+
h,l,s = int.from_bytes(d[:2],"big")/65535, .55+(d[2]/255-.5)*.25, .55+(d[3]/255-.5)*.25
7471
r,g,b = colorsys.hls_to_rgb(h,l,s)
7572
return f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}"
7673

77-
# ── markdown-ish helpers ──────────────────────────────────────────────
74+
# ── markdown-ish filters ──────────────────────────────────────────────
7875
_re_mdlink = re.compile(r'\[([^\]]+?)\]\((https?://[^\s)]+)\)')
7976
_re_rawurl = re.compile(r'(?<!["\'>])(https?://[^\s<]+)')
8077
_re_fence = re.compile(r'```(\w+)?\n([\s\S]*?)```', re.MULTILINE)
8178
_re_inline = re.compile(r'`([^`\n]+?)`')
82-
_re_italic = re.compile(r'(?<!\S)\*([^*\n]+?)\*(?!\S)') # new stricter rule
79+
_re_italic = re.compile(r'(?<!\S)\*([^*\n]+?)\*(?!\S)') # only *foo*
8380

84-
def md_links(t: str) -> str:
81+
def md_links(t:str)->str:
8582
t = _re_mdlink.sub(lambda m:
8683
f'<a href="{m.group(2)}" target="_blank" rel="noopener">{m.group(1)}</a>', t)
8784
return _re_rawurl.sub(lambda m:
8885
f'<a href="{m.group(0)}" target="_blank" rel="noopener">{m.group(0)}</a>', t)
8986

90-
def fmt_body(body: str) -> str:
91-
# Fenced blocks first
92-
out, pos = [], 0
87+
def fmt_body(body:str)->str:
88+
segs, pos = [], 0
9389
for fence in _re_fence.finditer(body):
94-
out.append(_re_italic.sub(r'<em>\1</em>', md_links(
95-
html.escape(body[pos:fence.start()]))))
96-
lang = fence.group(1) or ""
97-
code = html.escape(fence.group(2))
98-
out.append(f"<pre><code class='{lang}'>{code}</code></pre>")
90+
segs.append(("txt", body[pos:fence.start()]))
91+
segs.append(("code", fence))
9992
pos = fence.end()
100-
tail = html.escape(body[pos:])
101-
102-
seg, p = [], 0
103-
for ic in _re_inline.finditer(tail):
104-
seg.append(_re_italic.sub(r'<em>\1</em>', md_links(tail[p:ic.start()])))
105-
seg.append(f"<code>{html.escape(ic.group(1))}</code>")
106-
p = ic.end()
107-
seg.append(_re_italic.sub(r'<em>\1</em>', md_links(tail[p:])))
108-
out.append("".join(seg))
109-
return "".join(out)
93+
segs.append(("txt", body[pos:]))
94+
95+
html_out=[]
96+
for typ,part in segs:
97+
if typ=="code":
98+
lang = part.group(1) or ""
99+
code = html.escape(part.group(2))
100+
html_out.append(f"<pre><code class='{lang}'>{code}</code></pre>")
101+
continue
102+
chunk = html.escape(part)
103+
chunk = _re_inline.sub(lambda m: f"<code>{html.escape(m.group(1))}</code>", chunk)
104+
chunk = _re_italic.sub(r"<em>\1</em>", chunk)
105+
html_out.append(md_links(chunk))
106+
return "".join(html_out)
110107

111-
# ══════════════════════════════════════════════════════════════════════
108+
# ════════════════════════════════════════════════
112109
# ░░ archiver ░░
113-
# ══════════════════════════════════════════════════════════════════════
114-
def archive(room: str) -> tuple[str, str, str] | None:
110+
# ════════════════════════════════════════════════
111+
def archive(room:str):
115112
logging.info("room %s", room)
116-
117-
# keep commander happy
118-
cfg = json.loads(cred_file.read_text()); cfg.update(room_id=room, default_room=room)
113+
cfg = json.loads(cred_file.read_text()); cfg.update(room_id=room,default_room=room)
119114
cred_file.write_text(json.dumps(cfg))
120115

121-
rdir = pathlib.Path("archive") / slug(room)
116+
rdir = pathlib.Path("archive")/slug(room)
122117
rdir.mkdir(parents=True, exist_ok=True)
123118

124-
for cmd in (["--room-join", room],
125-
["--room", room, "--listen", "once"]):
126-
try: run(["matrix-commander", *CRED, *cmd])
127-
except subprocess.CalledProcessError:
128-
pass
119+
for cmd in (["--room-join",room], ["--room",room,"--listen","once"]):
120+
try: run(["matrix-commander",*CRED,*cmd])
121+
except subprocess.CalledProcessError: pass
129122

130-
title = room
123+
# title
124+
title=room
131125
try:
132-
info = next(json_lines(run(["matrix-commander", *CRED,
133-
"--room", room, "--get-room-info",
134-
"--output", "json"])), {})
135-
for k in ("room_display_name", "room_name",
136-
"canonical_alias", "room_alias"):
137-
if info.get(k):
138-
title = info[k]; break
139-
except Exception: # fine – room alias will do
140-
pass
141-
142-
listen_args = {
143-
"all" : ["--listen", "all", "--listen-self"],
144-
"tail": ["--listen", "tail", "--tail", TAIL_N, "--listen-self"],
145-
"once": ["--listen", "once", "--listen-self"],
146-
}[LISTEN_MODE]
126+
info=next(json_lines(run(["matrix-commander",*CRED,"--room",room,
127+
"--get-room-info","--output","json"])),{})
128+
for k in ("room_display_name","room_name","canonical_alias","room_alias"):
129+
if info.get(k): title=info[k]; break
130+
except Exception: pass
147131

148-
stream = run(["matrix-commander", *CRED, "--room", room,
149-
*listen_args, "--output", "json"],
150-
timeout=TIMEOUT_S if LISTEN_MODE == "all" else None)
132+
listen={"all":["all"],"tail":["tail","--tail",TAIL_N],"once":["once"]}[LISTEN_MODE]
133+
raw=run(["matrix-commander",*CRED,"--room",room,"--listen",*listen,"--listen-self","--output","json"],
134+
timeout=TIMEOUT_S if LISTEN_MODE=="all" else None)
151135

152136
originals, edits = {}, {}
153-
for j in json_lines(stream):
137+
for j in json_lines(raw):
154138
ev = j.get("source", j)
155-
if ev.get("type") != "m.room.message":
156-
continue
157-
rel = ev["content"].get("m.relates_to", {})
158-
# replacement?
159-
if rel.get("rel_type") == "m.replace" or "m.new_content" in ev["content"]:
139+
if ev.get("type")!="m.room.message": continue
140+
rel=ev["content"].get("m.relates_to",{})
141+
if rel.get("rel_type")=="m.replace" or "m.new_content" in ev["content"]:
160142
edits[rel.get("event_id")] = ev
161143
else:
162144
originals[ev["event_id"]] = ev
163145

164-
# apply latest edit
165-
for eid, msg in originals.items():
146+
# apply last edit
147+
for eid,msg in originals.items():
166148
if eid in edits:
167-
rep = edits[eid]
168-
new_body = rep["content"].get("m.new_content", {}).get("body") \
169-
or rep["content"].get("body", "")
170-
msg["content"]["body"] = new_body
171-
msg["_edited"] = True
149+
rep=edits[eid]
150+
new_body = rep["content"].get("m.new_content",{}).get("body") \
151+
or rep["content"].get("body","")
152+
msg["content"]["body"]=new_body
153+
msg["_edited"]=True
172154

173-
events = sorted(originals.values(), key=when)
174-
if not events:
175-
return None
155+
events=sorted(originals.values(), key=when)
156+
if not events: return None
176157

177158
# threading
178-
byid = {e["event_id"]: e for e in events}
179-
threads = collections.defaultdict(list)
159+
byid={e["event_id"]:e for e in events}
160+
threads=collections.defaultdict(list)
180161
for e in events:
181-
rel = e["content"].get("m.relates_to", {})
182-
if rel.get("rel_type") == "m.thread":
162+
rel=e["content"].get("m.relates_to",{})
163+
if rel.get("rel_type")=="m.thread":
183164
threads[rel["event_id"]].append(e["event_id"])
184-
roots = [e for e in events if e["event_id"] not in
185-
{c for kids in threads.values() for c in kids}]
186-
187-
# plain-text export
188-
stamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
189-
plain = [f"# room: {title}", f"# exported: {stamp}"]
190-
def line(ev,lvl):
191-
txt = ev["content"].get("body", "")
192-
if ev.get("_edited"): txt += " [edited]"
165+
roots=[e for e in events if e["event_id"] not in {c for kids in threads.values() for c in kids}]
166+
167+
# plain text
168+
stamp=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
169+
plain=[f"# room: {title}",f"# exported: {stamp}"]
170+
def pl(ev,lvl):
171+
body=ev["content"].get("body","")
172+
if ev.get("_edited"): body+=" [edited]"
193173
plain.append(f"{' '*lvl}{'↳ ' if lvl else ''}{when(ev).strftime('%Y-%m-%d %H:%M')} "
194-
f"{nice_user(ev['sender'])}: {txt}")
174+
f"{nice_user(ev['sender'])}: {body}")
195175
for r in roots:
196-
line(r,0)
197-
for cid in threads[r["event_id"]]:
198-
line(byid[cid],1)
176+
pl(r,0)
177+
for cid in threads[r["event_id"]]: pl(byid[cid],1)
199178

200-
# html export
201-
last = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
202-
html_lines = [
203-
"<!doctype html><meta charset=utf-8>",
204-
"<meta name=viewport content='width=device-width,initial-scale=1'>",
179+
# html
180+
last=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
181+
html_lines=[
182+
"<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>",
205183
f"<title>{html.escape(title)} – archive</title>",
206184
"""
207185
<style>
208-
body{
209-
margin:0 auto;
210-
max-width:75ch;
211-
font:15px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;
212-
background:#141414;
213-
color:#e6e6e6;
214-
padding:2rem
215-
}
186+
body{margin:0 auto;max-width:75ch;font:15px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;
187+
background:#141414;color:#e6e6e6;padding:2rem}
216188
.msg{white-space:pre-wrap;margin:.3em 0}
217189
.reply{margin-left:2ch}
218-
.edited{opacity:.7;font-style:italic}
190+
.edited{opacity:.65;font-style:italic;font-size:.9em}
219191
pre{background:#1e1e1e;padding:.6em;border-radius:4px;overflow:auto}
220192
code{font-family:ui-monospace,monospace}
221193
.u{font-weight:600}
222194
time{color:#888}
223195
a{color:#9cf;text-decoration:none}
224-
i,em{font-style:normal} /* kill stray italics */
225-
em{font-style:italic} /* restore our own */
196+
i,em{font-style:normal} /* reset */
197+
em{font-style:italic}
226198
@media(max-width:480px){
227-
body{padding:1rem;font-size:14px}
228-
pre{font-size:13px}
199+
body{padding:1rem;font-size:14px}
200+
pre{font-size:13px}
229201
}
230202
</style>""",
231203
f"<h1>{html.escape(title)}</h1>",
232204
f"<p><small>last updated {last}</small></p>",
233-
"<p><a href='room_log.txt'>⇩ plaintext</a> · "
234-
"<a href='../../'>⇦ all rooms</a></p>",
205+
"<p><a href='room_log.txt'>⇩ plaintext</a> · <a href='../../'>⇦ all rooms</a></p>",
235206
"<hr>"
236207
]
237208
def add(ev,lvl):
238-
cls = "msg" + (" reply" if lvl else "") + (" edited" if ev.get("_edited") else "")
239-
html_lines.append(
240-
f"<div class='{cls}'>"
241-
f"<time>{when(ev).strftime('%Y-%m-%d %H:%M')}</time>&ensp;"
242-
f"<span class='u' style='color:{rich_color(ev['sender'])}'>"
243-
f"{nice_user(ev['sender'])}</span>: "
244-
f"{fmt_body(ev['content'].get('body',''))}</div>")
209+
cls="msg"+(" reply" if lvl else "")
210+
body=fmt_body(ev['content'].get('body',''))
211+
if ev.get("_edited"):
212+
body+= ' <span class="edited">(edited)</span>'
213+
html_lines.append(f"<div class='{cls}'>"
214+
f"<time>{when(ev).strftime('%Y-%m-%d %H:%M')}</time>&ensp;"
215+
f"<span class='u' style='color:{rich_color(ev['sender'])}'>"
216+
f"{nice_user(ev['sender'])}</span>: {body}</div>")
245217
for r in roots:
246218
add(r,0)
247-
for cid in threads[r["event_id"]]:
248-
add(byid[cid],1)
219+
for cid in threads[r["event_id"]]: add(byid[cid],1)
249220

250-
# write files
251221
(rdir/"room_log.txt").write_text("\n".join(plain)+"\n",encoding="utf-8")
252222
(rdir/"index.html").write_text("\n".join(html_lines)+"\n",encoding="utf-8")
253223
logging.info(" wrote → %s", rdir)
254224
return title, room, slug(room)
255225

256-
# ══════════════════════════════════════════════════════════════════════
226+
# ════════════════════════════════════════════════
257227
# ░░ main ░░
258-
# ══════════════════════════════════════════════════════════════════════
228+
# ════════════════════════════════════════════════
259229
pathlib.Path("archive").mkdir(exist_ok=True)
260-
(pathlib.Path("archive/index.html")).unlink(missing_ok=True) # old artefact
230+
(pathlib.Path("archive/index.html")).unlink(missing_ok=True)
261231

262232
meta=[]
263-
for r in ROOMS:
233+
for rid in ROOMS:
264234
try:
265-
m=archive(r)
235+
m=archive(rid)
266236
if m: meta.append(m)
267237
except Exception as exc:
268-
logging.error("‼ failed for %s – %s", r, exc)
238+
logging.error("‼ failed for %s – %s", rid, exc)
269239

270240
meta.sort(key=lambda t:t[0].lower())
271241
listing="\n".join(
272-
f"<li><a href='archive/{s}/index.html'>{html.escape(t)}</a>"
273-
f"<br><small>{html.escape(r)}</small></li>"
242+
f"<li><a href='archive/{s}/index.html'>{html.escape(t)}</a><br><small>{html.escape(r)}</small></li>"
274243
for t,r,s in meta)
275244

276-
landing = f"""\
277-
<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>
245+
landing=f"""<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>
278246
<title>Archived rooms</title>
279-
<style>
280-
body{{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;
281-
background:#141414;color:#e6e6e6;padding:2rem}}
282-
a{{color:#9cf;text-decoration:none}}
283-
</style>
284-
<h1>Archived rooms</h1>
285-
<ul>{listing}</ul>
247+
<style>body{{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;
248+
background:#141414;color:#e6e6e6;padding:2rem}}a{{color:#9cf;text-decoration:none}}</style>
249+
<h1>Archived rooms</h1><ul>{listing}</ul>
286250
"""
287251
pathlib.Path("index.html").write_text(landing, encoding="utf-8")
288252
logging.info("root index.html regenerated ✓")

0 commit comments

Comments
 (0)