Skip to content

Commit 2a99297

Browse files
committed
italics update
1 parent e35f19d commit 2a99297

File tree

1 file changed

+82
-103
lines changed

1 file changed

+82
-103
lines changed

scripts/update.py

Lines changed: 82 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@
22
# -*- coding: utf-8 -*-
33
"""
44
Archive one-or-many public Matrix rooms.
5-
Produces per-room archives (HTML + plaintext) under archive/<slug>/
6-
and a nice responsive landing page at /index.html
5+
Creates archive/<slug>/{index.html, room_log.txt}
6+
and a root index.html listing all rooms.
77
"""
88

9-
# ─────────── std-lib ───────────
9+
# ─── std-lib ──────────────────────────────────────────────────────────
1010
import os, sys, json, subprocess, shlex, hashlib, colorsys, logging, re, html
1111
import collections, pathlib, urllib.parse
1212
from datetime import datetime, timezone
1313

14-
# ══════════ CONFIG ═════════════
14+
# ══════════ CONFIG ═══════════════════════════════════════════════════
1515
HS = os.environ["MATRIX_HS"]
1616
USER_ID = os.environ["MATRIX_USER"]
1717
TOKEN = os.environ["MATRIX_TOKEN"]
@@ -28,16 +28,20 @@
2828
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
2929
os.environ["NIO_LOG_LEVEL"] = "error"
3030

31-
# ═════ matrix-commander creds ══
31+
# ═══════════ matrix-commander creds ═══════════════════════════════════
3232
cred_file = pathlib.Path("mc_creds.json")
3333
store_dir = pathlib.Path("store"); store_dir.mkdir(exist_ok=True)
3434
if not cred_file.exists():
3535
cred_file.write_text(json.dumps({
36-
"homeserver":HS,"user_id":USER_ID,"access_token":TOKEN,
37-
"device_id":"GH","default_room":ROOMS[0]}))
36+
"homeserver": HS,
37+
"user_id": USER_ID,
38+
"access_token": TOKEN,
39+
"device_id": "GH",
40+
"default_room": ROOMS[0],
41+
}))
3842
CRED = ["--credentials", str(cred_file), "--store", str(store_dir)]
3943

40-
# ═════ helpers ═════
44+
# ═══════════ helpers ══════════════════════════════════════════════════
4145
def run(cmd, timeout=None) -> str:
4246
res = subprocess.run(cmd, text=True, capture_output=True, timeout=timeout)
4347
if res.returncode:
@@ -46,12 +50,11 @@ def run(cmd, timeout=None) -> str:
4650

4751
def json_lines(blob:str):
4852
for ln in blob.splitlines():
49-
ln=ln.strip()
50-
if ln and ln[0] in "{[": # crude but fast
53+
if ln and ln[0] in "{[":
5154
try: yield json.loads(ln)
5255
except json.JSONDecodeError: pass
5356

54-
when = lambda ev: datetime.utcfromtimestamp(ev["origin_server_ts"]/1000)
57+
when = lambda e: datetime.utcfromtimestamp(e["origin_server_ts"]/1000)
5558
nice_user = lambda u: u.lstrip("@").split(":",1)[0]
5659
slug = lambda s: urllib.parse.quote(s, safe="").replace("%","_")
5760

@@ -61,132 +64,122 @@ def rich_color(uid:str) -> str:
6164
r,g,b = colorsys.hls_to_rgb(h,l,s)
6265
return f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}"
6366

64-
# ═════ simple markdown-ish formatting ═════
67+
# ─── lightweight markdown-ish rendering ───────────────────────────────
6568
_re_mdlink = re.compile(r'\[([^\]]+?)\]\((https?://[^\s)]+)\)')
6669
_re_rawurl = re.compile(r'(?<!["\'>])(https?://[^\s<]+)')
6770
_re_fence = re.compile(r'```(\w+)?\n([\s\S]*?)```', re.MULTILINE)
6871
_re_inline = re.compile(r'`([^`\n]+?)`')
69-
_re_italic = re.compile(r'(?<!\w)\*([^\s*][^*]*?)\*(?!\w)') # *text*
72+
# italics only if *text* is surrounded by whitespace / line edges
73+
_re_italic = re.compile(r'(?<!\S)\*([^*\n]+?)\*(?!\S)')
7074

71-
def md_links(t:str)->str:
72-
t=_re_mdlink.sub(lambda m:f'<a href="{m.group(2)}" target="_blank" rel="noopener">{m.group(1)}</a>',t)
73-
return _re_rawurl.sub(lambda m:f'<a href="{m.group(0)}" target="_blank" rel="noopener">{m.group(0)}</a>',t)
75+
def md_links(txt:str)->str:
76+
txt=_re_mdlink.sub(lambda m:f'<a href="{m.group(2)}" target="_blank" rel="noopener">{m.group(1)}</a>',txt)
77+
return _re_rawurl.sub(lambda m:f'<a href="{m.group(0)}" target="_blank" rel="noopener">{m.group(0)}</a>',txt)
7478

7579
def fmt_body(body:str)->str:
76-
# first split out ``` fenced blocks
77-
parts, pos = [],0
80+
"""escape → code-block → inline-code → links → italics"""
81+
out, pos = [], 0
7882
for f in _re_fence.finditer(body):
79-
parts.append(("txt", body[pos:f.start()]))
80-
parts.append(("fence", f))
81-
pos = f.end()
82-
parts.append(("txt", body[pos:]))
83-
84-
out=[]
85-
for kind, chunk in parts:
86-
if kind=="fence":
87-
lang=chunk.group(1) or ""
88-
code=html.escape(chunk.group(2))
89-
out.append(f'<pre><code class="{lang}">{code}</code></pre>')
90-
continue
91-
# inline code
92-
seg=chunk
93-
p=0
94-
for ic in _re_inline.finditer(seg):
95-
out.append(md_links(html.escape(seg[p:ic.start()])))
96-
out.append(f"<code>{html.escape(ic.group(1))}</code>")
97-
p=ic.end()
98-
tail=md_links(html.escape(seg[p:]))
99-
# italics (after escaping!)
100-
tail=_re_italic.sub(r'<em>\1</em>',tail)
101-
out.append(tail)
83+
# preceding normal text
84+
out.append(md_links(_re_italic.sub(r'<em>\1</em>',
85+
html.escape(body[pos:f.start()]))))
86+
lang=f.group(1) or ""
87+
out.append(f"<pre><code class='{lang}'>{html.escape(f.group(2))}</code></pre>")
88+
pos=f.end()
89+
tail = html.escape(body[pos:])
90+
# inline code inside tail
91+
seg_parts=[]
92+
p=0
93+
for ic in _re_inline.finditer(tail):
94+
seg_parts.append(md_links(_re_italic.sub(r'<em>\1</em>', tail[p:ic.start()])))
95+
seg_parts.append(f"<code>{html.escape(ic.group(1))}</code>")
96+
p=ic.end()
97+
seg_parts.append(md_links(_re_italic.sub(r'<em>\1</em>', tail[p:])))
98+
out.append("".join(seg_parts))
10299
return "".join(out)
103100

104-
# ═════ archiver core ═════
101+
# ═══════════ archiver ═════════════════════════════════════════════════
105102
def archive(room:str):
106-
logging.info("room %s",room)
107-
cred=json.loads(cred_file.read_text());cred["room_id"]=cred["default_room"]=room
108-
cred_file.write_text(json.dumps(cred))
103+
logging.info("room %s", room)
104+
# update creds
105+
data=json.loads(cred_file.read_text()); data.update(room_id=room, default_room=room)
106+
cred_file.write_text(json.dumps(data))
109107

110-
rdir=pathlib.Path("archive")/slug(room);rdir.mkdir(parents=True,exist_ok=True)
108+
rdir=pathlib.Path("archive")/slug(room); rdir.mkdir(parents=True, exist_ok=True)
111109
for cmd in (["--room-join",room],["--room",room,"--listen","once"]):
112110
try: run(["matrix-commander",*CRED,*cmd])
113111
except subprocess.CalledProcessError: pass
114112

115113
title=room
116114
try:
117-
info=next(json_lines(run(["matrix-commander",*CRED,"--room",room,"--get-room-info","--output","json"])),{})
115+
meta=next(json_lines(run(["matrix-commander",*CRED,"--room",room,"--get-room-info","--output","json"])),{})
118116
for k in ("room_display_name","room_name","canonical_alias","room_alias"):
119-
if info.get(k): title=info[k];break
120-
except Exception as e:
121-
logging.warning(" room-info failed: %s",e)
117+
if meta.get(k): title=meta[k];break
118+
except: pass
122119

123-
blob=run(["matrix-commander",*CRED,"--room",room,*{
124-
"all":["--listen","all","--listen-self"],
125-
"tail":["--listen","tail","--tail",TAIL_N,"--listen-self"],
126-
"once":["--listen","once","--listen-self"]}[LISTEN_MODE],"--output","json"],
127-
timeout=TIMEOUT_S if LISTEN_MODE=="all" else None)
120+
listen_args={"all":["--listen","all","--listen-self"],
121+
"tail":["--listen","tail","--tail",TAIL_N,"--listen-self"],
122+
"once":["--listen","once","--listen-self"]}[LISTEN_MODE]
123+
blob=run(["matrix-commander",*CRED,"--room",room,*listen_args,"--output","json"],
124+
timeout=TIMEOUT_S if LISTEN_MODE=="all" else None)
128125

129-
originals,edits={},{}
126+
originals, edits = {}, {}
130127
for j in json_lines(blob):
131128
ev=j.get("source",j)
132-
if ev.get("type")!="m.room.message":continue
133-
c,rel=ev["content"],ev["content"].get("m.relates_to",{})
129+
if ev.get("type")!="m.room.message": continue
130+
c, rel = ev["content"], ev["content"].get("m.relates_to",{})
134131
if rel.get("rel_type")=="m.replace" or "m.new_content" in c:
135132
edits[rel.get("event_id")] = ev
136133
else:
137-
originals[ev["event_id"]]=ev
138-
for eid,msg in originals.items():
134+
originals[ev["event_id"]] = ev
135+
136+
for eid, msg in originals.items():
139137
if eid in edits:
140-
rep=edits[eid]
141-
new=rep["content"].get("m.new_content",{}).get("body") or rep["content"].get("body","")
142-
msg["content"]["body"]=new
143-
msg["_edited"]=True
138+
rep = edits[eid]
139+
new = rep["content"].get("m.new_content",{}).get("body") \
140+
or rep["content"].get("body","")
141+
msg["content"]["body"] = new
142+
msg["_edited"] = True
144143

145144
evs=list(originals.values())
146145
if not evs: return title
147-
evs.sort(key=when) # chronological
146+
evs.sort(key=when)
148147

149-
# threading map
148+
# threading
150149
byid={e["event_id"]:e for e in evs}
151150
threads=collections.defaultdict(list)
152151
for e in evs:
153152
rel=e["content"].get("m.relates_to",{})
154-
if rel.get("rel_type")=="m.thread":
155-
threads[rel["event_id"]].append(e["event_id"])
156-
roots=[e for e in evs if e["event_id"] not in {c for ls in threads.values() for c in ls}]
153+
if rel.get("rel_type")=="m.thread": threads[rel["event_id"]].append(e["event_id"])
154+
roots=[e for e in evs if e["event_id"] not in {c for v in threads.values() for c in v}]
157155

158-
# plaintext
156+
# plain-text
159157
stamp=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
160158
txt=[f"# room: {title}",f"# exported: {stamp}"]
161159
def add_txt(ev,lvl):
162160
body=ev["content"].get("body","")
163161
if ev.get("_edited"): body+=" [edited]"
164-
txt.append(f"{' '*lvl}{'↳ ' if lvl else ''}"
165-
f"{when(ev).strftime('%Y-%m-%d %H:%M')} "
162+
txt.append(f"{' '*lvl}{'↳ ' if lvl else ''}{when(ev).strftime('%Y-%m-%d %H:%M')} "
166163
f"{nice_user(ev['sender'])}: {body}")
167164
for r in roots:
168165
add_txt(r,0)
169-
for cid in sorted(threads[r["event_id"]],key=lambda c:when(byid[c])):
170-
add_txt(byid[cid],1)
166+
for cid in threads[r["event_id"]]: add_txt(byid[cid],1)
171167

172168
# html
173169
last=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
174170
h=[
175-
"<!doctype html><meta charset=utf-8><meta name='viewport' content='width=device-width,initial-scale=1'>",
171+
"<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>",
176172
f"<title>{html.escape(title)} – archive</title>",
177173
"<style>",
178174
"body{margin:0 auto;max-width:75ch;font:15px/1.55 system-ui,"
179175
"-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;background:#141414;color:#e6e6e6;padding:2rem}",
180176
".msg{white-space:pre-wrap;margin:0.3em 0}",
181-
".reply{margin-left:2ch}",
182-
".edited{opacity:0.75;font-style:italic}",
183-
"pre{background:#1e1e1e;padding:0.6em;border-radius:4px;overflow:auto}",
177+
".reply{margin-left:2ch}.edited{opacity:.7;font-style:italic}",
178+
"pre{background:#1e1e1e;padding:.6em;border-radius:4px;overflow:auto}",
184179
"code{font-family:ui-monospace,monospace}",
185-
".u{font-weight:600}",
186-
"time{color:#888}",
180+
".u{font-weight:600}time{color:#888}",
187181
"a{color:#9cf;text-decoration:none}",
188-
"em{font-style:italic}", # our italics
189-
"i,em:not(.allow){font-style:normal}", # neutralise stray <em>/<i>
182+
"i,em{font-style:normal} em{font-style:italic}",
190183
"@media(max-width:480px){body{padding:1rem;font-size:14px}pre{font-size:13px}}",
191184
"</style>",
192185
f"<h1>{html.escape(title)}</h1>",
@@ -201,42 +194,28 @@ def add_html(ev,lvl):
201194
f"{fmt_body(ev['content'].get('body',''))}</div>")
202195
for r in roots:
203196
add_html(r,0)
204-
for cid in sorted(threads[r["event_id"]],key=lambda c:when(byid[c])):
205-
add_html(byid[cid],1)
197+
for cid in threads[r["event_id"]]: add_html(byid[cid],1)
206198

207199
(rdir/"room_log.txt").write_text("\n".join(txt)+"\n",encoding="utf-8")
208200
(rdir/"index.html").write_text("\n".join(h)+"\n",encoding="utf-8")
209-
logging.info(" written → %s",rdir)
210201
return title
211202

212-
# ═════ main ═════
203+
# ═══════════ MAIN ═════════════════════════════════════════════════════
213204
pathlib.Path("archive").mkdir(exist_ok=True)
214205
(pathlib.Path("archive/index.html")).unlink(missing_ok=True)
215206

216207
landing=[]
217208
for rid in ROOMS:
218-
try:
219-
title=archive(rid)
220-
landing.append((title,rid,slug(rid)))
209+
try: landing.append((archive(rid), rid, slug(rid)))
221210
except Exception as exc:
222-
logging.error("‼ failed for %s – %s",rid,exc)
211+
logging.error("‼ failed for %s – %s", rid, exc)
223212

224213
landing.sort(key=lambda t:t[0].lower())
225214
ul="\n".join(f"<li><a href='archive/{s}/index.html'>{html.escape(t)}</a>"
226-
f"<br><small>{html.escape(r)}</small></li>"
227-
for t,r,s in landing)
215+
f"<br><small>{html.escape(r)}</small></li>" for t,r,s in landing)
228216

229217
pathlib.Path("index.html").write_text(
230218
"\n".join([
231-
"<!doctype html><meta charset=utf-8><meta name='viewport' content='width=device-width,initial-scale=1'>",
232-
"<title>Archived rooms</title>",
233-
"<style>",
234-
"body{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,"
235-
"-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;background:#141414;color:#e6e6e6;padding:2rem}",
236-
"a{color:#9cf;text-decoration:none}@media(max-width:480px){body{padding:1rem;font-size:15px}}",
237-
"</style>",
238-
"<h1>Archived rooms</h1><ul>",ul,"</ul>"
239-
])+"\n",encoding="utf-8")
240-
241-
logging.info("root index.html regenerated ✓")
219+
"<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>",
220+
"<title
242221

0 commit comments

Comments
 (0)