|
2 | 2 | # -*- coding: utf-8 -*- |
3 | 3 | """ |
4 | 4 | Archive one-or-many public Matrix rooms. |
5 | | -Creates archive/<slug>/{index.html,room_log.txt} |
6 | | -plus a root index.html listing every room. |
| 5 | +Creates archive/<slug>/{index.html, room_log.txt} |
| 6 | +and a root index.html listing all rooms by their human titles. |
7 | 7 | """ |
8 | 8 |
|
9 | | -# ─── std-lib ─────────────────────────────────────────────────────────── |
| 9 | +# ─── std-lib ────────────────────────────────────────────────────────── |
10 | 10 | import os, sys, json, subprocess, shlex, hashlib, colorsys, logging, re, html |
11 | 11 | import collections, pathlib, urllib.parse |
12 | 12 | from datetime import datetime, timezone |
13 | 13 |
|
14 | | -# ══════════════════════════════════════════════════════════════════════ |
| 14 | +# ════════════════════════════════════════════════ |
15 | 15 | # ░░ CONFIG ░░ |
16 | | -# ══════════════════════════════════════════════════════════════════════ |
| 16 | +# ════════════════════════════════════════════════ |
17 | 17 | HS = os.environ["MATRIX_HS"] |
18 | 18 | USER_ID = os.environ["MATRIX_USER"] |
19 | 19 | TOKEN = os.environ["MATRIX_TOKEN"] |
|
31 | 31 | format="%(levelname)s: %(message)s", stream=sys.stderr) |
32 | 32 | os.environ["NIO_LOG_LEVEL"] = "error" |
33 | 33 |
|
34 | | -# ══════════════════════════════════════════════════════════════════════ |
35 | | -# ░░ matrix-commander credentials ░░ |
36 | | -# ══════════════════════════════════════════════════════════════════════ |
| 34 | +# ════════════════════════════════════════════════ |
| 35 | +# ░░ matrix-commander creds ░░ |
| 36 | +# ════════════════════════════════════════════════ |
37 | 37 | cred_file = pathlib.Path("mc_creds.json") |
38 | 38 | store_dir = pathlib.Path("store"); store_dir.mkdir(exist_ok=True) |
39 | 39 | if not cred_file.exists(): |
|
46 | 46 | })) |
47 | 47 | CRED = ["--credentials", str(cred_file), "--store", str(store_dir)] |
48 | 48 |
|
49 | | -# ══════════════════════════════════════════════════════════════════════ |
| 49 | +# ════════════════════════════════════════════════ |
50 | 50 | # ░░ helpers ░░ |
51 | | -# ══════════════════════════════════════════════════════════════════════ |
| 51 | +# ════════════════════════════════════════════════ |
52 | 52 | def run(cmd, timeout=None) -> str: |
53 | 53 | res = subprocess.run(cmd, text=True, capture_output=True, timeout=timeout) |
54 | 54 | if res.returncode: |
55 | 55 | raise subprocess.CalledProcessError(res.returncode, cmd, res.stdout, res.stderr) |
56 | 56 | return res.stdout |
57 | 57 |
|
58 | | -def json_lines(blob: str): |
| 58 | +def json_lines(blob:str): |
59 | 59 | for ln in blob.splitlines(): |
60 | 60 | if ln and ln[0] in "{[": |
61 | | - try: |
62 | | - yield json.loads(ln) |
63 | | - except json.JSONDecodeError: |
64 | | - pass |
| 61 | + try: yield json.loads(ln) |
| 62 | + except json.JSONDecodeError: pass |
65 | 63 |
|
66 | 64 | when = lambda e: datetime.utcfromtimestamp(e["origin_server_ts"]/1000) |
67 | | -nice_user = lambda u: u.lstrip("@").split(":", 1)[0] |
68 | | -slug = lambda s: urllib.parse.quote(s, safe="").replace("%", "_") |
| 65 | +nice_user = lambda u: u.lstrip("@").split(":",1)[0] |
| 66 | +slug = lambda s: urllib.parse.quote(s, safe="").replace("%","_") |
69 | 67 |
|
70 | | -# ── colour hashing (good distribution) ──────────────────────────────── |
71 | | -def rich_color(uid: str) -> str: |
| 68 | +def rich_color(uid:str)->str: |
72 | 69 | d = hashlib.sha1(uid.encode()).digest() |
73 | | - h,l,s = int.from_bytes(d[:2], "big")/0xFFFF, .55+(d[2]/255-.5)*.25, .55+(d[3]/255-.5)*.25 |
| 70 | + h,l,s = int.from_bytes(d[:2],"big")/65535, .55+(d[2]/255-.5)*.25, .55+(d[3]/255-.5)*.25 |
74 | 71 | r,g,b = colorsys.hls_to_rgb(h,l,s) |
75 | 72 | return f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}" |
76 | 73 |
|
77 | | -# ── markdown-ish helpers ────────────────────────────────────────────── |
| 74 | +# ── markdown-ish filters ────────────────────────────────────────────── |
78 | 75 | _re_mdlink = re.compile(r'\[([^\]]+?)\]\((https?://[^\s)]+)\)') |
79 | 76 | _re_rawurl = re.compile(r'(?<!["\'>])(https?://[^\s<]+)') |
80 | 77 | _re_fence = re.compile(r'```(\w+)?\n([\s\S]*?)```', re.MULTILINE) |
81 | 78 | _re_inline = re.compile(r'`([^`\n]+?)`') |
82 | | -_re_italic = re.compile(r'(?<!\S)\*([^*\n]+?)\*(?!\S)') # new stricter rule |
| 79 | +_re_italic = re.compile(r'(?<!\S)\*([^*\n]+?)\*(?!\S)') # only *foo* |
83 | 80 |
|
84 | | -def md_links(t: str) -> str: |
| 81 | +def md_links(t:str)->str: |
85 | 82 | t = _re_mdlink.sub(lambda m: |
86 | 83 | f'<a href="{m.group(2)}" target="_blank" rel="noopener">{m.group(1)}</a>', t) |
87 | 84 | return _re_rawurl.sub(lambda m: |
88 | 85 | f'<a href="{m.group(0)}" target="_blank" rel="noopener">{m.group(0)}</a>', t) |
89 | 86 |
|
90 | | -def fmt_body(body: str) -> str: |
91 | | - # Fenced blocks first |
92 | | - out, pos = [], 0 |
| 87 | +def fmt_body(body:str)->str: |
| 88 | + segs, pos = [], 0 |
93 | 89 | for fence in _re_fence.finditer(body): |
94 | | - out.append(_re_italic.sub(r'<em>\1</em>', md_links( |
95 | | - html.escape(body[pos:fence.start()])))) |
96 | | - lang = fence.group(1) or "" |
97 | | - code = html.escape(fence.group(2)) |
98 | | - out.append(f"<pre><code class='{lang}'>{code}</code></pre>") |
| 90 | + segs.append(("txt", body[pos:fence.start()])) |
| 91 | + segs.append(("code", fence)) |
99 | 92 | pos = fence.end() |
100 | | - tail = html.escape(body[pos:]) |
101 | | - |
102 | | - seg, p = [], 0 |
103 | | - for ic in _re_inline.finditer(tail): |
104 | | - seg.append(_re_italic.sub(r'<em>\1</em>', md_links(tail[p:ic.start()]))) |
105 | | - seg.append(f"<code>{html.escape(ic.group(1))}</code>") |
106 | | - p = ic.end() |
107 | | - seg.append(_re_italic.sub(r'<em>\1</em>', md_links(tail[p:]))) |
108 | | - out.append("".join(seg)) |
109 | | - return "".join(out) |
| 93 | + segs.append(("txt", body[pos:])) |
| 94 | + |
| 95 | + html_out=[] |
| 96 | + for typ,part in segs: |
| 97 | + if typ=="code": |
| 98 | + lang = part.group(1) or "" |
| 99 | + code = html.escape(part.group(2)) |
| 100 | + html_out.append(f"<pre><code class='{lang}'>{code}</code></pre>") |
| 101 | + continue |
| 102 | + chunk = html.escape(part) |
| 103 | + chunk = _re_inline.sub(lambda m: f"<code>{html.escape(m.group(1))}</code>", chunk) |
| 104 | + chunk = _re_italic.sub(r"<em>\1</em>", chunk) |
| 105 | + html_out.append(md_links(chunk)) |
| 106 | + return "".join(html_out) |
110 | 107 |
|
111 | | -# ══════════════════════════════════════════════════════════════════════ |
| 108 | +# ════════════════════════════════════════════════ |
112 | 109 | # ░░ archiver ░░ |
113 | | -# ══════════════════════════════════════════════════════════════════════ |
114 | | -def archive(room: str) -> tuple[str, str, str] | None: |
| 110 | +# ════════════════════════════════════════════════ |
| 111 | +def archive(room:str): |
115 | 112 | logging.info("room %s", room) |
116 | | - |
117 | | - # keep commander happy |
118 | | - cfg = json.loads(cred_file.read_text()); cfg.update(room_id=room, default_room=room) |
| 113 | + cfg = json.loads(cred_file.read_text()); cfg.update(room_id=room,default_room=room) |
119 | 114 | cred_file.write_text(json.dumps(cfg)) |
120 | 115 |
|
121 | | - rdir = pathlib.Path("archive") / slug(room) |
| 116 | + rdir = pathlib.Path("archive")/slug(room) |
122 | 117 | rdir.mkdir(parents=True, exist_ok=True) |
123 | 118 |
|
124 | | - for cmd in (["--room-join", room], |
125 | | - ["--room", room, "--listen", "once"]): |
126 | | - try: run(["matrix-commander", *CRED, *cmd]) |
127 | | - except subprocess.CalledProcessError: |
128 | | - pass |
| 119 | + for cmd in (["--room-join",room], ["--room",room,"--listen","once"]): |
| 120 | + try: run(["matrix-commander",*CRED,*cmd]) |
| 121 | + except subprocess.CalledProcessError: pass |
129 | 122 |
|
130 | | - title = room |
| 123 | + # title |
| 124 | + title=room |
131 | 125 | try: |
132 | | - info = next(json_lines(run(["matrix-commander", *CRED, |
133 | | - "--room", room, "--get-room-info", |
134 | | - "--output", "json"])), {}) |
135 | | - for k in ("room_display_name", "room_name", |
136 | | - "canonical_alias", "room_alias"): |
137 | | - if info.get(k): |
138 | | - title = info[k]; break |
139 | | - except Exception: # fine – room alias will do |
140 | | - pass |
141 | | - |
142 | | - listen_args = { |
143 | | - "all" : ["--listen", "all", "--listen-self"], |
144 | | - "tail": ["--listen", "tail", "--tail", TAIL_N, "--listen-self"], |
145 | | - "once": ["--listen", "once", "--listen-self"], |
146 | | - }[LISTEN_MODE] |
| 126 | + info=next(json_lines(run(["matrix-commander",*CRED,"--room",room, |
| 127 | + "--get-room-info","--output","json"])),{}) |
| 128 | + for k in ("room_display_name","room_name","canonical_alias","room_alias"): |
| 129 | + if info.get(k): title=info[k]; break |
| 130 | + except Exception: pass |
147 | 131 |
|
148 | | - stream = run(["matrix-commander", *CRED, "--room", room, |
149 | | - *listen_args, "--output", "json"], |
150 | | - timeout=TIMEOUT_S if LISTEN_MODE == "all" else None) |
| 132 | + listen={"all":["all"],"tail":["tail","--tail",TAIL_N],"once":["once"]}[LISTEN_MODE] |
| 133 | + raw=run(["matrix-commander",*CRED,"--room",room,"--listen",*listen,"--listen-self","--output","json"], |
| 134 | + timeout=TIMEOUT_S if LISTEN_MODE=="all" else None) |
151 | 135 |
|
152 | 136 | originals, edits = {}, {} |
153 | | - for j in json_lines(stream): |
| 137 | + for j in json_lines(raw): |
154 | 138 | ev = j.get("source", j) |
155 | | - if ev.get("type") != "m.room.message": |
156 | | - continue |
157 | | - rel = ev["content"].get("m.relates_to", {}) |
158 | | - # replacement? |
159 | | - if rel.get("rel_type") == "m.replace" or "m.new_content" in ev["content"]: |
| 139 | + if ev.get("type")!="m.room.message": continue |
| 140 | + rel=ev["content"].get("m.relates_to",{}) |
| 141 | + if rel.get("rel_type")=="m.replace" or "m.new_content" in ev["content"]: |
160 | 142 | edits[rel.get("event_id")] = ev |
161 | 143 | else: |
162 | 144 | originals[ev["event_id"]] = ev |
163 | 145 |
|
164 | | - # apply latest edit |
165 | | - for eid, msg in originals.items(): |
| 146 | + # apply last edit |
| 147 | + for eid,msg in originals.items(): |
166 | 148 | if eid in edits: |
167 | | - rep = edits[eid] |
168 | | - new_body = rep["content"].get("m.new_content", {}).get("body") \ |
169 | | - or rep["content"].get("body", "") |
170 | | - msg["content"]["body"] = new_body |
171 | | - msg["_edited"] = True |
| 149 | + rep=edits[eid] |
| 150 | + new_body = rep["content"].get("m.new_content",{}).get("body") \ |
| 151 | + or rep["content"].get("body","") |
| 152 | + msg["content"]["body"]=new_body |
| 153 | + msg["_edited"]=True |
172 | 154 |
|
173 | | - events = sorted(originals.values(), key=when) |
174 | | - if not events: |
175 | | - return None |
| 155 | + events=sorted(originals.values(), key=when) |
| 156 | + if not events: return None |
176 | 157 |
|
177 | 158 | # threading |
178 | | - byid = {e["event_id"]: e for e in events} |
179 | | - threads = collections.defaultdict(list) |
| 159 | + byid={e["event_id"]:e for e in events} |
| 160 | + threads=collections.defaultdict(list) |
180 | 161 | for e in events: |
181 | | - rel = e["content"].get("m.relates_to", {}) |
182 | | - if rel.get("rel_type") == "m.thread": |
| 162 | + rel=e["content"].get("m.relates_to",{}) |
| 163 | + if rel.get("rel_type")=="m.thread": |
183 | 164 | threads[rel["event_id"]].append(e["event_id"]) |
184 | | - roots = [e for e in events if e["event_id"] not in |
185 | | - {c for kids in threads.values() for c in kids}] |
186 | | - |
187 | | - # plain-text export |
188 | | - stamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") |
189 | | - plain = [f"# room: {title}", f"# exported: {stamp}"] |
190 | | - def line(ev,lvl): |
191 | | - txt = ev["content"].get("body", "") |
192 | | - if ev.get("_edited"): txt += " [edited]" |
| 165 | + roots=[e for e in events if e["event_id"] not in {c for kids in threads.values() for c in kids}] |
| 166 | + |
| 167 | + # plain text |
| 168 | + stamp=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") |
| 169 | + plain=[f"# room: {title}",f"# exported: {stamp}"] |
| 170 | + def pl(ev,lvl): |
| 171 | + body=ev["content"].get("body","") |
| 172 | + if ev.get("_edited"): body+=" [edited]" |
193 | 173 | plain.append(f"{' '*lvl}{'↳ ' if lvl else ''}{when(ev).strftime('%Y-%m-%d %H:%M')} " |
194 | | - f"{nice_user(ev['sender'])}: {txt}") |
| 174 | + f"{nice_user(ev['sender'])}: {body}") |
195 | 175 | for r in roots: |
196 | | - line(r,0) |
197 | | - for cid in threads[r["event_id"]]: |
198 | | - line(byid[cid],1) |
| 176 | + pl(r,0) |
| 177 | + for cid in threads[r["event_id"]]: pl(byid[cid],1) |
199 | 178 |
|
200 | | - # html export |
201 | | - last = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") |
202 | | - html_lines = [ |
203 | | - "<!doctype html><meta charset=utf-8>", |
204 | | - "<meta name=viewport content='width=device-width,initial-scale=1'>", |
| 179 | + # html |
| 180 | + last=datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") |
| 181 | + html_lines=[ |
| 182 | + "<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>", |
205 | 183 | f"<title>{html.escape(title)} – archive</title>", |
206 | 184 | """ |
207 | 185 | <style> |
208 | | -body{ |
209 | | - margin:0 auto; |
210 | | - max-width:75ch; |
211 | | - font:15px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif; |
212 | | - background:#141414; |
213 | | - color:#e6e6e6; |
214 | | - padding:2rem |
215 | | -} |
| 186 | +body{margin:0 auto;max-width:75ch;font:15px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif; |
| 187 | + background:#141414;color:#e6e6e6;padding:2rem} |
216 | 188 | .msg{white-space:pre-wrap;margin:.3em 0} |
217 | 189 | .reply{margin-left:2ch} |
218 | | -.edited{opacity:.7;font-style:italic} |
| 190 | +.edited{opacity:.65;font-style:italic;font-size:.9em} |
219 | 191 | pre{background:#1e1e1e;padding:.6em;border-radius:4px;overflow:auto} |
220 | 192 | code{font-family:ui-monospace,monospace} |
221 | 193 | .u{font-weight:600} |
222 | 194 | time{color:#888} |
223 | 195 | a{color:#9cf;text-decoration:none} |
224 | | -i,em{font-style:normal} /* kill stray italics */ |
225 | | -em{font-style:italic} /* restore our own */ |
| 196 | +i,em{font-style:normal} /* reset */ |
| 197 | +em{font-style:italic} |
226 | 198 | @media(max-width:480px){ |
227 | | - body{padding:1rem;font-size:14px} |
228 | | - pre{font-size:13px} |
| 199 | + body{padding:1rem;font-size:14px} |
| 200 | + pre{font-size:13px} |
229 | 201 | } |
230 | 202 | </style>""", |
231 | 203 | f"<h1>{html.escape(title)}</h1>", |
232 | 204 | f"<p><small>last updated {last}</small></p>", |
233 | | - "<p><a href='room_log.txt'>⇩ plaintext</a> · " |
234 | | - "<a href='../../'>⇦ all rooms</a></p>", |
| 205 | + "<p><a href='room_log.txt'>⇩ plaintext</a> · <a href='../../'>⇦ all rooms</a></p>", |
235 | 206 | "<hr>" |
236 | 207 | ] |
237 | 208 | def add(ev,lvl): |
238 | | - cls = "msg" + (" reply" if lvl else "") + (" edited" if ev.get("_edited") else "") |
239 | | - html_lines.append( |
240 | | - f"<div class='{cls}'>" |
241 | | - f"<time>{when(ev).strftime('%Y-%m-%d %H:%M')}</time> " |
242 | | - f"<span class='u' style='color:{rich_color(ev['sender'])}'>" |
243 | | - f"{nice_user(ev['sender'])}</span>: " |
244 | | - f"{fmt_body(ev['content'].get('body',''))}</div>") |
| 209 | + cls="msg"+(" reply" if lvl else "") |
| 210 | + body=fmt_body(ev['content'].get('body','')) |
| 211 | + if ev.get("_edited"): |
| 212 | + body+= ' <span class="edited">(edited)</span>' |
| 213 | + html_lines.append(f"<div class='{cls}'>" |
| 214 | + f"<time>{when(ev).strftime('%Y-%m-%d %H:%M')}</time> " |
| 215 | + f"<span class='u' style='color:{rich_color(ev['sender'])}'>" |
| 216 | + f"{nice_user(ev['sender'])}</span>: {body}</div>") |
245 | 217 | for r in roots: |
246 | 218 | add(r,0) |
247 | | - for cid in threads[r["event_id"]]: |
248 | | - add(byid[cid],1) |
| 219 | + for cid in threads[r["event_id"]]: add(byid[cid],1) |
249 | 220 |
|
250 | | - # write files |
251 | 221 | (rdir/"room_log.txt").write_text("\n".join(plain)+"\n",encoding="utf-8") |
252 | 222 | (rdir/"index.html").write_text("\n".join(html_lines)+"\n",encoding="utf-8") |
253 | 223 | logging.info(" wrote → %s", rdir) |
254 | 224 | return title, room, slug(room) |
255 | 225 |
|
256 | | -# ══════════════════════════════════════════════════════════════════════ |
| 226 | +# ════════════════════════════════════════════════ |
257 | 227 | # ░░ main ░░ |
258 | | -# ══════════════════════════════════════════════════════════════════════ |
| 228 | +# ════════════════════════════════════════════════ |
259 | 229 | pathlib.Path("archive").mkdir(exist_ok=True) |
260 | | -(pathlib.Path("archive/index.html")).unlink(missing_ok=True) # old artefact |
| 230 | +(pathlib.Path("archive/index.html")).unlink(missing_ok=True) |
261 | 231 |
|
262 | 232 | meta=[] |
263 | | -for r in ROOMS: |
| 233 | +for rid in ROOMS: |
264 | 234 | try: |
265 | | - m=archive(r) |
| 235 | + m=archive(rid) |
266 | 236 | if m: meta.append(m) |
267 | 237 | except Exception as exc: |
268 | | - logging.error("‼ failed for %s – %s", r, exc) |
| 238 | + logging.error("‼ failed for %s – %s", rid, exc) |
269 | 239 |
|
270 | 240 | meta.sort(key=lambda t:t[0].lower()) |
271 | 241 | listing="\n".join( |
272 | | - f"<li><a href='archive/{s}/index.html'>{html.escape(t)}</a>" |
273 | | - f"<br><small>{html.escape(r)}</small></li>" |
| 242 | + f"<li><a href='archive/{s}/index.html'>{html.escape(t)}</a><br><small>{html.escape(r)}</small></li>" |
274 | 243 | for t,r,s in meta) |
275 | 244 |
|
276 | | -landing = f"""\ |
277 | | -<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'> |
| 245 | +landing=f"""<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'> |
278 | 246 | <title>Archived rooms</title> |
279 | | -<style> |
280 | | -body{{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif; |
281 | | - background:#141414;color:#e6e6e6;padding:2rem}} |
282 | | -a{{color:#9cf;text-decoration:none}} |
283 | | -</style> |
284 | | -<h1>Archived rooms</h1> |
285 | | -<ul>{listing}</ul> |
| 247 | +<style>body{{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif; |
| 248 | + background:#141414;color:#e6e6e6;padding:2rem}}a{{color:#9cf;text-decoration:none}}</style> |
| 249 | +<h1>Archived rooms</h1><ul>{listing}</ul> |
286 | 250 | """ |
287 | 251 | pathlib.Path("index.html").write_text(landing, encoding="utf-8") |
288 | 252 | logging.info("root index.html regenerated ✓") |
|
0 commit comments