22# -*- coding: utf-8 -*-
33"""
44Archive one-or-many public Matrix rooms.
5- Produces per-room archives (HTML + plaintext) under archive/<slug>/
6- and a nice responsive landing page at /index.html
5+ Creates archive/<slug>/{index.html, room_log.txt}
6+ and a root index.html listing all rooms.
77"""
88
9- # ─────────── std-lib ───────────
9+ # ─── std-lib ─────────────────────────────────────────────── ───────────
1010import os , sys , json , subprocess , shlex , hashlib , colorsys , logging , re , html
1111import collections , pathlib , urllib .parse
1212from datetime import datetime , timezone
1313
14- # ══════════ CONFIG ═════════════
14+ # ═══════════ CONFIG ══════════════════════════════════════ ═════════════
1515HS = os .environ ["MATRIX_HS" ]
1616USER_ID = os .environ ["MATRIX_USER" ]
1717TOKEN = os .environ ["MATRIX_TOKEN" ]
2828logging .basicConfig (level = logging .INFO , format = "%(levelname)s: %(message)s" )
2929os .environ ["NIO_LOG_LEVEL" ] = "error"
3030
31- # ═════ matrix-commander creds ══
31+ # ═══════════ matrix-commander creds ═════════════════════════════════ ══
3232cred_file = pathlib .Path ("mc_creds.json" )
3333store_dir = pathlib .Path ("store" ); store_dir .mkdir (exist_ok = True )
3434if not cred_file .exists ():
3535 cred_file .write_text (json .dumps ({
36- "homeserver" :HS ,"user_id" :USER_ID ,"access_token" :TOKEN ,
37- "device_id" :"GH" ,"default_room" :ROOMS [0 ]}))
36+ "homeserver" : HS ,
37+ "user_id" : USER_ID ,
38+ "access_token" : TOKEN ,
39+ "device_id" : "GH" ,
40+ "default_room" : ROOMS [0 ],
41+ }))
3842CRED = ["--credentials" , str (cred_file ), "--store" , str (store_dir )]
3943
40- # ═════ helpers ═════
44+ # ═══════════ helpers ═════════════════════════════════════════════ ═════
4145def run (cmd , timeout = None ) -> str :
4246 res = subprocess .run (cmd , text = True , capture_output = True , timeout = timeout )
4347 if res .returncode :
@@ -46,12 +50,11 @@ def run(cmd, timeout=None) -> str:
4650
4751def json_lines (blob :str ):
4852 for ln in blob .splitlines ():
49- ln = ln .strip ()
50- if ln and ln [0 ] in "{[" : # crude but fast
53+ if ln and ln [0 ] in "{[" :
5154 try : yield json .loads (ln )
5255 except json .JSONDecodeError : pass
5356
54- when = lambda ev : datetime .utcfromtimestamp (ev ["origin_server_ts" ]/ 1000 )
57+ when = lambda e : datetime .utcfromtimestamp (e ["origin_server_ts" ]/ 1000 )
5558nice_user = lambda u : u .lstrip ("@" ).split (":" ,1 )[0 ]
5659slug = lambda s : urllib .parse .quote (s , safe = "" ).replace ("%" ,"_" )
5760
@@ -61,132 +64,122 @@ def rich_color(uid:str) -> str:
6164 r ,g ,b = colorsys .hls_to_rgb (h ,l ,s )
6265 return f"#{ int (r * 255 ):02x} { int (g * 255 ):02x} { int (b * 255 ):02x} "
6366
64- # ═════ simple markdown-ish formatting ═════
67+ # ─── lightweight markdown-ish rendering ───────────────────────────────
6568_re_mdlink = re .compile (r'\[([^\]]+?)\]\((https?://[^\s)]+)\)' )
6669_re_rawurl = re .compile (r'(?<!["\'>])(https?://[^\s<]+)' )
6770_re_fence = re .compile (r'```(\w+)?\n([\s\S]*?)```' , re .MULTILINE )
6871_re_inline = re .compile (r'`([^`\n]+?)`' )
69- _re_italic = re .compile (r'(?<!\w)\*([^\s*][^*]*?)\*(?!\w)' ) # *text*
72+ # italics only if *text* is surrounded by whitespace / line edges
73+ _re_italic = re .compile (r'(?<!\S)\*([^*\n]+?)\*(?!\S)' )
7074
71- def md_links (t :str )-> str :
72- t = _re_mdlink .sub (lambda m :f'<a href="{ m .group (2 )} " target="_blank" rel="noopener">{ m .group (1 )} </a>' ,t )
73- return _re_rawurl .sub (lambda m :f'<a href="{ m .group (0 )} " target="_blank" rel="noopener">{ m .group (0 )} </a>' ,t )
75+ def md_links (txt :str )-> str :
76+ txt = _re_mdlink .sub (lambda m :f'<a href="{ m .group (2 )} " target="_blank" rel="noopener">{ m .group (1 )} </a>' ,txt )
77+ return _re_rawurl .sub (lambda m :f'<a href="{ m .group (0 )} " target="_blank" rel="noopener">{ m .group (0 )} </a>' ,txt )
7478
7579def fmt_body (body :str )-> str :
76- # first split out ``` fenced blocks
77- parts , pos = [],0
80+ """escape → code-block → inline-code → links → italics"""
81+ out , pos = [], 0
7882 for f in _re_fence .finditer (body ):
79- parts .append (("txt" , body [pos :f .start ()]))
80- parts .append (("fence" , f ))
81- pos = f .end ()
82- parts .append (("txt" , body [pos :]))
83-
84- out = []
85- for kind , chunk in parts :
86- if kind == "fence" :
87- lang = chunk .group (1 ) or ""
88- code = html .escape (chunk .group (2 ))
89- out .append (f'<pre><code class="{ lang } ">{ code } </code></pre>' )
90- continue
91- # inline code
92- seg = chunk
93- p = 0
94- for ic in _re_inline .finditer (seg ):
95- out .append (md_links (html .escape (seg [p :ic .start ()])))
96- out .append (f"<code>{ html .escape (ic .group (1 ))} </code>" )
97- p = ic .end ()
98- tail = md_links (html .escape (seg [p :]))
99- # italics (after escaping!)
100- tail = _re_italic .sub (r'<em>\1</em>' ,tail )
101- out .append (tail )
83+ # preceding normal text
84+ out .append (md_links (_re_italic .sub (r'<em>\1</em>' ,
85+ html .escape (body [pos :f .start ()]))))
86+ lang = f .group (1 ) or ""
87+ out .append (f"<pre><code class='{ lang } '>{ html .escape (f .group (2 ))} </code></pre>" )
88+ pos = f .end ()
89+ tail = html .escape (body [pos :])
90+ # inline code inside tail
91+ seg_parts = []
92+ p = 0
93+ for ic in _re_inline .finditer (tail ):
94+ seg_parts .append (md_links (_re_italic .sub (r'<em>\1</em>' , tail [p :ic .start ()])))
95+ seg_parts .append (f"<code>{ html .escape (ic .group (1 ))} </code>" )
96+ p = ic .end ()
97+ seg_parts .append (md_links (_re_italic .sub (r'<em>\1</em>' , tail [p :])))
98+ out .append ("" .join (seg_parts ))
10299 return "" .join (out )
103100
104- # ═════ archiver core ═════
101+ # ═══════════ archiver ════════════════════════════════════════════ ═════
105102def archive (room :str ):
106- logging .info ("room %s" ,room )
107- cred = json .loads (cred_file .read_text ());cred ["room_id" ]= cred ["default_room" ]= room
108- cred_file .write_text (json .dumps (cred ))
103+ logging .info ("room %s" , room )
104+ # update creds
105+ data = json .loads (cred_file .read_text ()); data .update (room_id = room , default_room = room )
106+ cred_file .write_text (json .dumps (data ))
109107
110- rdir = pathlib .Path ("archive" )/ slug (room );rdir .mkdir (parents = True ,exist_ok = True )
108+ rdir = pathlib .Path ("archive" )/ slug (room ); rdir .mkdir (parents = True , exist_ok = True )
111109 for cmd in (["--room-join" ,room ],["--room" ,room ,"--listen" ,"once" ]):
112110 try : run (["matrix-commander" ,* CRED ,* cmd ])
113111 except subprocess .CalledProcessError : pass
114112
115113 title = room
116114 try :
117- info = next (json_lines (run (["matrix-commander" ,* CRED ,"--room" ,room ,"--get-room-info" ,"--output" ,"json" ])),{})
115+ meta = next (json_lines (run (["matrix-commander" ,* CRED ,"--room" ,room ,"--get-room-info" ,"--output" ,"json" ])),{})
118116 for k in ("room_display_name" ,"room_name" ,"canonical_alias" ,"room_alias" ):
119- if info .get (k ): title = info [k ];break
120- except Exception as e :
121- logging .warning (" room-info failed: %s" ,e )
117+ if meta .get (k ): title = meta [k ];break
118+ except : pass
122119
123- blob = run ([ "matrix-commander" , * CRED ,"--room" , room , * {
124- "all " :["--listen" ,"all" ,"--listen-self" ],
125- "tail " :["--listen" ,"tail " ,"--tail" , TAIL_N , "-- listen-self" ],
126- "once" :[ "--listen" , "once" ,"--listen-self" ]}[ LISTEN_MODE ] ,"--output" ,"json" ],
127- timeout = TIMEOUT_S if LISTEN_MODE == "all" else None )
120+ listen_args = { "all" :[ "--listen" , "all" ,"--listen-self" ],
121+ "tail " :["--listen" ,"tail" , "--tail" , TAIL_N ,"--listen-self" ],
122+ "once " :["--listen" ,"once " ,"--listen-self" ]}[ LISTEN_MODE ]
123+ blob = run ([ "matrix-commander" , * CRED ,"--room" , room , * listen_args ,"--output" ,"json" ],
124+ timeout = TIMEOUT_S if LISTEN_MODE == "all" else None )
128125
129- originals ,edits = {},{}
126+ originals , edits = {}, {}
130127 for j in json_lines (blob ):
131128 ev = j .get ("source" ,j )
132- if ev .get ("type" )!= "m.room.message" :continue
133- c ,rel = ev ["content" ],ev ["content" ].get ("m.relates_to" ,{})
129+ if ev .get ("type" )!= "m.room.message" : continue
130+ c , rel = ev ["content" ], ev ["content" ].get ("m.relates_to" ,{})
134131 if rel .get ("rel_type" )== "m.replace" or "m.new_content" in c :
135132 edits [rel .get ("event_id" )] = ev
136133 else :
137- originals [ev ["event_id" ]]= ev
138- for eid ,msg in originals .items ():
134+ originals [ev ["event_id" ]] = ev
135+
136+ for eid , msg in originals .items ():
139137 if eid in edits :
140- rep = edits [eid ]
141- new = rep ["content" ].get ("m.new_content" ,{}).get ("body" ) or rep ["content" ].get ("body" ,"" )
142- msg ["content" ]["body" ]= new
143- msg ["_edited" ]= True
138+ rep = edits [eid ]
139+ new = rep ["content" ].get ("m.new_content" ,{}).get ("body" ) \
140+ or rep ["content" ].get ("body" ,"" )
141+ msg ["content" ]["body" ] = new
142+ msg ["_edited" ] = True
144143
145144 evs = list (originals .values ())
146145 if not evs : return title
147- evs .sort (key = when ) # chronological
146+ evs .sort (key = when )
148147
149- # threading map
148+ # threading
150149 byid = {e ["event_id" ]:e for e in evs }
151150 threads = collections .defaultdict (list )
152151 for e in evs :
153152 rel = e ["content" ].get ("m.relates_to" ,{})
154- if rel .get ("rel_type" )== "m.thread" :
155- threads [rel ["event_id" ]].append (e ["event_id" ])
156- roots = [e for e in evs if e ["event_id" ] not in {c for ls in threads .values () for c in ls }]
153+ if rel .get ("rel_type" )== "m.thread" : threads [rel ["event_id" ]].append (e ["event_id" ])
154+ roots = [e for e in evs if e ["event_id" ] not in {c for v in threads .values () for c in v }]
157155
158- # plaintext
156+ # plain-text
159157 stamp = datetime .utcnow ().strftime ("%Y-%m-%d %H:%M UTC" )
160158 txt = [f"# room: { title } " ,f"# exported: { stamp } " ]
161159 def add_txt (ev ,lvl ):
162160 body = ev ["content" ].get ("body" ,"" )
163161 if ev .get ("_edited" ): body += " [edited]"
164- txt .append (f"{ ' ' * lvl } { '↳ ' if lvl else '' } "
165- f"{ when (ev ).strftime ('%Y-%m-%d %H:%M' )} "
162+ txt .append (f"{ ' ' * lvl } { '↳ ' if lvl else '' } { when (ev ).strftime ('%Y-%m-%d %H:%M' )} "
166163 f"{ nice_user (ev ['sender' ])} : { body } " )
167164 for r in roots :
168165 add_txt (r ,0 )
169- for cid in sorted (threads [r ["event_id" ]],key = lambda c :when (byid [c ])):
170- add_txt (byid [cid ],1 )
166+ for cid in threads [r ["event_id" ]]: add_txt (byid [cid ],1 )
171167
172168 # html
173169 last = datetime .utcnow ().strftime ("%Y-%m-%d %H:%M UTC" )
174170 h = [
175- "<!doctype html><meta charset=utf-8><meta name=' viewport' content='width=device-width,initial-scale=1'>" ,
171+ "<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>" ,
176172 f"<title>{ html .escape (title )} – archive</title>" ,
177173 "<style>" ,
178174 "body{margin:0 auto;max-width:75ch;font:15px/1.55 system-ui,"
179175 "-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;background:#141414;color:#e6e6e6;padding:2rem}" ,
180176 ".msg{white-space:pre-wrap;margin:0.3em 0}" ,
181- ".reply{margin-left:2ch}" ,
182- ".edited{opacity:0.75;font-style:italic}" ,
183- "pre{background:#1e1e1e;padding:0.6em;border-radius:4px;overflow:auto}" ,
177+ ".reply{margin-left:2ch}.edited{opacity:.7;font-style:italic}" ,
178+ "pre{background:#1e1e1e;padding:.6em;border-radius:4px;overflow:auto}" ,
184179 "code{font-family:ui-monospace,monospace}" ,
185- ".u{font-weight:600}" ,
186- "time{color:#888}" ,
180+ ".u{font-weight:600}time{color:#888}" ,
187181 "a{color:#9cf;text-decoration:none}" ,
188- "em{font-style:italic}" , # our italics
189- "i,em:not(.allow){font-style:normal}" , # neutralise stray <em>/<i>
182+ "i,em{font-style:normal} em{font-style:italic}" ,
190183 "@media(max-width:480px){body{padding:1rem;font-size:14px}pre{font-size:13px}}" ,
191184 "</style>" ,
192185 f"<h1>{ html .escape (title )} </h1>" ,
@@ -201,42 +194,28 @@ def add_html(ev,lvl):
201194 f"{ fmt_body (ev ['content' ].get ('body' ,'' ))} </div>" )
202195 for r in roots :
203196 add_html (r ,0 )
204- for cid in sorted (threads [r ["event_id" ]],key = lambda c :when (byid [c ])):
205- add_html (byid [cid ],1 )
197+ for cid in threads [r ["event_id" ]]: add_html (byid [cid ],1 )
206198
207199 (rdir / "room_log.txt" ).write_text ("\n " .join (txt )+ "\n " ,encoding = "utf-8" )
208200 (rdir / "index.html" ).write_text ("\n " .join (h )+ "\n " ,encoding = "utf-8" )
209- logging .info (" written → %s" ,rdir )
210201 return title
211202
212- # ═════ main ═════
203+ # ═══════════ MAIN ════════════════════════════════════════════════ ═════
213204pathlib .Path ("archive" ).mkdir (exist_ok = True )
214205(pathlib .Path ("archive/index.html" )).unlink (missing_ok = True )
215206
216207landing = []
217208for rid in ROOMS :
218- try :
219- title = archive (rid )
220- landing .append ((title ,rid ,slug (rid )))
209+ try : landing .append ((archive (rid ), rid , slug (rid )))
221210 except Exception as exc :
222- logging .error ("‼ failed for %s – %s" ,rid ,exc )
211+ logging .error ("‼ failed for %s – %s" , rid , exc )
223212
224213landing .sort (key = lambda t :t [0 ].lower ())
225214ul = "\n " .join (f"<li><a href='archive/{ s } /index.html'>{ html .escape (t )} </a>"
226- f"<br><small>{ html .escape (r )} </small></li>"
227- for t ,r ,s in landing )
215+ f"<br><small>{ html .escape (r )} </small></li>" for t ,r ,s in landing )
228216
229217pathlib .Path ("index.html" ).write_text (
230218 "\n " .join ([
231- "<!doctype html><meta charset=utf-8><meta name='viewport' content='width=device-width,initial-scale=1'>" ,
232- "<title>Archived rooms</title>" ,
233- "<style>" ,
234- "body{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,"
235- "-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;background:#141414;color:#e6e6e6;padding:2rem}" ,
236- "a{color:#9cf;text-decoration:none}@media(max-width:480px){body{padding:1rem;font-size:15px}}" ,
237- "</style>" ,
238- "<h1>Archived rooms</h1><ul>" ,ul ,"</ul>"
239- ])+ "\n " ,encoding = "utf-8" )
240-
241- logging .info ("root index.html regenerated ✓" )
219+ "<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>" ,
220+ " < title
242221
0 commit comments