22# -*- coding: utf-8 -*-
33"""
44Archive one-or-many public Matrix rooms.
5-
6- Creates
7- archive/<slug>/{index.html, room_log.txt}
8- and a root index.html listing all rooms by their human titles.
5+ Creates archive/<slug>/{index.html, room_log.txt}
6+ plus a root index.html listing all rooms by their human titles.
97"""
108
119# ── std-lib ────────────────────────────────────────────────────────────
1210import os , sys , json , subprocess , shlex , hashlib , colorsys , logging , re , html
1311import collections , pathlib , urllib .parse
1412from datetime import datetime , timezone
1513
16- # ═════════════════════════════════ CONFIG ════════════════════════════
17- HS = os .environ ["MATRIX_HS" ]
18- USER_ID = os .environ ["MATRIX_USER" ]
19- TOKEN = os .environ ["MATRIX_TOKEN" ]
14+ # ══════════════════════════ CONFIG ═══════════════════════════════════
15+ HS , USER_ID , TOKEN = os .environ ["MATRIX_HS" ], os .environ ["MATRIX_USER" ], os .environ ["MATRIX_TOKEN" ]
2016
21- ROOMS_RAW = os .getenv ("MATRIX_ROOMS" ) or os .getenv ("MATRIX_ROOM" , "" )
22- ROOMS = [r for r in re .split (r"[,\s]+" , ROOMS_RAW ) if r ]
17+ ROOMS = [r for r in re .split (r"[,\s]+" , os .getenv ("MATRIX_ROOMS" ) or os .getenv ("MATRIX_ROOM" ,"" )) if r ]
2318if not ROOMS :
24- sys .exit ("‼ No MATRIX_ROOMS specified " )
19+ sys .exit ("‼ MATRIX_ROOMS is empty " )
2520
26- LISTEN_MODE = os .getenv ("LISTEN_MODE" , "all" ).lower () # all|tail|once
27- TAIL_N = os .getenv ("TAIL_N" , "10000" )
28- TIMEOUT_S = int (os .getenv ("TIMEOUT" , 20 ))
21+ LISTEN_MODE = os .getenv ("LISTEN_MODE" ,"all" ).lower () # all|tail|once
22+ TAIL_N = os .getenv ("TAIL_N" ,"10000" )
23+ TIMEOUT_S = int (os .getenv ("TIMEOUT" ,20 ))
2924
30- logging .basicConfig (level = logging .INFO ,
31- format = "%(levelname)s: %(message)s" , stream = sys .stderr )
25+ logging .basicConfig (level = logging .INFO , format = "%(levelname)s: %(message)s" , stream = sys .stderr )
3226os .environ ["NIO_LOG_LEVEL" ] = "error"
3327
34- # ══════════ matrix-commander creds ═════ ══════════════════════════════
28+ # ═══════════════ matrix-commander creds ══════════════════════════════
3529cred_file = pathlib .Path ("mc_creds.json" )
3630store_dir = pathlib .Path ("store" ); store_dir .mkdir (exist_ok = True )
3731if not cred_file .exists ():
3832 cred_file .write_text (json .dumps ({
39- "homeserver" : HS ,
40- "user_id" : USER_ID ,
41- "access_token" : TOKEN ,
42- "device_id" : "GH" ,
43- "default_room" : ROOMS [0 ],
44- }))
33+ "homeserver" :HS ,"user_id" :USER_ID ,"access_token" :TOKEN ,"device_id" :"GH" ,"default_room" :ROOMS [0 ]}))
4534CRED = ["--credentials" , str (cred_file ), "--store" , str (store_dir )]
4635
47- # ═══════════════════════ tiny helpers ═══════════════════════════════
48- def run (cmd , timeout = None ) -> str :
49- res = subprocess .run (cmd , text = True , capture_output = True , timeout = timeout )
50- if res .returncode :
51- raise subprocess .CalledProcessError (res .returncode , cmd , res .stdout , res .stderr )
36+ # ═══════════════ tiny helpers ═══════════════════════════════════════
37+ def run (cmd , timeout = None )-> str :
38+ res = subprocess .run (cmd , text = True , capture_output = True , timeout = timeout )
39+ if res .returncode : raise subprocess .CalledProcessError (res .returncode ,cmd ,res .stdout ,res .stderr )
5240 return res .stdout
5341
5442def json_lines (blob :str ):
@@ -57,118 +45,108 @@ def json_lines(blob:str):
5745 try : yield json .loads (ln )
5846 except json .JSONDecodeError : pass
5947
60- when = lambda e : datetime .utcfromtimestamp (e ["origin_server_ts" ]/ 1000 )
61- nice_user = lambda u : u .lstrip ("@" ).split (":" ,1 )[0 ]
62- slug = lambda s : urllib .parse .quote (s , safe = "" ).replace ("%" ,"_" )
48+ when = lambda e : datetime .utcfromtimestamp (e ["origin_server_ts" ]/ 1000 )
49+ uname = lambda u : u .lstrip ("@" ).split (":" ,1 )[0 ]
50+ slug = lambda s : urllib .parse .quote (s ,safe = "" ).replace ("%" ,"_" )
6351
64- def rich_color (uid :str )-> str :
65- d = hashlib .sha1 (uid .encode ()).digest ()
66- h ,l ,s = int .from_bytes (d [:2 ],"big" )/ 65535 , .55 + (d [2 ]/ 255 - .5 )* .25 , .55 + (d [3 ]/ 255 - .5 )* .25
67- r ,g ,b = colorsys .hls_to_rgb (h ,l ,s )
52+ def pastel (uid :str )-> str :
53+ d = hashlib .sha1 (uid .encode ()).digest ()
54+ h ,l ,s = int .from_bytes (d [:2 ],"big" )/ 65535 , .55 + (d [2 ]/ 255 - .5 )* .25 , .55 + (d [3 ]/ 255 - .5 )* .25
55+ r ,g ,b = colorsys .hls_to_rgb (h ,l ,s )
6856 return f"#{ int (r * 255 ):02x} { int (g * 255 ):02x} { int (b * 255 ):02x} "
6957
70- # ── markdown -ish filters ──────────────────────────────────────────────
58+ # md -ish post-processing ──────────────────────────────────────────────
7159_re_mdlink = re .compile (r'\[([^\]]+?)\]\((https?://[^\s)]+)\)' )
7260_re_rawurl = re .compile (r'(?<!["\'>])(https?://[^\s<]+)' )
7361_re_fence = re .compile (r'```(\w+)?\n([\s\S]*?)```' , re .MULTILINE )
7462_re_inline = re .compile (r'`([^`\n]+?)`' )
75- _re_italic = re .compile (r'(?<!\S)\*([^*\n]+?)\*(?!\S)' ) # *foo* only
63+ _re_italic = re .compile (r'(?<!\S)\*([^*\n]+?)\*(?!\S)' )
7664
77- def md_links (t :str )-> str :
78- t = _re_mdlink .sub (lambda m :
79- f'<a href="{ m .group (2 )} " target="_blank" rel="noopener">{ m .group (1 )} </a>' , t )
80- return _re_rawurl .sub (lambda m :
81- f'<a href="{ m .group (0 )} " target="_blank" rel="noopener">{ m .group (0 )} </a>' , t )
65+ def linkify (t :str )-> str :
66+ t = _re_mdlink .sub (lambda m :f'<a href="{ m .group (2 )} " rel="noopener" target="_blank">{ m .group (1 )} </a>' ,t )
67+ return _re_rawurl .sub (lambda m :f'<a href="{ m .group (0 )} " rel="noopener" target="_blank">{ m .group (0 )} </a>' ,t )
8268
83- def fmt_body (body :str )-> str :
84- segs , pos = [], 0
69+ def fmt (body :str )-> str :
70+ segs ,pos = [],0
8571 for fence in _re_fence .finditer (body ):
86- segs .append (("txt" , body [pos :fence .start ()]))
87- segs .append (("code" , fence ))
88- pos = fence .end ()
89- segs .append (("txt" , body [pos :]))
72+ segs .append (("txt" ,body [pos :fence .start ()])); segs .append (("code" ,fence )); pos = fence .end ()
73+ segs .append (("txt" ,body [pos :]))
9074
91- html_out = []
75+ out = []
9276 for typ ,part in segs :
9377 if typ == "code" :
9478 lang = part .group (1 ) or ""
95- code = html .escape (part .group (2 ))
96- html_out .append (f"<pre><code class='{ lang } '>{ code } </code></pre>" )
79+ out .append (f"<pre><code class='{ lang } '>{ html .escape (part .group (2 ))} </code></pre>" )
9780 continue
98- chunk = html .escape (part )
99- chunk = _re_inline .sub (lambda m : f"<code>{ html .escape (m .group (1 ))} </code>" , chunk )
100- chunk = _re_italic .sub (r"<em>\1</em>" , chunk )
101- html_out .append (md_links (chunk ))
102- return "" .join (html_out )
81+ chunk = html .escape (part )
82+ chunk = _re_inline .sub (lambda m :f"<code>{ html .escape (m .group (1 ))} </code>" ,chunk )
83+ chunk = _re_italic .sub (r"<em>\1</em>" ,chunk )
84+ out .append (linkify (chunk ))
85+ return "" .join (out )
10386
104- # ════════════════════════ archiver ══════════════════════════════════
87+ # ═══════════════ archiver ═══════════════════════════════════════════
10588def archive (room :str ):
10689 logging .info ("room %s" , room )
90+ cred = json .loads (cred_file .read_text ()); cred .update (room_id = room ,default_room = room )
91+ cred_file .write_text (json .dumps (cred ))
10792
108- cfg = json .loads (cred_file .read_text ()); cfg .update (room_id = room ,default_room = room )
109- cred_file .write_text (json .dumps (cfg ))
110-
111- rdir = pathlib .Path ("archive" )/ slug (room )
112- rdir .mkdir (parents = True , exist_ok = True )
93+ rdir = pathlib .Path ("archive" )/ slug (room ); rdir .mkdir (parents = True , exist_ok = True )
11394
114- for cmd in (["--room-join" ,room ], ["--room" ,room ,"--listen" ,"once" ]):
95+ for cmd in (["--room-join" ,room ],["--room" ,room ,"--listen" ,"once" ]):
11596 try : run (["matrix-commander" ,* CRED ,* cmd ])
11697 except subprocess .CalledProcessError : pass
11798
11899 title = room
119100 try :
120101 info = next (json_lines (run (["matrix-commander" ,* CRED ,"--room" ,room ,
121102 "--get-room-info" ,"--output" ,"json" ])),{})
122- for k in ("room_display_name" ,"room_name" ,"canonical_alias" ,"room_alias" ):
123- if info .get (k ): title = info [k ]; break
103+ for k in ("room_display_name" ,"room_name" ,"canonical_alias" ,"room_alias" ):
104+ if info .get (k ): title = info [k ];break
124105 except Exception : pass
125106
126- listen = {"all" :["all" ],"tail" :["tail" ,"--tail" ,TAIL_N ],"once" :["once" ]}[LISTEN_MODE ]
127- raw = run (["matrix-commander" ,* CRED ,"--room" ,room ,"--listen" ,* listen ,"--listen-self" ,"--output" ,"json" ],
107+ mode = {"all" :["all" ],"tail" :["tail" ,"--tail" ,TAIL_N ],"once" :["once" ]}[LISTEN_MODE ]
108+ raw = run (["matrix-commander" ,* CRED ,"--room" ,room ,"--listen" ,* mode ,"--listen-self" ,"--output" ,"json" ],
128109 timeout = TIMEOUT_S if LISTEN_MODE == "all" else None )
129110
130- originals , edits = {}, {}
111+ originals , edits = {},{}
131112 for j in json_lines (raw ):
132- ev = j .get ("source" , j )
113+ ev = j .get ("source" ,j )
133114 if ev .get ("type" )!= "m.room.message" : continue
134115 rel = ev ["content" ].get ("m.relates_to" ,{})
135116 if rel .get ("rel_type" )== "m.replace" or "m.new_content" in ev ["content" ]:
136- edits [rel .get ("event_id" )] = ev
117+ edits [rel .get ("event_id" )]= ev
137118 else :
138- originals [ev ["event_id" ]] = ev
139-
119+ originals [ev ["event_id" ]]= ev
140120 for eid ,msg in originals .items ():
141121 if eid in edits :
142122 rep = edits [eid ]
143- new_body = rep ["content" ].get ("m.new_content" ,{}).get ("body" ) \
144- or rep ["content" ].get ("body" ,"" )
145- msg ["content" ]["body" ]= new_body
146- msg ["_edited" ]= True
123+ new_body = rep ["content" ].get ("m.new_content" ,{}).get ("body" ) or rep ["content" ].get ("body" ,"" )
124+ msg ["content" ]["body" ]= new_body ; msg ["_edited" ]= True
147125
148- events = sorted (originals .values (), key = when )
126+ events = sorted (originals .values (),key = when ); # nothing? bail
149127 if not events : return None
150128
151- # threading
129+ # 1-level threads
152130 byid ,threads = {e ["event_id" ]:e for e in events },collections .defaultdict (list )
153131 for e in events :
154132 rel = e ["content" ].get ("m.relates_to" ,{})
155- if rel .get ("rel_type" )== "m.thread" :
156- threads [rel ["event_id" ]].append (e ["event_id" ])
133+ if rel .get ("rel_type" )== "m.thread" : threads [rel ["event_id" ]].append (e ["event_id" ])
157134 roots = [e for e in events if e ["event_id" ] not in {c for kids in threads .values () for c in kids }]
158135
159- # plain-text
136+ # plain-text (for git / LLM)
160137 stamp = datetime .utcnow ().strftime ("%Y-%m-%d %H:%M UTC" )
161138 plain = [f"# room: { title } " ,f"# exported: { stamp } " ]
162- def pl (ev ,lvl ):
139+ def add_txt (ev ,lvl ):
163140 body = ev ["content" ].get ("body" ,"" )
164141 if ev .get ("_edited" ): body += " [edited]"
165142 plain .append (f"{ ' ' * lvl } { '↳ ' if lvl else '' } { when (ev ).strftime ('%Y-%m-%d %H:%M' )} "
166- f"{ nice_user (ev ['sender' ])} : { body } " )
143+ f"{ uname (ev ['sender' ])} : { body } " )
167144 for r in roots :
168- pl (r ,0 )
169- for cid in threads [r ["event_id" ]]: pl (byid [cid ],1 )
145+ add_txt (r ,0 )
146+ for cid in threads [r ["event_id" ]]: add_txt (byid [cid ],1 )
170147
171148 # html
149+ accent = "#64b5f6" ; accent_hover = "#90caf9" # << new palette
172150 last = datetime .utcnow ().strftime ("%Y-%m-%d %H:%M UTC" )
173151 style = f"""
174152<style>
@@ -182,38 +160,39 @@ def pl(ev,lvl):
182160code{{font-family:ui-monospace,monospace}}
183161.u{{font-weight:600}}
184162.ts,a.ts{{color:#888;text-decoration:none}}
185- a.ts:hover{{color:#ccc}}
163+ a.ts:hover{{color:#bbb}}
164+ a, a:visited{{color:{ accent } }}
165+ a:hover{{color:{ accent_hover } }}
186166em{{font-style:italic}}
187167</style>"""
188168 html_lines = [
189169 "<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>" ,
190170 f"<title>{ html .escape (title )} – archive</title>" , style ,
191171 f"<h1>{ html .escape (title )} </h1>" ,
192172 f"<p><small>last updated { last } </small></p>" ,
193- "<p><a href='room_log.txt'>⇩ plaintext</a> · <a href='../../'>⇦ all rooms</a></p>" ,
173+ f "<p><a href='room_log.txt'>⇩ plaintext</a> · <a href='../../'>⇦ all rooms</a></p>" ,
194174 "<hr>"
195175 ]
196- def add (ev ,lvl ):
176+ def add_html (ev ,lvl ):
197177 cls = "msg" + (" reply" if lvl else "" )
198- body = fmt_body (ev [' content' ].get (' body' , '' ))
178+ body = fmt (ev [" content" ].get (" body" , "" ))
199179 if ev .get ("_edited" ): body += ' <span class="edited">(edited)</span>'
200- eid = ev ['event_id' ]
201- ts_link = f"<a class='ts' href='https://matrix.to/#/{ room } /{ eid } ' target='_blank'>" \
202- f"{ when (ev ).strftime ('%Y-%m-%d %H:%M' )} </a>"
203- html_lines .append (f"<div class='{ cls } '>"
204- f"{ ts_link }  "
205- f"<span class='u' style='color:{ rich_color (ev ['sender' ])} '>"
206- f"{ nice_user (ev ['sender' ])} </span>: { body } </div>" )
180+ eid = ev ["event_id" ]
181+ ts = f"<a class='ts' href='https://matrix.to/#/{ room } /{ eid } ' target='_blank'>" \
182+ f"{ when (ev ).strftime ('%Y-%m-%d %H:%M' )} </a>"
183+ html_lines .append (f"<div class='{ cls } '>{ ts }  "
184+ f"<span class='u' style='color:{ pastel (ev ['sender' ])} '>{ uname (ev ['sender' ])} </span>: "
185+ f"{ body } </div>" )
207186 for r in roots :
208- add (r ,0 )
209- for cid in threads [r ["event_id" ]]: add (byid [cid ],1 )
187+ add_html (r ,0 )
188+ for cid in threads [r ["event_id" ]]: add_html (byid [cid ],1 )
210189
211190 (rdir / "room_log.txt" ).write_text ("\n " .join (plain )+ "\n " ,encoding = "utf-8" )
212191 (rdir / "index.html" ).write_text ("\n " .join (html_lines )+ "\n " ,encoding = "utf-8" )
213192 logging .info (" wrote → %s" , rdir )
214193 return title , room , slug (room )
215194
216- # ═════════════════════════════ main ══ ═══════════════════════════════
195+ # ═══════════════════════════════ MAIN ═══════════════════════════════
217196pathlib .Path ("archive" ).mkdir (exist_ok = True )
218197(pathlib .Path ("archive/index.html" )).unlink (missing_ok = True )
219198
@@ -226,20 +205,20 @@ def add(ev,lvl):
226205 logging .error ("‼ failed for %s – %s" , rid , exc )
227206
228207meta .sort (key = lambda t :t [0 ].lower ())
229- listing = "\n " .join (
230- f"<li><a href='archive/{ s } /index.html'>{ html .escape (t )} </a>"
231- f"<br><small>{ html .escape (r )} </small></li>"
232- for t ,r ,s in meta )
233-
208+ accent = "#64b5f6" ; accent_hover = "#90caf9"
234209landing = f"""<!doctype html><meta charset=utf-8><meta name=viewport content='width=device-width,initial-scale=1'>
235210<title>Archived rooms</title>
236211<style>
237212body{{margin:0 auto;max-width:65ch;font:16px/1.55 system-ui,-apple-system,'Segoe UI',Helvetica,Arial,sans-serif;
238213 background:#141414;color:#e6e6e6;padding:2rem}}
239- a{{color:#9cf;text-decoration:none}}
214+ a,a:visited{{color:{ accent } }}
215+ a:hover{{color:{ accent_hover } }}
240216@media(max-width:480px){{body{{padding:1rem;font-size:15px}}}}
241217</style>
242- <h1>Archived rooms</h1><ul>{ listing } </ul>"""
218+ <h1>Archived rooms</h1>
219+ <ul>
220+ { "" .join (f"<li><a href='archive/{ s } /index.html'>{ html .escape (t )} </a><br><small>{ html .escape (r )} </small></li>" for t ,r ,s in meta )}
221+ </ul>"""
243222pathlib .Path ("index.html" ).write_text (landing , encoding = "utf-8" )
244223logging .info ("root index.html regenerated ✓" )
245224
0 commit comments