Skip to content

Commit 3f63638

Browse files
Add an Admin API endpoint for listing quarantined media (#19268)
Co-authored-by: turt2live <[email protected]> Co-authored-by: Andrew Morgan <[email protected]>
1 parent 1f7f164 commit 3f63638

File tree

8 files changed

+266
-10
lines changed

8 files changed

+266
-10
lines changed

changelog.d/19268.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add an admin API for retrieving a paginated list of quarantined media.

docs/admin_api/media_admin_api.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,33 @@ Response:
7373
}
7474
```
7575

76+
## Listing all quarantined media
77+
78+
This API returns a list of all quarantined media on the server. It is paginated, and can be scoped to either local or
79+
remote media. Note that the pagination values are also scoped to the request parameters - changing them but keeping the
80+
same pagination values will result in unexpected results.
81+
82+
Request:
83+
```http
84+
GET /_synapse/admin/v1/media/quarantined?from=0&limit=100&kind=local
85+
```
86+
87+
`from` and `limit` are optional parameters, and default to `0` and `100` respectively. They are the row index and number
88+
of rows to return - they are not timestamps.
89+
90+
`kind` *MUST* either be `local` or `remote`.
91+
92+
The API returns a JSON body containing MXC URIs for the quarantined media, like the following:
93+
94+
```json
95+
{
96+
"media": [
97+
"mxc://localhost/xwvutsrqponmlkjihgfedcba",
98+
"mxc://localhost/abcdefghijklmnopqrstuvwx"
99+
]
100+
}
101+
```
102+
76103
# Quarantine media
77104

78105
Quarantining media means that it is marked as inaccessible by users. It applies

synapse/media/media_repository.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,7 @@ async def _download_remote_file(
914914
filesystem_id=file_id,
915915
last_access_ts=time_now_ms,
916916
quarantined_by=None,
917+
quarantined_ts=None,
917918
authenticated=authenticated,
918919
sha256=sha256writer.hexdigest(),
919920
)
@@ -1047,6 +1048,7 @@ async def _federation_download_remote_file(
10471048
filesystem_id=file_id,
10481049
last_access_ts=time_now_ms,
10491050
quarantined_by=None,
1051+
quarantined_ts=None,
10501052
authenticated=authenticated,
10511053
sha256=sha256writer.hexdigest(),
10521054
)

synapse/rest/admin/media.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,38 @@ async def on_GET(
293293
return HTTPStatus.OK, {"local": local_mxcs, "remote": remote_mxcs}
294294

295295

296+
class ListQuarantinedMedia(RestServlet):
297+
"""Lists all quarantined media on the server."""
298+
299+
PATTERNS = admin_patterns("/media/quarantined$")
300+
301+
def __init__(self, hs: "HomeServer"):
302+
self.store = hs.get_datastores().main
303+
self.auth = hs.get_auth()
304+
305+
async def on_GET(
306+
self,
307+
request: SynapseRequest,
308+
) -> tuple[int, JsonDict]:
309+
await assert_requester_is_admin(self.auth, request)
310+
311+
start = parse_integer(request, "from", default=0)
312+
limit = parse_integer(request, "limit", default=100)
313+
local_or_remote = parse_string(request, "kind", required=True)
314+
315+
if local_or_remote not in ["local", "remote"]:
316+
raise SynapseError(
317+
HTTPStatus.BAD_REQUEST,
318+
"Query parameter `kind` must be either 'local' or 'remote'.",
319+
)
320+
321+
mxcs = await self.store.get_quarantined_media_mxcs(
322+
start, limit, local_or_remote == "local"
323+
)
324+
325+
return HTTPStatus.OK, {"media": mxcs}
326+
327+
296328
class PurgeMediaCacheRestServlet(RestServlet):
297329
PATTERNS = admin_patterns("/purge_media_cache$")
298330

@@ -532,6 +564,7 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer)
532564
ProtectMediaByID(hs).register(http_server)
533565
UnprotectMediaByID(hs).register(http_server)
534566
ListMediaInRoom(hs).register(http_server)
567+
ListQuarantinedMedia(hs).register(http_server)
535568
# XXX DeleteMediaByDateSize must be registered before DeleteMediaByID as
536569
# their URL routes overlap.
537570
DeleteMediaByDateSize(hs).register(http_server)

synapse/storage/databases/main/media_repository.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class LocalMedia:
6161
url_cache: str | None
6262
last_access_ts: int
6363
quarantined_by: str | None
64+
quarantined_ts: int | None
6465
safe_from_quarantine: bool
6566
user_id: str | None
6667
authenticated: bool | None
@@ -78,6 +79,7 @@ class RemoteMedia:
7879
created_ts: int
7980
last_access_ts: int
8081
quarantined_by: str | None
82+
quarantined_ts: int | None
8183
authenticated: bool | None
8284
sha256: str | None
8385

@@ -243,6 +245,7 @@ async def get_local_media(self, media_id: str) -> LocalMedia | None:
243245
"user_id",
244246
"authenticated",
245247
"sha256",
248+
"quarantined_ts",
246249
),
247250
allow_none=True,
248251
desc="get_local_media",
@@ -262,6 +265,7 @@ async def get_local_media(self, media_id: str) -> LocalMedia | None:
262265
user_id=row[8],
263266
authenticated=row[9],
264267
sha256=row[10],
268+
quarantined_ts=row[11],
265269
)
266270

267271
async def get_local_media_by_user_paginate(
@@ -319,7 +323,8 @@ def get_local_media_by_user_paginate_txn(
319323
safe_from_quarantine,
320324
user_id,
321325
authenticated,
322-
sha256
326+
sha256,
327+
quarantined_ts
323328
FROM local_media_repository
324329
WHERE user_id = ?
325330
ORDER BY {order_by_column} {order}, media_id ASC
@@ -345,6 +350,7 @@ def get_local_media_by_user_paginate_txn(
345350
user_id=row[9],
346351
authenticated=row[10],
347352
sha256=row[11],
353+
quarantined_ts=row[12],
348354
)
349355
for row in txn
350356
]
@@ -695,6 +701,7 @@ async def get_cached_remote_media(
695701
"quarantined_by",
696702
"authenticated",
697703
"sha256",
704+
"quarantined_ts",
698705
),
699706
allow_none=True,
700707
desc="get_cached_remote_media",
@@ -713,6 +720,7 @@ async def get_cached_remote_media(
713720
quarantined_by=row[6],
714721
authenticated=row[7],
715722
sha256=row[8],
723+
quarantined_ts=row[9],
716724
)
717725

718726
async def store_cached_remote_media(

synapse/storage/databases/main/room.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -945,14 +945,58 @@ def get_retention_policy_for_room_txn(
945945
max_lifetime=max_lifetime,
946946
)
947947

948+
async def get_quarantined_media_mxcs(
949+
self, index_start: int, index_limit: int, local: bool
950+
) -> list[str]:
951+
"""Retrieves all the quarantined media MXC URIs starting from the given position,
952+
ordered from oldest quarantined timestamp, then alphabetically by media ID
953+
(including origin).
954+
955+
Note that on established servers the "quarantined timestamp" may be zero due to
956+
being introduced after the quarantine timestamp field was introduced.
957+
958+
Args:
959+
index_start: The position to start from.
960+
index_limit: The maximum number of results to return.
961+
local: When true, only local media will be returned. When false, only remote media will be returned.
962+
963+
Returns:
964+
The quarantined media as a list of media IDs.
965+
"""
966+
967+
def _get_quarantined_media_mxcs_txn(
968+
txn: LoggingTransaction,
969+
) -> list[str]:
970+
# We order by quarantined timestamp *and* media ID (including origin, when
971+
# known) to ensure the ordering is stable for established servers.
972+
if local:
973+
sql = "SELECT '' as media_origin, media_id FROM local_media_repository WHERE quarantined_by IS NOT NULL ORDER BY quarantined_ts, media_id ASC LIMIT ? OFFSET ?"
974+
else:
975+
sql = "SELECT media_origin, media_id FROM remote_media_cache WHERE quarantined_by IS NOT NULL ORDER BY quarantined_ts, media_origin, media_id ASC LIMIT ? OFFSET ?"
976+
txn.execute(sql, (index_limit, index_start))
977+
978+
mxcs = []
979+
980+
for media_origin, media_id in txn:
981+
if local:
982+
media_origin = self.hs.hostname
983+
mxcs.append(f"mxc://{media_origin}/{media_id}")
984+
985+
return mxcs
986+
987+
return await self.db_pool.runInteraction(
988+
"get_quarantined_media_mxcs",
989+
_get_quarantined_media_mxcs_txn,
990+
)
991+
948992
async def get_media_mxcs_in_room(self, room_id: str) -> tuple[list[str], list[str]]:
949993
"""Retrieves all the local and remote media MXC URIs in a given room
950994
951995
Args:
952996
room_id
953997
954998
Returns:
955-
The local and remote media as a lists of the media IDs.
999+
The local and remote media as lists of the media IDs.
9561000
"""
9571001

9581002
def _get_media_mxcs_in_room_txn(
@@ -1147,6 +1191,10 @@ def _quarantine_local_media_txn(
11471191
The total number of media items quarantined
11481192
"""
11491193
total_media_quarantined = 0
1194+
now_ts: int | None = self.clock.time_msec()
1195+
1196+
if quarantined_by is None:
1197+
now_ts = None
11501198

11511199
# Effectively a legacy path, update any media that was explicitly named.
11521200
if media_ids:
@@ -1155,13 +1203,13 @@ def _quarantine_local_media_txn(
11551203
)
11561204
sql = f"""
11571205
UPDATE local_media_repository
1158-
SET quarantined_by = ?
1206+
SET quarantined_by = ?, quarantined_ts = ?
11591207
WHERE {sql_many_clause_sql}"""
11601208

11611209
if quarantined_by is not None:
11621210
sql += " AND safe_from_quarantine = FALSE"
11631211

1164-
txn.execute(sql, [quarantined_by] + sql_many_clause_args)
1212+
txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args)
11651213
# Note that a rowcount of -1 can be used to indicate no rows were affected.
11661214
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0
11671215

@@ -1172,13 +1220,13 @@ def _quarantine_local_media_txn(
11721220
)
11731221
sql = f"""
11741222
UPDATE local_media_repository
1175-
SET quarantined_by = ?
1223+
SET quarantined_by = ?, quarantined_ts = ?
11761224
WHERE {sql_many_clause_sql}"""
11771225

11781226
if quarantined_by is not None:
11791227
sql += " AND safe_from_quarantine = FALSE"
11801228

1181-
txn.execute(sql, [quarantined_by] + sql_many_clause_args)
1229+
txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args)
11821230
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0
11831231

11841232
return total_media_quarantined
@@ -1202,6 +1250,10 @@ def _quarantine_remote_media_txn(
12021250
The total number of media items quarantined
12031251
"""
12041252
total_media_quarantined = 0
1253+
now_ts: int | None = self.clock.time_msec()
1254+
1255+
if quarantined_by is None:
1256+
now_ts = None
12051257

12061258
if media:
12071259
sql_in_list_clause, sql_args = make_tuple_in_list_sql_clause(
@@ -1211,10 +1263,10 @@ def _quarantine_remote_media_txn(
12111263
)
12121264
sql = f"""
12131265
UPDATE remote_media_cache
1214-
SET quarantined_by = ?
1266+
SET quarantined_by = ?, quarantined_ts = ?
12151267
WHERE {sql_in_list_clause}"""
12161268

1217-
txn.execute(sql, [quarantined_by] + sql_args)
1269+
txn.execute(sql, [quarantined_by, now_ts] + sql_args)
12181270
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0
12191271

12201272
total_media_quarantined = 0
@@ -1224,9 +1276,9 @@ def _quarantine_remote_media_txn(
12241276
)
12251277
sql = f"""
12261278
UPDATE remote_media_cache
1227-
SET quarantined_by = ?
1279+
SET quarantined_by = ?, quarantined_ts = ?
12281280
WHERE {sql_many_clause_sql}"""
1229-
txn.execute(sql, [quarantined_by] + sql_many_clause_args)
1281+
txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args)
12301282
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0
12311283

12321284
return total_media_quarantined
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
--
2+
-- This file is licensed under the Affero General Public License (AGPL) version 3.
3+
--
4+
-- Copyright (C) 2025 Element Creations, Ltd
5+
--
6+
-- This program is free software: you can redistribute it and/or modify
7+
-- it under the terms of the GNU Affero General Public License as
8+
-- published by the Free Software Foundation, either version 3 of the
9+
-- License, or (at your option) any later version.
10+
--
11+
-- See the GNU Affero General Public License for more details:
12+
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
13+
14+
-- Add a timestamp for when the sliding sync connection position was last used,
15+
-- only updated with a small granularity.
16+
--
17+
-- This should be NOT NULL, but we need to consider existing rows. In future we
18+
-- may want to either backfill this or delete all rows with a NULL value (and
19+
-- then make it NOT NULL).
20+
ALTER TABLE local_media_repository ADD COLUMN quarantined_ts BIGINT;
21+
ALTER TABLE remote_media_cache ADD COLUMN quarantined_ts BIGINT;
22+
23+
UPDATE local_media_repository SET quarantined_ts = 0 WHERE quarantined_by IS NOT NULL;
24+
UPDATE remote_media_cache SET quarantined_ts = 0 WHERE quarantined_by IS NOT NULL;
25+
26+
-- Note: We *probably* should have an index on quarantined_ts, but we're going
27+
-- to try to defer that to a future migration after seeing the performance impact.

0 commit comments

Comments
 (0)