Skip to content

Commit 793956d

Browse files
dev-KingMastermikf
andauthored
[arena] add 'channel' extractor (#5847 #8509)
* feat: issue 5847 site support for are.na * flake8 lint error fix * class name error fix * update - prevent unnecessary request to 'page_url' - fix pagination - simplify block extraction code TODO: - rewrite without GalleryExtractor - extractors for Blocks, Users, etc * supportedsites * tests * rename to 'channel' extractor * update site title to 'Are.na' * prioritize attachments --------- Co-authored-by: Mike Fährmann <[email protected]>
1 parent d75a135 commit 793956d

File tree

5 files changed

+259
-0
lines changed

5 files changed

+259
-0
lines changed

docs/supportedsites.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@ Consider all listed sites to potentially be NSFW.
109109
<td>Search Results, Series, Subscriptions, Tag Searches, User Profiles, User Bookmarks, User Series, User Works, Works</td>
110110
<td>Supported</td>
111111
</tr>
112+
<tr id="arena" title="arena">
113+
<td>Are.na</td>
114+
<td>https://are.na/</td>
115+
<td>Channels</td>
116+
<td></td>
117+
</tr>
112118
<tr id="artstation" title="artstation">
113119
<td>ArtStation</td>
114120
<td>https://www.artstation.com/</td>

gallery_dl/extractor/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"ao3",
2727
"arcalive",
2828
"architizer",
29+
"arena",
2930
"artstation",
3031
"aryion",
3132
"batoto",

gallery_dl/extractor/arena.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2025 Mike Fährmann
4+
#
5+
# This program is free software; you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License version 2 as
7+
# published by the Free Software Foundation.
8+
9+
"""Extractor for https://are.na/"""
10+
11+
from .common import GalleryExtractor
12+
13+
14+
class ArenaChannelExtractor(GalleryExtractor):
15+
"""Extractor for are.na channels"""
16+
category = "arena"
17+
subcategory = "channel"
18+
root = "https://are.na"
19+
pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)"
20+
example = "https://are.na/evan-collins-1522646491/cassette-futurism"
21+
22+
def metadata(self, page):
23+
info = self.request_json(
24+
f"https://api.are.na/v2/channels/{self.groups[0]}")
25+
26+
return {
27+
"gallery_id" : info.get("slug") or str(info.get("id")),
28+
"channel_id" : info.get("id"),
29+
"channel_slug": info.get("slug"),
30+
"title" : info.get("title") or "",
31+
"count" : info.get("length") or 0,
32+
"user" : info.get("user"),
33+
"date" : self.parse_datetime_iso(info.get("created_at")),
34+
"date_updated": self.parse_datetime_iso(info.get("updated_at")),
35+
}
36+
37+
def images(self, page):
38+
api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents"
39+
limit = 100
40+
params = {"page": 1, "per": limit}
41+
42+
while True:
43+
data = self.request_json(api, params=params)
44+
45+
contents = data.get("contents")
46+
if not contents:
47+
return
48+
49+
for block in contents:
50+
url = None
51+
meta = {
52+
"id": block.get("id"),
53+
"block_class": block.get("class"),
54+
"block_title": block.get("title") or block.get(
55+
"generated_title") or "",
56+
}
57+
58+
# Attachments (e.g., PDFs, files)
59+
if attachment := block.get("attachment"):
60+
url = attachment.get("url")
61+
62+
# Images
63+
elif image := block.get("image"):
64+
# Prefer original image
65+
if original := image.get("original"):
66+
url = original.get("url")
67+
# Fallback to display/large image if present
68+
elif display := image.get("display"):
69+
url = display.get("url")
70+
elif large := image.get("large"):
71+
url = large.get("url")
72+
73+
# Some Links/Channels may not have downloadable media
74+
if not url:
75+
continue
76+
77+
# Provide source link if it exists
78+
if src := block.get("source"):
79+
meta["source_url"] = src.get("url") or ""
80+
81+
yield url, meta
82+
83+
if len(contents) < limit:
84+
return
85+
params["page"] += 1

scripts/supportedsites.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
"ao3" : "Archive of Our Own",
3131
"archivedmoe" : "Archived.Moe",
3232
"archiveofsins" : "Archive of Sins",
33+
"arena" : "Are.na",
3334
"artstation" : "ArtStation",
3435
"aryion" : "Eka's Portal",
3536
"atfbooru" : "ATFBooru",

test/results/arena.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
from gallery_dl.extractor import arena
8+
9+
10+
__tests__ = (
11+
{
12+
"#url" : "https://are.na/evan-collins-1522646491/cassette-futurism",
13+
"#class" : arena.ArenaChannelExtractor,
14+
"#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+\.\w+\?\d+\?bc=\d",
15+
"#count" : 160,
16+
17+
"block_class" : "Image",
18+
"block_title" : str,
19+
"channel_id" : 1102343,
20+
"channel_slug": "cassette-futurism",
21+
"count" : 160,
22+
"date" : "dt:2021-05-31 20:38:28",
23+
"date_updated": "dt:2025-10-24 15:25:40",
24+
"gallery_id" : "cassette-futurism",
25+
"id" : int,
26+
"num" : range(1, 160),
27+
"title" : "Cassette Futurism",
28+
"user" : {
29+
"avatar" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
30+
"badge" : "premium",
31+
"base_class" : "User",
32+
"can_index" : True,
33+
"channel_count" : range(250, 300),
34+
"class" : "User",
35+
"created_at" : "2018-04-02T05:21:30.282Z",
36+
"first_name" : "Evan",
37+
"follower_count" : range(4900, 6000),
38+
"following_count": range(10, 20),
39+
"full_name" : "Evan Collins",
40+
"id" : 51156,
41+
"initials" : "EC",
42+
"is_confirmed" : True,
43+
"is_exceeding_connections_limit": False,
44+
"is_lifetime_premium": False,
45+
"is_pending_confirmation": False,
46+
"is_pending_reconfirmation": False,
47+
"is_premium" : True,
48+
"is_supporter" : False,
49+
"last_name" : "Collins",
50+
"metadata" : {"description": None},
51+
"profile_id" : 171860,
52+
"slug" : "evan-collins-1522646491",
53+
"username" : "Evan Collins",
54+
"avatar_image" : {
55+
"display": "https://static.avatars.are.na/51156/medium_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
56+
"thumb" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
57+
},
58+
},
59+
},
60+
61+
{
62+
"#url" : "https://are.na/lachie/transparent-tech-cobxde9pu40",
63+
"#class" : arena.ArenaChannelExtractor,
64+
"#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+(\.\w+)?\?\d+\?bc=\d",
65+
"#count" : 89,
66+
67+
"block_class" : str,
68+
"block_title" : str,
69+
"channel_id" : 2599871,
70+
"channel_slug": "transparent-tech-cobxde9pu40",
71+
"count" : 91,
72+
"date" : "dt:2024-01-14 02:37:22",
73+
"date_updated": "dt:2025-10-20 20:52:09",
74+
"gallery_id" : "transparent-tech-cobxde9pu40",
75+
"id" : int,
76+
"num" : int,
77+
"?source_url" : str,
78+
"title" : "🫙 Transparent Tech",
79+
"user" : {
80+
"avatar" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
81+
"badge" : "premium",
82+
"base_class" : "User",
83+
"can_index" : True,
84+
"channel_count" : 219,
85+
"class" : "User",
86+
"created_at" : "2018-05-03T07:13:39.847Z",
87+
"first_name" : "lachie",
88+
"follower_count" : range(80, 120),
89+
"following_count": range(40, 80),
90+
"full_name" : "lachie 🔐",
91+
"id" : 55241,
92+
"initials" : "l🔐",
93+
"is_confirmed" : True,
94+
"is_exceeding_connections_limit": False,
95+
"is_lifetime_premium": False,
96+
"is_pending_confirmation": False,
97+
"is_pending_reconfirmation": False,
98+
"is_premium" : True,
99+
"is_supporter" : False,
100+
"last_name" : "🔐",
101+
"metadata" : {"description": None},
102+
"profile_id" : 188402,
103+
"slug" : "lachie",
104+
"username" : "lachie 🔐",
105+
"avatar_image" : {
106+
"display": "https://static.avatars.are.na/55241/medium_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
107+
"thumb" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
108+
},
109+
},
110+
},
111+
112+
{
113+
"#url" : "https://www.are.na/mikf/touhou-zr5p8idnkag",
114+
"#class" : arena.ArenaChannelExtractor,
115+
"#results" : (
116+
"https://d2w9rnfcy7mm78.cloudfront.net/40871580/original_3fb729c818f92de4539d4ff263eb6056.png?1762357121?bc=0",
117+
"https://d2w9rnfcy7mm78.cloudfront.net/40871591/original_91d7c8144a5ba9776118f0af6d923f94.png?1762357155?bc=0",
118+
"https://d2w9rnfcy7mm78.cloudfront.net/40871607/original_766f89eb3b06cc84372bea9d58132c93.png?1762357207?bc=0",
119+
"https://attachments.are.na/40873309/ebf4eae61a70773f7494e10a98b18fe3.mp4?1762359389",
120+
"https://d2w9rnfcy7mm78.cloudfront.net/40873379/original_289824f61eade100785db100652abd9a.jpg?1762359483?bc=0",
121+
),
122+
123+
"block_class" : str,
124+
"block_title" : str,
125+
"channel_id" : 4422732,
126+
"channel_slug": "touhou-zr5p8idnkag",
127+
"count" : 6,
128+
"date" : "dt:2025-11-05 15:37:40",
129+
"date_updated": "dt:2025-11-10 19:52:52",
130+
"gallery_id" : "touhou-zr5p8idnkag",
131+
"id" : int,
132+
"title" : '''Touhou "東方"''',
133+
"user" : {
134+
"avatar" : "",
135+
"badge" : None,
136+
"base_class" : "User",
137+
"can_index" : False,
138+
"channel_count" : 3,
139+
"class" : "User",
140+
"created_at" : "2025-11-05T15:35:15.242Z",
141+
"first_name" : "mikf",
142+
"follower_count" : 0,
143+
"following_count": 0,
144+
"full_name" : "mikf .",
145+
"id" : 1127493,
146+
"initials" : "m.",
147+
"is_confirmed" : True,
148+
"is_exceeding_connections_limit": False,
149+
"is_lifetime_premium": False,
150+
"is_pending_confirmation": False,
151+
"is_pending_reconfirmation": False,
152+
"is_premium" : False,
153+
"is_supporter" : False,
154+
"last_name" : ".",
155+
"metadata" : {"description": None},
156+
"profile_id" : 4422723,
157+
"slug" : "mikf",
158+
"username" : "mikf .",
159+
"avatar_image" : {
160+
"display": "",
161+
"thumb" : "",
162+
},
163+
},
164+
},
165+
166+
)

0 commit comments

Comments
 (0)