Skip to content

Commit 425f901

Browse files
committed
Fix: ajax kemono
1 parent 2fdd015 commit 425f901

File tree

3 files changed

+52
-29
lines changed

3 files changed

+52
-29
lines changed

comiccrawler/crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# videos
2626
".mp4", ".m4v", ".mkv", ".swf", ".webm", ".mov", ".wmv",
2727
# audio
28-
".mp3", ".aac", ".flac", ".wav",
28+
".mp3", ".aac", ".flac", ".wav", ".mpga",
2929
# json
3030
".json", ".txt"
3131
)

comiccrawler/mods/kemono.py

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,68 @@
22
https://kemono.party/{service}/user/{id}
33
"""
44

5+
import json
56
import re
6-
from urllib.parse import urljoin
77

8-
from comiccrawler.error import SkipEpisodeError
8+
from comiccrawler.error import SkipPageError
9+
from comiccrawler.url import update_qs
910

1011
from ..core import Episode
12+
from ..grabber import grabber
1113

1214
domain = ["kemono.party", "kemono.su", "coomer.su"]
1315
name = "Kemono"
1416
noepfolder = True
17+
next_page_cache = {}
1518

1619
def get_title(html, url):
17-
service = re.search('<meta name="service" content="([^"]+)', html).group(1) # type: ignore
18-
name = re.search('<meta name="artist_name" content="([^"]+)', html).group(1) # type: ignore
20+
sig = re.search(r"\w+/user/\d+", url).group()
21+
data = grabber(f"https://kemono.su/api/v1/{sig}/profile").json()
22+
service = data["service"]
23+
name = data["name"]
1924
return f"[Kemono][{service}] {name}"
2025

2126
def get_episodes(html, url):
22-
result = []
23-
for match in re.finditer(r'<a href="([^"]+/post/(\d+))">\s*<header[^>]*>([^<]*)</header>', html):
24-
id = match.group(2).strip()
25-
title = match.group(3).strip()
26-
result.append(Episode(
27-
title=f"{id} - {title}",
28-
url=urljoin(url, match.group(1))
29-
))
30-
result.reverse()
31-
return result
32-
33-
def get_images(html, url):
34-
result = []
35-
for match in re.finditer(r'<a[^>]*href="([^"]*)"\s+download', html):
36-
result.append(match.group(1))
37-
if not result:
38-
raise SkipEpisodeError(True)
39-
return result
27+
if "/api/v1/" not in url:
28+
sig = re.search(r"\w+/user/\d+", url).group()
29+
next_page_cache[url] = f"https://kemono.su/api/v1/{sig}/posts-legacy"
30+
raise SkipPageError
31+
32+
data = json.loads(html)
33+
episodes = []
34+
for result, attachments in zip(data["results"], data["result_attachments"]):
35+
ep = Episode(
36+
title=f"{result['id']} - {result['title']}",
37+
url=f"https://kemono.su/post/{result['id']}",
38+
image=[f"{a['server']}/data{a['path']}" for a in attachments]
39+
)
40+
episodes.append(ep)
41+
42+
try:
43+
def next_o(o):
44+
new_o = int(o or "0") + data["props"]["limit"]
45+
if new_o >= data["props"]["count"]:
46+
raise StopIteration
47+
return str(new_o)
48+
next_page_cache[url] = update_qs(url, {"o": next_o})
49+
except StopIteration:
50+
pass
51+
52+
episodes.reverse()
53+
return episodes
54+
55+
# def get_images(html, url):
56+
# result = []
57+
# for match in re.finditer(r'<a[^>]*href="([^"]*)"\s+download', html):
58+
# result.append(match.group(1))
59+
# if not result:
60+
# raise SkipEpisodeError(True)
61+
# return result
4062

4163
def get_next_page(html, url):
42-
if "/post/" in url:
43-
return None
44-
match = re.search(r'<a href="([^"]+)"[^>]*class="next"', html)
45-
if match:
46-
return urljoin(url, match.group(1))
64+
# if "/post/" in url:
65+
# return None
66+
# match = re.search(r'<a href="([^"]+)"[^>]*class="next"', html)
67+
# if match:
68+
# return urljoin(url, match.group(1))
69+
return next_page_cache.pop(url, None)

comiccrawler/url.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def update_qs(url, new_query: dict[str, str | Callable[[str], str]]):
2626
query_dict = parse_qs(d["query"])
2727
for key, value in new_query.items():
2828
if callable(value):
29-
value = value(query_dict.get(key, "")[0])
29+
value = value(query_dict.get(key, [""])[0])
3030
if value is None:
3131
query_dict.pop(key, None)
3232
else:

0 commit comments

Comments
 (0)