Fix: ajax kemono

eight04 · eight04 · commit 425f90199d85 · 2025-03-14T19:56:45.000+08:00
diff --git a/comiccrawler/crawler.py b/comiccrawler/crawler.py
@@ -25,7 +25,7 @@
 	# videos
 	".mp4", ".m4v", ".mkv", ".swf", ".webm", ".mov", ".wmv",
 	# audio
-	".mp3", ".aac", ".flac", ".wav",
+	".mp3", ".aac", ".flac", ".wav", ".mpga",
 	# json
 	".json", ".txt"
 )
diff --git a/comiccrawler/mods/kemono.py b/comiccrawler/mods/kemono.py
@@ -2,45 +2,68 @@
 https://kemono.party/{service}/user/{id}
 """
 
+import json
 import re
-from urllib.parse import urljoin
 
-from comiccrawler.error import SkipEpisodeError
+from comiccrawler.error import SkipPageError
+from comiccrawler.url import update_qs
 
 from ..core import Episode
+from ..grabber import grabber
 
 domain = ["kemono.party", "kemono.su", "coomer.su"]
 name = "Kemono"
 noepfolder = True
+next_page_cache = {}
 
 def get_title(html, url):
-	service = re.search('<meta name="service" content="([^"]+)', html).group(1) # type: ignore
-	name = re.search('<meta name="artist_name" content="([^"]+)', html).group(1) # type: ignore
+	sig = re.search(r"\w+/user/\d+", url).group()
+	data = grabber(f"https://kemono.su/api/v1/{sig}/profile").json()
+	service = data["service"]
+	name = data["name"]
 	return f"[Kemono][{service}] {name}"
 
 def get_episodes(html, url):
-	result = []
-	for match in re.finditer(r'<a href="([^"]+/post/(\d+))">\s*<header[^>]*>([^<]*)</header>', html):
-		id = match.group(2).strip()
-		title = match.group(3).strip()
-		result.append(Episode(
-			title=f"{id} - {title}",
-			url=urljoin(url, match.group(1))
-		))
-	result.reverse()
-	return result
-
-def get_images(html, url):
-	result = []
-	for match in re.finditer(r'<a[^>]*href="([^"]*)"\s+download', html):
-		result.append(match.group(1))
-	if not result:
-		raise SkipEpisodeError(True)
-	return result
+	if "/api/v1/" not in url:
+		sig = re.search(r"\w+/user/\d+", url).group()
+		next_page_cache[url] = f"https://kemono.su/api/v1/{sig}/posts-legacy"
+		raise SkipPageError
+
+	data = json.loads(html)
+	episodes = []
+	for result, attachments in zip(data["results"], data["result_attachments"]):
+		ep = Episode(
+			title=f"{result['id']} - {result['title']}",
+			url=f"https://kemono.su/post/{result['id']}",
+			image=[f"{a['server']}/data{a['path']}" for a in attachments]
+			)
+		episodes.append(ep)
+
+	try:
+		def next_o(o):
+			new_o = int(o or "0") + data["props"]["limit"]
+			if new_o >= data["props"]["count"]:
+				raise StopIteration
+			return str(new_o)
+		next_page_cache[url] = update_qs(url, {"o": next_o})
+	except StopIteration:
+		pass
+
+	episodes.reverse()
+	return episodes
+
+# def get_images(html, url):
+# 	result = []
+# 	for match in re.finditer(r'<a[^>]*href="([^"]*)"\s+download', html):
+# 		result.append(match.group(1))
+# 	if not result:
+# 		raise SkipEpisodeError(True)
+# 	return result
 
 def get_next_page(html, url):
-	if "/post/" in url:
-		return None
-	match = re.search(r'<a href="([^"]+)"[^>]*class="next"', html)
-	if match:
-		return urljoin(url, match.group(1))
+	# if "/post/" in url:
+	# 	return None
+	# match = re.search(r'<a href="([^"]+)"[^>]*class="next"', html)
+	# if match:
+	# 	return urljoin(url, match.group(1))
+	return next_page_cache.pop(url, None)
diff --git a/comiccrawler/url.py b/comiccrawler/url.py
@@ -26,7 +26,7 @@ def update_qs(url, new_query: dict[str, str | Callable[[str], str]]):
 	query_dict = parse_qs(d["query"])
 	for key, value in new_query.items():
 		if callable(value):
-			value = value(query_dict.get(key, "")[0])
+			value = value(query_dict.get(key, [""])[0])
 		if value is None:
 			query_dict.pop(key, None)
 		else:

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@`
`25`	`25`	`# videos`
`26`	`26`	`".mp4", ".m4v", ".mkv", ".swf", ".webm", ".mov", ".wmv",`
`27`	`27`	`# audio`
`28`		`- ".mp3", ".aac", ".flac", ".wav",`
	`28`	`+ ".mp3", ".aac", ".flac", ".wav", ".mpga",`
`29`	`29`	`# json`
`30`	`30`	`".json", ".txt"`
`31`	`31`	`)`