|
2 | 2 | https://kemono.party/{service}/user/{id} |
3 | 3 | """ |
4 | 4 |
|
| 5 | +import json |
5 | 6 | import re |
6 | | -from urllib.parse import urljoin |
7 | 7 |
|
8 | | -from comiccrawler.error import SkipEpisodeError |
| 8 | +from comiccrawler.error import SkipPageError |
| 9 | +from comiccrawler.url import update_qs |
9 | 10 |
|
10 | 11 | from ..core import Episode |
| 12 | +from ..grabber import grabber |
11 | 13 |
|
12 | 14 | domain = ["kemono.party", "kemono.su", "coomer.su"] |
13 | 15 | name = "Kemono" |
14 | 16 | noepfolder = True |
| 17 | +next_page_cache = {} |
15 | 18 |
|
16 | 19 | def get_title(html, url): |
17 | | - service = re.search('<meta name="service" content="([^"]+)', html).group(1) # type: ignore |
18 | | - name = re.search('<meta name="artist_name" content="([^"]+)', html).group(1) # type: ignore |
| 20 | + sig = re.search(r"\w+/user/\d+", url).group() |
| 21 | + data = grabber(f"https://kemono.su/api/v1/{sig}/profile").json() |
| 22 | + service = data["service"] |
| 23 | + name = data["name"] |
19 | 24 | return f"[Kemono][{service}] {name}" |
20 | 25 |
|
21 | 26 | def get_episodes(html, url): |
22 | | - result = [] |
23 | | - for match in re.finditer(r'<a href="([^"]+/post/(\d+))">\s*<header[^>]*>([^<]*)</header>', html): |
24 | | - id = match.group(2).strip() |
25 | | - title = match.group(3).strip() |
26 | | - result.append(Episode( |
27 | | - title=f"{id} - {title}", |
28 | | - url=urljoin(url, match.group(1)) |
29 | | - )) |
30 | | - result.reverse() |
31 | | - return result |
32 | | - |
33 | | -def get_images(html, url): |
34 | | - result = [] |
35 | | - for match in re.finditer(r'<a[^>]*href="([^"]*)"\s+download', html): |
36 | | - result.append(match.group(1)) |
37 | | - if not result: |
38 | | - raise SkipEpisodeError(True) |
39 | | - return result |
| 27 | + if "/api/v1/" not in url: |
| 28 | + sig = re.search(r"\w+/user/\d+", url).group() |
| 29 | + next_page_cache[url] = f"https://kemono.su/api/v1/{sig}/posts-legacy" |
| 30 | + raise SkipPageError |
| 31 | + |
| 32 | + data = json.loads(html) |
| 33 | + episodes = [] |
| 34 | + for result, attachments in zip(data["results"], data["result_attachments"]): |
| 35 | + ep = Episode( |
| 36 | + title=f"{result['id']} - {result['title']}", |
| 37 | + url=f"https://kemono.su/post/{result['id']}", |
| 38 | + image=[f"{a['server']}/data{a['path']}" for a in attachments] |
| 39 | + ) |
| 40 | + episodes.append(ep) |
| 41 | + |
| 42 | + try: |
| 43 | + def next_o(o): |
| 44 | + new_o = int(o or "0") + data["props"]["limit"] |
| 45 | + if new_o >= data["props"]["count"]: |
| 46 | + raise StopIteration |
| 47 | + return str(new_o) |
| 48 | + next_page_cache[url] = update_qs(url, {"o": next_o}) |
| 49 | + except StopIteration: |
| 50 | + pass |
| 51 | + |
| 52 | + episodes.reverse() |
| 53 | + return episodes |
| 54 | + |
| 55 | +# def get_images(html, url): |
| 56 | +# result = [] |
| 57 | +# for match in re.finditer(r'<a[^>]*href="([^"]*)"\s+download', html): |
| 58 | +# result.append(match.group(1)) |
| 59 | +# if not result: |
| 60 | +# raise SkipEpisodeError(True) |
| 61 | +# return result |
40 | 62 |
|
41 | 63 | def get_next_page(html, url): |
42 | | - if "/post/" in url: |
43 | | - return None |
44 | | - match = re.search(r'<a href="([^"]+)"[^>]*class="next"', html) |
45 | | - if match: |
46 | | - return urljoin(url, match.group(1)) |
| 64 | + # if "/post/" in url: |
| 65 | + # return None |
| 66 | + # match = re.search(r'<a href="([^"]+)"[^>]*class="next"', html) |
| 67 | + # if match: |
| 68 | + # return urljoin(url, match.group(1)) |
| 69 | + return next_page_cache.pop(url, None) |
0 commit comments