|
6 | 6 |
|
7 | 7 | """Extractors for https://myhentaigallery.com/""" |
8 | 8 |
|
9 | | -from .common import GalleryExtractor |
| 9 | +from .common import Extractor, GalleryExtractor, Message |
10 | 10 | from .. import text, exception |
11 | 11 |
|
| 12 | +BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com" |
12 | 13 |
|
13 | | -class MyhentaigalleryGalleryExtractor(GalleryExtractor): |
14 | | - """Extractor for image galleries from myhentaigallery.com""" |
| 14 | + |
| 15 | +class MyhentaigalleryBase(): |
15 | 16 | category = "myhentaigallery" |
16 | 17 | root = "https://myhentaigallery.com" |
| 18 | + |
| 19 | + |
| 20 | +class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor): |
| 21 | + """Extractor for image galleries from myhentaigallery.com""" |
17 | 22 | directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") |
18 | | - pattern = (r"(?:https?://)?myhentaigallery\.com" |
19 | | - r"/g(?:allery/(?:thumbnails|show))?/(\d+)") |
| 23 | + pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)" |
20 | 24 | example = "https://myhentaigallery.com/g/12345" |
21 | 25 |
|
22 | 26 | def __init__(self, match): |
@@ -53,3 +57,32 @@ def images(self, page): |
53 | 57 | "/thumbnail/", "/original/"), None) |
54 | 58 | for url in text.extract_iter(page, 'class="comic-thumb"', '</div>') |
55 | 59 | ] |
| 60 | + |
| 61 | + |
| 62 | +class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor): |
| 63 | + """Extractor for myhentaigallery tag searches""" |
| 64 | + subcategory = "tag" |
| 65 | + pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)" |
| 66 | + example = "https://myhentaigallery.com/g/category/123" |
| 67 | + |
| 68 | + def items(self): |
| 69 | + data = {"_extractor": MyhentaigalleryGalleryExtractor} |
| 70 | + for url in self.galleries(): |
| 71 | + yield Message.Queue, url, data |
| 72 | + |
| 73 | + def galleries(self): |
| 74 | + root = self.root |
| 75 | + url = root + self.groups[0] |
| 76 | + |
| 77 | + while True: |
| 78 | + page = self.request(url).text |
| 79 | + |
| 80 | + for inner in text.extract_iter( |
| 81 | + page, '<div class="comic-inner">', "<div"): |
| 82 | + yield root + text.extr(inner, 'href="', '"') |
| 83 | + |
| 84 | + try: |
| 85 | + pos = page.index(">Next<") |
| 86 | + except ValueError: |
| 87 | + return |
| 88 | + url = root + text.rextr(page, 'href="', '"', pos) |
0 commit comments