Skip to content

Commit d75a135

Browse files
committed
[myhentaigallery] add 'tag' extractor (#8537)
1 parent 8876272 commit d75a135

File tree

3 files changed

+61
-6
lines changed

3 files changed

+61
-6
lines changed

docs/supportedsites.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ Consider all listed sites to potentially be NSFW.
658658
<tr id="myhentaigallery" title="myhentaigallery">
659659
<td>My Hentai Gallery</td>
660660
<td>https://myhentaigallery.com/</td>
661-
<td>Galleries</td>
661+
<td>Galleries, Tag Searches</td>
662662
<td></td>
663663
</tr>
664664
<tr id="naver-blog" title="naver-blog">

gallery_dl/extractor/myhentaigallery.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,21 @@
66

77
"""Extractors for https://myhentaigallery.com/"""
88

9-
from .common import GalleryExtractor
9+
from .common import Extractor, GalleryExtractor, Message
1010
from .. import text, exception
1111

12+
BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
1213

13-
class MyhentaigalleryGalleryExtractor(GalleryExtractor):
14-
"""Extractor for image galleries from myhentaigallery.com"""
14+
15+
class MyhentaigalleryBase():
1516
category = "myhentaigallery"
1617
root = "https://myhentaigallery.com"
18+
19+
20+
class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor):
21+
"""Extractor for image galleries from myhentaigallery.com"""
1722
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
18-
pattern = (r"(?:https?://)?myhentaigallery\.com"
19-
r"/g(?:allery/(?:thumbnails|show))?/(\d+)")
23+
pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)"
2024
example = "https://myhentaigallery.com/g/12345"
2125

2226
def __init__(self, match):
@@ -53,3 +57,32 @@ def images(self, page):
5357
"/thumbnail/", "/original/"), None)
5458
for url in text.extract_iter(page, 'class="comic-thumb"', '</div>')
5559
]
60+
61+
62+
class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor):
63+
"""Extractor for myhentaigallery tag searches"""
64+
subcategory = "tag"
65+
pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)"
66+
example = "https://myhentaigallery.com/g/category/123"
67+
68+
def items(self):
69+
data = {"_extractor": MyhentaigalleryGalleryExtractor}
70+
for url in self.galleries():
71+
yield Message.Queue, url, data
72+
73+
def galleries(self):
74+
root = self.root
75+
url = root + self.groups[0]
76+
77+
while True:
78+
page = self.request(url).text
79+
80+
for inner in text.extract_iter(
81+
page, '<div class="comic-inner">', "<div"):
82+
yield root + text.extr(inner, 'href="', '"')
83+
84+
try:
85+
pos = page.index(">Next<")
86+
except ValueError:
87+
return
88+
url = root + text.rextr(page, 'href="', '"', pos)

test/results/myhentaigallery.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,26 @@
3535
"#class" : myhentaigallery.MyhentaigalleryGalleryExtractor,
3636
},
3737

38+
{
39+
"#url" : "https://myhentaigallery.com/g/artist/8084?sorting=favorite",
40+
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
41+
"#pattern" : myhentaigallery.MyhentaigalleryGalleryExtractor.pattern,
42+
"#count" : 18,
43+
},
44+
45+
{
46+
"#url" : "https://myhentaigallery.com/g/group/2",
47+
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
48+
},
49+
50+
{
51+
"#url" : "https://myhentaigallery.com/g/parody/8239",
52+
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
53+
},
54+
55+
{
56+
"#url" : "https://myhentaigallery.com/g/category/59",
57+
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
58+
},
59+
3860
)

0 commit comments

Comments
 (0)