Skip to content

Commit dbd3f8c

Browse files
authored
v2.2.4: 优化搜本API,增加本子类的字段和对应的正则表达式(喜欢数、观看数、评论数、作品、登场人物) (#121)
1 parent a2c0a86 commit dbd3f8c

10 files changed

+175
-60
lines changed

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.2.3'
5+
__version__ = '2.2.4'
66

77
from .api import *
88
from .jm_plugin import *

src/jmcomic/jm_client_impl.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def request_with_retry(self,
5757
api_path=url,
5858
domain=self.domain_list[domain_index],
5959
)
60-
jm_debug(self.debug_topic_request(), url)
60+
jm_debug(self.debug_topic_request(), self.decode(url))
6161
else:
6262
# 图片url
6363
pass
@@ -124,7 +124,7 @@ def wrap_func_cache(func_name, cache_dict_name):
124124
for func in {
125125
'get_photo_detail',
126126
'get_album_detail',
127-
'search_album',
127+
'search',
128128
}:
129129
wrap_func_cache(func, func + '.cache.dict')
130130

@@ -151,6 +151,23 @@ def fallback(self, request, url, domain_index, retry_count, **kwargs):
151151
jm_debug('req.fallback', msg)
152152
raise JmModuleConfig.exception(msg)
153153

154+
# noinspection PyMethodMayBeStatic
155+
def append_params_to_url(self, url, params):
156+
from urllib.parse import urlencode
157+
158+
# 将参数字典编码为查询字符串
159+
query_string = urlencode(params)
160+
url = f"{url}?{query_string}"
161+
return url
162+
163+
# noinspection PyMethodMayBeStatic
164+
def decode(self, url: str):
165+
if not JmModuleConfig.decode_url_when_debug or '/search/' not in url:
166+
return url
167+
168+
from urllib.parse import unquote
169+
return unquote(url.replace('+', ' '))
170+
154171

155172
# 基于网页实现的JmClient
156173
class JmHtmlClient(AbstractJmClient):
@@ -182,14 +199,17 @@ def get_photo_detail(self, photo_id, fetch_album=True) -> JmPhotoDetail:
182199

183200
return photo
184201

185-
def search_album(self, search_query, main_tag=0, page=1) -> JmSearchPage:
202+
def search(self, search_query, page, main_tag) -> JmSearchPage:
186203
params = {
187204
'main_tag': main_tag,
188205
'search_query': search_query,
189206
'page': page,
190207
}
191208

192-
resp = self.get_jm_html('/search/photos', params=params, allow_redirects=True)
209+
resp = self.get_jm_html(
210+
self.append_params_to_url('/search/photos', params),
211+
allow_redirects=True,
212+
)
193213

194214
# 检查是否发生了重定向
195215
# 因为如果搜索的是禁漫车号,会直接跳转到本子详情页面
@@ -371,7 +391,7 @@ class JmApiClient(AbstractJmClient):
371391
client_key = 'api'
372392
API_SEARCH = '/search'
373393

374-
def search_album(self, search_query, main_tag=0, page=1) -> JmApiResp:
394+
def search(self, search_query, main_tag=0, page=1) -> JmApiResp:
375395
"""
376396
model_data: {
377397
"search_query": "MANA",

src/jmcomic/jm_client_interface.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,6 @@ def get_album_detail(self, album_id) -> JmAlbumDetail:
151151
def get_photo_detail(self, photo_id, fetch_album=True) -> JmPhotoDetail:
152152
raise NotImplementedError
153153

154-
def search_album(self, search_query: str, main_tag: int = 0, page: int = 1) -> JmSearchPage:
155-
raise NotImplementedError
156-
157154
def of_api_url(self, api_path, domain):
158155
raise NotImplementedError
159156

@@ -272,11 +269,65 @@ def img_is_not_need_to_decode(cls, data_original: str, _resp):
272269
return data_original.endswith('.gif')
273270

274271

272+
class JmSearchAlbumClient:
273+
"""
274+
搜尋的最佳姿勢?
275+
【包含搜尋】
276+
搜尋[+]全彩[空格][+]人妻,僅顯示全彩且是人妻的本本
277+
範例:+全彩 +人妻
278+
279+
【排除搜尋】
280+
搜尋全彩[空格][-]人妻,顯示全彩並排除人妻的本本
281+
範例:全彩 -人妻
282+
283+
【我都要搜尋】
284+
搜尋全彩[空格]人妻,會顯示所有包含全彩及人妻的本本
285+
範例:全彩 人妻
286+
"""
287+
288+
def search(self, search_query: str, page: int, main_tag: int) -> JmSearchPage:
289+
"""
290+
搜索【成人A漫】
291+
"""
292+
raise NotImplementedError
293+
294+
def search_site(self, search_query: str, page: int = 1) -> JmSearchPage:
295+
"""
296+
对应禁漫的站内搜索
297+
"""
298+
return self.search(search_query, page, 0)
299+
300+
def search_work(self, search_query: str, page: int = 1) -> JmSearchPage:
301+
"""
302+
搜索album的作品 work
303+
"""
304+
return self.search(search_query, page, 1)
305+
306+
def search_author(self, search_query: str, page: int = 1) -> JmSearchPage:
307+
"""
308+
搜索album的作者 author
309+
"""
310+
return self.search(search_query, page, 2)
311+
312+
def search_tag(self, search_query: str, page: int = 1) -> JmSearchPage:
313+
"""
314+
搜索album的标签 tag
315+
"""
316+
return self.search(search_query, page, 3)
317+
318+
def search_actor(self, search_query: str, page: int = 1) -> JmSearchPage:
319+
"""
320+
搜索album的登场角色 actor
321+
"""
322+
return self.search(search_query, page, 4)
323+
324+
275325
# noinspection PyAbstractClass
276326
class JmcomicClient(
277327
JmImageClient,
278328
JmDetailClient,
279329
JmUserClient,
330+
JmSearchAlbumClient,
280331
Postman,
281332
):
282333
def get_jmcomic_url(self):

src/jmcomic/jm_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class JmModuleConfig:
7777

7878
# debug开关标记
7979
enable_jm_debug = True
80+
# debug时解码url
81+
decode_url_when_debug = True
8082

8183
# 插件注册表
8284
plugin_registry = {}

src/jmcomic/jm_downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def before_album(self, album: JmAlbumDetail):
2323
f'章节数: [{len(album)}], '
2424
f'总页数: [{album.page_count}], '
2525
f'标题: [{album.title}], '
26-
f'关键词: [{album.keywords}]'
26+
f'关键词: [{album.tag_list}]'
2727
)
2828

2929
def after_album(self, album: JmAlbumDetail):

src/jmcomic/jm_entity.py

Lines changed: 50 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,9 @@ def is_single_album(self) -> bool:
198198
return self._series_id == 0
199199

200200
@property
201-
def keywords(self) -> List[str]:
201+
def tags(self) -> List[str]:
202202
if self.from_album is not None:
203-
return self.from_album.keywords
203+
return self.from_album.tag_list
204204

205205
return self._keywords.split(',')
206206

@@ -303,19 +303,30 @@ def __init__(self,
303303
episode_list,
304304
page_count,
305305
author_list,
306-
keywords_list,
306+
tag_list,
307307
pub_date,
308308
update_date,
309+
likes,
310+
views,
311+
comment_count,
312+
work_list,
313+
actor_list,
309314
):
310315
self.album_id: str = album_id
311316
self.scramble_id: str = scramble_id
312317
self.title: str = title
313-
self.page_count = int(page_count)
314-
self._author_list: List[str] = author_list
315-
self._keywords_list: List[str] = keywords_list
318+
self.page_count = int(page_count) # 总页数
316319
self.pub_date: str = pub_date # 发布日期
317320
self.update_date: str = update_date # 更新日期
318321

322+
self.likes: str = likes # [1K] 點擊喜歡
323+
self.views: str = views # [40K] 次觀看
324+
self.comment_count = int(comment_count)
325+
self.work_list: List[str] = work_list # 作品
326+
self.actor_list: List[str] = actor_list # 登場人物
327+
self.tag_list: List[str] = tag_list # 標籤
328+
self.author_list: List[str] = author_list # 作者
329+
319330
# 有的 album 没有章节,则自成一章。
320331
if len(episode_list) == 0:
321332
# photo_id, photo_index, photo_title, photo_pub_date
@@ -325,6 +336,38 @@ def __init__(self,
325336

326337
self.episode_list: List[Tuple] = episode_list
327338

339+
@property
340+
def author(self):
341+
"""
342+
作者
343+
禁漫本子的作者标签可能有多个,全部作者请使用字段 self.author_list
344+
"""
345+
if len(self.author_list) >= 1:
346+
return self.author_list[0]
347+
348+
return JmModuleConfig.default_author
349+
350+
@property
351+
def id(self):
352+
return self.album_id
353+
354+
@staticmethod
355+
def distinct_episode(episode_list):
356+
ret = []
357+
358+
def not_exist(episode):
359+
photo_id = episode[0]
360+
for each in ret:
361+
if each[0] == photo_id:
362+
return False
363+
return True
364+
365+
for episode in episode_list:
366+
if not_exist(episode):
367+
ret.append(episode)
368+
369+
return ret
370+
328371
def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]:
329372
# 校验参数
330373
length = len(self.episode_list)
@@ -351,37 +394,6 @@ def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]:
351394

352395
return photo, episode_info
353396

354-
@property
355-
def author(self):
356-
if len(self._author_list) >= 1:
357-
return self._author_list[0]
358-
return JmModuleConfig.default_author
359-
360-
@property
361-
def keywords(self) -> List[str]:
362-
return self._keywords_list
363-
364-
@property
365-
def id(self):
366-
return self.album_id
367-
368-
@staticmethod
369-
def distinct_episode(episode_list):
370-
ret = []
371-
372-
def not_exist(episode):
373-
photo_id = episode[0]
374-
for each in ret:
375-
if each[0] == photo_id:
376-
return False
377-
return True
378-
379-
for episode in episode_list:
380-
if not_exist(episode):
381-
ret.append(episode)
382-
383-
return ret
384-
385397
def getindex(self, item) -> JmPhotoDetail:
386398
return self.create_photo_detail(item)[0]
387399

@@ -421,7 +433,7 @@ def wrap_single_album(cls, album: JmAlbumDetail) -> 'JmSearchPage':
421433
album.title,
422434
None,
423435
None,
424-
album.keywords,
436+
album.tag_list,
425437
)
426438
obj = JmSearchPage([album_info])
427439

src/jmcomic/jm_toolkit.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,32 @@ class JmcomicText:
2828
pattern_html_album_page_count = compile('<span class="pagecount">.*?:(\d+)</span>')
2929
pattern_html_album_pub_date = compile('>上架日期 : (.*?)</span>')
3030
pattern_html_album_update_date = compile('>更新日期 : (.*?)</span>')
31-
pattern_html_album_keywords_list = [
31+
# 作品
32+
pattern_html_album_work_list = [
33+
compile('<span itemprop="author" data-type="works">([\s\S]*?)</span>'),
34+
compile('<a[\s\S]*?>(.*?)</a>')
35+
]
36+
# 登場人物
37+
pattern_html_album_actor_list = [
38+
compile('<span itemprop="author" data-type="actor">([\s\S]*?)</span>'),
39+
compile('<a[\s\S]*?>(.*?)</a>')
40+
]
41+
# 标签
42+
pattern_html_album_tag_list = [
3243
compile('<span itemprop="genre" data-type="tags">([\s\S]*?)</span>'),
3344
compile('<a[\s\S]*?>(.*?)</a>')
3445
]
35-
36-
# album 作者
46+
# 作者
3747
pattern_html_album_author_list = [
3848
compile('作者: *<span itemprop="author" data-type="author">([\s\S]*?)</span>'),
3949
compile("<a[\s\S]*?>(.*?)</a>"),
4050
]
51+
# 點擊喜歡
52+
pattern_html_album_likes = compile('<span id="albim_likes_\d+">(.*?)</span>')
53+
# 觀看
54+
pattern_html_album_views = compile('<span>(.*?)</span> 次觀看')
55+
# 評論
56+
pattern_html_album_comment_count = compile('<div class="badge" id="total_video_comments">(\d+)</div></a></li>')
4157

4258
@classmethod
4359
def parse_to_jm_domain(cls, text: str):
@@ -142,7 +158,7 @@ def match_field(field_key: str, pattern: Union[Pattern, List[Pattern]], text):
142158

143159
if field_value is None:
144160
JmModuleConfig.raise_regex_error_executor(
145-
f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern_value.pattern}]",
161+
f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern_value}]",
146162
html,
147163
field_name,
148164
pattern_value

tests/test_jmcomic/test_jm_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def run(aid):
9999
def test_get_jmcomic_url(self):
100100
func_list = {
101101
self.client.get_jmcomic_url,
102-
# self.client.get_jmcomic_domain_all,
103-
JmModuleConfig.get_jmcomic_url,
102+
self.client.get_jmcomic_domain_all,
103+
# JmModuleConfig.get_jmcomic_url,
104104
# JmModuleConfig.get_jmcomic_domain_all,
105105
}
106106

tests/test_jmcomic/test_jm_client.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def test_multi_album_and_single_album(self):
3333
print(f'本子: [{album.title}] 一共有{album.page_count}页图')
3434

3535
def test_search(self):
36-
jm_search_page: JmSearchPage = self.client.search_album('+无修正 +中文 -全彩')
36+
jm_search_page: JmSearchPage = self.client.search_tag('+无修正 +中文 -全彩')
3737
for album_id, title in reversed(jm_search_page):
3838
print(album_id, title)
3939

@@ -51,3 +51,16 @@ def test_album_missing(self):
5151
self.client.get_album_detail,
5252
'332583'
5353
)
54+
55+
def test_entity(self):
56+
album = self.client.get_album_detail(410090)
57+
58+
ans = [
59+
(album.work_list, ['原神', 'Genshin']),
60+
(album.actor_list, ['申鶴', '神里綾華', '甘雨']),
61+
(album.tag_list, ['C101', '巨乳', '校服', '口交', '乳交', '群交', '連褲襪', '中文', '禁漫漢化組', '纯爱']),
62+
(album.author_list, ['うぱ西']),
63+
]
64+
65+
for pair in ans:
66+
self.assertListEqual(pair[0], pair[1])

0 commit comments

Comments
 (0)