Skip to content

Commit 6948743

Browse files
authored
v2.1.16: 优化搜索功能,增加错误提示检测并简化正则表达式,更新JM发布页 (#102)
1 parent 18ba10e commit 6948743

File tree

4 files changed

+40
-10
lines changed

4 files changed

+40
-10
lines changed

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.1.15'
5+
__version__ = '2.1.16'
66

77
from .api import *

src/jmcomic/jm_client_impl.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@ def headers_key_ts(self):
418418
"accept-encoding": "gzip",
419419
}, key_ts
420420

421+
def debug_topic_request(self):
422+
return 'api'
423+
421424

422425
class AsyncSaveImageClient(JmImageClient):
423426

src/jmcomic/jm_config.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@ def default_postman_constructor(session, **kwargs):
1717
return Postmans.new_postman(**kwargs)
1818

1919

20+
def default_raise_regex_error(msg, *_args, **_kwargs):
21+
raise AssertionError(msg)
22+
23+
2024
class JmModuleConfig:
2125
# 网站相关
2226
PROT = "https://"
2327
JM_REDIRECT_URL = f'{PROT}jm365.work/3YeBdF' # 永久網域,怕走失的小伙伴收藏起来
24-
JM_PUB_URL = f'{PROT}jmcomic2.bet'
28+
JM_PUB_URL = f'{PROT}jmcomic.ltd'
2529
JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}' # index 从1开始
2630
JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']
2731

@@ -62,6 +66,9 @@ class JmModuleConfig:
6266
debug_executor = default_jm_debug
6367
# postman构造函数
6468
postman_constructor = default_postman_constructor
69+
# 网页正则表达式解析失败时,执行抛出异常的函数,可以替换掉用于debug
70+
raise_regex_error_executor = default_raise_regex_error
71+
6572
# debug开关标记
6673
enable_jm_debug = True
6774

@@ -163,7 +170,7 @@ def get_jmcomic_url(cls, postman=None):
163170
postman = postman or cls.new_postman(session=True)
164171

165172
url = postman.with_redirect_catching().get(cls.JM_REDIRECT_URL)
166-
cls.jm_debug('获取禁漫地址', f'[{cls.JM_REDIRECT_URL}] → [{url}]')
173+
cls.jm_debug('获取禁漫URL', f'[{cls.JM_REDIRECT_URL}] → [{url}]')
167174
return url
168175

169176
@classmethod
@@ -181,7 +188,10 @@ def get_jmcomic_domain_all(cls, postman=None):
181188
raise AssertionError(resp.text)
182189

183190
from .jm_toolkit import JmcomicText
184-
return JmcomicText.analyse_jm_pub_html(resp.text)
191+
domain_list = JmcomicText.analyse_jm_pub_html(resp.text)
192+
193+
cls.jm_debug('获取禁漫全部域名', f'[{resp.url}] → {domain_list}')
194+
return domain_list
185195

186196
album_comment_headers = {
187197
'authority': '18comic.vip',

src/jmcomic/jm_toolkit.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,12 @@ def match_field(field_key: str, pattern: Union[Pattern, List[Pattern]], text):
141141
field_value = match_field(field_name, pattern_value, html)
142142

143143
if field_value is None:
144-
write_text('./resp.txt', html) # debug
145-
raise AssertionError(f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern_value.pattern}]")
144+
JmModuleConfig.raise_regex_error_executor(
145+
f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern_value.pattern}]",
146+
html,
147+
field_name,
148+
pattern_value
149+
)
146150

147151
# 保存字段
148152
field_dict[field_name] = field_value
@@ -167,9 +171,7 @@ def format_url(cls, path, domain=None):
167171

168172
class JmSearchSupport:
169173
# 用来缩减html的长度
170-
pattern_html_search_shorten_for = compile('<div class="well well-sm">([\s\S]*)'
171-
'<div class="row">[\s\S]*'
172-
'<div class="bot-per visible-xs visible-sm">')
174+
pattern_html_search_shorten_for = compile('<div class="well well-sm">([\s\S]*)<div class="row">')
173175

174176
# 用来提取搜索页面的的album的信息
175177
pattern_html_search_album_info_list = compile(
@@ -185,9 +187,24 @@ class JmSearchSupport:
185187
# 用来查找tag列表
186188
pattern_html_search_tag_list = compile('<a href=".*?">(.*?)</a>')
187189

190+
# 查找错误,例如 [错误,關鍵字過短,請至少輸入兩個字以上。]
191+
pattern_html_search_error = compile('<fieldset>\n<legend>(.*?)</legend>\n<div class=.*?>\n(.*?)\n</div>\n</fieldset>')
192+
188193
@classmethod
189194
def analyse_jm_search_html(cls, html: str) -> JmSearchPage:
190-
html = cls.pattern_html_search_shorten_for.search(html)[0]
195+
# 检查是否失败
196+
match = cls.pattern_html_search_error.search(html)
197+
if match is not None:
198+
topic, reason = match[1], match[2]
199+
JmModuleConfig.raise_regex_error_executor(f'{topic}: {reason}', html)
200+
201+
# 缩小文本范围
202+
match = cls.pattern_html_search_shorten_for.search(html)
203+
if match is None:
204+
JmModuleConfig.raise_regex_error_executor('未匹配到搜索结果', html)
205+
html = match[0]
206+
207+
# 提取结果
191208
album_info_list = cls.pattern_html_search_album_info_list.findall(html)
192209

193210
for i, (album_id, title, *args) in enumerate(album_info_list):

0 commit comments

Comments
 (0)