Skip to content

Commit ed83be0

Browse files
authored
v2.3.2: 优化正则表达式,异常处理,取消master的GitHub Actions限制 (#141)
1 parent e62921c commit ed83be0

File tree

4 files changed

+29
-17
lines changed

4 files changed

+29
-17
lines changed

.github/workflows/download.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ name: 下载JM本子
33
on:
44
workflow_dispatch:
55
push:
6-
branches-ignore:
7-
- 'master' # master专门用于发布pip和repo介绍
86
paths:
97
- '.github/workflows/download.yml' # 工作流定义
108
- 'usage/workflow_download.py' # 下载脚本

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.3.1'
5+
__version__ = '2.3.2'
66

77
from .api import *
88
from .jm_plugin import *

src/jmcomic/jm_config.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def default_postman_constructor(session, **kwargs):
1717
return Postmans.new_postman(**kwargs)
1818

1919

20-
def default_raise_regex_error(msg, *_args, **_kwargs):
20+
def default_raise_exception_executor(msg, **_kwargs):
2121
raise JmModuleConfig.exception(msg)
2222

2323

@@ -76,7 +76,7 @@ class JmModuleConfig:
7676
# postman构造函数
7777
postman_constructor = default_postman_constructor
7878
# 网页正则表达式解析失败时,执行抛出异常的函数,可以替换掉用于debug
79-
raise_regex_error_executor = default_raise_regex_error
79+
raise_exception_executor = default_raise_exception_executor
8080

8181
# debug开关标记
8282
enable_jm_debug = True
@@ -135,11 +135,25 @@ def client_impl_class(cls, client_key: str):
135135

136136
@classmethod
137137
def exception(cls, msg: str):
138+
"""
139+
获取jmcomic模块的异常类
140+
"""
138141
if cls.CLASS_EXCEPTION is not None:
139142
return cls.CLASS_EXCEPTION(msg)
140143

141144
return JmcomicException(msg)
142145

146+
@classmethod
147+
def raises(cls, msg: str, **kwargs):
148+
"""
149+
抛出异常,支持把一些上下文参数传递为kwargs
150+
真正抛出异常的是函数 cls.raise_exception_executor,用户可自定义此字段
151+
152+
如果只想抛异常,不想支持一些扩展处理,使用 raise cls.exception(msg)
153+
如果想支持一些扩展处理,使用 cls.raises(msg, context=context)
154+
"""
155+
cls.raise_exception_executor(msg, **kwargs)
156+
143157
@classmethod
144158
@field_cache("DOMAIN")
145159
def domain(cls, postman=None):

src/jmcomic/jm_toolkit.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class JmcomicText:
1313
pattern_html_photo_name = compile('<title>([\s\S]*?)\|.*</title>')
1414
# pattern_html_photo_data_original_list = compile('data-original="(.*?)" id="album_photo_.+?"')
1515
pattern_html_photo_data_original_domain = compile('src="https://(.*?)/media/albums/blank')
16-
pattern_html_photo_data_original_0 = compile('data-original="(.*?)"[ \n]*?id="album_photo')
16+
pattern_html_photo_data_original_0 = compile('data-original="(.*?)"[^>]*?id="album_photo[^>]*?data-page="0"')
1717
pattern_html_photo_keywords = compile('<meta name="keywords"[\s\S]*?content="(.*?)"')
1818
pattern_html_photo_series_id = compile('var series_id = (\d+);')
1919
pattern_html_photo_sort = compile('var sort = (\d+);')
@@ -31,29 +31,29 @@ class JmcomicText:
3131
# 作品
3232
pattern_html_album_works = [
3333
compile('<span itemprop="author" data-type="works">([\s\S]*?)</span>'),
34-
compile('<a[\s\S]*?>(.*?)</a>')
34+
compile('<a[^>]*?>(.*?)</a>')
3535
]
3636
# 登場人物
3737
pattern_html_album_actors = [
3838
compile('<span itemprop="author" data-type="actor">([\s\S]*?)</span>'),
39-
compile('<a[\s\S]*?>(.*?)</a>')
39+
compile('<a[^>]*?>(.*?)</a>')
4040
]
4141
# 标签
4242
pattern_html_album_tags = [
4343
compile('<span itemprop="genre" data-type="tags">([\s\S]*?)</span>'),
44-
compile('<a[\s\S]*?>(.*?)</a>')
44+
compile('<a[^>]*?>(.*?)</a>')
4545
]
4646
# 作者
4747
pattern_html_album_authors = [
4848
compile('作者: *<span itemprop="author" data-type="author">([\s\S]*?)</span>'),
49-
compile("<a[\s\S]*?>(.*?)</a>"),
49+
compile("<a[^>]*?>(.*?)</a>"),
5050
]
5151
# 點擊喜歡
5252
pattern_html_album_likes = compile('<span id="albim_likes_\d+">(.*?)</span>')
5353
# 觀看
5454
pattern_html_album_views = compile('<span>(.*?)</span> (次觀看|观看次数)')
5555
# 評論(div)
56-
pattern_html_album_comment_count = compile('<div class="badge"\n? *id="total_video_comments">(\d+)</div>'), 0
56+
pattern_html_album_comment_count = compile('<div class="badge"[^>]*?id="total_video_comments">(\d+)</div>'), 0
5757

5858
@classmethod
5959
def parse_to_jm_domain(cls, text: str):
@@ -169,11 +169,11 @@ def match_field(field_key: str, pattern: Union[Pattern, List[Pattern]], text):
169169

170170
if field_value is None:
171171
if default is None:
172-
JmModuleConfig.raise_regex_error_executor(
172+
JmModuleConfig.raises(
173173
f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern}]",
174-
html,
175-
field_name,
176-
pattern
174+
re_match_html=html,
175+
re_match_field_name=field_name,
176+
re_match_pattern=pattern,
177177
)
178178
else:
179179
field_value = default
@@ -260,12 +260,12 @@ def parse_html_to_page(cls, html: str) -> JmSearchPage:
260260
match = cls.pattern_html_search_error.search(html)
261261
if match is not None:
262262
topic, reason = match[1], match[2]
263-
JmModuleConfig.raise_regex_error_executor(f'{topic}: {reason}', html)
263+
JmModuleConfig.raises(f'{topic}: {reason}', re_search_html=html)
264264

265265
# 缩小文本范围
266266
match = cls.pattern_html_search_shorten_for.search(html)
267267
if match is None:
268-
JmModuleConfig.raise_regex_error_executor('未匹配到搜索结果', html)
268+
JmModuleConfig.raises('未匹配到搜索结果', re_shorten_html=html)
269269
html = match[0]
270270

271271
# 提取结果

0 commit comments

Comments
 (0)