Skip to content

Commit fbaaffc

Browse files
authored
v2.5.6: 更新禁漫发布页URl; 优化正则表达式的适配; 支持通过github网址获取禁漫网页域名; 优化代码和文档. (#212) (#213)
1 parent 9a3d0f1 commit fbaaffc

15 files changed

+153
-70
lines changed

assets/docs/sources/option_file_syntax.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ plugins:
127127

128128
- plugin: client_proxy # 客户端实现类代理插件,不建议非开发人员使用
129129
kwargs:
130-
proxy_client_key: cl_proxy_future # 代理类的client_key
130+
proxy_client_key: photo_concurrent_fetcher_proxy # 代理类的client_key
131131
whitelist: [ api, ] # 白名单,当client.impl匹配白名单时才代理
132132

133133
- plugin: auto_set_browser_cookies # 自动获取浏览器cookies,详见插件类

assets/docs/sources/tutorial/11_log_custom.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ plugins:
4646
- plugin: client_proxy # 提高移动端的请求效率的插件
4747
log: false # 插件自身不打印日志
4848
kwargs:
49-
proxy_client_key: cl_proxy_future
49+
proxy_client_key: photo_concurrent_fetcher_proxy
5050
whitelist: [ api, ]
5151
```
5252

assets/option/option_test_api.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ plugins:
2424

2525
- plugin: client_proxy
2626
kwargs:
27-
proxy_client_key: cl_proxy_future
27+
proxy_client_key: photo_concurrent_fetcher_proxy
2828
whitelist: [ api, ]

assets/option/option_test_html.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,5 @@ plugins:
2525

2626
- plugin: client_proxy
2727
kwargs:
28-
proxy_client_key: cl_proxy_future
28+
proxy_client_key: photo_concurrent_fetcher_proxy
2929
whitelist: [ api, ]

assets/option/option_workflow_download.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ plugins:
1818

1919
- plugin: client_proxy # 提高移动端的请求效率的插件
2020
kwargs:
21-
proxy_client_key: cl_proxy_future
21+
proxy_client_key: photo_concurrent_fetcher_proxy
2222
whitelist: [ api, ]
2323

2424
- plugin: login # 登录插件

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.5.5'
5+
__version__ = '2.5.6'
66

77
from .api import *
88
from .jm_plugin import *

src/jmcomic/api.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def new_downloader(option=None, downloader=None) -> JmDownloader:
100100
return downloader(option)
101101

102102

103-
def create_option(filepath):
103+
def create_option_by_file(filepath):
104104
return JmModuleConfig.option_class().from_file(filepath)
105105

106106

@@ -110,4 +110,14 @@ def create_option_by_env(env_name='JM_OPTION_PATH'):
110110
filepath = get_env(env_name, None)
111111
ExceptionTool.require_true(filepath is not None,
112112
f'未配置环境变量: {env_name},请配置为option的文件路径')
113-
return create_option(filepath)
113+
return create_option_by_file(filepath)
114+
115+
116+
def create_option_by_str(text: str, mode=None):
117+
if mode is None:
118+
mode = PackerUtil.mode_yml
119+
data = PackerUtil.unpack_by_str(text, mode)[0]
120+
return JmModuleConfig.option_class().construct(data)
121+
122+
123+
create_option = create_option_by_file

src/jmcomic/jm_client_impl.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ def request_with_retry(self,
7979
"""
8080
if domain_index >= len(self.domain_list):
8181
return self.fallback(request, url, domain_index, retry_count, **kwargs)
82-
82+
8383
url_backup = url
84-
84+
8585
if url.startswith('/'):
8686
# path → url
8787
domain = self.domain_list[domain_index]
@@ -976,21 +976,25 @@ def get_cookies(self):
976976
return cookies
977977

978978

979-
class FutureClientProxy(JmcomicClient):
979+
class PhotoConcurrentFetcherProxy(JmcomicClient):
980980
"""
981-
在Client上做了一层线程池封装来实现异步,对外仍然暴露JmcomicClient的接口,可以看作Client的代理。
982-
除了使用线程池做异步,还通过加锁和缓存结果,实现同一个请求不会被多个线程发出,减少开销
981+
为了解决 JmApiClient.get_photo_detail 方法的排队调用问题,
982+
即在访问完photo的接口后,需要另外排队访问获取album和scramble_id的接口。
983+
984+
这三个接口可以并发请求,这样可以提高效率。
985+
986+
此Proxy代理了get_photo_detail,实现了并发请求这三个接口,然后组装返回值返回photo。
983987
984988
可通过插件 ClientProxyPlugin 启用本类,配置如下:
985989
```yml
986990
plugins:
987991
after_init:
988992
- plugin: client_proxy
989993
kwargs:
990-
proxy_client_key: cl_proxy_future
994+
proxy_client_key: photo_concurrent_fetcher_proxy
991995
```
992996
"""
993-
client_key = 'cl_proxy_future'
997+
client_key = 'photo_concurrent_fetcher_proxy'
994998

995999
class FutureWrapper:
9961000
def __init__(self, future, after_done_callback):
@@ -1024,16 +1028,15 @@ def __init__(self,
10241028
executors = ThreadPoolExecutor(max_workers)
10251029

10261030
self.executors = executors
1027-
self.future_dict: Dict[str, FutureClientProxy.FutureWrapper] = {}
1031+
self.future_dict: Dict[str, PhotoConcurrentFetcherProxy.FutureWrapper] = {}
10281032
from threading import Lock
10291033
self.lock = Lock()
10301034

10311035
def route_notimpl_method_to_internal_client(self, client):
10321036

1033-
impl_methods = str_to_set('''
1037+
proxy_methods = str_to_set('''
10341038
get_album_detail
10351039
get_photo_detail
1036-
search
10371040
''')
10381041

10391042
# 获取对象的所有属性和方法的名称列表
@@ -1043,7 +1046,7 @@ def route_notimpl_method_to_internal_client(self, client):
10431046
# 判断是否为方法(可调用对象)
10441047
if (not method.startswith('_')
10451048
and callable(getattr(client, method))
1046-
and method not in impl_methods
1049+
and method not in proxy_methods
10471050
):
10481051
setattr(self, method, getattr(client, method))
10491052

@@ -1055,15 +1058,19 @@ def get_album_detail(self, album_id) -> JmAlbumDetail:
10551058

10561059
def get_future(self, cache_key, task):
10571060
if cache_key in self.future_dict:
1061+
# cache hit, means that a same task is running
10581062
return self.future_dict[cache_key]
10591063

10601064
with self.lock:
10611065
if cache_key in self.future_dict:
10621066
return self.future_dict[cache_key]
10631067

1068+
# after future done, remove it from future_dict.
1069+
# cache depends on self.client instead of self.future_dict
10641070
future = self.FutureWrapper(self.executors.submit(task),
10651071
after_done_callback=lambda: self.future_dict.pop(cache_key, None)
10661072
)
1073+
10671074
self.future_dict[cache_key] = future
10681075
return future
10691076

@@ -1115,8 +1122,3 @@ def get_photo_detail(self, photo_id, fetch_album=True, fetch_scramble_id=True) -
11151122
photo.scramble_id = scramble_id
11161123

11171124
return photo
1118-
1119-
def search(self, search_query: str, page: int, main_tag: int, order_by: str, time: str) -> JmSearchPage:
1120-
cache_key = f'search_query_{search_query}_page_{page}_main_tag_{main_tag}_order_by_{order_by}_time_{time}'
1121-
future = self.get_future(cache_key, task=lambda: self.client.search(search_query, page, main_tag, order_by, time))
1122-
return future.result()

src/jmcomic/jm_client_interface.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,11 +469,14 @@ def get_cache_dict(self) -> Optional[Dict]:
469469
def of_api_url(self, api_path, domain):
470470
raise NotImplementedError
471471

472-
def get_html_domain(self, postman=None):
473-
return JmModuleConfig.get_html_domain(postman or self.get_root_postman())
472+
def get_html_domain(self):
473+
return JmModuleConfig.get_html_domain(self.get_root_postman())
474474

475-
def get_html_domain_all(self, postman=None):
476-
return JmModuleConfig.get_html_domain_all(postman or self.get_root_postman())
475+
def get_html_domain_all(self):
476+
return JmModuleConfig.get_html_domain_all(self.get_root_postman())
477+
478+
def get_html_domain_all_via_github(self):
479+
return JmModuleConfig.get_html_domain_all_via_github(self.get_root_postman())
477480

478481
# noinspection PyMethodMayBeStatic
479482
def do_page_iter(self, params: dict, page: int, get_page_method):

src/jmcomic/jm_config.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class JmModuleConfig:
8181
# 网站相关
8282
PROT = "https://"
8383
JM_REDIRECT_URL = f'{PROT}jm365.work/3YeBdF' # 永久網域,怕走失的小伙伴收藏起来
84-
JM_PUB_URL = f'{PROT}jmcomic.ltd'
84+
JM_PUB_URL = f'{PROT}jmcomic-fb.vip'
8585
JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}' # index 从1开始
8686
JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']
8787

@@ -259,6 +259,41 @@ def get_html_domain_all(cls, postman=None):
259259
cls.jm_log('module.html_domain_all', f'获取禁漫网页全部域名: [{resp.url}] → {domain_list}')
260260
return domain_list
261261

262+
@classmethod
263+
def get_html_domain_all_via_github(cls,
264+
postman=None,
265+
template='https://jmcmomic.github.io/go/{}.html',
266+
index_range=(300, 309)
267+
):
268+
"""
269+
通过禁漫官方的github号的repo获取最新的禁漫域名
270+
https://github.com/jmcmomic/jmcmomic.github.io
271+
"""
272+
postman = postman or cls.new_postman(headers={
273+
'authority': 'github.com',
274+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 '
275+
'Safari/537.36'
276+
})
277+
domain_set = set()
278+
279+
def fetch_domain(url):
280+
resp = postman.get(url, allow_redirects=False)
281+
text = resp.text
282+
from .jm_toolkit import JmcomicText
283+
for domain in JmcomicText.analyse_jm_pub_html(text):
284+
if domain.startswith('jm365'):
285+
continue
286+
domain_set.add(domain)
287+
288+
from common import multi_thread_launcher
289+
290+
multi_thread_launcher(
291+
iter_objs=[template.format(i) for i in range(*index_range)],
292+
apply_each_obj_func=fetch_domain,
293+
)
294+
295+
return domain_set
296+
262297
@classmethod
263298
def new_html_headers(cls, domain='18comic.vip'):
264299
"""

src/jmcomic/jm_downloader.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,11 @@ def client_for_photo(self, jm_photo_id) -> JmcomicClient:
169169
def before_album(self, album: JmAlbumDetail):
170170
super().before_album(album)
171171
self.all_downloaded.setdefault(album, {})
172-
173-
def before_photo(self, photo: JmPhotoDetail):
174-
super().before_photo(photo)
175-
self.all_downloaded.setdefault(photo.from_album, {})
176-
self.all_downloaded[photo.from_album].setdefault(photo, [])
172+
self.option.call_all_plugin(
173+
'before_album',
174+
album=album,
175+
downloader=self,
176+
)
177177

178178
def after_album(self, album: JmAlbumDetail):
179179
super().after_album(album)
@@ -183,6 +183,16 @@ def after_album(self, album: JmAlbumDetail):
183183
downloader=self,
184184
)
185185

186+
def before_photo(self, photo: JmPhotoDetail):
187+
super().before_photo(photo)
188+
self.all_downloaded.setdefault(photo.from_album, {})
189+
self.all_downloaded[photo.from_album].setdefault(photo, [])
190+
self.option.call_all_plugin(
191+
'before_photo',
192+
photo=photo,
193+
downloader=self,
194+
)
195+
186196
def after_photo(self, photo: JmPhotoDetail):
187197
super().after_photo(photo)
188198
self.option.call_all_plugin(
@@ -191,12 +201,25 @@ def after_photo(self, photo: JmPhotoDetail):
191201
downloader=self,
192202
)
193203

204+
def before_image(self, image: JmImageDetail, img_save_path):
205+
super().before_image(image, img_save_path)
206+
self.option.call_all_plugin(
207+
'before_image',
208+
image=image,
209+
downloader=self,
210+
)
211+
194212
def after_image(self, image: JmImageDetail, img_save_path):
195213
super().after_image(image, img_save_path)
196214
photo = image.from_photo
197215
album = photo.from_album
198216

199217
self.all_downloaded.get(album).get(photo).append((img_save_path, image))
218+
self.option.call_all_plugin(
219+
'after_image',
220+
image=image,
221+
downloader=self,
222+
)
200223

201224
# 下面是对with语法的支持
202225

@@ -219,28 +242,23 @@ def use(cls, *args, **kwargs):
219242

220243
class DoNotDownloadImage(JmDownloader):
221244
"""
222-
本类仅用于测试
223-
224-
用法:
225-
226-
JmModuleConfig.CLASS_DOWNLOADER = DoNotDownloadImage
245+
不会下载任何图片的Downloader,用作测试
227246
"""
228247

229248
def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient):
230249
# ensure make dir
231250
self.option.decide_image_filepath(image)
232-
pass
233251

234252

235253
class JustDownloadSpecificCountImage(JmDownloader):
254+
"""
255+
只下载特定数量图片的Downloader,用作测试
256+
"""
236257
from threading import Lock
237258

238259
count_lock = Lock()
239260
count = 0
240261

241-
def __init__(self, option: JmOption) -> None:
242-
super().__init__(option)
243-
244262
def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient):
245263
# ensure make dir
246264
self.option.decide_image_filepath(image)

src/jmcomic/jm_entity.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,30 +91,34 @@ def authoroname(self):
9191
"""
9292
authoroname = author + oname
9393
94-
比较好识别的一种本子名称方式
94+
个人认为识别度比较高的本子名称,一眼看去就能获取到本子的关键信息
9595
96-
具体格式: f'【author】{oname}'
96+
具体格式: '【author】oname'
9797
9898
示例:
9999
100-
原本子名:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
100+
Pname:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
101101
102-
authoroname:【BLVEFO9】喂我吃吧 老師!
102+
Pauthoroname:【BLVEFO9】喂我吃吧 老師!
103103
104-
:return: 返回作者名+作品原名,格式为: '【author】{oname}'
104+
:return: 返回作者名+本子原始名称,格式为: '【author】oname'
105105
"""
106106
return f'【{self.author}{self.oname}'
107107

108108
@property
109109
def idoname(self):
110110
"""
111111
类似 authoroname
112-
:return: '[id] {oname}'
112+
113+
:return: '[id] oname'
113114
"""
114115
return f'[{self.id}] {self.oname}'
115116

116117
def __str__(self):
117-
return f'{self.__class__.__name__}({self.id}-{self.title})'
118+
return f'{self.__class__.__name__}' \
119+
'{' \
120+
f'{self.id}: {self.title}'\
121+
'}'
118122

119123
@classmethod
120124
def __alias__(cls):

0 commit comments

Comments
 (0)