Skip to content

Commit 5e013e1

Browse files
authored
v2.1.6: 简化字段缓存的代码,调整下载章节并发数,更新文档 (#83)
1 parent 9a5c5d1 commit 5e013e1

File tree

8 files changed

+70
-61
lines changed

8 files changed

+70
-61
lines changed

README.md

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,16 @@
2121

2222
## 快速上手
2323

24-
使用下面的两行代码,即可实现功能:把某个本子集(album)里的所有本子(photo)下载到本地
24+
使用下面的两行代码,即可实现功能:把某个本子(album)里的所有章节(photo)下载到本地
2525

2626
```python
2727
import jmcomic # 导入此模块,需要先安装.
2828
jmcomic.download_album('422866') # 传入要下载的album的id,即可下载整个album到本地.
2929
# 上面的这行代码,还有一个可选参数option: JmOption,表示配置项,
3030
# 配置项的作用是告诉程序下载时候的一些选择,
31-
# 比如,要下载到哪个文件夹,使用怎样的路径组织方式(比如[/作者/本子id/图片] 或者 [/作者/本子名称/图片]).
31+
# 比如,要下载到哪个文件夹,使用怎样的路径组织规则(比如[/作者/本子id/图片] 或者 [/作者/本子名称/图片]).
3232
# 如果没有配置,则会使用 JmOption.default(),下载的路径是[当前工作文件夹/本子名称/图片].
33+
# 如果你想要配置,请参考assets/config/和usgae/下的文档和示例.
3334
```
3435

3536
进一步的使用可以参考usage文件夹下的示例代码: `getting_started.py` `sample_usage.py`
@@ -39,17 +40,21 @@ jmcomic.download_album('422866') # 传入要下载的album的id,即可下载
3940
- **绕过Cloudflare的反爬虫**
4041
- 支持使用**Github Action**下载漫画,不会编程都能用([教程:使用Github Actions下载禁漫本子](./assets/docs/教程:使用Github%20Actions下载禁漫本子.md)
4142
- 可配置性强
42-
4343
- 不配置也能使用,十分方便
44-
- 配置可以从**配置文件**生成,无需写Python代码
45-
- 配置点有:`是否使用磁盘缓存` `是否使用代理` `图片类型转换` `本子下载路径` `请求元信息(headers,cookies,重试次数)等 `
44+
- 配置可以从**配置文件**生成,支持多种文件格式,无需写Python代码
45+
- 配置点有:`是否使用磁盘缓存` `图片类型转换` `下载路径` `请求元信息(headers,cookies,代理)等 `
46+
- 可扩展性强
47+
- 支持自定义本子/章节/图片下载前后的回调函数
48+
- 支持自定义debug日志的开关/格式
49+
- 支持自定义Option/Client/实体类
50+
- 支持重试和域名切换机制
4651
- 多线程下载(可细化到一图一线程,效率极高)
4752
- 跟进了JM最新的图片分割算法(2023-02-08)
4853

4954
## 使用小说明
5055

5156
* Python >= 3.7
52-
* 项目只有代码注释,没有API文档。因此想深入高级地使用,自行看源码和改造代码叭 ^^_
57+
* 个人项目,文档和示例会有不及时之处,可以Issue提问
5358

5459
## 项目文件夹介绍
5560

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option
44

5-
__version__ = '2.1.5'
5+
__version__ = '2.1.6'
66

77
from .api import *

src/jmcomic/api.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def download_album(jm_album_id, option=None):
1717

1818
option.before_album(album)
1919
execute_by_condition(
20-
iter_obj=album,
20+
iter_objs=album,
2121
apply=lambda photo: download_by_photo_detail(photo, option),
2222
count_batch=option.decide_photo_batch_count(album)
2323
)
@@ -49,13 +49,11 @@ def download_by_photo_detail(photo: JmPhotoDetail, option=None):
4949
# 下载每个图片的函数
5050
def download_image(image: JmImageDetail):
5151
img_save_path = option.decide_image_filepath(image)
52-
53-
# 已下载过,缓存命中
54-
if use_cache is True and file_exists(img_save_path):
55-
image.is_exists = True
56-
return
52+
image.is_exists = file_exists(img_save_path)
5753

5854
option.before_image(image, img_save_path)
55+
if use_cache is True and image.is_exists:
56+
return
5957
jm_client.download_by_image_detail(
6058
image,
6159
img_save_path,
@@ -65,7 +63,7 @@ def download_image(image: JmImageDetail):
6563

6664
option.before_photo(photo)
6765
execute_by_condition(
68-
iter_obj=photo,
66+
iter_objs=photo,
6967
apply=download_image,
7068
count_batch=option.decide_image_batch_count(photo)
7169
)
@@ -88,28 +86,28 @@ def download_album_batch(jm_album_id_iter: Union[Iterable, Generator],
8886
option = JmOption.default()
8987

9088
return thread_pool_executor(
91-
iter_objs=((album_id, option) for album_id in jm_album_id_iter),
92-
apply_each_obj_func=download_album,
89+
iter_objs=set(JmcomicText.parse_to_album_id(album_id) for album_id in jm_album_id_iter),
90+
apply_each_obj_func=lambda album_id: download_album(album_id, option),
9391
wait_finish=wait_finish,
9492
)
9593

9694

97-
def execute_by_condition(iter_obj, apply: Callable, count_batch: int):
95+
def execute_by_condition(iter_objs, apply: Callable, count_batch: int):
9896
"""
9997
章节/图片的下载调度逻辑
10098
"""
101-
count_real = len(iter_obj)
99+
count_real = len(iter_objs)
102100

103101
if count_batch >= count_real:
104-
# 一图一线程
102+
# 一个图/章节 对应 一个线程
105103
multi_thread_launcher(
106-
iter_objs=iter_obj,
104+
iter_objs=iter_objs,
107105
apply_each_obj_func=apply,
108106
)
109107
else:
110-
# 创建batch个线程的线程池,当图片数>batch时要等待。
108+
# 创建batch个线程的线程池
111109
thread_pool_executor(
112-
iter_objs=iter_obj,
110+
iter_objs=iter_objs,
113111
apply_each_obj_func=apply,
114112
max_workers=count_batch,
115113
)

src/jmcomic/jm_config.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
def field_cache(*args, **kwargs):
2+
from common import field_cache
3+
return field_cache(*args, **kwargs)
4+
5+
16
def default_jm_debug(topic: str, msg: str):
27
from common import format_ts
38
print(f'{format_ts()}:【{topic}{msg}')
@@ -15,11 +20,13 @@ def default_postman_constructor(session, **kwargs):
1520
class JmModuleConfig:
1621
# 网站相关
1722
PROT = "https://"
18-
DOMAIN = None
1923
JM_REDIRECT_URL = f'{PROT}jm365.work/3YeBdF' # 永久網域,怕走失的小伙伴收藏起来
2024
JM_PUB_URL = f'{PROT}jmcomic2.bet'
2125
JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}' # index 从1开始
2226
JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']
27+
# 缓存字段
28+
DOMAIN = None
29+
DOMAIN_LIST = None
2330

2431
# 访问JM可能会遇到的异常网页
2532
JM_ERROR_RESPONSE_TEXT = {
@@ -52,17 +59,15 @@ class JmModuleConfig:
5259
postman_constructor = default_postman_constructor
5360

5461
@classmethod
62+
@field_cache("DOMAIN")
5563
def domain(cls, postman=None):
5664
"""
5765
由于禁漫的域名经常变化,调用此方法可以获取一个当前可用的最新的域名 domain,
5866
并且设置把 domain 设置为禁漫模块的默认域名。
5967
这样一来,配置文件也不用配置域名了,一切都在运行时动态获取。
6068
"""
61-
if cls.DOMAIN is None:
62-
from .jm_toolkit import JmcomicText
63-
cls.DOMAIN = JmcomicText.parse_to_jm_domain(cls.get_jmcomic_url(postman))
64-
65-
return cls.DOMAIN # jmcomic默认域名
69+
from .jm_toolkit import JmcomicText
70+
return JmcomicText.parse_to_jm_domain(cls.get_jmcomic_url(postman))
6671

6772
@classmethod
6873
def headers(cls, domain='18comic.vip'):
@@ -116,6 +121,7 @@ def get_jmcomic_url(cls, postman=None):
116121
return url
117122

118123
@classmethod
124+
@field_cache("DOMAIN_LIST")
119125
def get_jmcomic_domain_all(cls, postman=None):
120126
"""
121127
访问禁漫发布页,得到所有禁漫的域名

src/jmcomic/jm_option.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def before_image(self, image: JmImageDetail, img_save_path):
3232
f'图片已存在: {image.tag} ← [{img_save_path}]'
3333
)
3434
else:
35-
jm_debug('image_before',
35+
jm_debug('image-before',
3636
f'图片准备下载: {image.tag}, [{image.img_url}] → [{img_save_path}]'
3737
)
3838

@@ -159,9 +159,6 @@ def __init__(self,
159159
# 其他配置
160160
self.filepath = filepath
161161

162-
# 字段
163-
self.jm_client_cache = None
164-
165162
@property
166163
def download_cache(self):
167164
return self.download.cache
@@ -186,9 +183,9 @@ def download_image_suffix(self):
186183
def decide_image_batch_count(self, photo: JmPhotoDetail):
187184
return self.download_threading_batch_count
188185

189-
# noinspection PyMethodMayBeStatic
186+
# noinspection PyMethodMayBeStatic,PyUnusedLocal
190187
def decide_photo_batch_count(self, album: JmAlbumDetail):
191-
return len(album)
188+
return os.cpu_count()
192189

193190
def decide_image_save_dir(self, photo) -> str:
194191
# 使用 self.dir_rule 决定 save_dir
@@ -266,22 +263,14 @@ def to_file(self, filepath=None):
266263
"""
267264

268265
# 缓存
269-
cache_jm_client = True
270266
jm_client_impl_mapping: Dict[str, Type[AbstractJmClient]] = {
271267
'html': JmHtmlClient,
272268
'api': JmApiClient,
273269
}
274270

271+
@field_cache("__jm_client_cache__")
275272
def build_jm_client(self, **kwargs) -> JmcomicClient:
276-
if self.cache_jm_client is not True:
277-
return self.new_jm_client(**kwargs)
278-
279-
client = self.jm_client_cache
280-
if client is None:
281-
client = self.new_jm_client(**kwargs)
282-
self.jm_client_cache = client
283-
284-
return client
273+
return self.new_jm_client(**kwargs)
285274

286275
def new_jm_client(self, **kwargs) -> JmcomicClient:
287276
postman_conf: dict = self.client.postman.src_dict

usage/getting_started.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
获取域名介绍
3131
--------------------
3232
"""
33-
3433
# 方式1: 访问禁漫发布页
3534
url_ls = jmcomic.JmModuleConfig.get_jmcomic_url_all()
3635
print(url_ls)
@@ -55,3 +54,16 @@
5554
# 如果你修改了默认配置,现在想用你修改后的配置来下载,使用如下代码
5655
jm_option = jmcomic.create_option('./你的配置文件路名称.yml')
5756
jmcomic.download_album('23333', jm_option)
57+
58+
# 如果你只想做简单的配置,也可以使用如下形式
59+
# 具体可以写什么,请参考 JmOption.default_dict,你只需要覆盖里面的键值即可
60+
# 配置代理
61+
jm_option = JmOption.construct({
62+
'client': {
63+
'postman': {
64+
'meta_data': {
65+
'proxies': ProxyBuilder.clash_proxy(),
66+
}
67+
}
68+
}
69+
})

usage/sample_usage.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from jmcomic import *
22

3-
jm_option = create_option(
3+
option = create_option(
44
f'你的配置文件路径,例如: D:/a/b/c/jmcomic/config.yml'
55
)
6+
client = option.build_jm_client()
7+
client.enable_cache(debug=True)
68

79

810
@timeit('下载本子集: ')
@@ -13,21 +15,18 @@ def download_jm_album():
1315
1416
''')
1517

16-
download_album(ls, jm_option) # 效果同下面的代码
17-
# download_album_batch(ls, jm_option)
18+
download_album(ls, option) # 效果同下面的代码
19+
# download_album_batch(ls, op)
1820

1921

2022
@timeit('获取实体类: ')
2123
def get_album_photo_detail():
22-
client = jm_option.build_jm_client()
2324
# 启用缓存,会缓存id → album和photo的实体类
24-
client.enable_cache(debug=True)
25-
2625
album: JmAlbumDetail = client.get_album_detail('427413')
2726

28-
def show(p):
29-
p: JmPhotoDetail = client.get_photo_detail(p.photo_id, False)
30-
for img in p:
27+
def show(photo):
28+
photo: JmPhotoDetail = client.get_photo_detail(photo.photo_id, False)
29+
for img in photo:
3130
img: JmImageDetail
3231
print(img.img_url)
3332

@@ -39,8 +38,6 @@ def show(p):
3938

4039
@timeit('搜索本子: ')
4140
def search_jm_album():
42-
client = jm_option.build_jm_client()
43-
4441
# 分页查询
4542
search_page: JmSearchPage = client.search_album(search_query='+MANA +无修正', page=1)
4643
for album_id, title in search_page:
@@ -51,20 +48,22 @@ def search_jm_album():
5148
album: JmAlbumDetail = search_page.single_album
5249
print(album.keywords)
5350

51+
5452
@timeit('搜索并下载本子: ')
5553
def search_and_download():
5654
tag = '無修正'
57-
search_album: JmSearchPage = cl.search_album(tag, main_tag=3)
55+
search_page: JmSearchPage = client.search_album(tag, main_tag=3)
5856

5957
id_list = []
6058

61-
for arg in search_album.album_info_list:
59+
for arg in search_page.album_info_list:
6260
(album_id, title, category_none, label_sub_none, tag_list) = arg
6361
if tag in tag_list:
6462
print(f'[标签/{tag}] 发现目标: [{album_id}]: [{title}]')
6563
id_list.append(album_id)
6664

67-
download_album(id_list, op)
65+
download_album(id_list, option)
66+
6867

6968
def main():
7069
search_jm_album()

usage/workflow_download.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
# 每行的首尾可以有空白字符
33
jm_albums = '''
44
452859
5-
6-
5+
https://18comic.vip/photo/452859/mana-ディシア-1-原神-中国語-無修正
6+
JM452859
77
88
99
'''

0 commit comments

Comments
 (0)