Skip to content

Commit 88ad684

Browse files
authored
v2.5.12: 更新禁漫APP v1.7.0的最新APP域名; 新增插件【删除重复文件】(#244); 优化代码. (#245)
1 parent e0652a9 commit 88ad684

File tree

8 files changed

+105
-25
lines changed

8 files changed

+105
-25
lines changed

assets/docs/sources/option_file_syntax.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,17 @@ plugins:
191191

192192
zip_dir: D:/jmcomic/zip/ # 压缩文件存放的文件夹
193193
delete_original_file: true # 压缩成功后,删除所有原文件和文件夹
194+
195+
# 删除重复文件插件
196+
# 参考 → [https://github.com/hect0x7/JMComic-Crawler-Python/issues/244]
197+
- plugin: delete_duplicated_files
198+
kwargs:
199+
# limit: 必填,表示对md5出现次数的限制
200+
limit: 3
201+
# 如果文件的md5的出现次数 >= limit,是否要删除
202+
# 如果delete_original_file不配置,此插件只会打印信息,不会执行其他操作
203+
# 如果limit=1, delete_original_file=true 效果会是删除所有文件
204+
delete_original_file: true
194205

195206
- plugin: send_qq_email # 发送qq邮件插件
196207
kwargs:

assets/option/option_workflow_download.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ plugins:
3333
msg_to: ${EMAIL_TO}
3434
password: ${EMAIL_PASS}
3535
title: ${EMAIL_TITLE}
36-
content: ${EMAIL_CONTENT}
36+
content: ${EMAIL_CONTENT}

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.5.11'
5+
__version__ = '2.5.12'
66

77
from .api import *
88
from .jm_plugin import *

src/jmcomic/jm_client_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,6 @@ def is_given_type(self, ctype: Type['JmcomicClient']) -> bool:
581581
"""
582582
if isinstance(self, ctype):
583583
return True
584-
if self.client_key == instance.client_key:
584+
if self.client_key == ctype.client_key:
585585
return True
586586
return False

src/jmcomic/jm_config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,17 @@ class JmModuleConfig:
109109
DOMAIN_IMAGE_LIST = str_to_list('''
110110
cdn-msp.jmapinodeudzn.net
111111
cdn-msp2.jmapinodeudzn.net
112+
cdn-msp2.jmapiproxy3.cc
113+
cdn-msp3.jmapinodeudzn.net
112114
113115
''')
114116

115117
# 移动端API域名
116118
DOMAIN_API_LIST = str_to_list('''
117119
www.jmapinodeudzn.xyz
118-
www.jmapinode.vip
119-
www.jmapinode.biz
120+
www.cdn-eldenringproxy.xyz
121+
www.cdn-eldenringproxy.me
122+
www.cdn-eldenringproxy.vip
120123
www.jmapinode.xyz
121124
''')
122125

src/jmcomic/jm_option.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,9 @@ class DirRule:
7272

7373
Detail = Union[JmAlbumDetail, JmPhotoDetail, None]
7474
RuleFunc = Callable[[Detail], str]
75-
RuleSolver = Tuple[int, RuleFunc, str]
75+
RuleSolver = Tuple[str, RuleFunc, str]
7676
RuleSolverList = List[RuleSolver]
7777

78-
rule_solver_cache: Dict[str, RuleSolver] = {}
79-
8078
def __init__(self, rule: str, base_dir=None):
8179
base_dir = JmcomicText.parse_to_abspath(base_dir)
8280
self.base_dir = base_dir
@@ -100,6 +98,25 @@ def decide_image_save_dir(self,
10098

10199
return fix_filepath('/'.join(path_ls), is_dir=True)
102100

101+
def decide_album_root_dir(self, album: JmAlbumDetail) -> str:
102+
path_ls = []
103+
for solver in self.solver_list:
104+
key, _, rule = solver
105+
106+
if key != 'Bd' and key != 'A':
107+
continue
108+
109+
try:
110+
ret = self.apply_rule_solver(album, None, solver)
111+
except BaseException as e:
112+
# noinspection PyUnboundLocalVariable
113+
jm_log('dir_rule', f'路径规则"{rule}"的解析出错: {e}, album={album}')
114+
raise e
115+
116+
path_ls.append(str(ret))
117+
118+
return fix_filepath('/'.join(path_ls), is_dir=True)
119+
103120
def get_role_solver_list(self, rule_dsl: str, base_dir: str) -> RuleSolverList:
104121
"""
105122
解析下载路径dsl,得到一个路径规则解析列表
@@ -111,7 +128,7 @@ def get_role_solver_list(self, rule_dsl: str, base_dir: str) -> RuleSolverList:
111128
for rule in rule_list:
112129
rule = rule.strip()
113130
if rule == 'Bd':
114-
solver_ls.append((0, lambda _: base_dir, 'Bd'))
131+
solver_ls.append(('Bd', lambda _: base_dir, 'Bd'))
115132
continue
116133

117134
rule_solver = self.get_rule_solver(rule)
@@ -137,24 +154,14 @@ def split_rule_dsl(self, rule_dsl: str) -> List[str]:
137154

138155
@classmethod
139156
def get_rule_solver(cls, rule: str) -> Optional[RuleSolver]:
140-
# 查找缓存
141-
if rule in cls.rule_solver_cache:
142-
return cls.rule_solver_cache[rule]
143-
144157
# 检查dsl
145158
if not rule.startswith(('A', 'P')):
146159
return None
147160

148-
# Axxx or Pyyy
149-
key = 1 if rule[0] == 'A' else 2
150-
151161
def solve_func(detail):
152162
return fix_windir_name(str(DetailEntity.get_dirname(detail, rule[1:])))
153163

154-
# 保存缓存
155-
rule_solver = (key, solve_func, rule)
156-
cls.rule_solver_cache[rule] = rule_solver
157-
return rule_solver
164+
return rule[0], solve_func, rule
158165

159166
@classmethod
160167
def apply_rule_solver(cls, album, photo, rule_solver: RuleSolver) -> str:
@@ -168,11 +175,11 @@ def apply_rule_solver(cls, album, photo, rule_solver: RuleSolver) -> str:
168175
"""
169176

170177
def choose_detail(key):
171-
if key == 0:
178+
if key == 'Bd':
172179
return None
173-
if key == 1:
180+
if key == 'A':
174181
return album
175-
if key == 2:
182+
if key == 'P':
176183
return photo
177184

178185
key, func, _ = rule_solver

src/jmcomic/jm_plugin.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,3 +1035,62 @@ def try_mark_photo_skip_and_log(self, photo: JmPhotoDetail, at_least_image_count
10351035
@field_cache() # 单例
10361036
def build(cls, option: JmOption) -> 'JmOptionPlugin':
10371037
return super().build(option)
1038+
1039+
1040+
class DeleteDuplicatedFilesPlugin(JmOptionPlugin):
1041+
"""
1042+
https://github.com/hect0x7/JMComic-Crawler-Python/issues/244
1043+
"""
1044+
plugin_key = 'delete_duplicated_files'
1045+
1046+
@classmethod
1047+
def calculate_md5(cls, file_path):
1048+
import hashlib
1049+
1050+
"""计算文件的MD5哈希值"""
1051+
hash_md5 = hashlib.md5()
1052+
with open(file_path, "rb") as f:
1053+
for chunk in iter(lambda: f.read(4096), b""):
1054+
hash_md5.update(chunk)
1055+
return hash_md5.hexdigest()
1056+
1057+
@classmethod
1058+
def find_duplicate_files(cls, root_folder):
1059+
"""递归读取文件夹下所有文件并计算MD5出现次数"""
1060+
import os
1061+
from collections import defaultdict
1062+
md5_dict = defaultdict(list)
1063+
1064+
for root, _, files in os.walk(root_folder):
1065+
for file in files:
1066+
file_path = os.path.join(root, file)
1067+
file_md5 = cls.calculate_md5(file_path)
1068+
md5_dict[file_md5].append(file_path)
1069+
1070+
return md5_dict
1071+
1072+
def invoke(self,
1073+
limit,
1074+
album=None,
1075+
downloader=None,
1076+
delete_original_file=True,
1077+
**kwargs,
1078+
) -> None:
1079+
if album is None:
1080+
return
1081+
1082+
self.delete_original_file = delete_original_file
1083+
# 获取到下载本子所在根目录
1084+
root_folder = self.option.dir_rule.decide_album_root_dir(album)
1085+
self.find_duplicated_files_and_delete(limit, root_folder, album)
1086+
1087+
def find_duplicated_files_and_delete(self, limit: int, root_folder: str, album: Optional[JmAlbumDetail] = None):
1088+
md5_dict = self.find_duplicate_files(root_folder)
1089+
# 打印MD5出现次数大于等于limit的文件
1090+
for md5, paths in md5_dict.items():
1091+
if len(paths) >= limit:
1092+
prefix = '' if album is None else f'({album.album_id}) '
1093+
message = [prefix + f'MD5: {md5} 出现次数: {len(paths)}'] + \
1094+
[f' {path}' for path in paths]
1095+
self.log('\n'.join(message))
1096+
self.execute_deletion(paths)

src/jmcomic/jm_toolkit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ def save_resp_img(cls, resp: Any, filepath: str, need_convert=True):
707707
如果需要改变图片的文件格式,比如 .jpg → .png,则需要指定参数 neet_convert=True.
708708
如果不需要改变图片的文件格式,使用 need_convert=False,可以跳过PIL解析图片,效率更高.
709709
710-
:param resp: HTTP响应对象
710+
:param resp: JmImageResp
711711
:param filepath: 图片文件路径
712712
:param need_convert: 是否转换图片
713713
"""
@@ -746,7 +746,7 @@ def decode_and_save(cls,
746746

747747
# 无需解密,直接保存
748748
if num == 0:
749-
img_src.save(decoded_save_path)
749+
cls.save_image(img_src, decoded_save_path)
750750
return
751751

752752
import math

0 commit comments

Comments
 (0)