v2.4.6: 实现对本子名称进行分词并提取原始名称;实现更便捷的自定义下载文件夹名机制、并跟进对应文档;内置一些更简洁的、可用作文件夹名的字段oname, idoname, authoroname. (#172)

hect0x7 · web-flow · commit a7e8ea271a3c · 2023-12-01T22:34:55.000+08:00
diff --git a/assets/docs/sources/tutorial/9_custom_download_dir_name.md b/assets/docs/sources/tutorial/9_custom_download_dir_name.md
@@ -0,0 +1,112 @@
+# 自定义下载文件夹名
+
+
+
+## 1. DirRule简介
+
+当你使用download_album下载本子时，本子会以一定的路径规则（DirRule）下载到你的磁盘上。
+
+你可以使用配置文件定制DirRule，例如下面的例子
+
+```yml
+dir_rule:
+  base_dir: D:/a/b/c/
+  # 规则含义: 根目录 / 章节标题 / 图片文件
+  rule: Bd_Ptitle # P表示章节，title表示使用章节的title字段
+```
+
+如果一个章节的名称（title）是ddd，则最后的下载文件夹结构为：
+
+```
+D:/a/b/c/ddd/00001.webp
+D:/a/b/c/ddd/00002.webp
+D:/a/b/c/ddd/00003.webp
+...
+```
+
+
+
+## 2. 自定义字段名
+
+上述例子使用了title字段，如果你想自定义一个字段，然后在DirRule中使用自定义字段，该怎么做？
+
+基于v2.4.6，你可以使用如下方式
+
+
+
+1. 给你的自定义字段取个名
+
+```yml
+dir_rule: # 忽略base_dir配置项
+  rule: Bd_Amyname # A表示本子，myname表示本子的一个自定义字段
+```
+
+
+
+2. 在代码中，加入你自定义字段的处理函数
+
+```python
+from jmcomic import JmModuleConfig
+# 你需要写一个函数，把字段名作为key，函数作为value，加到JmModuleConfig.AFIELD_ADVICE这个字典中
+JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: f'[{album.id}] {album.title}'
+```
+
+
+
+这样一来，Amyname这个规则就会交由你的函数进行处理，你便可以返回一个自定义的文件夹名
+
+
+
+
+
+## 3. 更多的使用例子
+
+
+
+### 完全使用自己的文件夹名
+
+```python
+from jmcomic import JmModuleConfig
+
+dic = {
+    '248965': '社团学姐（爆赞韩漫）'
+}
+
+# Amyname
+JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: dic[album.id]
+download_album(248965)
+```
+
+
+
+### 文件夹名=作者+标题
+
+```python
+from jmcomic import JmModuleConfig
+# Amyname
+JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: f'【{album.author}】{album.title}'
+# album有一个内置字段 authoroname，效果类似
+```
+
+
+
+### 文件夹名=禁漫车号+标题
+
+```python
+from jmcomic import JmModuleConfig
+# Pmyname
+JmModuleConfig.PFIELD_ADVICE['myname'] = lambda photo: f'【{photo.id}】{photo.title}'
+```
+
+
+
+### 文件夹名=第x话+标题
+
+```python
+# 直接使用内置字段 indextitle 即可
+dir_rule:
+  rule: Bd_Pindextitle
+```
+
+
+
diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py
@@ -2,7 +2,7 @@
 # 被依赖方 <--- 使用方
 # config <--- entity <--- toolkit <--- client <--- option <--- downloader
 
-__version__ = '2.4.5'
+__version__ = '2.4.6'
 
 from .api import *
 from .jm_plugin import *
diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py
@@ -151,6 +151,12 @@ class JmModuleConfig:
     # log时解码url
     flag_decode_url_when_logging = True
 
+    # 关联dir_rule的自定义字段与对应的处理函数
+    # 例如:
+    # Amyname -> JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: "自定义名称"
+    AFIELD_ADVICE = dict()
+    PFIELD_ADVICE = dict()
+
     @classmethod
     def downloader_class(cls):
         if cls.CLASS_DOWNLOADER is not None:
diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py
@@ -61,6 +61,58 @@ def id(self) -> str:
     def title(self) -> str:
         return getattr(self, 'name')
 
+    @property
+    def author(self):
+        raise NotImplementedError
+
+    @property
+    def oname(self) -> str:
+        """
+        oname = original name
+
+        示例:
+
+        title："喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]"
+
+        oname："喂我吃吧 老師!"
+
+        :return: 返回本子的原始名称
+        """
+        from .jm_toolkit import JmcomicText
+        oname = JmcomicText.parse_orig_album_name(self.title)
+        if oname is not None:
+            return oname
+
+        jm_log('entity', f'无法提取出原album名字: {self.title}')
+        return self.title
+
+    @property
+    def authoroname(self):
+        """
+        authoroname = author + oname
+
+        比较好识别的一种本子名称方式
+
+        具体格式: f'【author】{oname}'
+
+        示例:
+
+        原本子名：喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
+
+        authoroname：【BLVEFO9】喂我吃吧 老師!
+
+        :return: 返回作者名+作品原名，格式为: '【author】{oname}'
+        """
+        return f'【{self.author}】{self.oname}'
+
+    @property
+    def idoname(self):
+        """
+        类似 authoroname
+        :return: '[id] {oname}'
+        """
+        return f'[{self.id}] {self.oname}'
+
     def __str__(self):
         return f'{self.__class__.__name__}({self.id}-{self.title})'
 
@@ -71,19 +123,33 @@ def __alias__(cls):
         cls_name = cls.__name__
         return cls_name[cls_name.index("m") + 1: cls_name.rfind("Detail")].lower()
 
-    def get_dirname(self, ref: str) -> str:
+    @classmethod
+    def get_dirname(cls, detail: 'DetailEntity', ref: str) -> str:
         """
         该方法被 DirDule 调用，用于生成特定层次的文件夹
+
         通常调用方式如下:
-        Atitle -> ref = 'title' -> album.get_dirname(ref)
-        该方法需要返回 ref 对应的文件夹名，默认实现直接返回 getattr(self, ref)
+        Atitle -> ref = 'title' -> DetailEntity.get_dirname(album, 'title')
+        该方法需要返回 ref 对应的文件夹名，默认实现直接返回 getattr(detail, 'title')
 
         用户可重写此方法，来实现自定义文件夹名
 
+        v2.4.5: 此方法支持优先从 JmModuleConfig.XFIELD_ADVICE 中获取自定义函数并调用返回结果
+
+        :param detail: 本子/章节 实例
         :param ref: 字段名
         :returns: 文件夹名
         """
-        return getattr(self, ref)
+
+        advice_func = (JmModuleConfig.AFIELD_ADVICE
+                       if isinstance(detail, JmAlbumDetail)
+                       else JmModuleConfig.PFIELD_ADVICE
+                       ).get(ref, None)
+
+        if advice_func is not None:
+            return advice_func(detail)
+
+        return getattr(detail, ref)
 
 
 class JmImageDetail(JmBaseEntity):
@@ -110,7 +176,7 @@ def __init__(self,
 
         self.from_photo: Optional[JmPhotoDetail] = from_photo
         self.query_params: StrNone = query_params
-        self.index = index
+        self.index = index # 从1开始
 
         # temp fields, in order to simplify passing parameter
         self.save_path: str = ''
@@ -171,7 +237,7 @@ def tag(self) -> str:
         """
         this tag is used to print pretty info when logging
         """
-        return f'{self.aid}/{self.img_file_name}{self.img_file_suffix} [{self.index + 1}/{len(self.from_photo)}]'
+        return f'{self.aid}/{self.img_file_name}{self.img_file_suffix} [{self.index}/{len(self.from_photo)}]'
 
     @classmethod
     def is_image(cls):
@@ -289,7 +355,7 @@ def create_image_detail(self, index) -> JmImageDetail:
             data_original,
             from_photo=self,
             query_params=self.data_original_query_params,
-            index=index,
+            index=index + 1,
         )
 
     def get_img_data_original(self, img_name: str) -> str:
diff --git a/src/jmcomic/jm_option.py b/src/jmcomic/jm_option.py
@@ -132,7 +132,7 @@ def get_rule_solver(cls, rule: str) -> Optional[RuleSolver]:
 
         # Axxx or Pyyy
         key = 1 if rule[0] == 'A' else 2
-        solve_func = lambda detail, ref=rule[1:]: fix_windir_name(str(detail.get_dirname(ref)))
+        solve_func = lambda detail, ref=rule[1:]: fix_windir_name(str(DetailEntity.get_dirname(detail, ref)))
 
         # 保存缓存
         rule_solver = (key, solve_func, rule)
diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py
@@ -225,6 +225,73 @@ def parse_to_abspath(cls, dsl_text: str) -> str:
     def parse_dsl_text(cls, dsl_text: str) -> str:
         return cls.dsl_replacer.parse_dsl_text(dsl_text)
 
+    bracket_map = {'(': ')',
+                   '[': ']',
+                   '【': '】',
+                   '（': '）',
+                   }
+
+    @classmethod
+    def parse_orig_album_name(cls, name: str, default=None):
+        word_list = cls.tokenize(name)
+
+        for word in word_list:
+            if word[0] in cls.bracket_map:
+                continue
+
+            return word
+
+        return default
+
+    @classmethod
+    def tokenize(cls, title: str) -> List[str]:
+        """
+        繞道#2 [暴碧漢化組] [えーすけ（123）] よりみち#2 (COMIC 快樂天 2024年1月號) [中國翻譯] [DL版]
+        :return: ['繞道#2', '[暴碧漢化組]', '[えーすけ（123）]', 'よりみち#2', '(COMIC 快樂天 2024年1月號)', '[中國翻譯]', '[DL版]']
+        """
+        title = title.strip()
+        ret = []
+        bracket_map = cls.bracket_map
+
+        char_list = []
+        i = 0
+        length = len(title)
+
+        def add(w=None):
+            if w is None:
+                w = ''.join(char_list).strip()
+
+            if w == '':
+                return
+
+            ret.append(w)
+            char_list.clear()
+
+        while i < length:
+            c = title[i]
+
+            if c in bracket_map:
+                # 上一个单词结束
+                add()
+                # 定位右括号
+                j = title.find(bracket_map[c], i)
+                ExceptionTool.require_true(j != -1, f'未闭合的 {c}{bracket_map[c]}: {title[i:]}')
+                # 整个括号的单词结束
+                add(title[i:j + 1])
+                # 移动指针
+                i = j + 1
+            else:
+                char_list.append(c)
+                i += 1
+
+        add()
+        return ret
+
+    @classmethod
+    def to_zh_cn(cls, s):
+        import zhconv
+        return zhconv.convert(s, 'zh_cn')
+
 
 # 支持dsl: #{???} -> os.getenv(???)
 JmcomicText.dsl_replacer.add_dsl_and_replacer(r'\$\{(.*?)\}', JmcomicText.match_os_env)