1111from app .schemas .types import MediaType
1212
1313
14+ BRACKET_TITLE_RE = re .compile (r'\[(.+?)]' )
15+ RESOURCE_PIX_X_RE = re .compile (r'x' , re .IGNORECASE )
16+ RESOURCE_PIX_SPLIT_RE = re .compile (r'[Xx]' )
17+ ANIME_MARK_RE = re .compile (r"新番|月?番|[日美国][漫剧]" )
18+ ANIME_PREFIX_RE = re .compile (r".*番.|.*[日美国][漫剧]." )
19+ CATEGORY_TAG_RE = re .compile (
20+ r"[动漫画纪录片电影视连续剧集日美韩中港台海外亚洲华语大陆综艺原盘高清]{2,}|TV|Animation|Movie|Documentar|Anime" ,
21+ re .IGNORECASE ,
22+ )
23+ LEADING_BRACKET_BLOCK_RE = re .compile (r"^[^]]*]" )
24+ FILE_SIZE_RE = re .compile (r'[0-9.]+\s*[MGT]i?B(?![A-Z]+)' , re .IGNORECASE )
25+ TV_EPISODE_BRACKET_RE = re .compile (r"\[TV\s+(\d{1,4})" , re .IGNORECASE )
26+ FOUR_K_BRACKET_RE = re .compile (r'\[4k]' , re .IGNORECASE )
27+ NUMERIC_BRACKET_RE = re .compile (r"\[\d+" , re .IGNORECASE )
28+ MIXED_CHINESE_TOKEN_RE = re .compile (r'[\d|#::\-()()\u4e00-\u9fff]' )
29+
30+
1431class MetaAnime (MetaBase ):
1532 """
1633 识别动漫
1734 """
1835 _anime_no_words = ['CHS&CHT' , 'MP4' , 'GB MP4' , 'WEB-DL' ]
1936 _name_nostring_re = r"S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}|\s+GB"
2037 _fps_re = r"(\d{2,3})(?=FPS)"
38+ _name_nostring_pattern = re .compile (_name_nostring_re , re .IGNORECASE )
39+ _fps_pattern = re .compile (r"(%s)" % _fps_re , re .IGNORECASE )
2140
2241 def __init__ (self , title : str , subtitle : str = None , isfile : bool = False ):
2342 super ().__init__ (title , subtitle , isfile )
@@ -38,7 +57,7 @@ def __init__(self, title: str, subtitle: str = None, isfile: bool = False):
3857 if anitopy_info :
3958 name = anitopy_info .get ("anime_title" )
4059 if not name or name in self ._anime_no_words or (len (name ) < 5 and not StringUtils .is_chinese (name )):
41- name_match = re .search (r'\[(.+?)]' , title )
60+ name_match = BRACKET_TITLE_RE .search (title )
4261 if name_match and name_match .group (1 ):
4362 name = name_match .group (1 ).strip ()
4463 # 拆份中英文名称
@@ -81,9 +100,9 @@ def __init__(self, title: str, subtitle: str = None, isfile: bool = False):
81100 if self .cn_name :
82101 _ , self .cn_name , _ , _ , _ , _ = StringUtils .get_keyword (self .cn_name )
83102 if self .cn_name :
84- self .cn_name = re . sub (r'%s' % self . _name_nostring_re , '' , self .cn_name , flags = re . IGNORECASE ).strip ()
103+ self .cn_name = self . _name_nostring_pattern . sub ('' , self .cn_name ).strip ()
85104 if self .en_name :
86- self .en_name = re . sub (r'%s' % self . _name_nostring_re , '' , self .en_name , flags = re . IGNORECASE ).strip ().title ()
105+ self .en_name = self . _name_nostring_pattern . sub ('' , self .en_name ).strip ().title ()
87106 self ._name = StringUtils .str_title (self .en_name )
88107 # 年份
89108 year = anitopy_info .get ("anime_year" )
@@ -154,8 +173,8 @@ def __init__(self, title: str, subtitle: str = None, isfile: bool = False):
154173 if isinstance (self .resource_pix , list ):
155174 self .resource_pix = self .resource_pix [0 ]
156175 if self .resource_pix :
157- if re .search (r'x' , self .resource_pix , re . IGNORECASE ):
158- self .resource_pix = re .split (r'[Xx]' , self .resource_pix )[- 1 ] + "p"
176+ if RESOURCE_PIX_X_RE .search (self .resource_pix ):
177+ self .resource_pix = RESOURCE_PIX_SPLIT_RE .split (self .resource_pix )[- 1 ] + "p"
159178 else :
160179 self .resource_pix = self .resource_pix .lower ()
161180 if str (self .resource_pix ).isdigit ():
@@ -191,7 +210,7 @@ def __init_anime_fps(self, anitopy_info: dict, original_title: str):
191210 """
192211 从原始标题中提取帧率信息,与MetaVideo保持完全一致的实现
193212 """
194- re_res = re . search (rf"( { self . _fps_re } )" , original_title , re . IGNORECASE )
213+ re_res = self . _fps_pattern . search (original_title )
195214 if re_res :
196215 fps_value = None
197216 if re_res .group (1 ): # FPS格式
@@ -211,23 +230,21 @@ def __prepare_title(title: str):
211230 # 所有【】换成[]
212231 title = title .replace ("【" , "[" ).replace ("】" , "]" ).strip ()
213232 # 截掉xx番剧漫
214- match = re .search (r"新番|月?番|[日美国][漫剧]" , title )
233+ match = ANIME_MARK_RE .search (title )
215234 if match and match .span ()[1 ] < len (title ) - 1 :
216- title = re .sub (".*番.|.*[日美国][漫剧]." , "" , title )
235+ title = ANIME_PREFIX_RE .sub ("" , title )
217236 elif match :
218237 title = title [:title .rfind ('[' )]
219238 # 截掉分类
220239 first_item = title .split (']' )[0 ]
221- if first_item and re .search (r"[动漫画纪录片电影视连续剧集日美韩中港台海外亚洲华语大陆综艺原盘高清]{2,}|TV|Animation|Movie|Documentar|Anime" ,
222- zhconv_convert (first_item , "zh-hans" ),
223- re .IGNORECASE ):
224- title = re .sub (r"^[^]]*]" , "" , title ).strip ()
240+ if first_item and CATEGORY_TAG_RE .search (zhconv_convert (first_item , "zh-hans" )):
241+ title = LEADING_BRACKET_BLOCK_RE .sub ("" , title ).strip ()
225242 # 去掉大小
226- title = re .sub (r'[0-9.]+\s*[MGT]i?B(?![A-Z]+)' , "" , title , flags = re . IGNORECASE )
243+ title = FILE_SIZE_RE .sub ("" , title )
227244 # 将TVxx改为xx
228- title = re .sub (r"\[TV\s+(\d{1,4}) " , r"[\1" , title , flags = re . IGNORECASE )
245+ title = TV_EPISODE_BRACKET_RE .sub (r"[\1 " , title )
229246 # 将4K转为2160p
230- title = re .sub (r'\[4k]' , ' 2160p' , title , flags = re . IGNORECASE )
247+ title = FOUR_K_BRACKET_RE .sub (' 2160p' , title )
231248 # 处理/分隔的中英文标题
232249 names = title .split ("]" )
233250 if len (names ) > 1 and title .find ("- " ) == - 1 :
@@ -246,8 +263,8 @@ def __prepare_title(title: str):
246263 titles .append ("%s%s" % (left_char , name .split ("/" )[0 ].strip ()))
247264 elif name :
248265 if StringUtils .is_chinese (name ) and not StringUtils .is_all_chinese (name ):
249- if not re .search (r"\[\d+" , name , re . IGNORECASE ):
250- name = re .sub (r'[\d|#::\-()()\u4e00-\u9fff]' , '' , name ).strip ()
266+ if not NUMERIC_BRACKET_RE .search (name ):
267+ name = MIXED_CHINESE_TOKEN_RE .sub ('' , name ).strip ()
251268 if not name or name .strip ().isdigit ():
252269 continue
253270 if name == '[' :
0 commit comments