44
55from module .models import Episode
66
7- from . import patterns as p
7+ from module . parser . analyser import patterns as p
88
99logger = logging .getLogger (__name__ )
1010
@@ -38,25 +38,27 @@ class TitleMetaParser:
3838 """
3939
4040 def __init__ (self ) -> None :
41- self .raw_title = ""
42- self .title = ""
43- self .token = []
41+ self .raw_title : str = ""
42+ self .title : str = ""
43+ self .token : list [ str ] = []
4444
4545 def process_title (self ) -> None :
4646 """预处理标题,统一格式"""
47+ # title 里面可能有"\n"
4748 self .title = self .title .replace ("\n " , " " )
48-
4949 # 如果以【开头
50+ #
5051 if self .title .startswith ("【" ):
5152 translation_table = str .maketrans ("【】" , "[]" )
5253 self .title = self .title .translate (translation_table )
5354 self .title = self .title .strip ()
54- self .title += "_ "
55+ self .title += "/ "
5556
5657 def parser (self , title : str ) -> Episode :
5758 self .raw_title = title
5859 self .title = title
5960 self .process_title ()
61+ group = self .get_group_info ()
6062 source_info = self .get_source_info ()
6163 resolution_info = self .get_resolution_info ()
6264 audio_info = self .get_audio_info ()
@@ -76,6 +78,8 @@ def parser(self, title: str) -> Episode:
7678 temp_title = self .title
7779 if "/[]" in temp_title :
7880 parts = temp_title .split ("/[]" )
81+ # /[] 代表可信的集数或季度, 所以可以相信后面是与集数无用的信息
82+ # 暂时没有哪个组把集数放前面
7983 if len (parts ) > 1 :
8084 temp_title = "[]" .join (parts [:- 1 ])
8185 self .token = re .split (r"[\[\]]" , temp_title )
@@ -85,7 +89,8 @@ def parser(self, title: str) -> Episode:
8589 name_jp ,
8690 ) = self .name_process ()
8791
88- group = self .get_group ()
92+ if not group :
93+ group = self .get_group ()
8994 source = source_info [0 ] if source_info else ""
9095 sub = sub_language
9196 resolution = resolution_info [0 ] if resolution_info else ""
@@ -106,7 +111,7 @@ def parser(self, title: str) -> Episode:
106111 video_info = video_info ,
107112 )
108113
109- def findall_sub_title (self , pattern : re .Pattern , sym : str = "[]" ) -> Any :
114+ def findall_sub_title (self , pattern : re .Pattern [ str ] , sym : str = "[]" ) -> list [ str ] :
110115 """查找并替换标题中的模式"""
111116 ans = re .findall (pattern , self .title )
112117 if ans :
@@ -115,6 +120,13 @@ def findall_sub_title(self, pattern: re.Pattern, sym: str = "[]") -> Any:
115120 ans = re .findall (pattern , self .raw_title )
116121 return ans
117122
123+ def get_group_info (self ) -> str :
124+ """获取字幕组信息"""
125+ group_info = self .findall_sub_title (p .GROUP_RE )
126+ # 用& 合并多个字幕组信息
127+ group_info = "&" .join (group_info ).strip ()
128+ return group_info
129+
118130 def get_episode_info (self ) -> tuple [Any , bool , Any , bool ]:
119131 """获取剧集和季度信息"""
120132 episode_info = self .findall_sub_title (p .EPISODE_PATTERN , sym = "/[]" )
@@ -140,7 +152,6 @@ def parser_episode(self, episode_info: Any, episode_is_trusted: bool) -> int:
140152 un_trusted_episode_list .append (
141153 self .episode_info_to_episode (un_trusted_episode )
142154 )
143-
144155 # 所有的集数一致
145156 if all (x == un_trusted_episode_list [0 ] for x in un_trusted_episode_list ):
146157 return un_trusted_episode_list [0 ]
@@ -208,7 +219,9 @@ def name_process(self) -> tuple[str, str, str]:
208219
209220 # 简化 token 过滤逻辑
210221 max_len = min (10 , len (self .token ))
211- self .token = [token for token in self .token [:max_len ] if len (token .strip ()) > 1 ]
222+ self .token = [
223+ token .strip () for token in self .token [:max_len ] if len (token .strip ()) > 1
224+ ]
212225
213226 self .token = self .token [:5 ]
214227 token_priority = [len (s ) for s in self .token ]
@@ -278,19 +291,19 @@ def get_group(self) -> str:
278291 return group
279292 return ""
280293
281- def get_video_info (self ) -> Any :
294+ def get_video_info (self ) -> list [ str ] :
282295 """获取视频格式信息"""
283296 return self .findall_sub_title (p .VIDEO_TYPE_PATTERN )
284297
285- def get_resolution_info (self ) -> Any :
298+ def get_resolution_info (self ) -> list [ str ] :
286299 """获取分辨率信息"""
287300 return self .findall_sub_title (p .RESOLUTION_RE )
288301
289- def get_source_info (self ) -> Any :
302+ def get_source_info (self ) -> list [ str ] :
290303 """获取视频来源信息"""
291304 return self .findall_sub_title (p .SOURCE_RE )
292305
293- def get_unuseful_info (self ) -> Any :
306+ def get_unuseful_info (self ) -> list [ str ] :
294307 """获取无用信息"""
295308 return self .findall_sub_title (p .UNUSEFUL_RE )
296309
@@ -365,23 +378,23 @@ def is_point_5(title: str) -> bool:
365378 # title = "前辈是男孩子 (2024) S01E02.mp4"
366379 # title = "[SBSUB][CONAN][1082][V2][1080P][AVC_AAC][CHS_JP](C1E4E331).mp4"
367380 # title = "海盗战记 S01E01.zh-tw.ass"
368- title = "[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]"
381+ # title = "[百冬练习组&LoliHouse] BanG Dream! 少女乐团派对!☆PICO FEVER! / Garupa Pico: Fever! - 26 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][END] [101.69 MB]"
369382 # title ="【喵萌奶茶屋】★04月新番★[夏日重现/Summer Time Rendering][11][1080p][繁日双语][招募翻译]"
370383 # title = "【失眠搬运组】放学后失眠的你-Kimi wa Houkago Insomnia - 06 [bilibili - 1080p AVC1 CHS-JP].mp4"
371384 # title = "[KitaujiSub] Shikanoko Nokonoko Koshitantan [01Pre][WebRip][HEVC_AAC][CHS_JP].mp4"
372385 # title = "[Doomdos] - 白色闪电 - 第02话 - [1080P].mp4"
373- # title = "Doomdos] -凡人修仙传-第107话-[1080P].mp"
386+ # title = "[ Doomdos] -凡人修仙传-第107话-[1080P].mp"
374387 # title = "[豌豆字幕组&风之圣殿字幕组】★04月新番[鬼灭之刃 柱训练篇 / Kimetsu_no_Yaiba-Hashira_Geiko_Hen][02(57)][简体][1080P][MP4]"
375388 # title = "迷宮飯 08/[TOC] Delicious in Dungeon [08][1080P][AVC AAC][CHT][MP4].mp4"
376389 # title = "[喵萌奶茶屋&LoliHouse] 葬送的芙莉莲 / Sousou no Frieren - 06 [WebRip 1080p HEVC-10bit AAC][简繁日内封字幕]"
377390 # title = "[LoliHouse] Ore wa Subete wo Parry suru - 05 [WebRip 1080p HEVC-10bit AAC SRTx2]"
378- # title = " [LoliHouse] 我要【招架】一切 ~反误解的世界最强想成为冒险者~ / Ore wa Subete wo Parry suru - 05 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕] [复制磁连]"
391+ title = " [LoliHouse] 我要【招架】一切 ~反误解的世界最强想成为冒险者~ / Ore wa Subete wo Parry suru - 05 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕] [复制磁连]"
379392 # title = "北宇治字幕组] 夜晚的水母不会游泳 / Yoru no Kurage wa Oyogenai [01-12 修正合集][WebRip][HEVC_AAC][简繁日内封] [复制磁连]"
380393 # title = "[北宇治字组&霜庭云花Sub&氢气烤肉架]【我推的孩子】/【Oshi no Ko】[18][WebRip][HEVC_AAC][繁日内嵌]"
381394 # # print(re.findall(RESOLUTION_RE,title))
382- title = (
383- "[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"
384- )
395+ # title = (
396+ # "[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"
397+ # )
385398 # title = "[ANi] Bakemonogatari / 物语系列 第外季&第怪季 - 06.5 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4][ANi] Bakemonogatari / 物语系列 第外季&第怪季 - 06.5 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4][217.2 MB]"
386399 # title = "ANi] 我獨自升級 - 07.5 [1080P][Baha][WEB-DL][AAC AVC][CHT].mp4"
387400 # title = "[NEO·QSW]古莲泰沙U グレンダイザーU Grendizer U 02[WEBRIP AVC 1080P](搜索用:巨灵神/克雷飞天神)"
@@ -391,28 +404,29 @@ def is_point_5(title: str) -> bool:
391404 # )
392405 # title = "物语系列 S05E06.5.mp4 "
393406 # title = " 【幻月字幕组】【24年日剧】【直到破坏了丈夫的家庭】【第7话】【1080P】【中日双语】.mp4"
394- title = "[LoliHouse] 2.5次元的诱惑 / 2.5-jigen no Ririsa - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][LoliHouse] 2.5次元的诱惑 / 2.5-jigen no Ririsa - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][609.59 MB]"
407+ # title = "[LoliHouse] 2.5次元的诱惑 / 2.5-jigen no Ririsa - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][LoliHouse] 2.5次元的诱惑 / 2.5-jigen no Ririsa - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕][609.59 MB]"
395408 # title = "[ANi] Re:从零开始的异世界生活 第三季 - 01 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4] [复制磁连]"
396409 # title = "[AnimeRep] 蓝箱 / 青之箱 / Blue Box / Ao no Hako- 02 [1080p][简中内嵌]"
397410 # title = "[ANi] Kekkon Surutte Hontou desu ka / 听说你们要结婚!? - 03 [1080P][baha][WEB-DL][AAC AVC][CHT][MP4]"
398- # title = "[DBD-Raws][败犬女主太多了!/Make Heroine ga Oosugiru!/负けヒロインが多すぎる!][07-08TV+特典映像][BOX4][1080P][BDRip][HEVC-10bit][FLACx2][MKV] [复制磁连]"
411+ title = "[DBD-Raws][败犬女主太多了!/Make Heroine ga Oosugiru!/负けヒロインが多すぎる!][07-08TV+特典映像][BOX4][1080P][BDRip][HEVC-10bit][FLACx2][MKV] [复制磁连]"
399412 # title = "[漫猫字幕组&猫恋汉化组] 败犬女主太多了/Make Heroine ga Oosugiru (01-12Fin WEBRIP 1080p AVC AAC MP4 2024年7月 简中) [复制磁连]"
400- title = "[北宇治字幕组&LoliHouse] 地。-关于地球的运动- / Chi. Chikyuu no Undou ni Tsuite 03 [WebRip 1080p HEVC-10bit AAC ASSx2][简繁日内封字幕] [复制磁连]"
413+ # title = "[北宇治字幕组&LoliHouse] 地。-关于地球的运动- / Chi. Chikyuu no Undou ni Tsuite 03 [WebRip 1080p HEVC-10bit AAC ASSx2][简繁日内封字幕] [复制磁连]"
401414 # title = "[Lilith-Raws] Boku no Kokoro no Yabai Yatsu - 01 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4].mp4"
402415 # title = "[LoliHouse] 关于我转生变成史莱姆这档事 第三季 / Tensei Shitara Slime Datta Ken 3rd Season - 17.5(65.5) [WebRip 1080p HEVC-10bit AAC][简繁内封字幕] [复制磁连]"
403416 # title = "水星的魔女(2022) S00E19.mp4"
404417 # title = "[Billion Meta Lab] 终末列车寻往何方 Shuumatsu Torein Dokoe Iku [12][1080][HEVC 10bit][简繁日内封][END]"
405- # title = " 幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"
418+ # title = "【 幻樱字幕组】【4月新番】【古见同学有交流障碍症 第二季 Komi-san wa, Komyushou Desu. S02】【22】【GB_MP4】【1920X1080】"
406419 # title = "【1月】超超超超超喜欢你的100个女朋友 第二季 07.mp4"
407420 # print(is_vd(title))
408421 # print(is_point_5(title))
409422 # title = "[云歌字幕组][Re:从零开始的异世界生活 第三季 袭击篇][01][HEVC][x265 10bit][1080p][简日双语][招募校对] [复制磁连]"
410- # title = "NEO·QSW]想星的阿克艾利昂 情感神话 想星のアクエリオン Aquarion: Myth of Emotions 02[WEBRIP AVC 1080P](搜索用:想星的大天使)"
423+ # title = "[ NEO·QSW]想星的阿克艾利昂 情感神话 想星のアクエリオン Aquarion: Myth of Emotions 02[WEBRIP AVC 1080P](搜索用:想星的大天使)"
411424 # title = "[SBSUBJ[CONAN][1155][WEBRIP][1080P1[AVC_AAC][CHT_JP](8D4F664C).mp4"
412425 # title = "[TOC] 最弱技能《果实大师》 ~关于能无限食用技能果实(吃了就会死)这件事~ 09 [1080P][AVC AAC][CHT][MP4] [复制磁连]"
413426 # title = "[ANi] 离开 A 级队伍的我,和从前的弟子往迷宫深处迈进 - 08 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4] [复制磁连]"
414427 # title = "【喵萌奶茶屋】★04月新番★[夏日重现/Summer Time Rendering][11][1080p][繁日双语][招募翻译]"
415428 # title = "海盗战记 (2019) S01E01.mp4"
429+ # title = "somethime error"
416430 # print(title)
417431 # print(re.findall(EPISODE_PATTERN, title))
418432 # print(re.findall(SEASON_RE, title))
@@ -433,9 +447,13 @@ def is_point_5(title: str) -> bool:
433447 # title = (
434448 # "[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"
435449 # )
436- title = "负けヒロインが多すぎる! (JPBD Vol.1-6 Remux) 败犬女主太多了! 败北女角太多了! Make Heroine ga Oosugiru! Toooooo Many Losing Heroines! [复制磁连]"
437- title = "海盗战记 S01E01.SC.ass"
438- title = "[LoliHouse] 2.5次元的诱惑 / 2.5-jigen no Ririsa - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕].mkv"
450+ # title = "负けヒロインが多すぎる! (JPBD Vol.1-6 Remux) 败犬女主太多了! 败北女角太多了! Make Heroine ga Oosugiru! Toooooo Many Losing Heroines! [复制磁连]"
451+ # title = "海盗战记 S01E01.SC.ass"
452+ # title = "[LoliHouse] 2.5次元的诱惑 / 2.5-jigen no Ririsa - 01 [WebRip 1080p HEVC-10bit AAC][简繁内封字幕].mkv"
453+ # title = "[桜都字幕组&7³ACG] 摇曳露营 第3季/ゆるキャン△ SEASON3/Yuru Camp S03 | 01-12+New Anime 01-03 [简繁字幕] BDrip 1080p AV1 OPUS 2.0 [复制磁连]"
454+ #
455+ #
456+ # print(re.findall(p.GROUP_RE, title))
439457 res = raw_parser (title )
440458 for k , v in res .__dict__ .items ():
441459 print (f"{ k } : { v } " )
0 commit comments