-
Notifications
You must be signed in to change notification settings - Fork 432
Expand file tree
/
Copy pathtitle_parser.py
More file actions
114 lines (104 loc) · 4.09 KB
/
title_parser.py
File metadata and controls
114 lines (104 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import logging
from module.conf import settings
from module.models import Bangumi
from module.models.bangumi import Episode
from module.parser.analyser import (
OpenAIParser,
mikan_parser,
raw_parser,
tmdb_parser,
torrent_parser,
)
logger = logging.getLogger(__name__)
class TitleParser:
def __init__(self):
pass
@staticmethod
def torrent_parser(
torrent_path: str,
torrent_name: str | None = None,
season: int | None = None,
file_type: str = "media",
):
try:
return torrent_parser(torrent_path, torrent_name, season, file_type)
except Exception as e:
logger.warning(f"Cannot parse {torrent_path} with error {e}")
@staticmethod
async def tmdb_parser(title: str, season: int, language: str):
tmdb_info = await tmdb_parser(title, language)
if tmdb_info:
logger.debug("TMDB Matched, official title is %s", tmdb_info.title)
tmdb_season = tmdb_info.last_season if tmdb_info.last_season else season
return tmdb_info.title, tmdb_season, tmdb_info.year, tmdb_info.poster_link
else:
logger.warning(f"Cannot match {title} in TMDB. Use raw title instead.")
logger.warning("Please change bangumi info manually.")
return title, season, None, None
@staticmethod
async def tmdb_poster_parser(bangumi: Bangumi):
tmdb_info = await tmdb_parser(
bangumi.official_title, settings.rss_parser.language
)
if tmdb_info:
logger.debug("TMDB Matched, official title is %s", tmdb_info.title)
bangumi.poster_link = tmdb_info.poster_link
else:
logger.warning(
f"Cannot match {bangumi.official_title} in TMDB. Use raw title instead."
)
logger.warning("Please change bangumi info manually.")
@staticmethod
def raw_parser(raw: str) -> Bangumi | None:
language = settings.rss_parser.language
try:
# use OpenAI ChatGPT to parse raw title and get structured data
if settings.experimental_openai.enable:
kwargs = settings.experimental_openai.dict(exclude={"enable"})
gpt = OpenAIParser(**kwargs)
episode_dict = gpt.parse(raw, asdict=True)
episode = Episode(**episode_dict)
else:
episode = raw_parser(raw)
if episode is None:
return None
titles = {
"zh": episode.title_zh,
"en": episode.title_en,
"jp": episode.title_jp,
}
title_raw = episode.title_en or episode.title_zh or episode.title_jp
if titles[language]:
official_title = titles[language]
elif titles["zh"]:
official_title = titles["zh"]
elif titles["en"]:
official_title = titles["en"]
elif titles["jp"]:
official_title = titles["jp"]
else:
official_title = title_raw
if not title_raw:
logger.warning("Cannot extract title_raw from '%s', skipping", raw)
return None
_season = episode.season
logger.debug("RAW:%s >> %s", raw, title_raw)
return Bangumi(
official_title=official_title,
title_raw=title_raw,
season=_season,
season_raw=episode.season_raw,
group_name=episode.group,
dpi=episode.resolution,
source=episode.source,
subtitle=episode.sub,
eps_collect=False if episode.episode > 1 else True,
offset=0,
filter=",".join(settings.rss_parser.filter),
)
except (ValueError, AttributeError, TypeError) as e:
logger.warning(f"Cannot parse '{raw}': {type(e).__name__}: {e}")
return None
@staticmethod
async def mikan_parser(homepage: str) -> tuple[str, str]:
return await mikan_parser(homepage)