Skip to content

Commit 924d54d

Browse files
committed
perf(search): 按站点并行过滤搜索结果
1 parent 39f9550 commit 924d54d

2 files changed

Lines changed: 90 additions & 25 deletions

File tree

app/chain/search.py

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,66 @@ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
592592
torrent_list=torrent_list,
593593
mediainfo=mediainfo) or []
594594

595+
def __do_site_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
596+
"""
597+
执行单个站点的过滤流程
598+
"""
599+
if not torrent_list:
600+
return []
601+
602+
filtered_torrents = torrent_list
603+
if filter_params:
604+
torrenthelper = TorrentHelper()
605+
filtered_torrents = [
606+
torrent for torrent in filtered_torrents
607+
if torrenthelper.filter_torrent(torrent, filter_params)
608+
]
609+
610+
if rule_groups and filtered_torrents:
611+
filtered_torrents = __do_filter(filtered_torrents)
612+
613+
return filtered_torrents
614+
615+
def __do_parallel_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
616+
"""
617+
按站点并发执行过滤,保持站点内顺序不变
618+
"""
619+
if not torrent_list or (not filter_params and not rule_groups):
620+
return torrent_list
621+
622+
site_torrents: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {}
623+
for torrent in torrent_list:
624+
site_key = (torrent.site, torrent.site_name)
625+
if site_key not in site_torrents:
626+
site_torrents[site_key] = []
627+
site_torrents[site_key].append(torrent)
628+
629+
if len(site_torrents) <= 1:
630+
return __do_site_filter(torrent_list)
631+
632+
finished_count = 0
633+
filtered_by_site: Dict[Tuple[Optional[int], Optional[str]], List[TorrentInfo]] = {}
634+
max_workers = min(len(site_torrents), settings.CONF.threadpool or len(site_torrents))
635+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
636+
all_tasks = {
637+
executor.submit(__do_site_filter, site_torrent_list): site_key
638+
for site_key, site_torrent_list in site_torrents.items()
639+
}
640+
for future in as_completed(all_tasks):
641+
finished_count += 1
642+
filtered_by_site[all_tasks[future]] = future.result() or []
643+
progress.update(
644+
value=finished_count / len(site_torrents) * 50,
645+
text=f'正在过滤,已完成 {finished_count} / {len(site_torrents)} 个站点 ...'
646+
)
647+
648+
filtered_ids = {
649+
id(torrent)
650+
for filtered_torrents in filtered_by_site.values()
651+
for torrent in filtered_torrents
652+
}
653+
return [torrent for torrent in torrent_list if id(torrent) in filtered_ids]
654+
595655
if not torrents:
596656
logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
597657
return []
@@ -605,14 +665,14 @@ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
605665
# 匹配订阅附加参数
606666
if filter_params:
607667
logger.info(f'开始附加参数过滤,附加参数:{filter_params} ...')
608-
torrents = [torrent for torrent in torrents if TorrentHelper().filter_torrent(torrent, filter_params)]
609668
# 开始过滤规则过滤
610669
if rule_groups is None:
611670
# 取搜索过滤规则
612671
rule_groups: List[str] = SystemConfigOper().get(SystemConfigKey.SearchFilterRuleGroups)
613672
if rule_groups:
614673
logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...')
615-
torrents = __do_filter(torrents)
674+
torrents = __do_parallel_filter(torrents)
675+
if rule_groups:
616676
if not torrents:
617677
logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
618678
return []

app/modules/filter/__init__.py

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@
1515

1616
class FilterModule(_ModuleBase):
1717
CONFIG_WATCH = {SystemConfigKey.CustomFilterRules.value}
18-
# 规则解析器
19-
parser: RuleParser = None
20-
# 媒体信息
21-
media: MediaInfo = None
2218

2319
# 保留一份只读内置规则定义,方便查询工具准确区分“内置规则”和“自定义规则”。
2420
builtin_rule_set: Dict[str, dict] = deepcopy(BUILTIN_RULE_SET)
@@ -30,7 +26,6 @@ def __init__(self):
3026
self.rulehelper = RuleHelper()
3127

3228
def init_module(self) -> None:
33-
self.parser = RuleParser()
3429
# 每次重载都先恢复为纯内置规则,避免旧的自定义规则残留在内存里。
3530
self.rule_set = deepcopy(self.builtin_rule_set)
3631
self.__init_custom_rules()
@@ -90,7 +85,7 @@ def filter_torrents(self, rule_groups: List[str],
9085
"""
9186
if not rule_groups:
9287
return torrent_list
93-
self.media = mediainfo
88+
parser = RuleParser()
9489
# 查询规则表详情
9590
groups = self.rulehelper.get_rule_group_by_media(media=mediainfo, group_names=rule_groups)
9691
if groups:
@@ -99,28 +94,33 @@ def filter_torrents(self, rule_groups: List[str],
9994
torrent_list = self.__filter_torrents(
10095
rule_string=group.rule_string,
10196
rule_name=group.name,
102-
torrent_list=torrent_list
97+
torrent_list=torrent_list,
98+
mediainfo=mediainfo,
99+
parser=parser,
103100
)
104101
return torrent_list
105102

106103
def __filter_torrents(self, rule_string: str, rule_name: str,
107-
torrent_list: List[TorrentInfo]) -> List[TorrentInfo]:
104+
torrent_list: List[TorrentInfo],
105+
mediainfo: MediaInfo,
106+
parser: RuleParser) -> List[TorrentInfo]:
108107
"""
109108
过滤种子
110109
"""
111110
# 返回种子列表
112111
ret_torrents = []
113112
for torrent in torrent_list:
114113
# 能命中优先级的才返回
115-
if not self.__get_order(torrent, rule_string):
114+
if not self.__get_order(torrent, rule_string, mediainfo, parser):
116115
logger.debug(f"种子 {torrent.site_name} - {torrent.title} {torrent.description or ''} "
117116
f"不匹配 {rule_name} 过滤规则")
118117
continue
119118
ret_torrents.append(torrent)
120119

121120
return ret_torrents
122121

123-
def __get_order(self, torrent: TorrentInfo, rule_str: str) -> Optional[TorrentInfo]:
122+
def __get_order(self, torrent: TorrentInfo, rule_str: str,
123+
mediainfo: MediaInfo, parser: RuleParser) -> Optional[TorrentInfo]:
124124
"""
125125
获取种子匹配的规则优先级,值越大越优先,未匹配时返回None
126126
"""
@@ -133,8 +133,8 @@ def __get_order(self, torrent: TorrentInfo, rule_str: str) -> Optional[TorrentIn
133133

134134
for rule_group in rule_groups:
135135
# 解析规则组
136-
parsed_group = self.parser.parse(rule_group.strip())
137-
if self.__match_group(torrent, parsed_group.as_list()[0]):
136+
parsed_group = parser.parse(rule_group.strip())
137+
if self.__match_group(torrent, parsed_group.as_list()[0], mediainfo):
138138
# 出现匹配时中断
139139
matched = True
140140
logger.debug(f"种子 {torrent.site_name} - {torrent.title} 优先级为 {100 - res_order + 1}")
@@ -145,27 +145,31 @@ def __get_order(self, torrent: TorrentInfo, rule_str: str) -> Optional[TorrentIn
145145

146146
return None if not matched else torrent
147147

148-
def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str]) -> Optional[bool]:
148+
def __match_group(self, torrent: TorrentInfo, rule_group: Union[list, str],
149+
mediainfo: MediaInfo) -> Optional[bool]:
149150
"""
150151
判断种子是否匹配规则组
151152
"""
152153
if not isinstance(rule_group, list):
153154
# 不是列表,说明是规则名称
154-
return self.__match_rule(torrent, rule_group)
155+
return self.__match_rule(torrent, rule_group, mediainfo)
155156
elif isinstance(rule_group, list) and len(rule_group) == 1:
156157
# 只有一个规则项
157-
return self.__match_group(torrent, rule_group[0])
158+
return self.__match_group(torrent, rule_group[0], mediainfo)
158159
elif rule_group[0] == "not":
159160
# 非操作
160-
return not self.__match_group(torrent, rule_group[1:])
161+
return not self.__match_group(torrent, rule_group[1:], mediainfo)
161162
elif rule_group[1] == "and":
162163
# 与操作
163-
return self.__match_group(torrent, rule_group[0]) and self.__match_group(torrent, rule_group[2:])
164+
return self.__match_group(torrent, rule_group[0], mediainfo) \
165+
and self.__match_group(torrent, rule_group[2:], mediainfo)
164166
elif rule_group[1] == "or":
165167
# 或操作
166-
return self.__match_group(torrent, rule_group[0]) or self.__match_group(torrent, rule_group[2:])
168+
return self.__match_group(torrent, rule_group[0], mediainfo) \
169+
or self.__match_group(torrent, rule_group[2:], mediainfo)
167170

168-
def __match_rule(self, torrent: TorrentInfo, rule_name: str) -> bool:
171+
def __match_rule(self, torrent: TorrentInfo, rule_name: str,
172+
mediainfo: MediaInfo) -> bool:
169173
"""
170174
判断种子是否匹配规则项
171175
"""
@@ -176,7 +180,7 @@ def __match_rule(self, torrent: TorrentInfo, rule_name: str) -> bool:
176180
# TMDB规则
177181
tmdb = self.rule_set[rule_name].get("tmdb")
178182
# 符合TMDB规则的直接返回True,即不过滤
179-
if tmdb and self.__match_tmdb(tmdb):
183+
if tmdb and self.__match_tmdb(tmdb, mediainfo):
180184
logger.debug(f"种子 {torrent.site_name} - {torrent.title} 符合 {rule_name} 的TMDB规则,匹配成功")
181185
return True
182186
# 匹配项:标题、副标题、标签
@@ -259,18 +263,19 @@ def __match_rule(self, torrent: TorrentInfo, rule_name: str) -> bool:
259263

260264
return True
261265

262-
def __match_tmdb(self, tmdb: dict) -> bool:
266+
@staticmethod
267+
def __match_tmdb(tmdb: dict, mediainfo: MediaInfo) -> bool:
263268
"""
264269
判断种子是否匹配TMDB规则
265270
"""
266271

267272
def __get_media_value(key: str):
268273
try:
269-
return getattr(self.media, key)
274+
return getattr(mediainfo, key)
270275
except ValueError:
271276
return ""
272277

273-
if not self.media:
278+
if not mediainfo:
274279
return False
275280

276281
for attr, value in tmdb.items():

0 commit comments

Comments
 (0)