Skip to content

Commit 90239b0

Browse files
committed
fix(parser): stop destroying titles without a [group] prefix (#1025)
prefix_process called re.sub(f".{group}.", "", raw) unconditionally. When group was empty (torrents that don't start with [group]), the pattern degenerated to ".." and every pair of characters in the title was deleted, leaving a stub the downstream splitter couldn't turn into title_en/zh/jp. Guarded the substitution with `if group:`. Titles like "冰之城墙「氷の城壁」The Ramparts of Ice S01E02 1080p 日英双语-多国字幕" and "Girls Band Cry S01E05 ..." now parse correctly. Updated #764's test which had been pinned to the broken behavior. Closes #1025
1 parent 8130789 commit 90239b0

2 files changed

Lines changed: 41 additions & 7 deletions

File tree

backend/src/module/parser/analyser/raw_parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,11 @@ def pre_process(raw_name: str) -> str:
5959

6060

6161
def prefix_process(raw: str, group: str) -> str:
62-
raw = re.sub(f".{re.escape(group)}.", "", raw)
62+
# Guard against empty group: without this, the pattern degenerates to ".."
63+
# and every pair of characters gets deleted, destroying titles that lack a
64+
# [group] prefix (#1025).
65+
if group:
66+
raw = re.sub(f".{re.escape(group)}.", "", raw)
6367
raw_process = PREFIX_RE.sub("/", raw)
6468
arg_group = raw_process.split("/")
6569
while "" in arg_group:

backend/src/test/test_raw_parser.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ def test_raw_parser():
5656
assert info.episode == 9
5757
assert info.season == 1
5858

59-
content = "[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"
59+
content = (
60+
"[梦蓝字幕组]New Doraemon 哆啦A梦新番[747][2023.02.25][AVC][1080P][GB_JP][MP4]"
61+
)
6062
info = raw_parser(content)
6163
assert info.group == "梦蓝字幕组"
6264
assert info.title_zh == "哆啦A梦新番"
@@ -65,7 +67,9 @@ def test_raw_parser():
6567
assert info.episode == 747
6668
assert info.season == 1
6769

68-
content = "[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"
70+
content = (
71+
"[织梦字幕组][尼尔:机械纪元 NieR Automata Ver1.1a][02集][1080P][AVC][简日双语]"
72+
)
6973
info = raw_parser(content)
7074
assert info.group == "织梦字幕组"
7175
assert info.title_zh == "尼尔:机械纪元"
@@ -160,7 +164,9 @@ def test_raw_parser():
160164
assert info.season == 1
161165

162166
# Issue #990: Title starting with number — should not misparse "29" as episode
163-
content = "[ANi] 29 岁单身中坚冒险家的日常 - 07 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"
167+
content = (
168+
"[ANi] 29 岁单身中坚冒险家的日常 - 07 [1080P][Baha][WEB-DL][AAC AVC][CHT][MP4]"
169+
)
164170
info = raw_parser(content)
165171
assert info.group == "ANi"
166172
assert info.title_zh == "29 岁单身中坚冒险家的日常"
@@ -310,8 +316,9 @@ def test_parse_western_format(self):
310316
assert info.resolution == "1080p"
311317
# No brackets → group detection fails
312318
assert info.group == ""
313-
# No CJK chars → no title_zh/jp; EN detection also fails (short segments)
314-
assert info.title_en is None
319+
# After the #1025 fix, prefix_process no longer destroys titles without
320+
# a [group] prefix, so the English title is now extracted correctly.
321+
assert info.title_en == "Girls Band Cry"
315322
assert info.title_zh is None
316323

317324

@@ -323,7 +330,9 @@ class TestIssue986AtlasFormat:
323330
"[阿特拉斯字幕组·雪原市出差所][命运-奇异赝品_Fate/strange Fake][07_神自黄昏归来][简繁日内封PGS][日语配音版_Japanese Dub][Web-DL Remux][1080p AVC AAC]",
324331
]
325332

326-
@pytest.mark.xfail(reason="Atlas bracket-delimited format not supported by TITLE_RE")
333+
@pytest.mark.xfail(
334+
reason="Atlas bracket-delimited format not supported by TITLE_RE"
335+
)
327336
def test_parse_atlas_format(self):
328337
info = raw_parser(self.TITLES[0])
329338
assert info is not None
@@ -362,3 +371,24 @@ def test_parse_cht_title(self):
362371
assert info.source == "Baha"
363372
assert info.sub == "CHT"
364373

374+
375+
class TestIssue1025NoGroupPrefix:
376+
"""Issue #1025: Titles without a [group] prefix must still parse.
377+
378+
prefix_process was calling re.sub(f".{group}.", "", raw) even when
379+
group was empty, which reduced the pattern to `..` and deleted every
380+
pair of characters, leaving a stub like `1` that name_process couldn't
381+
split into en/zh/jp.
382+
"""
383+
384+
def test_mixed_cjk_and_en_without_group(self):
385+
content = (
386+
"冰之城墙「氷の城壁」The Ramparts of Ice S01E02 1080p 日英双语-多国字幕"
387+
)
388+
info = raw_parser(content)
389+
assert info is not None
390+
assert info.episode == 2
391+
assert info.season == 1
392+
# Before the fix all three title fields were None and title_parser
393+
# raised "Cannot extract title_raw". At least one must now be set.
394+
assert any([info.title_en, info.title_zh, info.title_jp])

0 commit comments

Comments
 (0)