Skip to content

Commit 10759de

Browse files
fix: preserve existing subtitles instead of overwriting on move
Subtitles already placed next to a video are now preserved (marked _tested) and the extracted subs/ archive is kept rather than cleaned up. This was always the intended behaviour; moving a new best subtitle should not silently overwrite or discard a previously placed one. ㅤ
1 parent 9d0831a commit 10759de

6 files changed

Lines changed: 145 additions & 34 deletions

File tree

src/subsearch/core/pipeline.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,9 @@ def subtitle_rename(self) -> None:
226226

227227
@run_if_conditions_met
228228
def subtitle_move_best(self, target: Path) -> None:
229-
file_system.move_and_replace(self.bootstrap.autoload_src, target)
229+
file_system.move_best_next_to_video(
230+
self.bootstrap.autoload_src, target, SEARCH_SUBJECT.search_term, WORKSPACE.extraction_directory
231+
)
230232
capture("Best subtitle moved")
231233

232234
@run_if_conditions_met
@@ -288,16 +290,12 @@ def _elapsed(self) -> float:
288290

289291
@run_if_conditions_met
290292
def clean_up(self) -> None:
293+
# subs/ is the kept extraction archive; only tmp_subsearch and its downloads are removed.
291294
file_system.del_directory_content(APP_PATHS.tmp_dir)
292295
tracker = get_file_tracker()
293296
if WORKSPACE.download_directory != Path(""):
294-
tracker.delete_tracked_within(WORKSPACE.extraction_directory, "*.nfo")
295297
tracker.delete_tracked_within(WORKSPACE.download_directory)
296298
tracker.delete_if_tracked(WORKSPACE.download_directory)
297-
if WORKSPACE.extraction_directory.is_dir() and file_system.directory_is_empty(
298-
WORKSPACE.extraction_directory
299-
):
300-
tracker.delete_if_tracked(WORKSPACE.extraction_directory)
301299
capture("Cleanup completed")
302300

303301
def _wait_for_terminal_input(self) -> None:

src/subsearch/io/file_system.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import json
23
import re
34
import shutil
@@ -60,6 +61,9 @@ def is_zip_payload(chunk: bytes) -> bool:
6061

6162
_HASH_MATCH_PREFIX = "hashmatch__"
6263

64+
# marks a subtitle that was already placed next to the video and later displaced, so it is not re-selected
65+
_TESTED_MARKER = "_tested"
66+
6367

6468
def _cloudflare_block_reason(response: "CurlResponse") -> str:
6569
headers = response.headers
@@ -158,6 +162,24 @@ def _archive_listing(archive: zipfile.ZipFile) -> str:
158162
return "\n".join(f" {info.filename} ({info.file_size} bytes)" for info in archive.infolist())
159163

160164

165+
def content_hash(data: bytes) -> str:
166+
return hashlib.sha256(data).hexdigest()
167+
168+
169+
def file_content_hash(path: Path) -> str:
170+
return content_hash(path.read_bytes())
171+
172+
173+
def _resolve_extraction_target(destination: Path, filename: str, payload: bytes) -> Path | None:
174+
target = destination / Path(filename).name
175+
if not target.exists():
176+
return target
177+
if file_content_hash(target) == content_hash(payload):
178+
capture(f"Skipping {target.name} (identical copy already extracted)", level=LogLevel.DEBUG)
179+
return None
180+
return _next_available_path(destination, target.stem, target.suffix)
181+
182+
161183
def _safe_extract_archive(archive: zipfile.ZipFile, dst: Path, hash_match: bool = False) -> int:
162184
members = archive.infolist()
163185
total_uncompressed = sum(info.file_size for info in members)
@@ -180,12 +202,15 @@ def _safe_extract_archive(archive: zipfile.ZipFile, dst: Path, hash_match: bool
180202
if member_path.suffix.lower() not in _SUBTITLE_EXTENSIONS:
181203
capture(f"Ignoring {member.filename} (not a subtitle file)", level=LogLevel.DEBUG)
182204
continue
183-
extracted_path = Path(archive.extract(member, dst))
184-
if hash_match and not extracted_path.name.startswith(_HASH_MATCH_PREFIX):
185-
renamed_path = extracted_path.with_name(f"{_HASH_MATCH_PREFIX}{extracted_path.name}")
186-
extracted_path.rename(renamed_path)
187-
extracted_path = renamed_path
188-
_track(extracted_path)
205+
payload = archive.read(member)
206+
member_name = Path(member.filename).name
207+
if hash_match and not member_name.startswith(_HASH_MATCH_PREFIX):
208+
member_name = f"{_HASH_MATCH_PREFIX}{member_name}"
209+
target = _resolve_extraction_target(dst, member_name, payload)
210+
if target is None:
211+
continue
212+
target.write_bytes(payload)
213+
_track(target)
189214
extracted_count += 1
190215
return extracted_count
191216

@@ -215,11 +240,17 @@ def extract_subtitle_by_id(subtitle_id: str, src: Path, dst: Path, extension: st
215240
return extracted_count
216241

217242

243+
def _is_tested_subtitle(stem: str) -> bool:
244+
return stem.endswith(_TESTED_MARKER) or f"{_TESTED_MARKER}_v" in stem
245+
246+
218247
def subtitle_files_in(directory: Path) -> list[Path]:
219248
if not directory.is_dir():
220249
return []
221250
return sorted(
222-
file for file in directory.iterdir() if file.is_file() and file.suffix.lower() in _SUBTITLE_EXTENSIONS
251+
file
252+
for file in directory.iterdir()
253+
if file.is_file() and file.suffix.lower() in _SUBTITLE_EXTENSIONS and not _is_tested_subtitle(file.stem)
223254
)
224255

225256

@@ -265,9 +296,21 @@ def move_all(src: Path, dst: Path) -> int:
265296
return moved_count
266297

267298

268-
def move_and_replace(source_file: Path, destination_directory: Path) -> None:
269-
source_file.replace(destination_directory / source_file.name)
270-
capture(f"Moving file: {source_file} -> {destination_directory}")
299+
def move_best_next_to_video(
300+
source_file: Path, destination_directory: Path, video_stem: str, extraction_directory: Path
301+
) -> None:
302+
if _is_tested_subtitle(source_file.stem):
303+
capture(f"Refusing to move already-tested subtitle: {source_file}", level=LogLevel.DEBUG)
304+
return
305+
target = destination_directory / f"{video_stem}{source_file.suffix}"
306+
if target.exists():
307+
preserved = _next_available_path(extraction_directory, f"{video_stem}{_TESTED_MARKER}", target.suffix)
308+
capture(f"Preserving existing subtitle: {target} -> {preserved}")
309+
target.replace(preserved)
310+
_track(preserved)
311+
capture(f"Moving file: {source_file} -> {target}")
312+
source_file.replace(target)
313+
_track(target)
271314

272315

273316
def del_file_type(cwd: Path, extension: str) -> None:

src/subsearch/ui/services/post_processing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ def _count_already_placed(self) -> int:
4646
def _rename_and_place(self) -> int:
4747
target_path = self._resolve_target_path()
4848
renamed = subtitle_selection.autoload_rename(SEARCH_SUBJECT.search_term, self._extraction_dir)
49-
file_system.move_and_replace(renamed, target_path)
50-
placed = (target_path / renamed.name).exists()
51-
if not placed:
49+
placed_name = f"{SEARCH_SUBJECT.search_term}{renamed.suffix}"
50+
file_system.move_best_next_to_video(renamed, target_path, SEARCH_SUBJECT.search_term, self._extraction_dir)
51+
if not (target_path / placed_name).exists():
5252
self._log_no_files(0)
5353
return 0
5454
capture(f"Moved 1 subtitles to {target_path}")

tests/test_file_system.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,74 @@ def test_extract_files_in_dir_returns_extracted_subtitle_count(tmp_path) -> None
116116
assert sorted(path.name for path in destination.iterdir()) == ["movie.ass", "movie.srt"]
117117

118118

119+
def test_extract_skips_identical_content_already_in_destination(tmp_path) -> None:
120+
body = b"1\n00:00:00,000 --> 00:00:01,000\nfoo\n"
121+
destination = tmp_path / "subs"
122+
destination.mkdir()
123+
(destination / "movie.srt").write_bytes(body)
124+
with zipfile.ZipFile(tmp_path / "download.zip", "w") as archive:
125+
archive.writestr("movie.srt", body)
126+
127+
extracted_count = file_system.extract_files_in_dir(tmp_path, destination)
128+
129+
assert extracted_count == 0
130+
assert [path.name for path in destination.iterdir()] == ["movie.srt"]
131+
132+
133+
def test_extract_versions_same_name_different_content(tmp_path) -> None:
134+
destination = tmp_path / "subs"
135+
destination.mkdir()
136+
(destination / "movie.srt").write_bytes(b"existing content\n")
137+
with zipfile.ZipFile(tmp_path / "download.zip", "w") as archive:
138+
archive.writestr("movie.srt", b"different content\n")
139+
140+
extracted_count = file_system.extract_files_in_dir(tmp_path, destination)
141+
142+
assert extracted_count == 1
143+
assert sorted(path.name for path in destination.iterdir()) == ["movie.srt", "movie_v1.srt"]
144+
145+
146+
def test_move_best_uses_video_name_and_preserves_existing_into_subs(tmp_path) -> None:
147+
video_directory = tmp_path / "video"
148+
subs_directory = tmp_path / "subs"
149+
video_directory.mkdir()
150+
subs_directory.mkdir()
151+
video_stem = "the.foo.bar.2021.1080p.web.h264-foobar"
152+
(video_directory / f"{video_stem}.srt").write_text("old subtitle\n", encoding="utf-8")
153+
source = _make_srt(subs_directory, "downloaded.srt")
154+
155+
file_system.move_best_next_to_video(source, video_directory, video_stem, subs_directory)
156+
157+
assert [path.name for path in video_directory.glob("*.srt")] == [f"{video_stem}.srt"]
158+
assert (video_directory / f"{video_stem}.srt").read_text(encoding="utf-8").startswith("1\n")
159+
assert (subs_directory / f"{video_stem}_tested.srt").read_text(encoding="utf-8") == "old subtitle\n"
160+
161+
162+
def test_move_best_refuses_to_move_a_tested_subtitle(tmp_path) -> None:
163+
video_directory = tmp_path / "video"
164+
subs_directory = tmp_path / "subs"
165+
video_directory.mkdir()
166+
subs_directory.mkdir()
167+
video_stem = "the.foo.bar.2021.1080p.web.h264-foobar"
168+
tested_source = _make_srt(subs_directory, f"{video_stem}_tested_v1.srt")
169+
170+
file_system.move_best_next_to_video(tested_source, video_directory, video_stem, subs_directory)
171+
172+
assert list(video_directory.glob("*.srt")) == []
173+
assert tested_source.exists()
174+
175+
176+
def test_tested_subtitles_are_excluded_from_candidates(tmp_path) -> None:
177+
video_stem = "the.foo.bar.2021.1080p.web.h264-foobar"
178+
_make_srt(tmp_path, f"{video_stem}.srt")
179+
_make_srt(tmp_path, f"{video_stem}_tested.srt")
180+
_make_srt(tmp_path, f"{video_stem}_tested_v1.srt")
181+
182+
candidates = [path.name for path in file_system.subtitle_files_in(tmp_path)]
183+
184+
assert candidates == [f"{video_stem}.srt"]
185+
186+
119187
def test_extract_subtitle_by_id_unpacks_only_the_matching_archive(tmp_path) -> None:
120188
downloads = tmp_path / "downloads"
121189
downloads.mkdir()

tests/test_release_validation_body.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,35 +16,35 @@ def validation() -> ReleaseValidation:
1616

1717

1818
def test_block_inserted_into_empty_body(validation: ReleaseValidation) -> None:
19-
block = validation._body_block("abc123", "passed", "999")
19+
block = validation._body_block("passed", "999")
2020
body = validation._replace_body_block("", block)
2121
assert body.startswith(validation.BODY_BLOCK_START)
2222
assert "passed" in body
2323

2424

2525
def test_block_prepended_above_existing_body(validation: ReleaseValidation) -> None:
2626
existing = "#### Features:\n- thing"
27-
body = validation._replace_body_block(existing, validation._body_block("abc123", "passed", "999"))
27+
body = validation._replace_body_block(existing, validation._body_block("passed", "999"))
2828
assert body.startswith(validation.BODY_BLOCK_START)
2929
assert body.rstrip().endswith("- thing")
3030

3131

3232
def test_block_replaced_in_place_without_duplication(validation: ReleaseValidation) -> None:
33-
first = validation._replace_body_block("- thing", validation._body_block("abc123", "passed", "999"))
34-
second = validation._replace_body_block(first, validation._body_block("def456", "failed", "1000"))
33+
first = validation._replace_body_block("- thing", validation._body_block("passed", "999"))
34+
second = validation._replace_body_block(first, validation._body_block("failed", "1000"))
3535
assert second.count(validation.BODY_BLOCK_START) == 1
36-
assert "def456" in second and "failed" in second
37-
assert "abc123" not in second
36+
assert "1000" in second and "failed" in second
37+
assert "999" not in second
3838
assert second.rstrip().endswith("- thing")
3939

4040

4141
def test_jobs_preserves_validation_block_across_regeneration(validation: ReleaseValidation) -> None:
4242
pull_request = OpenMainPullRequest()
43-
existing = validation._replace_body_block("old changelog", validation._body_block("abc123", "passed", "999"))
43+
existing = validation._replace_body_block("old changelog", validation._body_block("passed", "999"))
4444
regenerated = pull_request._pr_body("#### NEW CHANGELOG", existing)
4545
assert regenerated.startswith(pull_request.VALIDATION_BLOCK_START)
4646
assert regenerated.count(pull_request.VALIDATION_BLOCK_START) == 1
47-
assert "abc123" in regenerated
47+
assert "999" in regenerated
4848
assert "#### NEW CHANGELOG" in regenerated
4949

5050

tests/test_ui_post_processing.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ def _run_unpack_worker(monkeypatch, rename: bool, extracted_count: int):
3333
monkeypatch.setattr(
3434
post_processing.subtitle_selection, "autoload_rename", lambda search_term, src: src / "renamed.srt"
3535
)
36-
monkeypatch.setattr(post_processing.file_system, "move_and_replace", lambda src, dst: None)
36+
monkeypatch.setattr(
37+
post_processing.file_system, "move_best_next_to_video", lambda src, dst, video_stem, subs_dir: None
38+
)
3739
monkeypatch.setattr(post_processing.file_system, "create_path_from_string", lambda *args, **kwargs: Path("target"))
3840
worker = post_processing._PostProcessWorker(rename=rename, store=SettingsStore(), subtitle=_make_subtitle())
3941
return worker.execute()
@@ -79,16 +81,16 @@ def test_rename_and_place_delivers_to_target(monkeypatch, tmp_path, post_process
7981
monkeypatch.setattr(
8082
post_processing.subtitle_selection, "autoload_rename", lambda search_term, src: src / "renamed.srt"
8183
)
82-
placed: list[tuple[Path, Path]] = []
84+
placed: list[tuple[Path, Path, str]] = []
8385

84-
def fake_move(source_file, destination_directory):
85-
placed.append((source_file, destination_directory))
86-
(destination_directory / source_file.name).write_text("subtitle")
86+
def fake_move(source_file, destination_directory, video_stem, subs_dir):
87+
placed.append((source_file, destination_directory, video_stem))
88+
(destination_directory / f"{video_stem}{source_file.suffix}").write_text("subtitle")
8789

88-
monkeypatch.setattr(post_processing.file_system, "move_and_replace", fake_move)
90+
monkeypatch.setattr(post_processing.file_system, "move_best_next_to_video", fake_move)
8991
monkeypatch.setattr(post_processing.file_system, "create_path_from_string", lambda *args, **kwargs: target_dir)
9092
worker = post_processing._PostProcessWorker(rename=True, store=SettingsStore(), subtitle=_make_subtitle())
9193
delivered = worker.execute()
9294

9395
assert delivered == 1
94-
assert placed == [(Path("subs") / "renamed.srt", target_dir)]
96+
assert placed == [(Path("subs") / "renamed.srt", target_dir, post_processing.SEARCH_SUBJECT.search_term)]

0 commit comments

Comments
 (0)