Skip to content

Commit 16ae3d8

Browse files
beaufourclaude
andcommitted
Add local-first resume checking for faster download resumption
Add fast local-first checking to skip already-downloaded photos without making any API calls: - Add _check_file_exists_by_id() to check for existing files by photo ID - Supports id, id_and_title, and title_and_id naming patterns - Check runs before any API calls are made This significantly speeds up resuming large downloads (17k+ photos) where most files already exist locally. Closes #63 Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 6babd7f commit 16ae3d8

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

flickr_download/flick_download.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,35 @@ def _load_defaults() -> Dict[str, Any]:
9393
return {}
9494

9595

96+
def _check_file_exists_by_id(dirname: str, photo_id: str) -> Optional[str]:
97+
"""Check if a file for a given photo ID already exists in the directory.
98+
99+
This allows fast local-first checking without API calls when using
100+
ID-based naming (id, id_and_title, title_and_id).
101+
102+
:param dirname: directory to check
103+
:param photo_id: the photo ID to look for
104+
:returns: path to existing file if found, None otherwise
105+
"""
106+
import glob
107+
108+
# Check for files that start with the photo ID (for id and id_and_title naming)
109+
pattern = os.path.join(dirname, f"{photo_id}*")
110+
matches = glob.glob(pattern)
111+
if matches:
112+
# Return the first match (should typically be only one)
113+
return matches[0]
114+
115+
# Also check for files that contain the photo ID (for title_and_id naming)
116+
# This is more expensive so only do it if the first pattern didn't match
117+
pattern = os.path.join(dirname, f"*-{photo_id}.*")
118+
matches = glob.glob(pattern)
119+
if matches:
120+
return matches[0]
121+
122+
return None
123+
124+
96125
def _get_metadata_db(dirname: str) -> sqlite3.Connection:
97126
conn = sqlite3.connect(Path(dirname) / ".metadata.db")
98127
conn.execute(
@@ -207,6 +236,7 @@ def do_download_photo(
207236
:param metadata_db: optional metadata database to record downloads
208237
in
209238
"""
239+
# 1. Local-first check: metadata DB (no API call)
210240
if metadata_db:
211241
if metadata_db.execute(
212242
"SELECT * FROM downloads WHERE photo_id = ? AND size_label = ? AND suffix = ?",
@@ -215,6 +245,14 @@ def do_download_photo(
215245
logging.info("Skipping download of already downloaded photo with ID: %s", photo.id)
216246
return
217247

248+
# 2. Local-first check: file exists by ID pattern (no API call)
249+
# This helps with resuming large downloads without needing metadata_db
250+
existing_file = _check_file_exists_by_id(dirname, photo.id)
251+
if existing_file and not save_json:
252+
# Found existing file and not saving JSON, can skip entirely
253+
logging.info("Skipping %s, as it exists already (local-first check)", existing_file)
254+
return
255+
218256
fname = get_full_path(dirname, get_filename(pset, photo, suffix))
219257
try:
220258
fname = photo._getOutputFilename(fname, size_label)

0 commit comments

Comments
 (0)