Skip to content

Commit 62be606

Browse files
beaufourclaude
andcommitted
Optimize photoset downloads with bulk metadata fetch
- Use extras parameter in getPhotos() to fetch URLs and metadata in bulk - Add helper functions to extract URLs from extras and download directly - Skip per-photo API calls when pre-fetched data is available - Reduces API calls from 3-4 per photo to 0 additional calls This significantly speeds up large photoset downloads and reduces the likelihood of hitting API rate limits. Closes #64 Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 6babd7f commit 62be606

File tree

2 files changed

+105
-10
lines changed

2 files changed

+105
-10
lines changed

flickr_download/flick_download.py

Lines changed: 103 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from typing import Any, Dict, Optional
1313

1414
import flickr_api as Flickr
15+
import requests
1516
import yaml
1617
from flickr_api.flickrerrors import FlickrAPIError, FlickrError
1718
from flickr_api.objects import Person, Photo, Photoset, Walker
@@ -37,6 +38,23 @@
3738
CONFIG_FILE = "~/.flickr_download"
3839
OAUTH_TOKEN_FILE = "~/.flickr_token"
3940

41+
# Extras to fetch in bulk API calls for optimization (reduces per-photo API calls)
42+
# See: https://www.flickr.com/services/api/flickr.photosets.getPhotos.html
43+
EXTRAS_FOR_BULK_FETCH = "url_o,url_l,url_c,url_z,url_m,url_s,original_format,date_taken,media"
44+
45+
# Mapping from size labels to extras URL keys (in preference order for fallback)
46+
SIZE_TO_URL_KEY: Dict[str, str] = {
47+
"Original": "url_o",
48+
"Large": "url_l",
49+
"Medium 800": "url_c",
50+
"Medium 640": "url_z",
51+
"Medium": "url_m",
52+
"Small": "url_s",
53+
}
54+
55+
# All URL keys in descending size order for fallback
56+
URL_KEYS_BY_SIZE = ["url_o", "url_l", "url_c", "url_z", "url_m", "url_s"]
57+
4058

4159
def _init(key: str, secret: str, oauth: bool) -> bool:
4260
"""Initialize API.
@@ -93,6 +111,57 @@ def _load_defaults() -> Dict[str, Any]:
93111
return {}
94112

95113

114+
def _get_url_from_extras(photo: Photo, size_label: Optional[str]) -> Optional[str]:
115+
"""Get the best available URL from photo extras data.
116+
117+
:param photo: photo object with extras data
118+
:param size_label: requested size label (or None for largest)
119+
:returns: URL string or None if not available in extras
120+
"""
121+
if size_label:
122+
# Try to get the specific size requested
123+
url_key = SIZE_TO_URL_KEY.get(size_label)
124+
if url_key:
125+
url = photo.get(url_key)
126+
if url:
127+
return str(url)
128+
else:
129+
# No specific size requested, find largest available
130+
for url_key in URL_KEYS_BY_SIZE:
131+
url = photo.get(url_key)
132+
if url:
133+
return str(url)
134+
return None
135+
136+
137+
def _get_extension_from_url(url: str) -> str:
138+
"""Extract file extension from URL.
139+
140+
:param url: URL string
141+
:returns: extension including dot (e.g., '.jpg')
142+
"""
143+
# Remove query string if present
144+
path = url.split("?")[0]
145+
# Get extension from path
146+
ext = os.path.splitext(path)[1]
147+
return ext if ext else ".jpg"
148+
149+
150+
def _download_file(url: str, fname: str) -> None:
151+
"""Download a file directly from URL.
152+
153+
:param url: URL to download from
154+
:param fname: local filename to save to
155+
:raises IOError: on download failure
156+
"""
157+
response = requests.get(url, stream=True, timeout=60)
158+
response.raise_for_status()
159+
160+
with open(fname, "wb") as f:
161+
for chunk in response.iter_content(chunk_size=8192):
162+
f.write(chunk)
163+
164+
96165
def _get_metadata_db(dirname: str) -> sqlite3.Connection:
97166
conn = sqlite3.connect(Path(dirname) / ".metadata.db")
98167
conn.execute(
@@ -142,7 +211,9 @@ def download_list(
142211
:param save_json: save photo info as .json file
143212
"""
144213

145-
photos = Walker(pset.getPhotos)
214+
# Use extras parameter to fetch metadata in bulk (reduces per-photo API calls)
215+
# This fetches URLs, original format, date taken, etc. in the initial paginated call
216+
photos = Walker(pset.getPhotos, extras=EXTRAS_FOR_BULK_FETCH)
146217

147218
suffix = f" ({size_label})" if size_label else ""
148219

@@ -215,12 +286,22 @@ def do_download_photo(
215286
logging.info("Skipping download of already downloaded photo with ID: %s", photo.id)
216287
return
217288

289+
# Try to get URL from extras first (avoids API call for file extension)
290+
prefetched_url = _get_url_from_extras(photo, size_label)
291+
218292
fname = get_full_path(dirname, get_filename(pset, photo, suffix))
219-
try:
220-
fname = photo._getOutputFilename(fname, size_label)
221-
except (OSError, FlickrError) as ex:
222-
logging.error("Error getting photo info for %s: %s", photo.id, ex)
223-
return
293+
if prefetched_url:
294+
# We can determine extension from URL without an API call
295+
ext = _get_extension_from_url(prefetched_url)
296+
fname = fname + ext
297+
logging.debug("Using extension %s from pre-fetched URL for %s", ext, photo.id)
298+
else:
299+
# Fall back to API call to get filename/extension
300+
try:
301+
fname = photo._getOutputFilename(fname, size_label)
302+
except (OSError, FlickrError) as ex:
303+
logging.error("Error getting photo info for %s: %s", photo.id, ex)
304+
return
224305
json_fname = fname + ".json"
225306

226307
if not photo["loaded"]:
@@ -252,7 +333,9 @@ def do_download_photo(
252333
except Exception:
253334
logging.warning("Trouble saving photo info: %s", sys.exc_info())
254335

255-
if not size_label:
336+
if not size_label and not prefetched_url:
337+
# Only check size label if we don't have a pre-fetched URL
338+
# (pre-fetched URL means we already have a valid download URL)
256339
try:
257340
largest_size = photo._getLargestSizeLabel()
258341
except (OSError, FlickrError) as ex:
@@ -275,12 +358,22 @@ def do_download_photo(
275358
return
276359

277360
try:
278-
photo.save(fname, size_label)
361+
# Use pre-fetched URL if available (avoids additional API calls)
362+
if prefetched_url:
363+
logging.debug("Using pre-fetched URL for %s", photo.id)
364+
_download_file(prefetched_url, fname)
365+
else:
366+
# Fall back to standard save method (makes additional API calls)
367+
logging.debug("Falling back to standard save for %s", photo.id)
368+
photo.save(fname, size_label)
279369
except IOError as ex:
280-
logging.error("IO error saving photo: %s", ex)
370+
logging.error("IO error saving photo %s: %s", fname, ex)
281371
return
282372
except FlickrError as ex:
283-
logging.error("Flickr error saving photo: %s", ex)
373+
logging.error("Flickr error saving photo %s: %s", fname, ex)
374+
return
375+
except requests.RequestException as ex:
376+
logging.error("Network error saving photo %s: %s", fname, ex)
284377
return
285378

286379
# Set file times to when the photo was taken

tests/test_flick_download.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ def test_skip_existing_file(self) -> None:
203203
mock_photo.__getitem__ = Mock(return_value=True) # loaded = True
204204
mock_photo._getOutputFilename = Mock(return_value=str(existing_file))
205205
mock_photo.save = Mock()
206+
# Return None for URL extras to trigger fallback to _getOutputFilename
207+
mock_photo.get = Mock(return_value=None)
206208

207209
mock_pset = Mock()
208210
mock_pset.title = "Test Set"

0 commit comments

Comments
 (0)