-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path00-get-blog-images.py
More file actions
33 lines (27 loc) · 1.06 KB
/
00-get-blog-images.py
File metadata and controls
33 lines (27 loc) · 1.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Download individual page images from URLs in image-links.csv.
# + Tab-delimited file: filename, URL
# + Save each image as "page_<filename>.jpg"
import os
import pandas as pd
import utils.utilities as utl
img_root = "extra_img_Blog_orig"
img_loc = "67-img-to-download_Blog.csv"
file_path = os.path.join(img_root, img_loc)
logger = utl.init_logger()
df = pd.read_csv(file_path, delimiter='\t')
dl_good = 0
dl_fail = 0
# For each row in df, download URL and save as "page_<filename>.jpg"
for index, row in df.iterrows():
filename = row['name']
url = row['url']
# Download the image from url and save as "page_<filename>.jpg"
# (Implementation of download logic goes here)
try:
utl.download_url(url, img_root, f"page_{filename}.jpg")
dl_good += 1
except Exception as e:
logger.error(f"Failed to download {url}: {e}")
dl_fail += 1
logger.info(f"Completed processing {len(df)} images from file \"{file_path}\".")
logger.info(f":: Downloaded {dl_good} images successfully, {dl_fail} failures.")