Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.ipynb_checkpoints/
imagenet_scraper.log
stats.csv
28 changes: 15 additions & 13 deletions downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
parser.add_argument('-number_of_classes', default = 10, type=int)
parser.add_argument('-images_per_class', default = 10, type=int)
parser.add_argument('-data_root', default='' , type=str)
parser.add_argument('-data_folder', default='imagenet_images' , type=str)
parser.add_argument('-use_class_list', default=False,type=lambda x: (str(x).lower() == 'true'))
parser.add_argument('-class_list', default=[], nargs='*')
parser.add_argument('-debug', default=False,type=lambda x: (str(x).lower() == 'true'))
Expand All @@ -26,16 +27,15 @@
args, args_other = parser.parse_known_args()

if args.debug:
logging.basicConfig(filename='imagenet_scarper.log', level=logging.DEBUG)
logging.basicConfig(filename='imagenet_scraper.log', level=logging.DEBUG)

if len(args.data_root) == 0:
logging.error("-data_root is required to run downloader!")
exit()

if not os.path.isdir(args.data_root):
logging.error(f'folder {args.data_root} does not exist! please provide existing folder in -data_root arg!')
exit()

os.makedirs(args.data_root)
logging.warning(f'folder {args.data_root} did not exist! creating folder..')

IMAGENET_API_WNID_TO_URLS = lambda wnid: f'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={wnid}'

Expand All @@ -55,7 +55,7 @@
for item in args.class_list:
classes_to_scrape.append(item)
if item not in class_info_dict:
logging.error(f'Class {item} not found in ImageNete')
logging.error(f'Class {item} not found in ImageNet')
exit()

elif args.use_class_list == False:
Expand Down Expand Up @@ -83,10 +83,11 @@
print("Picked the following clases:")
print([ class_info_dict[class_wnid]['class_name'] for class_wnid in classes_to_scrape ])

imagenet_images_folder = os.path.join(args.data_root, 'imagenet_images')
imagenet_images_folder = os.path.join(args.data_root, args.data_folder)
if not os.path.isdir(imagenet_images_folder):
os.mkdir(imagenet_images_folder)

else:
logging.warning(f'folder {args.data_folder} already exists! New images will be appended...')

scraping_stats = dict(
all=dict(
Expand Down Expand Up @@ -331,9 +332,10 @@ def finish(status):

urls = [url.decode('utf-8') for url in resp.content.splitlines()]

#for url in urls:
# get_image(url)

print(f"Multiprocessing workers: {args.multiprocessing_workers}")
with Pool(processes=args.multiprocessing_workers) as p:
p.map(get_image,urls)
if args.multiprocessing_workers==0:
for url in urls:
get_image(url)
else:
print(f"Multiprocessing workers: {args.multiprocessing_workers}")
with Pool(processes=args.multiprocessing_workers) as p:
p.map(get_image,urls)
Loading