-
-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathmain.py
80 lines (57 loc) · 2.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import requests
import shutil
from bs4 import BeautifulSoup
from termcolor import colored
output_dir = 'output'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
def createOutputFolderIfNotExist():
if not os.path.exists(output_dir):
os.makedirs(output_dir)
def welcomePage():
url_to_grab = input("Enter your URL, where you want to grab the images:\t")
user_dir = input('In Which directory, you want to put the images?\t')
start_num = int(input('From which page to grab START? (default: 0):\t') or '0')
end_num = int(input('Till which page to grab: END?\t'))
per_page_item = int(input('Page Per Items count (default: 10):\t') or '10')
directory = f'{output_dir}/{user_dir}'
if not os.path.exists(directory):
os.makedirs(directory)
pages = []
for i in range(start_num, end_num, per_page_item):
pages.append(i + per_page_item)
for page in pages:
url = f'{url_to_grab}&start=' + str(page)
print(f' + {colored("Getting", "green")} this URL: {colored(url, "blue")}')
spider(url, directory)
def spider(url, directory):
image_count = 1
# while page <= max_pages:
# url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page)
sourcecode = requests.get(url, headers=headers)
plaintext = sourcecode.text
soup = BeautifulSoup(plaintext, "lxml")
for tag in soup.findAll('img', {"class": "postimage"}):
link = tag.get('src') # get the link
# Check if the tag is in expect format
# del tag['src']
# if tag.attrs != {';': '', 'alt': '', 'border': '0'}:
# continue
filename = link.strip('/').rsplit('/', 1)[-1] # to get the correct file name
res = requests.get(link, headers=headers, stream=True) # use requests to get the content of the images
if res.status_code == 200:
with open(f'{directory}/{filename}', 'wb') as f:
shutil.copyfileobj(res.raw, f)
# f.write(image) # write the image into a file
print(
f'{colored(f" ---#{image_count} SUCCESS:", "green")}'
f' - Image successfully Downloaded: {colored(filename, "blue")}')
image_count += 1
else:
print(f'{colored(" ---ERROR:", "red")} - Image Could not be retrieved: {colored(filename, "blue")}')
print(colored(f"Total Images found on {url} is: {image_count}", "yellow"))
if __name__ == '__main__':
createOutputFolderIfNotExist()
welcomePage()