6
6
7
7
output_dir = 'output'
8
8
9
+ headers = {
10
+ "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
11
+ }
12
+
9
13
10
14
def createOutputFolderIfNotExist ():
11
15
if not os .path .exists (output_dir ):
@@ -40,7 +44,7 @@ def spider(url, directory):
40
44
41
45
# while page <= max_pages:
42
46
# url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page)
43
- sourcecode = requests .get (url )
47
+ sourcecode = requests .get (url , headers = headers )
44
48
plaintext = sourcecode .text
45
49
soup = BeautifulSoup (plaintext , "lxml" )
46
50
@@ -54,20 +58,21 @@ def spider(url, directory):
54
58
55
59
filename = link .strip ('/' ).rsplit ('/' , 1 )[- 1 ] # to get the correct file name
56
60
57
- res = requests .get (link , stream = True ) # use requests to get the content of the images
61
+ res = requests .get (link , headers = headers , stream = True ) # use requests to get the content of the images
58
62
59
63
if res .status_code == 200 :
60
64
with open (f'{ directory } /{ filename } ' , 'wb' ) as f :
61
65
shutil .copyfileobj (res .raw , f )
62
66
# f.write(image) # write the image into a file
63
67
print (
64
- f'{ colored (f" ---#{ image_count } SUCCESS:" , "green" )} - Image successfully Downloaded: { colored (filename , "blue" )} ' )
68
+ f'{ colored (f" ---#{ image_count } SUCCESS:" , "green" )} '
69
+ f' - Image successfully Downloaded: { colored (filename , "blue" )} ' )
65
70
66
71
image_count += 1
67
72
else :
68
- print (f'{ colored (" ---ERROR:" , "red" )} - Image Could not be retrieved' )
73
+ print (f'{ colored (" ---ERROR:" , "red" )} - Image Could not be retrieved: { colored ( filename , "blue" ) } ' )
69
74
70
- print (f' Total Images found on { url } is: { colored ( image_count , "orange" ) } ' )
75
+ print (colored ( f" Total Images found on { url } is: { image_count } " , "yellow" ) )
71
76
72
77
73
78
if __name__ == '__main__' :
0 commit comments