Skip to content

Commit

Permalink
fix: Added User-Agent to all request
Browse files Browse the repository at this point in the history
Error is throwing fine  (closes #2)
  • Loading branch information
anburocky3 committed Oct 9, 2022
1 parent 82d0eb2 commit abf5cfe
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

output_dir = 'output'

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}


def createOutputFolderIfNotExist():
if not os.path.exists(output_dir):
Expand Down Expand Up @@ -40,7 +44,7 @@ def spider(url, directory):

# while page <= max_pages:
# url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page)
sourcecode = requests.get(url)
sourcecode = requests.get(url, headers=headers)
plaintext = sourcecode.text
soup = BeautifulSoup(plaintext, "lxml")

Expand All @@ -54,20 +58,21 @@ def spider(url, directory):

filename = link.strip('/').rsplit('/', 1)[-1] # to get the correct file name

res = requests.get(link, stream=True) # use requests to get the content of the images
res = requests.get(link, headers=headers, stream=True) # use requests to get the content of the images

if res.status_code == 200:
with open(f'{directory}/{filename}', 'wb') as f:
shutil.copyfileobj(res.raw, f)
# f.write(image) # write the image into a file
print(
f'{colored(f" ---#{image_count} SUCCESS:", "green")} - Image successfully Downloaded: {colored(filename, "blue")}')
f'{colored(f" ---#{image_count} SUCCESS:", "green")}'
f' - Image successfully Downloaded: {colored(filename, "blue")}')

image_count += 1
else:
print(f'{colored(" ---ERROR:", "red")} - Image Could not be retrieved')
print(f'{colored(" ---ERROR:", "red")} - Image Could not be retrieved: {colored(filename, "blue")}')

print(f'Total Images found on {url} is: {colored(image_count, "orange")}')
print(colored(f"Total Images found on {url} is: {image_count}", "yellow"))


if __name__ == '__main__':
Expand Down

0 comments on commit abf5cfe

Please sign in to comment.