-
-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 82d0eb2
Showing
4 changed files
with
207 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# ide | ||
.idea/ | ||
|
||
# generated folder | ||
/output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Forum Image Grabber (Inssia) | ||
|
||
Simple tool, which will download all images based on the URL provided. | ||
|
||
> Some of the elements found on the below platform contains +18 content and if you are under-aged, STOP IT. Its not for you. | ||
## Screenshots | ||
|
||
## How to use it? | ||
|
||
1. [Fork the repository]() and run the `main.py` using the following command | ||
|
||
```shell | ||
python main.py # you have to have python 3 installed. | ||
``` | ||
|
||
2. It will ask you which directory, output folder name etc. input those information correctly. | ||
|
||
| Inputs | Sample | DataType | | ||
| ------------- | ------------- | ----| | ||
| - Enter your **URL**, where you want to grab the images: | Any Post thread link from inssia.com | _string_ | | ||
| - In Which **directory**, you want to put the images? | Appropriate folder title | _string_ | | ||
| - From which page to grab **START**? (default: 0): | Any numeric value or skip (defaults to '0') | _integer_ | | ||
| - Till which page to grab: **END**? | Any numeric value | _integer_ | | ||
| - Page Per Items count (default: 10): | Any numeric value or skip (defaults to '10') | _integer_ | | ||
|
||
3. It will download it for you. Automatically. | ||
|
||
### License: MIT + (use it with care) | ||
|
||
### Tested platforms | ||
|
||
- inssia.com | ||
|
||
### Authors & Maintainer | ||
|
||
- [Anbuselvan Rocky](https://fb.me/anburocky3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import os | ||
import requests | ||
import shutil | ||
from bs4 import BeautifulSoup | ||
from termcolor import colored | ||
|
||
output_dir = 'output' | ||
|
||
|
||
def createOutputFolderIfNotExist(): | ||
if not os.path.exists(output_dir): | ||
os.makedirs(output_dir) | ||
|
||
|
||
def welcomePage(): | ||
url_to_grab = input("Enter your URL, where you want to grab the images:\t") | ||
user_dir = input('In Which directory, you want to put the images?\t') | ||
start_num = int(input('From which page to grab START? (default: 0):\t') or '0') | ||
end_num = int(input('Till which page to grab: END?\t')) | ||
per_page_item = int(input('Page Per Items count (default: 10):\t') or '10') | ||
|
||
directory = f'{output_dir}/{user_dir}' | ||
|
||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
|
||
pages = [] | ||
|
||
for i in range(start_num, end_num, per_page_item): | ||
pages.append(i + per_page_item) | ||
|
||
for page in pages: | ||
url = f'{url_to_grab}&start=' + str(page) | ||
print(f' + {colored("Getting", "green")} this URL: {colored(url, "blue")}') | ||
spider(url, directory) | ||
|
||
|
||
def spider(url, directory): | ||
image_count = 1 | ||
|
||
# while page <= max_pages: | ||
# url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page) | ||
sourcecode = requests.get(url) | ||
plaintext = sourcecode.text | ||
soup = BeautifulSoup(plaintext, "lxml") | ||
|
||
for tag in soup.findAll('img', {"class": "postimage"}): | ||
link = tag.get('src') # get the link | ||
|
||
# Check if the tag is in expect format | ||
# del tag['src'] | ||
# if tag.attrs != {';': '', 'alt': '', 'border': '0'}: | ||
# continue | ||
|
||
filename = link.strip('/').rsplit('/', 1)[-1] # to get the correct file name | ||
|
||
res = requests.get(link, stream=True) # use requests to get the content of the images | ||
|
||
if res.status_code == 200: | ||
with open(f'{directory}/{filename}', 'wb') as f: | ||
shutil.copyfileobj(res.raw, f) | ||
# f.write(image) # write the image into a file | ||
print( | ||
f'{colored(f" ---#{image_count} SUCCESS:", "green")} - Image successfully Downloaded: {colored(filename, "blue")}') | ||
|
||
image_count += 1 | ||
else: | ||
print(f'{colored(" ---ERROR:", "red")} - Image Could not be retrieved') | ||
|
||
print(f'Total Images found on {url} is: {colored(image_count, "orange")}') | ||
|
||
|
||
if __name__ == '__main__': | ||
createOutputFolderIfNotExist() | ||
welcomePage() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
art==5.2 | ||
astroid==2.4.2 | ||
async-generator==1.10 | ||
attrs==21.2.0 | ||
autopep8==1.5.5 | ||
awsebcli==3.20.2 | ||
beautifulsoup4==4.9.3 | ||
botocore==1.21.44 | ||
cement==2.8.2 | ||
certifi==2020.12.5 | ||
cffi==1.15.0 | ||
cfscrape==2.1.1 | ||
chardet==3.0.4 | ||
charset-normalizer==2.0.6 | ||
click==7.1.1 | ||
colorama==0.4.3 | ||
colored==1.4.2 | ||
configparser==5.0.2 | ||
cryptography==35.0.0 | ||
distro==1.5.0 | ||
docopt==0.6.2 | ||
et-xmlfile==1.0.1 | ||
excel2json-3==0.1.6 | ||
fire==0.4.0 | ||
future==0.16.0 | ||
gpsoauth==1.0.0 | ||
h11==0.12.0 | ||
idna==2.8 | ||
isort==5.7.0 | ||
jdcal==1.4.1 | ||
jmespath==0.10.0 | ||
keyring==9.3.1 | ||
keyrings.alt==3.2.0 | ||
lazy-object-proxy==1.4.3 | ||
lxml==4.6.2 | ||
mccabe==0.6.1 | ||
MouseInfo==0.1.3 | ||
numpy==1.21.0 | ||
openpyxl==3.0.6 | ||
outcome==1.1.0 | ||
pandas==1.2.5 | ||
pathspec==0.5.9 | ||
pdfminer.six==20200517 | ||
pdfplumber==0.5.25 | ||
Pillow==8.1.0 | ||
pipreqs==0.4.10 | ||
PyAutoGUI==0.9.53 | ||
pycodestyle==2.6.0 | ||
pycparser==2.20 | ||
pycryptodome==3.10.1 | ||
pycryptodomex==3.10.1 | ||
pyfiglet==0.8.post1 | ||
PyGetWindow==0.0.9 | ||
pyicloud==0.10.2 | ||
pylint==2.6.0 | ||
PyMsgBox==1.0.9 | ||
pyOpenSSL==21.0.0 | ||
pyperclip==1.8.2 | ||
pypiwin32==223 | ||
PyRect==0.1.4 | ||
PyScreeze==0.1.28 | ||
python-dateutil==2.8.1 | ||
pytweening==1.0.4 | ||
pytz==2021.1 | ||
pywin32==301 | ||
pywin32-ctypes==0.2.0 | ||
PyYAML==5.4.1 | ||
requests==2.26.0 | ||
selenium==4.0.0 | ||
semantic-version==2.8.5 | ||
six==1.14.0 | ||
sniffio==1.2.0 | ||
sortedcontainers==2.3.0 | ||
soupsieve==2.1 | ||
tabula-py==2.2.0 | ||
termcolor==1.1.0 | ||
terminaltables==3.1.0 | ||
text2art==0.2.0 | ||
toml==0.10.2 | ||
tqdm==4.60.0 | ||
trio==0.19.0 | ||
trio-websocket==0.9.2 | ||
tzlocal==2.0.0 | ||
urllib3==1.26.6 | ||
Wand==0.6.5 | ||
wcwidth==0.1.9 | ||
wget==3.2 | ||
wrapt==1.12.1 | ||
wsproto==1.0.0 | ||
xlrd==2.0.1 | ||
yarg==0.1.9 |