Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
anburocky3 committed Sep 21, 2022
0 parents commit 82d0eb2
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# ide
.idea/

# generated folder
/output
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Forum Image Grabber (Inssia)

Simple tool, which will download all images based on the URL provided.

> Some of the elements found on the below platform contains +18 content and if you are under-aged, STOP IT. Its not for you.
## Screenshots

## How to use it?

1. [Fork the repository]() and run the `main.py` using the following command

```shell
python main.py # you have to have python 3 installed.
```

2. It will ask you which directory, output folder name etc. input those information correctly.

| Inputs | Sample | DataType |
| ------------- | ------------- | ----|
| - Enter your **URL**, where you want to grab the images: | Any Post thread link from inssia.com | _string_ |
| - In Which **directory**, you want to put the images? | Appropriate folder title | _string_ |
| - From which page to grab **START**? (default: 0): | Any numeric value or skip (defaults to '0') | _integer_ |
| - Till which page to grab: **END**? | Any numeric value | _integer_ |
| - Page Per Items count (default: 10): | Any numeric value or skip (defaults to '10') | _integer_ |

3. It will download it for you. Automatically.

### License: MIT + (use it with care)

### Tested platforms

- inssia.com

### Authors & Maintainer

- [Anbuselvan Rocky](https://fb.me/anburocky3)
75 changes: 75 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
import requests
import shutil
from bs4 import BeautifulSoup
from termcolor import colored

output_dir = 'output'


def createOutputFolderIfNotExist():
if not os.path.exists(output_dir):
os.makedirs(output_dir)


def welcomePage():
url_to_grab = input("Enter your URL, where you want to grab the images:\t")
user_dir = input('In Which directory, you want to put the images?\t')
start_num = int(input('From which page to grab START? (default: 0):\t') or '0')
end_num = int(input('Till which page to grab: END?\t'))
per_page_item = int(input('Page Per Items count (default: 10):\t') or '10')

directory = f'{output_dir}/{user_dir}'

if not os.path.exists(directory):
os.makedirs(directory)

pages = []

for i in range(start_num, end_num, per_page_item):
pages.append(i + per_page_item)

for page in pages:
url = f'{url_to_grab}&start=' + str(page)
print(f' + {colored("Getting", "green")} this URL: {colored(url, "blue")}')
spider(url, directory)


def spider(url, directory):
image_count = 1

# while page <= max_pages:
# url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page)
sourcecode = requests.get(url)
plaintext = sourcecode.text
soup = BeautifulSoup(plaintext, "lxml")

for tag in soup.findAll('img', {"class": "postimage"}):
link = tag.get('src') # get the link

# Check if the tag is in expect format
# del tag['src']
# if tag.attrs != {';': '', 'alt': '', 'border': '0'}:
# continue

filename = link.strip('/').rsplit('/', 1)[-1] # to get the correct file name

res = requests.get(link, stream=True) # use requests to get the content of the images

if res.status_code == 200:
with open(f'{directory}/{filename}', 'wb') as f:
shutil.copyfileobj(res.raw, f)
# f.write(image) # write the image into a file
print(
f'{colored(f" ---#{image_count} SUCCESS:", "green")} - Image successfully Downloaded: {colored(filename, "blue")}')

image_count += 1
else:
print(f'{colored(" ---ERROR:", "red")} - Image Could not be retrieved')

print(f'Total Images found on {url} is: {colored(image_count, "orange")}')


if __name__ == '__main__':
createOutputFolderIfNotExist()
welcomePage()
91 changes: 91 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
art==5.2
astroid==2.4.2
async-generator==1.10
attrs==21.2.0
autopep8==1.5.5
awsebcli==3.20.2
beautifulsoup4==4.9.3
botocore==1.21.44
cement==2.8.2
certifi==2020.12.5
cffi==1.15.0
cfscrape==2.1.1
chardet==3.0.4
charset-normalizer==2.0.6
click==7.1.1
colorama==0.4.3
colored==1.4.2
configparser==5.0.2
cryptography==35.0.0
distro==1.5.0
docopt==0.6.2
et-xmlfile==1.0.1
excel2json-3==0.1.6
fire==0.4.0
future==0.16.0
gpsoauth==1.0.0
h11==0.12.0
idna==2.8
isort==5.7.0
jdcal==1.4.1
jmespath==0.10.0
keyring==9.3.1
keyrings.alt==3.2.0
lazy-object-proxy==1.4.3
lxml==4.6.2
mccabe==0.6.1
MouseInfo==0.1.3
numpy==1.21.0
openpyxl==3.0.6
outcome==1.1.0
pandas==1.2.5
pathspec==0.5.9
pdfminer.six==20200517
pdfplumber==0.5.25
Pillow==8.1.0
pipreqs==0.4.10
PyAutoGUI==0.9.53
pycodestyle==2.6.0
pycparser==2.20
pycryptodome==3.10.1
pycryptodomex==3.10.1
pyfiglet==0.8.post1
PyGetWindow==0.0.9
pyicloud==0.10.2
pylint==2.6.0
PyMsgBox==1.0.9
pyOpenSSL==21.0.0
pyperclip==1.8.2
pypiwin32==223
PyRect==0.1.4
PyScreeze==0.1.28
python-dateutil==2.8.1
pytweening==1.0.4
pytz==2021.1
pywin32==301
pywin32-ctypes==0.2.0
PyYAML==5.4.1
requests==2.26.0
selenium==4.0.0
semantic-version==2.8.5
six==1.14.0
sniffio==1.2.0
sortedcontainers==2.3.0
soupsieve==2.1
tabula-py==2.2.0
termcolor==1.1.0
terminaltables==3.1.0
text2art==0.2.0
toml==0.10.2
tqdm==4.60.0
trio==0.19.0
trio-websocket==0.9.2
tzlocal==2.0.0
urllib3==1.26.6
Wand==0.6.5
wcwidth==0.1.9
wget==3.2
wrapt==1.12.1
wsproto==1.0.0
xlrd==2.0.1
yarg==0.1.9

0 comments on commit 82d0eb2

Please sign in to comment.