Skip to content

Commit 82d0eb2

Browse files
committed
init
0 parents  commit 82d0eb2

File tree

4 files changed

+207
-0
lines changed

4 files changed

+207
-0
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# ide
2+
.idea/
3+
4+
# generated folder
5+
/output

README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Forum Image Grabber (Inssia)
2+
3+
Simple tool, which will download all images based on the URL provided.
4+
5+
> Some of the elements found on the below platform contains +18 content and if you are under-aged, STOP IT. Its not for you.
6+
## Screenshots
7+
8+
## How to use it?
9+
10+
1. [Fork the repository]() and run the `main.py` using the following command
11+
12+
```shell
13+
python main.py # you have to have python 3 installed.
14+
```
15+
16+
2. It will ask you which directory, output folder name etc. input those information correctly.
17+
18+
| Inputs | Sample | DataType |
19+
| ------------- | ------------- | ----|
20+
| - Enter your **URL**, where you want to grab the images: | Any Post thread link from inssia.com | _string_ |
21+
| - In Which **directory**, you want to put the images? | Appropriate folder title | _string_ |
22+
| - From which page to grab **START**? (default: 0): | Any numeric value or skip (defaults to '0') | _integer_ |
23+
| - Till which page to grab: **END**? | Any numeric value | _integer_ |
24+
| - Page Per Items count (default: 10): | Any numeric value or skip (defaults to '10') | _integer_ |
25+
26+
3. It will download it for you. Automatically.
27+
28+
### License: MIT + (use it with care)
29+
30+
### Tested platforms
31+
32+
- inssia.com
33+
34+
### Authors & Maintainer
35+
36+
- [Anbuselvan Rocky](https://fb.me/anburocky3)

main.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import os
2+
import requests
3+
import shutil
4+
from bs4 import BeautifulSoup
5+
from termcolor import colored
6+
7+
output_dir = 'output'
8+
9+
10+
def createOutputFolderIfNotExist():
11+
if not os.path.exists(output_dir):
12+
os.makedirs(output_dir)
13+
14+
15+
def welcomePage():
16+
url_to_grab = input("Enter your URL, where you want to grab the images:\t")
17+
user_dir = input('In Which directory, you want to put the images?\t')
18+
start_num = int(input('From which page to grab START? (default: 0):\t') or '0')
19+
end_num = int(input('Till which page to grab: END?\t'))
20+
per_page_item = int(input('Page Per Items count (default: 10):\t') or '10')
21+
22+
directory = f'{output_dir}/{user_dir}'
23+
24+
if not os.path.exists(directory):
25+
os.makedirs(directory)
26+
27+
pages = []
28+
29+
for i in range(start_num, end_num, per_page_item):
30+
pages.append(i + per_page_item)
31+
32+
for page in pages:
33+
url = f'{url_to_grab}&start=' + str(page)
34+
print(f' + {colored("Getting", "green")} this URL: {colored(url, "blue")}')
35+
spider(url, directory)
36+
37+
38+
def spider(url, directory):
39+
image_count = 1
40+
41+
# while page <= max_pages:
42+
# url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page)
43+
sourcecode = requests.get(url)
44+
plaintext = sourcecode.text
45+
soup = BeautifulSoup(plaintext, "lxml")
46+
47+
for tag in soup.findAll('img', {"class": "postimage"}):
48+
link = tag.get('src') # get the link
49+
50+
# Check if the tag is in expect format
51+
# del tag['src']
52+
# if tag.attrs != {';': '', 'alt': '', 'border': '0'}:
53+
# continue
54+
55+
filename = link.strip('/').rsplit('/', 1)[-1] # to get the correct file name
56+
57+
res = requests.get(link, stream=True) # use requests to get the content of the images
58+
59+
if res.status_code == 200:
60+
with open(f'{directory}/{filename}', 'wb') as f:
61+
shutil.copyfileobj(res.raw, f)
62+
# f.write(image) # write the image into a file
63+
print(
64+
f'{colored(f" ---#{image_count} SUCCESS:", "green")} - Image successfully Downloaded: {colored(filename, "blue")}')
65+
66+
image_count += 1
67+
else:
68+
print(f'{colored(" ---ERROR:", "red")} - Image Could not be retrieved')
69+
70+
print(f'Total Images found on {url} is: {colored(image_count, "orange")}')
71+
72+
73+
if __name__ == '__main__':
74+
createOutputFolderIfNotExist()
75+
welcomePage()

requirements.txt

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
art==5.2
2+
astroid==2.4.2
3+
async-generator==1.10
4+
attrs==21.2.0
5+
autopep8==1.5.5
6+
awsebcli==3.20.2
7+
beautifulsoup4==4.9.3
8+
botocore==1.21.44
9+
cement==2.8.2
10+
certifi==2020.12.5
11+
cffi==1.15.0
12+
cfscrape==2.1.1
13+
chardet==3.0.4
14+
charset-normalizer==2.0.6
15+
click==7.1.1
16+
colorama==0.4.3
17+
colored==1.4.2
18+
configparser==5.0.2
19+
cryptography==35.0.0
20+
distro==1.5.0
21+
docopt==0.6.2
22+
et-xmlfile==1.0.1
23+
excel2json-3==0.1.6
24+
fire==0.4.0
25+
future==0.16.0
26+
gpsoauth==1.0.0
27+
h11==0.12.0
28+
idna==2.8
29+
isort==5.7.0
30+
jdcal==1.4.1
31+
jmespath==0.10.0
32+
keyring==9.3.1
33+
keyrings.alt==3.2.0
34+
lazy-object-proxy==1.4.3
35+
lxml==4.6.2
36+
mccabe==0.6.1
37+
MouseInfo==0.1.3
38+
numpy==1.21.0
39+
openpyxl==3.0.6
40+
outcome==1.1.0
41+
pandas==1.2.5
42+
pathspec==0.5.9
43+
pdfminer.six==20200517
44+
pdfplumber==0.5.25
45+
Pillow==8.1.0
46+
pipreqs==0.4.10
47+
PyAutoGUI==0.9.53
48+
pycodestyle==2.6.0
49+
pycparser==2.20
50+
pycryptodome==3.10.1
51+
pycryptodomex==3.10.1
52+
pyfiglet==0.8.post1
53+
PyGetWindow==0.0.9
54+
pyicloud==0.10.2
55+
pylint==2.6.0
56+
PyMsgBox==1.0.9
57+
pyOpenSSL==21.0.0
58+
pyperclip==1.8.2
59+
pypiwin32==223
60+
PyRect==0.1.4
61+
PyScreeze==0.1.28
62+
python-dateutil==2.8.1
63+
pytweening==1.0.4
64+
pytz==2021.1
65+
pywin32==301
66+
pywin32-ctypes==0.2.0
67+
PyYAML==5.4.1
68+
requests==2.26.0
69+
selenium==4.0.0
70+
semantic-version==2.8.5
71+
six==1.14.0
72+
sniffio==1.2.0
73+
sortedcontainers==2.3.0
74+
soupsieve==2.1
75+
tabula-py==2.2.0
76+
termcolor==1.1.0
77+
terminaltables==3.1.0
78+
text2art==0.2.0
79+
toml==0.10.2
80+
tqdm==4.60.0
81+
trio==0.19.0
82+
trio-websocket==0.9.2
83+
tzlocal==2.0.0
84+
urllib3==1.26.6
85+
Wand==0.6.5
86+
wcwidth==0.1.9
87+
wget==3.2
88+
wrapt==1.12.1
89+
wsproto==1.0.0
90+
xlrd==2.0.1
91+
yarg==0.1.9

0 commit comments

Comments
 (0)