init

anburocky3 · anburocky3 · commit 82d0eb28d108 · 2022-09-21T19:08:40.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+# ide
+.idea/
+
+# generated folder
+/output
diff --git a/README.md b/README.md
@@ -0,0 +1,36 @@
+# Forum Image Grabber (Inssia)
+
+Simple tool, which will download all images based on the URL provided.
+
+> Some of the elements found on the below platform contains +18 content and if you are under-aged, STOP IT. Its not for you.
+## Screenshots
+
+## How to use it?
+
+1. [Fork the repository]() and run the `main.py` using the following command
+
+```shell
+python main.py # you have to have python 3 installed.
+```
+
+2. It will ask you which directory, output folder name etc. input those information correctly.
+
+| Inputs  | Sample | DataType |
+   | ------------- | ------------- | ----|
+| - Enter your **URL**, where you want to grab the images:  | Any Post thread link from inssia.com   | _string_ |
+| - In Which **directory**, you want to put the images?  | Appropriate folder title | _string_  |
+| - From which page to grab **START**? (default: 0):  | Any numeric value or skip (defaults to '0') | _integer_ |
+| - Till which page to grab: **END**?  | Any numeric value | _integer_ |
+| - Page Per Items count (default: 10):  | Any numeric value or skip (defaults to '10') | _integer_ |
+
+3. It will download it for you. Automatically.
+
+### License: MIT + (use it with care)
+
+### Tested platforms
+
+- inssia.com
+
+### Authors & Maintainer
+
+- [Anbuselvan Rocky](https://fb.me/anburocky3)
diff --git a/main.py b/main.py
@@ -0,0 +1,75 @@
+import os
+import requests
+import shutil
+from bs4 import BeautifulSoup
+from termcolor import colored
+
+output_dir = 'output'
+
+
+def createOutputFolderIfNotExist():
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+
+def welcomePage():
+    url_to_grab = input("Enter your URL, where you want to grab the images:\t")
+    user_dir = input('In Which directory, you want to put the images?\t')
+    start_num = int(input('From which page to grab START? (default: 0):\t') or '0')
+    end_num = int(input('Till which page to grab: END?\t'))
+    per_page_item = int(input('Page Per Items count (default: 10):\t') or '10')
+
+    directory = f'{output_dir}/{user_dir}'
+
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    pages = []
+
+    for i in range(start_num, end_num, per_page_item):
+        pages.append(i + per_page_item)
+
+    for page in pages:
+        url = f'{url_to_grab}&start=' + str(page)
+        print(f' + {colored("Getting", "green")} this URL: {colored(url, "blue")}')
+        spider(url, directory)
+
+
+def spider(url, directory):
+    image_count = 1
+
+    # while page <= max_pages:
+    # url = 'https://www.inssia.com/viewtopic.php?f=35&t=23XXX&start=' + str(page)
+    sourcecode = requests.get(url)
+    plaintext = sourcecode.text
+    soup = BeautifulSoup(plaintext, "lxml")
+
+    for tag in soup.findAll('img', {"class": "postimage"}):
+        link = tag.get('src')  # get the link
+
+        # Check if the tag is in expect format
+        # del tag['src']
+        # if tag.attrs != {';': '', 'alt': '', 'border': '0'}:
+        #     continue
+
+        filename = link.strip('/').rsplit('/', 1)[-1]  # to get the correct file name
+
+        res = requests.get(link, stream=True)  # use requests to get the content of the images
+
+        if res.status_code == 200:
+            with open(f'{directory}/{filename}', 'wb') as f:
+                shutil.copyfileobj(res.raw, f)
+                # f.write(image)  # write the image into a file
+                print(
+                    f'{colored(f"    ---#{image_count} SUCCESS:", "green")} - Image successfully Downloaded: {colored(filename, "blue")}')
+
+            image_count += 1
+        else:
+            print(f'{colored("    ---ERROR:", "red")} - Image Could not be retrieved')
+
+    print(f'Total Images found on {url} is: {colored(image_count, "orange")}')
+
+
+if __name__ == '__main__':
+    createOutputFolderIfNotExist()
+    welcomePage()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,91 @@
+art==5.2
+astroid==2.4.2
+async-generator==1.10
+attrs==21.2.0
+autopep8==1.5.5
+awsebcli==3.20.2
+beautifulsoup4==4.9.3
+botocore==1.21.44
+cement==2.8.2
+certifi==2020.12.5
+cffi==1.15.0
+cfscrape==2.1.1
+chardet==3.0.4
+charset-normalizer==2.0.6
+click==7.1.1
+colorama==0.4.3
+colored==1.4.2
+configparser==5.0.2
+cryptography==35.0.0
+distro==1.5.0
+docopt==0.6.2
+et-xmlfile==1.0.1
+excel2json-3==0.1.6
+fire==0.4.0
+future==0.16.0
+gpsoauth==1.0.0
+h11==0.12.0
+idna==2.8
+isort==5.7.0
+jdcal==1.4.1
+jmespath==0.10.0
+keyring==9.3.1
+keyrings.alt==3.2.0
+lazy-object-proxy==1.4.3
+lxml==4.6.2
+mccabe==0.6.1
+MouseInfo==0.1.3
+numpy==1.21.0
+openpyxl==3.0.6
+outcome==1.1.0
+pandas==1.2.5
+pathspec==0.5.9
+pdfminer.six==20200517
+pdfplumber==0.5.25
+Pillow==8.1.0
+pipreqs==0.4.10
+PyAutoGUI==0.9.53
+pycodestyle==2.6.0
+pycparser==2.20
+pycryptodome==3.10.1
+pycryptodomex==3.10.1
+pyfiglet==0.8.post1
+PyGetWindow==0.0.9
+pyicloud==0.10.2
+pylint==2.6.0
+PyMsgBox==1.0.9
+pyOpenSSL==21.0.0
+pyperclip==1.8.2
+pypiwin32==223
+PyRect==0.1.4
+PyScreeze==0.1.28
+python-dateutil==2.8.1
+pytweening==1.0.4
+pytz==2021.1
+pywin32==301
+pywin32-ctypes==0.2.0
+PyYAML==5.4.1
+requests==2.26.0
+selenium==4.0.0
+semantic-version==2.8.5
+six==1.14.0
+sniffio==1.2.0
+sortedcontainers==2.3.0
+soupsieve==2.1
+tabula-py==2.2.0
+termcolor==1.1.0
+terminaltables==3.1.0
+text2art==0.2.0
+toml==0.10.2
+tqdm==4.60.0
+trio==0.19.0
+trio-websocket==0.9.2
+tzlocal==2.0.0
+urllib3==1.26.6
+Wand==0.6.5
+wcwidth==0.1.9
+wget==3.2
+wrapt==1.12.1
+wsproto==1.0.0
+xlrd==2.0.1
+yarg==0.1.9

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +# ide
 +.idea/
++
 +# generated folder
 +/output