Add Gather Company Employee

aancw · aancw · commit 2938987eafe7 · 2017-05-02T11:06:09.000+07:00
Add SVN Finder
Update URL Request
Rework Code
Fix small bug
Update Harvest Public Document Regex
Add version for updater
diff --git a/Belati.py b/Belati.py
@@ -31,17 +31,22 @@
 import time
 import dns.resolver
 import tldextract
-from plugins.check_domain import CheckDomain
+
+from plugins.about_project import AboutProject
 from plugins.banner_grab import BannerGrab
-from plugins.logger import Logger
+from plugins.check_domain import CheckDomain
+from plugins.common_service_check import CommonServiceCheck
+from plugins.gather_company import GatherCompany
+from plugins.git_finder import GitFinder
 from plugins.harvest_email import HarvestEmail
 from plugins.harvest_public_document import HarvestPublicDocument
-from plugins.scan_nmap import ScanNmap
-from plugins.wappalyzer import Wappalyzer
-from plugins.git_finder import GitFinder
+from plugins.logger import Logger
 from plugins.robots_scraper import RobotsScraper
-from plugins.about_project import AboutProject
+from plugins.scan_nmap import ScanNmap
+from plugins.svn_finder import SVNFinder
 from plugins.url_request import URLRequest
+from plugins.wappalyzer import Wappalyzer
+
 from lib.Sublist3r import sublist3r
 from lib.CheckMyUsername.check_my_username import CheckMyUsername
 from dnsknife.scanner import Scanner
@@ -66,7 +71,7 @@ def __init__(self):
         parser.add_argument('-d', action='store', dest='domain' , help='Perform OSINT from Domain e.g petruknisme.com(without protocol http/https)')
         parser.add_argument('-u', action='store', dest='username' , help='Perform OSINT from username e.g petruknisme')
         parser.add_argument('-e', action='store', dest='email' , help='Perform OSINT from email address')
-        parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name')
+        parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name, use double quote')
         parser.add_argument('-o', action='store', dest='output_files' , help='Save log for output files')
         parser.add_argument('--db-file', action='store', dest='db_file_location' , help='Specify Database File Location(SQLite3)')
         parser.add_argument('--single-proxy', action='store', dest='single_proxy', help='Proxy support with single IP (ex: http://127.0.0.1:8080)' )
@@ -102,10 +107,10 @@ def __init__(self):
                 proxy = self.multiple_proxy_list
 
             extract_domain = tldextract.extract(domain)
+
             self.check_domain(self.url_req.ssl_checker(domain), proxy)
             self.banner_grab(self.url_req.ssl_checker(domain), proxy)
 
-
             if extract_domain.subdomain == "":
                 self.robots_scraper(self.url_req.ssl_checker(domain), proxy)
                 self.enumerate_subdomains(domain, proxy)
@@ -125,7 +130,10 @@ def __init__(self):
         if username is not None:
             self.username_checker(username)
 
-        if email or orgcomp is not None:
+        if orgcomp is not None:
+            self.gather_company(orgcomp, proxy)
+
+        if email is not None:
             log.console_log("This feature will be coming soon. Be patient :)")
 
         log.console_log("{}All done sir! All log saved in log directory and dowloaded file saved in belatiFiles {}".format(Y, W))
@@ -142,6 +150,7 @@ def show_banner(self):
         | $$$$$$$/| $$$$$$$$| $$$$$$$$| $$  | $$   | $$    /$$$$$$
         |_______/ |________/|________/|__/  |__/   |__/   |______/
 
+                                The Traditional Swiss Army Knife for OSINT
 
         =[ {} {} by {}]=
 
@@ -188,13 +197,18 @@ def enumerate_subdomains(self, domain_name, proxy):
             self.robots_scraper(self.url_req.ssl_checker(subdomain), proxy)
             self.wappalyzing_webpage(subdomain)
             self.public_git_finder(subdomain, proxy)
+            self.public_svn_finder(subdomain, proxy)
             try:
                 subdomain_ip_list.append(socket.gethostbyname(subdomain))
             except socket.gaierror:
                 pass
 
         subdomain_ip_listFix = list(set(subdomain_ip_list))
 
+        # check common service port TODO
+        #for ipaddress in subdomain_ip_listFix:
+            #self.common_service_check(ipaddress)
+
         for ipaddress in subdomain_ip_listFix:
             self.service_scanning(ipaddress)
 
@@ -300,15 +314,38 @@ def public_git_finder(self, domain, proxy_address):
         log.console_log("{}[*] Checking Public GIT Directory on domain {}{}".format(G, domain, W))
         git_finder = GitFinder()
         if git_finder.check_git(domain, proxy_address) == True:
-            log.console_log("{}[+] Gotcha! You are in luck boy!{}".format(G, W))
+            log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.git/]{}".format(Y, domain, W))
+
+    def public_svn_finder(self, domain, proxy_address):
+        log.console_log("{}[*] Checking Public SVN Directory on domain {}{}".format(G, domain, W))
+        svn_finder = SVNFinder()
+        if svn_finder.check_svn(domain, proxy_address) == 403:
+            log.console_log("{}[+] Um... Forbidden :( {}".format(Y, W))
+        if svn_finder.check_svn(domain, proxy_address) == 200:
+            log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.svn/]{}".format(Y, domain, W))
 
     def robots_scraper(self, domain, proxy_address):
         scraper = RobotsScraper()
         data = scraper.check_robots(domain, proxy_address)
-        if data is not None and data.code == 200:
-            log.console_log("{}[+] Found interesting robots.txt content on domain {}:{}".format(Y, domain, W))
+        if data is not None and isinstance(data, int) == False and data.code == 200:
+            log.console_log("{}[+] Found interesting robots.txt[ {} ] =>{}".format(Y, domain, W))
             log.console_log(data.read())
 
+    def gather_company(self, company_name, proxy_address):
+        log.console_log("{}[+] Gathering Company Employee {} -> {}".format(G, W, company_name))
+        gather_company = GatherCompany()
+        gather_company.crawl_company_employee(company_name, proxy_address)
+
+    def check_update(self, version):
+        log.console_log("{} Checking Version Update for Belati... {}".format(G, W))
+        # TODO
+
+
+    def common_service_check(self, host):
+        log.console_log("{}[*] Checking Common Service Check on host {}{}".format(G, host, W))
+        service_check = CommonServiceCheck()
+        service_check.check_available_service(host)
+
     def check_python_version(self):
         if sys.version[:3] == "2.7" or "2" in sys.version[:3]:
             log.console_log("{}[*] Python version OK! {}{}".format(G, sys.version[:6], W))
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,11 @@
+Changelog:
+
+v0.2.0-dev:
+
+Add Gather Company Employee
+Add SVN Finder
+Update URL Request
+Rework Code
+Fix small bug
+Update Harvest Public Document Regex
+Add version for updater
diff --git a/README.md b/README.md
@@ -1,4 +1,6 @@
 # Belati
+Belati - The Traditional Swiss Army Knife For OSINT
+
 Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. This tools is inspired by Foca and Datasploit for OSINT :)
 
 ## Why I Made this?
@@ -17,7 +19,10 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong
 - Fake and Random User Agent ( Prevent from blocking )
 - Proxy Support for Harvesting Emails and Documents
 - Public Git Finder in domain/subdomain
+- Public SVN Finder in domain/subdomain
 - Robot.txt Scraper in domain/subdomain
+- Gather Public Company Employee
+
 
 ## TODO
 - Automatic OSINT with Username and Email support
@@ -32,6 +37,7 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong
 - Web version with Django
 - Scanning Report export to PDF
 - domain or subdomain reputation checker
+- Reporting Support to JSON, PDF 
 
 ## Install/Usage
 ```
@@ -85,13 +91,21 @@ yum install gcc gmp gmp-devel python-devel
 - Sublist3r
 - Subbrute
 - nmap
+- git
 
 ## Notice
 I'm using PyWhois Library, Sublist3r, MailHarvester, Emingoo as part of my code. This tool is for educational purposes only. Any damage you make will not affect the author. Do It With Your Own Risk
 
 ## Author
 Aan Wahyu a.k.a Petruknisme(https://petruknisme.com)
 
+## Thanks To
+
+Thanks to PyWhois Library, Sublist3r, MailHarvester, Emingoo for being part of my code. Also thanks to Hispagatos, Infosec-ninjas, eCHo, RNDC( Research and development center ) and all other people who are inspiring this project :)
+
+Thanks to Echo-Zine Staff for approving my Ezine : http://ezine.echo.or.id/issue31/005.txt - Belati : Collecting Public Data & Public Document for OSINT Purpose - Petruknisme
+
+
 ## License
 Belati is licensed under GPL V2. You can use, modify, or redistribute this tool under the terms of GNU General Public License (GPLv2).
 
diff --git a/plugins/about_project.py b/plugins/about_project.py
@@ -24,7 +24,7 @@ class AboutProject(object):
     def __init__(self):
         self.__info__ = 'Collecting Public Data & Public Document for OSINT purpose'
         self.__author__ = 'Petruknisme'
-        self.__version__ = 'v0.1.8-dev'
+        self.__version__ = 'v0.2.0-dev'
         self.__name__= "Belati"
         self.__giturl__ = "https://github.com/aancw/Belati"
         self.__authorurl__ = "https://petruknisme.com"
diff --git a/plugins/common_service_check.py b/plugins/common_service_check.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#   Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose.
+#   This tools is inspired by Foca and Datasploit for OSINT
+#   Copyright (C) 2017  cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu)
+
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 2 of the License, or
+#    (at your option) any later version.
+
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This file is part of Belati project
+
+import sys, socket, errno
+from logger import Logger
+from url_request import URLRequest
+
+
+# Console color
+G = '\033[92m'  # green
+Y = '\033[93m'  # yellow
+B = '\033[94m'  # blue
+R = '\033[91m'  # red
+W = '\033[0m'   # white
+
+url_req = URLRequest()
+log = Logger()
+
+class CommonServiceCheck(object):
+    ## STILL NOT ACCURATE!
+    def check_available_service(self, host):
+        list_available_port = []
+        list_common_port = [21,22,23,25,53,80,110,111,135,139,143,443,445,993,995,1723,3306,3389,5900,8080]
+        for port in list_common_port:
+            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            try:
+                s.connect((host, port))
+                if port == 80:
+                    data = url_req.header_info("http://" + host, "")
+                    log.console_log("Found HTPP Service : ({} OPEN)".format(str(port)) )
+                    log.console_log("\n{}".format(data))
+                elif port == 443:
+                    data = url_req.header_info("https://" + host, "")
+                else:
+                    print("port :" + str(port) + " OPEN! " + s.recv(4096))
+            except socket.error as e:
+                if e.errno == errno.ECONNREFUSED or e.errno == 113:
+                    pass
+                else:
+                    print("port :" + str(port) + str(e) + "closed")
+            s.close()
diff --git a/plugins/gather_company.py b/plugins/gather_company.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#   Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose.
+#   This tools is inspired by Foca and Datasploit for OSINT
+#   Copyright (C) 2017  cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu)
+
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 2 of the License, or
+#    (at your option) any later version.
+
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This file is part of Belati project
+
+import re,sys
+from bs4 import BeautifulSoup
+from logger import Logger
+from url_request import URLRequest
+
+# Console color
+G = '\033[92m'  # green
+Y = '\033[93m'  # yellow
+B = '\033[94m'  # blue
+R = '\033[91m'  # red
+W = '\033[0m'   # white
+
+url_req = URLRequest()
+log = Logger()
+
+class GatherCompany(object):
+    def crawl_company_employee(self, company_name, proxy_address):
+        comp_strip = company_name.replace(" ", "+")
+        url = 'https://www.google.com/search?q={}+site:linkedin.com&num=200'.format(comp_strip)
+
+        data = url_req.standart_request(url, proxy_address)
+
+        soup = BeautifulSoup( data, 'html.parser' )
+        company_linkedin_url_list = []
+
+        #Getting all h3 tags with class 'r'
+        scrap_container = soup.find_all('div', class_='rc')
+        for rc in scrap_container:
+            soup2 = BeautifulSoup( str(rc), 'html.parser' )
+            url = soup2.find_all('h3', class_= 'r')
+            url_fix = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url))
+            linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/in/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com
+            company_linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/company/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com/company
+            job_title = soup2.find_all('div', class_='slp f')
+
+            if company_linkedin_url:
+                company_linkedin_url_list.append(company_linkedin_url)
+
+            # Get data when linkedin url is like this : *.linkedin.com/in
+            if not linkedin_url:
+                pass
+            else:
+                name_fix = re.sub('<[^<]+?>', '', str(rc.h3.a)) # strip all html tags like <em>
+                job_title_fix = re.sub('<[^<]+?>', '', str(job_title)) # strip all html tags like <em>
+                log.console_log("{}[+] --------------------------------------------------- [+]{}".format(Y, W))
+                log.console_log("Name: {}".format( name_fix.replace('| LinkedIn', '') ))
+                log.console_log("Job Title: {}".format( str(job_title_fix.replace('\u200e', ' ')).strip("\'[]") ))
+                log.console_log("Url: {}".format( str(linkedin_url).strip("\'[]") ))
+                log.console_log("{}[+] --------------------------------------------------- [+]{}\n".format(Y, W))
+
+        log.console_log("\n\n{}[+] --------------------------------------------------- [+]{}".format(Y, W))
+        log.console_log("{}[+] Found LinkedIn Company URL: {}".format(Y, W))
+        for url in company_linkedin_url_list:
+            log.console_log("{} {} {}".format(Y, str(url), W))
diff --git a/plugins/harvest_public_document.py b/plugins/harvest_public_document.py
@@ -20,7 +20,7 @@
 
 # This file is part of Belati project
 
-import re, os
+import re, os, errno
 import urllib
 from logger import Logger
 from tqdm import tqdm
@@ -54,7 +54,10 @@ def harvest_public_doc(self, domain, extension, proxy_address):
         total_files = 0
         url = 'https://www.google.com/search?q=site:' + domain + '%20ext:' + extension + '&filter=0&num=200'
         data = data = url_req.standart_request(url, proxy_address)
-        regex = "(?P<url>https?://[^:]+\.%s)" % extension
+        # Re<url>https?:\/\/[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+.pdf
+        # (?P<url>https?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.pdf)
+        #  "(?P<url>https?://[^:]+\.%s)" % extension
+        regex = "(?P<url>https?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.{})".format(extension)
         data = re.findall(regex, data)
         list_files_download = list(set(data))
         total_files = str(len(list_files_download))
diff --git a/plugins/logger.py b/plugins/logger.py
@@ -20,7 +20,7 @@
 
 # This file is part of Belati project
 
-import sys, os
+import sys, os, errno
 import logging
 import time
 
diff --git a/plugins/robots_scraper.py b/plugins/robots_scraper.py
@@ -30,6 +30,7 @@ def check_robots(self, domain_name, proxy_address):
             url_request = "{}/robots.txt".format(domain_name, proxy_address)
             data = url_req.just_url_open(url_request, proxy_address)
             if data is not "" and data is not "notexist":
-                return data
+                if data.getcode() == 200 and data.getcode() != 302:
+                    return data
         except:
             pass
diff --git a/plugins/svn_finder.py b/plugins/svn_finder.py
diff --git a/plugins/url_request.py b/plugins/url_request.py
diff --git a/requirements.txt b/requirements.txt
diff --git a/version b/version