Skip to content
This repository was archived by the owner on Apr 4, 2024. It is now read-only.

Commit 2938987

Browse files
committed
Add Gather Company Employee
Add SVN Finder Update URL Request Rework Code Fix small bug Update Harvest Public Document Regex Add version for updater
1 parent 1d32eb1 commit 2938987

13 files changed

Lines changed: 271 additions & 19 deletions

Belati.py

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,22 @@
3131
import time
3232
import dns.resolver
3333
import tldextract
34-
from plugins.check_domain import CheckDomain
34+
35+
from plugins.about_project import AboutProject
3536
from plugins.banner_grab import BannerGrab
36-
from plugins.logger import Logger
37+
from plugins.check_domain import CheckDomain
38+
from plugins.common_service_check import CommonServiceCheck
39+
from plugins.gather_company import GatherCompany
40+
from plugins.git_finder import GitFinder
3741
from plugins.harvest_email import HarvestEmail
3842
from plugins.harvest_public_document import HarvestPublicDocument
39-
from plugins.scan_nmap import ScanNmap
40-
from plugins.wappalyzer import Wappalyzer
41-
from plugins.git_finder import GitFinder
43+
from plugins.logger import Logger
4244
from plugins.robots_scraper import RobotsScraper
43-
from plugins.about_project import AboutProject
45+
from plugins.scan_nmap import ScanNmap
46+
from plugins.svn_finder import SVNFinder
4447
from plugins.url_request import URLRequest
48+
from plugins.wappalyzer import Wappalyzer
49+
4550
from lib.Sublist3r import sublist3r
4651
from lib.CheckMyUsername.check_my_username import CheckMyUsername
4752
from dnsknife.scanner import Scanner
@@ -66,7 +71,7 @@ def __init__(self):
6671
parser.add_argument('-d', action='store', dest='domain' , help='Perform OSINT from Domain e.g petruknisme.com(without protocol http/https)')
6772
parser.add_argument('-u', action='store', dest='username' , help='Perform OSINT from username e.g petruknisme')
6873
parser.add_argument('-e', action='store', dest='email' , help='Perform OSINT from email address')
69-
parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name')
74+
parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name, use double quote')
7075
parser.add_argument('-o', action='store', dest='output_files' , help='Save log for output files')
7176
parser.add_argument('--db-file', action='store', dest='db_file_location' , help='Specify Database File Location(SQLite3)')
7277
parser.add_argument('--single-proxy', action='store', dest='single_proxy', help='Proxy support with single IP (ex: http://127.0.0.1:8080)' )
@@ -102,10 +107,10 @@ def __init__(self):
102107
proxy = self.multiple_proxy_list
103108

104109
extract_domain = tldextract.extract(domain)
110+
105111
self.check_domain(self.url_req.ssl_checker(domain), proxy)
106112
self.banner_grab(self.url_req.ssl_checker(domain), proxy)
107113

108-
109114
if extract_domain.subdomain == "":
110115
self.robots_scraper(self.url_req.ssl_checker(domain), proxy)
111116
self.enumerate_subdomains(domain, proxy)
@@ -125,7 +130,10 @@ def __init__(self):
125130
if username is not None:
126131
self.username_checker(username)
127132

128-
if email or orgcomp is not None:
133+
if orgcomp is not None:
134+
self.gather_company(orgcomp, proxy)
135+
136+
if email is not None:
129137
log.console_log("This feature will be coming soon. Be patient :)")
130138

131139
log.console_log("{}All done sir! All log saved in log directory and dowloaded file saved in belatiFiles {}".format(Y, W))
@@ -142,6 +150,7 @@ def show_banner(self):
142150
| $$$$$$$/| $$$$$$$$| $$$$$$$$| $$ | $$ | $$ /$$$$$$
143151
|_______/ |________/|________/|__/ |__/ |__/ |______/
144152
153+
The Traditional Swiss Army Knife for OSINT
145154
146155
=[ {} {} by {}]=
147156
@@ -188,13 +197,18 @@ def enumerate_subdomains(self, domain_name, proxy):
188197
self.robots_scraper(self.url_req.ssl_checker(subdomain), proxy)
189198
self.wappalyzing_webpage(subdomain)
190199
self.public_git_finder(subdomain, proxy)
200+
self.public_svn_finder(subdomain, proxy)
191201
try:
192202
subdomain_ip_list.append(socket.gethostbyname(subdomain))
193203
except socket.gaierror:
194204
pass
195205

196206
subdomain_ip_listFix = list(set(subdomain_ip_list))
197207

208+
# check common service port TODO
209+
#for ipaddress in subdomain_ip_listFix:
210+
#self.common_service_check(ipaddress)
211+
198212
for ipaddress in subdomain_ip_listFix:
199213
self.service_scanning(ipaddress)
200214

@@ -300,15 +314,38 @@ def public_git_finder(self, domain, proxy_address):
300314
log.console_log("{}[*] Checking Public GIT Directory on domain {}{}".format(G, domain, W))
301315
git_finder = GitFinder()
302316
if git_finder.check_git(domain, proxy_address) == True:
303-
log.console_log("{}[+] Gotcha! You are in luck boy!{}".format(G, W))
317+
log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.git/]{}".format(Y, domain, W))
318+
319+
def public_svn_finder(self, domain, proxy_address):
320+
log.console_log("{}[*] Checking Public SVN Directory on domain {}{}".format(G, domain, W))
321+
svn_finder = SVNFinder()
322+
if svn_finder.check_svn(domain, proxy_address) == 403:
323+
log.console_log("{}[+] Um... Forbidden :( {}".format(Y, W))
324+
if svn_finder.check_svn(domain, proxy_address) == 200:
325+
log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.svn/]{}".format(Y, domain, W))
304326

305327
def robots_scraper(self, domain, proxy_address):
306328
scraper = RobotsScraper()
307329
data = scraper.check_robots(domain, proxy_address)
308-
if data is not None and data.code == 200:
309-
log.console_log("{}[+] Found interesting robots.txt content on domain {}:{}".format(Y, domain, W))
330+
if data is not None and isinstance(data, int) == False and data.code == 200:
331+
log.console_log("{}[+] Found interesting robots.txt[ {} ] =>{}".format(Y, domain, W))
310332
log.console_log(data.read())
311333

334+
def gather_company(self, company_name, proxy_address):
335+
log.console_log("{}[+] Gathering Company Employee {} -> {}".format(G, W, company_name))
336+
gather_company = GatherCompany()
337+
gather_company.crawl_company_employee(company_name, proxy_address)
338+
339+
def check_update(self, version):
340+
log.console_log("{} Checking Version Update for Belati... {}".format(G, W))
341+
# TODO
342+
343+
344+
def common_service_check(self, host):
345+
log.console_log("{}[*] Checking Common Service Check on host {}{}".format(G, host, W))
346+
service_check = CommonServiceCheck()
347+
service_check.check_available_service(host)
348+
312349
def check_python_version(self):
313350
if sys.version[:3] == "2.7" or "2" in sys.version[:3]:
314351
log.console_log("{}[*] Python version OK! {}{}".format(G, sys.version[:6], W))

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Changelog:
2+
3+
v0.2.0-dev:
4+
5+
Add Gather Company Employee
6+
Add SVN Finder
7+
Update URL Request
8+
Rework Code
9+
Fix small bug
10+
Update Harvest Public Document Regex
11+
Add version for updater

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Belati
2+
Belati - The Traditional Swiss Army Knife For OSINT
3+
24
Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. This tools is inspired by Foca and Datasploit for OSINT :)
35

46
## Why I Made this?
@@ -17,7 +19,10 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong
1719
- Fake and Random User Agent ( Prevent from blocking )
1820
- Proxy Support for Harvesting Emails and Documents
1921
- Public Git Finder in domain/subdomain
22+
- Public SVN Finder in domain/subdomain
2023
- Robot.txt Scraper in domain/subdomain
24+
- Gather Public Company Employee
25+
2126

2227
## TODO
2328
- Automatic OSINT with Username and Email support
@@ -32,6 +37,7 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong
3237
- Web version with Django
3338
- Scanning Report export to PDF
3439
- domain or subdomain reputation checker
40+
- Reporting Support to JSON, PDF
3541

3642
## Install/Usage
3743
```
@@ -85,13 +91,21 @@ yum install gcc gmp gmp-devel python-devel
8591
- Sublist3r
8692
- Subbrute
8793
- nmap
94+
- git
8895

8996
## Notice
9097
I'm using PyWhois Library, Sublist3r, MailHarvester, Emingoo as part of my code. This tool is for educational purposes only. Any damage you make will not affect the author. Do It With Your Own Risk
9198

9299
## Author
93100
Aan Wahyu a.k.a Petruknisme(https://petruknisme.com)
94101

102+
## Thanks To
103+
104+
Thanks to PyWhois Library, Sublist3r, MailHarvester, Emingoo for being part of my code. Also thanks to Hispagatos, Infosec-ninjas, eCHo, RNDC( Research and development center ) and all other people who are inspiring this project :)
105+
106+
Thanks to Echo-Zine Staff for approving my Ezine : http://ezine.echo.or.id/issue31/005.txt - Belati : Collecting Public Data & Public Document for OSINT Purpose - Petruknisme
107+
108+
95109
## License
96110
Belati is licensed under GPL V2. You can use, modify, or redistribute this tool under the terms of GNU General Public License (GPLv2).
97111

plugins/about_project.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class AboutProject(object):
2424
def __init__(self):
2525
self.__info__ = 'Collecting Public Data & Public Document for OSINT purpose'
2626
self.__author__ = 'Petruknisme'
27-
self.__version__ = 'v0.1.8-dev'
27+
self.__version__ = 'v0.2.0-dev'
2828
self.__name__= "Belati"
2929
self.__giturl__ = "https://github.com/aancw/Belati"
3030
self.__authorurl__ = "https://petruknisme.com"

plugins/common_service_check.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose.
5+
# This tools is inspired by Foca and Datasploit for OSINT
6+
# Copyright (C) 2017 cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu)
7+
8+
# This program is free software: you can redistribute it and/or modify
9+
# it under the terms of the GNU General Public License as published by
10+
# the Free Software Foundation, either version 2 of the License, or
11+
# (at your option) any later version.
12+
13+
# This program is distributed in the hope that it will be useful,
14+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
# GNU General Public License for more details.
17+
18+
# You should have received a copy of the GNU General Public License
19+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
21+
# This file is part of Belati project
22+
23+
import sys, socket, errno
24+
from logger import Logger
25+
from url_request import URLRequest
26+
27+
28+
# Console color
29+
G = '\033[92m' # green
30+
Y = '\033[93m' # yellow
31+
B = '\033[94m' # blue
32+
R = '\033[91m' # red
33+
W = '\033[0m' # white
34+
35+
url_req = URLRequest()
36+
log = Logger()
37+
38+
class CommonServiceCheck(object):
39+
## STILL NOT ACCURATE!
40+
def check_available_service(self, host):
41+
list_available_port = []
42+
list_common_port = [21,22,23,25,53,80,110,111,135,139,143,443,445,993,995,1723,3306,3389,5900,8080]
43+
for port in list_common_port:
44+
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
45+
try:
46+
s.connect((host, port))
47+
if port == 80:
48+
data = url_req.header_info("http://" + host, "")
49+
log.console_log("Found HTPP Service : ({} OPEN)".format(str(port)) )
50+
log.console_log("\n{}".format(data))
51+
elif port == 443:
52+
data = url_req.header_info("https://" + host, "")
53+
else:
54+
print("port :" + str(port) + " OPEN! " + s.recv(4096))
55+
except socket.error as e:
56+
if e.errno == errno.ECONNREFUSED or e.errno == 113:
57+
pass
58+
else:
59+
print("port :" + str(port) + str(e) + "closed")
60+
s.close()

plugins/gather_company.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose.
5+
# This tools is inspired by Foca and Datasploit for OSINT
6+
# Copyright (C) 2017 cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu)
7+
8+
# This program is free software: you can redistribute it and/or modify
9+
# it under the terms of the GNU General Public License as published by
10+
# the Free Software Foundation, either version 2 of the License, or
11+
# (at your option) any later version.
12+
13+
# This program is distributed in the hope that it will be useful,
14+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
# GNU General Public License for more details.
17+
18+
# You should have received a copy of the GNU General Public License
19+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
21+
# This file is part of Belati project
22+
23+
import re,sys
24+
from bs4 import BeautifulSoup
25+
from logger import Logger
26+
from url_request import URLRequest
27+
28+
# Console color
29+
G = '\033[92m' # green
30+
Y = '\033[93m' # yellow
31+
B = '\033[94m' # blue
32+
R = '\033[91m' # red
33+
W = '\033[0m' # white
34+
35+
url_req = URLRequest()
36+
log = Logger()
37+
38+
class GatherCompany(object):
39+
def crawl_company_employee(self, company_name, proxy_address):
40+
comp_strip = company_name.replace(" ", "+")
41+
url = 'https://www.google.com/search?q={}+site:linkedin.com&num=200'.format(comp_strip)
42+
43+
data = url_req.standart_request(url, proxy_address)
44+
45+
soup = BeautifulSoup( data, 'html.parser' )
46+
company_linkedin_url_list = []
47+
48+
#Getting all h3 tags with class 'r'
49+
scrap_container = soup.find_all('div', class_='rc')
50+
for rc in scrap_container:
51+
soup2 = BeautifulSoup( str(rc), 'html.parser' )
52+
url = soup2.find_all('h3', class_= 'r')
53+
url_fix = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url))
54+
linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/in/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com
55+
company_linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/company/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com/company
56+
job_title = soup2.find_all('div', class_='slp f')
57+
58+
if company_linkedin_url:
59+
company_linkedin_url_list.append(company_linkedin_url)
60+
61+
# Get data when linkedin url is like this : *.linkedin.com/in
62+
if not linkedin_url:
63+
pass
64+
else:
65+
name_fix = re.sub('<[^<]+?>', '', str(rc.h3.a)) # strip all html tags like <em>
66+
job_title_fix = re.sub('<[^<]+?>', '', str(job_title)) # strip all html tags like <em>
67+
log.console_log("{}[+] --------------------------------------------------- [+]{}".format(Y, W))
68+
log.console_log("Name: {}".format( name_fix.replace('| LinkedIn', '') ))
69+
log.console_log("Job Title: {}".format( str(job_title_fix.replace('\u200e', ' ')).strip("\'[]") ))
70+
log.console_log("Url: {}".format( str(linkedin_url).strip("\'[]") ))
71+
log.console_log("{}[+] --------------------------------------------------- [+]{}\n".format(Y, W))
72+
73+
log.console_log("\n\n{}[+] --------------------------------------------------- [+]{}".format(Y, W))
74+
log.console_log("{}[+] Found LinkedIn Company URL: {}".format(Y, W))
75+
for url in company_linkedin_url_list:
76+
log.console_log("{} {} {}".format(Y, str(url), W))

plugins/harvest_public_document.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
# This file is part of Belati project
2222

23-
import re, os
23+
import re, os, errno
2424
import urllib
2525
from logger import Logger
2626
from tqdm import tqdm
@@ -54,7 +54,10 @@ def harvest_public_doc(self, domain, extension, proxy_address):
5454
total_files = 0
5555
url = 'https://www.google.com/search?q=site:' + domain + '%20ext:' + extension + '&filter=0&num=200'
5656
data = data = url_req.standart_request(url, proxy_address)
57-
regex = "(?P<url>https?://[^:]+\.%s)" % extension
57+
# Re<url>https?:\/\/[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+.pdf
58+
# (?P<url>https?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.pdf)
59+
# "(?P<url>https?://[^:]+\.%s)" % extension
60+
regex = "(?P<url>https?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.{})".format(extension)
5861
data = re.findall(regex, data)
5962
list_files_download = list(set(data))
6063
total_files = str(len(list_files_download))

plugins/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
# This file is part of Belati project
2222

23-
import sys, os
23+
import sys, os, errno
2424
import logging
2525
import time
2626

plugins/robots_scraper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def check_robots(self, domain_name, proxy_address):
3030
url_request = "{}/robots.txt".format(domain_name, proxy_address)
3131
data = url_req.just_url_open(url_request, proxy_address)
3232
if data is not "" and data is not "notexist":
33-
return data
33+
if data.getcode() == 200 and data.getcode() != 302:
34+
return data
3435
except:
3536
pass

0 commit comments

Comments
 (0)