Skip to content

Commit 246d9aa

Browse files
authored
Merge pull request #30 from MikeMeliz/25-typeerror-nonetype
BugFix: NoneType Error when using Input File
2 parents be355be + e09b4bc commit 246d9aa

File tree

4 files changed

+21
-12
lines changed

4 files changed

+21
-12
lines changed

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ Feel free to contribute on this project! Just fork it, make any change on your f
216216

217217
## Changelog
218218
```shell
219+
v1.31:
220+
* Fixed Input Link NoneType Error
221+
* Fixed name mismatch
219222
v1.3:
220223
* Make yara search optional
221224
v1.21:

modules/checker.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ def url_canon(website, verbose):
1717
:param verbose: Boolean - Verbose logging switch.
1818
:return: String 'website' - normalised result.
1919
"""
20-
if not website.startswith("http"):
21-
if not website.startswith("www."):
22-
website = "www." + website
23-
if verbose:
24-
print(("## URL fixed: " + website))
25-
website = "http://" + website
20+
if not website.startswith("https"):
21+
# if not website.startswith("www."):
22+
# website = "www." + website
23+
# if verbose:
24+
# print(("## URL fixed: " + website))
25+
website = "https://" + website
2626
if verbose:
2727
print(("## URL fixed: " + website))
2828
return website

modules/extractor.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
from bs4 import BeautifulSoup
1313

14+
from modules.checker import url_canon
15+
1416

1517
def text(response=None):
1618
""" Removes all the garbage from the HTML and takes only text elements
@@ -122,17 +124,20 @@ def intermex(input_file, yara):
122124
try:
123125
with open(input_file, 'r') as file:
124126
for line in file:
125-
content = urllib.request.urlopen(line).read()
127+
website = url_canon(line, 0)
128+
try:
129+
content = urllib.request.urlopen(website).read()
130+
except (HTTPError, URLError, InvalidURL) as err:
131+
print(f"## ERROR: {err}. URL: " + website)
132+
continue
126133
if yara is not None:
127134
full_match_keywords = check_yara(raw=content, yara=yara)
128135

129136
if len(full_match_keywords) == 0:
130137
print(f"No matches in: {line}")
131138
print(content)
132-
except (HTTPError, URLError, InvalidURL) as err:
133-
print(f"Request Error: {err}")
134139
except IOError as err:
135-
print(f"Error: {err}\n## Not valid file")
140+
print(f"ERROR: {err}\n## Not valid file. File tried: " + input_file)
136141

137142

138143
def outex(website, output_file, out_path, yara):

torcrawl.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ def main():
175175
website = ''
176176
out_path = ''
177177

178-
if len(args.url) > 0:
178+
if args.input: pass
179+
elif len(args.url) > 0:
179180
website = url_canon(args.url, args.verbose)
180181
if args.folder is not None:
181182
out_path = folder(args.folder, args.verbose)
@@ -196,7 +197,7 @@ def main():
196197

197198
if args.verbose:
198199
check_ip()
199-
print(('## URL: ' + args.url))
200+
if args.url: print(('## URL: ' + args.url))
200201

201202
if args.crawl:
202203
crawler = Crawler(website, c_depth, c_pause, out_path, args.log,

0 commit comments

Comments
 (0)