Merge pull request #30 from MikeMeliz/25-typeerror-nonetype

MikeMeliz · web-flow · commit 246d9aaa31d6 · 2024-10-27T13:50:03.000+01:00
BugFix: NoneType Error when using Input File
diff --git a/README.md b/README.md
@@ -216,6 +216,9 @@ Feel free to contribute on this project! Just fork it, make any change on your f
 
 ## Changelog
 ```shell
+v1.31:
+    * Fixed Input Link NoneType Error
+    * Fixed name mismatch  
 v1.3:
     * Make yara search optional
 v1.21:
diff --git a/modules/checker.py b/modules/checker.py
@@ -17,12 +17,12 @@ def url_canon(website, verbose):
     :param verbose: Boolean - Verbose logging switch.
     :return: String 'website' - normalised result.
     """
-    if not website.startswith("http"):
-        if not website.startswith("www."):
-            website = "www." + website
-            if verbose:
-                print(("## URL fixed: " + website))
-        website = "http://" + website
+    if not website.startswith("https"):
+        # if not website.startswith("www."):
+        #     website = "www." + website
+        #     if verbose:
+        #         print(("## URL fixed: " + website))
+        website = "https://" + website
         if verbose:
             print(("## URL fixed: " + website))
     return website
diff --git a/modules/extractor.py b/modules/extractor.py
@@ -11,6 +11,8 @@
 
 from bs4 import BeautifulSoup
 
+from modules.checker import url_canon
+
 
 def text(response=None):
     """ Removes all the garbage from the HTML and takes only text elements
@@ -122,17 +124,20 @@ def intermex(input_file, yara):
     try:
         with open(input_file, 'r') as file:
             for line in file:
-                content = urllib.request.urlopen(line).read()
+                website = url_canon(line, 0)
+                try:
+                    content = urllib.request.urlopen(website).read()
+                except (HTTPError, URLError, InvalidURL) as err:
+                    print(f"## ERROR: {err}. URL: " + website)
+                    continue
                 if yara is not None:
                     full_match_keywords = check_yara(raw=content, yara=yara)
 
                     if len(full_match_keywords) == 0:
                         print(f"No matches in: {line}")
                 print(content)
-    except (HTTPError, URLError, InvalidURL) as err:
-        print(f"Request Error: {err}")
     except IOError as err:
-        print(f"Error: {err}\n## Not valid file")
+        print(f"ERROR: {err}\n## Not valid file. File tried: " + input_file)
 
 
 def outex(website, output_file, out_path, yara):
diff --git a/torcrawl.py b/torcrawl.py
@@ -175,7 +175,8 @@ def main():
     website = ''
     out_path = ''
 
-    if len(args.url) > 0:
+    if args.input: pass
+    elif len(args.url) > 0:
         website = url_canon(args.url, args.verbose)
         if args.folder is not None:
             out_path = folder(args.folder, args.verbose)
@@ -196,7 +197,7 @@ def main():
 
     if args.verbose:
         check_ip()
-        print(('## URL: ' + args.url))
+        if args.url: print(('## URL: ' + args.url))
 
     if args.crawl:
         crawler = Crawler(website, c_depth, c_pause, out_path, args.log,